From fe7852d2ae53bc789d6f844b8dcfefad0b432edd Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 23 Jan 2025 22:26:59 -0600 Subject: [PATCH 1/9] update pip devcontainers to UCX 1.18 (#4890) Contributes to https://github.com/rapidsai/build-planning/issues/138 Updates to using UCX 1.18 in pip devcontainers here. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - https://github.com/jakirkham URL: https://github.com/rapidsai/cugraph/pull/4890 --- .devcontainer/cuda11.8-pip/devcontainer.json | 2 +- .devcontainer/cuda12.5-pip/devcontainer.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index f2abf493e12..0a6119fc825 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 7024c0f0a22..fe402024e29 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ From 09f18b4730373c7875d1a375d121198b3752c5dd Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 27 Jan 2025 10:04:59 -0600 Subject: [PATCH 2/9] remove ucx-proc dependency (#4894) Contributes to https://github.com/rapidsai/build-planning/issues/142 `ucx-proc` has been unnecessary for conda environments since UCX 1.14, and RAPIDS currently supports UCX 1.15+. This proposes removing that dependency from conda packages and environments here. See the linked issue for more details. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4894 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 - conda/environments/all_cuda-125_arch-x86_64.yaml | 1 - conda/recipes/cugraph/meta.yaml | 1 - conda/recipes/libcugraph/meta.yaml | 2 -- dependencies.yaml | 1 - 5 files changed, 6 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index f7f64a45b20..1a7e2b77662 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -70,7 +70,6 @@ dependencies: - thriftpy2>=0.4.15,!=0.5.0,!=0.5.1 - torchdata - torchmetrics -- ucx-proc=*=gpu - ucx-py==0.42.*,>=0.0.0a0 - wheel name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 14e53a5c668..83126df9194 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -75,7 +75,6 @@ dependencies: - thriftpy2>=0.4.15,!=0.5.0,!=0.5.1 - torchdata - torchmetrics -- ucx-proc=*=gpu - ucx-py==0.42.*,>=0.0.0a0 - wheel name: all_cuda-125_arch-x86_64 diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 7bc8b7f06e1..d25c3be881e 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -90,7 +90,6 @@ requirements: - raft-dask ={{ minor_version }} - rapids-dask-dependency ={{ minor_version }} - requests - - ucx-proc=*=gpu - ucx-py {{ ucx_py_version }} tests: diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 0ff163ed737..400e0e5f226 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -72,7 +72,6 @@ requirements: - libraft ={{ minor_version }} - librmm ={{ minor_version }} - nccl {{ nccl_version }} - - ucx-proc=*=gpu - rapids-build-backend>=0.3.1,<0.4.0.dev0 outputs: @@ -113,7 +112,6 @@ outputs: - libraft ={{ minor_version }} - librmm ={{ minor_version }} - nccl {{ nccl_version }} - - ucx-proc=*=gpu about: home: https://rapids.ai/ dev_url: https://github.com/rapidsai/cugraph diff --git a/dependencies.yaml b/dependencies.yaml index ba1bc20addb..02fa03cff70 100755 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -410,7 +410,6 @@ dependencies: - fsspec>=0.6.0 - requests - nccl>=2.19 - - ucx-proc=*=gpu - output_types: pyproject packages: # cudf uses fsspec but is protocol independent. cugraph From b64b04fde6bab4ac1c810269c2101eff0e41cabd Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Tue, 28 Jan 2025 07:28:49 +0100 Subject: [PATCH 3/9] Replace all uses of `thrust::optional` with `cuda::std::optional` (#4891) `thrust::optional` is being deprecated and will be removed in a future version of CCCL Authors: - Michael Schellenberger Costa (https://github.com/miscco) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4891 --- .../detail/decompress_edge_partition.cuh | 3 +- .../cugraph/edge_partition_device_view.cuh | 84 ++++++++------- ...ge_partition_edge_property_device_view.cuh | 6 +- ...artition_endpoint_property_device_view.cuh | 13 +-- cpp/include/cugraph/edge_property.hpp | 8 +- cpp/include/cugraph/edge_src_dst_property.hpp | 10 +- cpp/include/cugraph/utilities/misc_utils.cuh | 10 +- .../betweenness_centrality_impl.cuh | 19 ++-- .../approx_weighted_matching_impl.cuh | 15 +-- cpp/src/community/detail/common_methods.cuh | 8 +- .../detail/maximal_independent_moves.cuh | 4 +- cpp/src/community/detail/refine_impl.cuh | 3 +- cpp/src/community/ecg_impl.cuh | 6 +- .../community/edge_triangle_count_impl.cuh | 7 +- cpp/src/community/k_truss_impl.cuh | 43 ++++---- cpp/src/community/triangle_count_impl.cuh | 25 ++--- cpp/src/components/mis_impl.cuh | 4 +- cpp/src/components/vertex_coloring_impl.cuh | 11 +- .../weakly_connected_components_impl.cuh | 42 ++++---- cpp/src/cores/core_number_impl.cuh | 14 +-- cpp/src/lookup/lookup_src_dst_impl.cuh | 20 ++-- .../detail/extract_transform_v_frontier_e.cuh | 44 ++++---- cpp/src/prims/detail/nbr_intersection.cuh | 102 +++++++++--------- cpp/src/prims/detail/partition_v_frontier.cuh | 4 +- .../prims/detail/per_v_transform_reduce_e.cuh | 44 ++++---- .../sample_and_compute_local_nbr_indices.cuh | 24 ++--- .../prims/detail/transform_v_frontier_e.cuh | 18 ++-- cpp/src/prims/extract_transform_e.cuh | 8 +- ...xtract_transform_v_frontier_outgoing_e.cuh | 8 +- cpp/src/prims/fill_edge_property.cuh | 7 +- ..._v_pair_transform_dst_nbr_intersection.cuh | 61 +++++------ ...r_v_random_select_transform_outgoing_e.cuh | 18 ++-- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 57 +++++----- cpp/src/prims/transform_e.cuh | 23 ++-- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 8 +- cpp/src/prims/transform_reduce_e.cuh | 22 ++-- .../transform_reduce_e_by_src_dst_key.cuh | 42 ++++---- ...rm_reduce_v_frontier_outgoing_e_by_dst.cuh | 28 ++--- cpp/src/prims/update_v_frontier.cuh | 22 ++-- .../detail/gather_one_hop_edgelist_impl.cuh | 46 ++++---- .../detail/prepare_next_frontier_impl.cuh | 5 +- .../remove_visited_vertices_from_frontier.cuh | 5 +- ...sited_vertices_from_frontier_sg_v32_e32.cu | 5 +- ...sited_vertices_from_frontier_sg_v64_e64.cu | 5 +- cpp/src/sampling/detail/sample_edges.cuh | 12 +-- cpp/src/sampling/detail/sampling_utils.hpp | 4 +- .../shuffle_and_organize_output_impl.cuh | 5 +- cpp/src/sampling/neighbor_sampling_impl.hpp | 7 +- cpp/src/sampling/random_walks.cuh | 8 +- cpp/src/sampling/random_walks_impl.cuh | 34 +++--- cpp/src/sampling/rw_traversals.hpp | 33 +++--- .../sampling_post_processing_impl.cuh | 65 +++++------ cpp/src/structure/graph_view_impl.cuh | 27 ++--- cpp/src/structure/induced_subgraph_impl.cuh | 27 ++--- cpp/src/traversal/bfs_impl.cuh | 31 ++++-- cpp/src/traversal/k_hop_nbrs_impl.cuh | 14 +-- .../traversal/od_shortest_distances_impl.cuh | 20 ++-- cpp/src/traversal/sssp_impl.cuh | 28 ++--- .../components/mg_vertex_coloring_test.cu | 8 +- cpp/tests/components/vertex_coloring_test.cu | 8 +- cpp/tests/prims/mg_count_if_e.cu | 25 ++--- cpp/tests/prims/mg_extract_transform_e.cu | 14 +-- ...extract_transform_v_frontier_outgoing_e.cu | 10 +- ...er_v_random_select_transform_outgoing_e.cu | 30 +++--- ...rm_reduce_dst_key_aggregated_outgoing_e.cu | 16 +-- ..._v_transform_reduce_incoming_outgoing_e.cu | 6 +- cpp/tests/prims/mg_transform_e.cu | 10 +- cpp/tests/prims/mg_transform_reduce_e.cu | 9 +- .../mg_transform_reduce_e_by_src_dst_key.cu | 14 +-- ...orm_reduce_v_frontier_outgoing_e_by_dst.cu | 20 ++-- cpp/tests/prims/result_compare.cuh | 27 ++--- .../sampling_post_processing_validate.cu | 48 +++++---- .../utilities/property_generator_kernels.cuh | 11 +- 73 files changed, 798 insertions(+), 734 deletions(-) diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index c94f456f215..b31624da840 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 628c3cc10cc..b887309bf6f 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,10 +26,10 @@ #include #include +#include #include #include #include -#include #include #include #include @@ -43,7 +43,7 @@ namespace cugraph { namespace detail { template -__device__ thrust::optional major_hypersparse_idx_from_major_nocheck_impl( +__device__ cuda::std::optional major_hypersparse_idx_from_major_nocheck_impl( raft::device_span dcs_nzd_vertices, vertex_t major) { // we can avoid binary search (and potentially improve performance) if we add an auxiliary array @@ -51,10 +51,10 @@ __device__ thrust::optional major_hypersparse_idx_from_major_nocheck_i auto it = thrust::lower_bound(thrust::seq, dcs_nzd_vertices.begin(), dcs_nzd_vertices.end(), major); return it != dcs_nzd_vertices.end() - ? (*it == major ? thrust::optional{static_cast( + ? (*it == major ? cuda::std::optional{static_cast( thrust::distance(dcs_nzd_vertices.begin(), it))} - : thrust::nullopt) - : thrust::nullopt; + : cuda::std::nullopt) + : cuda::std::nullopt; } template @@ -490,7 +490,7 @@ class edge_partition_device_view_t major_hypersparse_first() const noexcept + __host__ __device__ cuda::std::optional major_hypersparse_first() const noexcept { return major_hypersparse_first_; } @@ -528,15 +528,16 @@ class edge_partition_device_view_t major_idx_from_major_nocheck(vertex_t major) const noexcept + __device__ cuda::std::optional major_idx_from_major_nocheck( + vertex_t major) const noexcept { if (major_hypersparse_first_ && (major >= *major_hypersparse_first_)) { auto major_hypersparse_idx = detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major); return major_hypersparse_idx - ? thrust::make_optional((*major_hypersparse_first_ - major_range_first_) + - *major_hypersparse_idx) - : thrust::nullopt; + ? cuda::std::make_optional((*major_hypersparse_first_ - major_range_first_) + + *major_hypersparse_idx) + : cuda::std::nullopt; } else { return major - major_range_first_; } @@ -554,23 +555,23 @@ class edge_partition_device_view_t major_hypersparse_idx_from_major_nocheck( + __device__ cuda::std::optional major_hypersparse_idx_from_major_nocheck( vertex_t major) const noexcept { if (dcs_nzd_vertices_) { return detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major); } else { - return thrust::nullopt; + return cuda::std::nullopt; } } // major_hypersparse_idx: index within the hypersparse segment - __device__ thrust::optional major_from_major_hypersparse_idx_nocheck( + __device__ cuda::std::optional major_from_major_hypersparse_idx_nocheck( vertex_t major_hypersparse_idx) const noexcept { return dcs_nzd_vertices_ - ? thrust::optional{(*dcs_nzd_vertices_)[major_hypersparse_idx]} - : thrust::nullopt; + ? cuda::std::optional{(*dcs_nzd_vertices_)[major_hypersparse_idx]} + : cuda::std::nullopt; } __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept @@ -578,36 +579,36 @@ class edge_partition_device_view_t> for consistency (see - // dcs_nzd_range_bitmap()) - __host__ __device__ thrust::optional dcs_nzd_vertices() const + // FIxME: better return cuda::std::optional> for consistency + // (see dcs_nzd_range_bitmap()) + __host__ __device__ cuda::std::optional dcs_nzd_vertices() const { - return dcs_nzd_vertices_ ? thrust::optional{(*dcs_nzd_vertices_).data()} - : thrust::nullopt; + return dcs_nzd_vertices_ ? cuda::std::optional{(*dcs_nzd_vertices_).data()} + : cuda::std::nullopt; } - __host__ __device__ thrust::optional dcs_nzd_vertex_count() const + __host__ __device__ cuda::std::optional dcs_nzd_vertex_count() const { return dcs_nzd_vertices_ - ? thrust::optional{static_cast((*dcs_nzd_vertices_).size())} - : thrust::nullopt; + ? cuda::std::optional{static_cast((*dcs_nzd_vertices_).size())} + : cuda::std::nullopt; } - __host__ __device__ thrust::optional> dcs_nzd_range_bitmap() + __host__ __device__ cuda::std::optional> dcs_nzd_range_bitmap() const { return dcs_nzd_range_bitmap_ - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*dcs_nzd_range_bitmap_).data(), (*dcs_nzd_range_bitmap_).size()) - : thrust::nullopt; + : cuda::std::nullopt; } private: // should be trivially copyable to device - thrust::optional> dcs_nzd_vertices_{thrust::nullopt}; - thrust::optional> dcs_nzd_range_bitmap_{thrust::nullopt}; - thrust::optional major_hypersparse_first_{thrust::nullopt}; + cuda::std::optional> dcs_nzd_vertices_{cuda::std::nullopt}; + cuda::std::optional> dcs_nzd_range_bitmap_{cuda::std::nullopt}; + cuda::std::optional major_hypersparse_first_{cuda::std::nullopt}; vertex_t major_range_first_{0}; vertex_t major_range_last_{0}; @@ -790,10 +791,10 @@ class edge_partition_device_view_t major_hypersparse_first() const noexcept + __host__ __device__ cuda::std::optional major_hypersparse_first() const noexcept { assert(false); - return thrust::nullopt; + return cuda::std::nullopt; } __host__ __device__ constexpr vertex_t major_range_first() const noexcept { return vertex_t{0}; } @@ -823,7 +824,8 @@ class edge_partition_device_view_t major_idx_from_major_nocheck(vertex_t major) const noexcept + __device__ cuda::std::optional major_idx_from_major_nocheck( + vertex_t major) const noexcept { return major_offset_from_major_nocheck(major); } @@ -834,19 +836,19 @@ class edge_partition_device_view_t major_hypersparse_idx_from_major_nocheck( + __device__ cuda::std::optional major_hypersparse_idx_from_major_nocheck( vertex_t major) const noexcept { assert(false); - return thrust::nullopt; + return cuda::std::nullopt; } // major_hypersparse_idx: index within the hypersparse segment - __device__ thrust::optional major_from_major_hypersparse_idx_nocheck( + __device__ cuda::std::optional major_from_major_hypersparse_idx_nocheck( vertex_t major_hypersparse_idx) const noexcept { assert(false); - return thrust::nullopt; + return cuda::std::nullopt; } __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept @@ -854,14 +856,14 @@ class edge_partition_device_view_t dcs_nzd_vertices() const + __host__ __device__ cuda::std::optional dcs_nzd_vertices() const { - return thrust::nullopt; + return cuda::std::nullopt; } - __host__ __device__ thrust::optional dcs_nzd_vertex_count() const + __host__ __device__ cuda::std::optional dcs_nzd_vertex_count() const { - return thrust::nullopt; + return cuda::std::nullopt; } private: diff --git a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh index 4b324bcf348..061c4108f4d 100644 --- a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh @@ -21,8 +21,8 @@ #include #include +#include #include -#include namespace cugraph { @@ -182,7 +182,7 @@ template class edge_partition_edge_dummy_property_device_view_t { public: using edge_type = edge_t; - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; static constexpr bool is_packed_bool = false; static constexpr bool has_packed_bool_element = false; @@ -194,7 +194,7 @@ class edge_partition_edge_dummy_property_device_view_t { { } - __device__ auto get(edge_t offset) const { return thrust::nullopt; } + __device__ auto get(edge_t offset) const { return cuda::std::nullopt; } }; } // namespace detail diff --git a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh index f86675e5572..e5600848db6 100644 --- a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh @@ -23,12 +23,12 @@ #include +#include #include #include #include #include #include -#include namespace cugraph { @@ -184,9 +184,10 @@ class edge_partition_endpoint_property_device_view_t { } private: - thrust::optional> keys_{thrust::nullopt}; - thrust::optional> key_chunk_start_offsets_{thrust::nullopt}; - thrust::optional key_chunk_size_{thrust::nullopt}; + cuda::std::optional> keys_{cuda::std::nullopt}; + cuda::std::optional> key_chunk_start_offsets_{ + cuda::std::nullopt}; + cuda::std::optional key_chunk_size_{cuda::std::nullopt}; ValueIterator value_first_{}; vertex_t range_first_{}; @@ -214,7 +215,7 @@ template class edge_partition_endpoint_dummy_property_device_view_t { public: using vertex_type = vertex_t; - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; static constexpr bool is_packed_bool = false; static constexpr bool has_packed_bool_element = false; @@ -230,7 +231,7 @@ class edge_partition_endpoint_dummy_property_device_view_t { { } - __device__ auto get(vertex_t offset) const { return thrust::nullopt; } + __device__ auto get(vertex_t offset) const { return cuda::std::nullopt; } }; } // namespace detail diff --git a/cpp/include/cugraph/edge_property.hpp b/cpp/include/cugraph/edge_property.hpp index d46d4e52fd4..11041f504f3 100644 --- a/cpp/include/cugraph/edge_property.hpp +++ b/cpp/include/cugraph/edge_property.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ #include +#include #include -#include #include #include @@ -63,7 +63,7 @@ class edge_property_view_t { class edge_dummy_property_view_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; using value_iterator = void*; }; @@ -155,7 +155,7 @@ class edge_property_t { class edge_dummy_property_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; auto view() const { return edge_dummy_property_view_t{}; } }; diff --git a/cpp/include/cugraph/edge_src_dst_property.hpp b/cpp/include/cugraph/edge_src_dst_property.hpp index d27f6856428..f7096ce32fa 100644 --- a/cpp/include/cugraph/edge_src_dst_property.hpp +++ b/cpp/include/cugraph/edge_src_dst_property.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,9 +24,9 @@ #include #include +#include #include #include -#include #include #include @@ -365,7 +365,7 @@ class edge_minor_property_t { class edge_endpoint_dummy_property_view_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; using value_iterator = void*; }; @@ -557,14 +557,14 @@ class edge_dst_property_t { class edge_src_dummy_property_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; } }; class edge_dst_dummy_property_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; } }; diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 91a349007da..b25dd9a41f4 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,11 +23,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -87,15 +87,15 @@ std::tuple, std::vector> compute_offset_aligned_ } template -thrust::optional to_thrust_optional(std::optional val) +cuda::std::optional to_thrust_optional(std::optional val) { - thrust::optional ret{thrust::nullopt}; + cuda::std::optional ret{cuda::std::nullopt}; if (val) { ret = *val; } return ret; } template -std::optional to_std_optional(thrust::optional val) +std::optional to_std_optional(cuda::std::optional val) { std::optional ret{std::nullopt}; if (val) { ret = *val; } diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index 88ef3987a03..4eb57b621ea 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,8 @@ #include +#include #include -#include #include // @@ -52,10 +52,11 @@ struct brandes_e_op_t { const vertex_t invalid_distance_{std::numeric_limits::max()}; template - __device__ thrust::optional operator()( + __device__ cuda::std::optional operator()( vertex_t, vertex_t, value_t src_sigma, vertex_t dst_distance, ignore_t) const { - return (dst_distance == invalid_distance_) ? thrust::make_optional(src_sigma) : thrust::nullopt; + return (dst_distance == invalid_distance_) ? cuda::std::make_optional(src_sigma) + : cuda::std::nullopt; } }; @@ -64,7 +65,7 @@ struct extract_edge_e_op_t { vertex_t d{}; template - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src, vertex_t dst, thrust::tuple src_props, @@ -72,8 +73,8 @@ struct extract_edge_e_op_t { weight_t edge_centrality) const { return ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; } }; @@ -153,8 +154,8 @@ std::tuple, rmm::device_uvector> brandes_b thrust::make_zip_iterator(distances.begin(), sigmas.begin()), [hop] __device__(auto v, auto old_values, auto v_sigma) { return thrust::make_tuple( - thrust::make_optional(bucket_idx_next), - thrust::make_optional(thrust::make_tuple(hop + 1, v_sigma))); + cuda::std::make_optional(bucket_idx_next), + cuda::std::make_optional(thrust::make_tuple(hop + 1, v_sigma))); }); vertex_frontier.bucket(bucket_idx_cur).clear(); diff --git a/cpp/src/community/approx_weighted_matching_impl.cuh b/cpp/src/community/approx_weighted_matching_impl.cuh index 869ed4e7ae6..57bf9d50b9a 100644 --- a/cpp/src/community/approx_weighted_matching_impl.cuh +++ b/cpp/src/community/approx_weighted_matching_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,7 +66,8 @@ std::tuple, weight_t> approximate_weighted_matchin cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { return !(src == dst); }, edge_masks_even.mutable_view()); @@ -130,7 +131,7 @@ std::tuple, weight_t> approximate_weighted_matchin graph_view_t::is_multi_gpu ? src_key_cache.view() : detail::edge_major_property_view_t(local_vertices.begin()), - [] __device__(auto, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wt) { + [] __device__(auto, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto wt) { return thrust::make_tuple(wt, dst); }, thrust::make_tuple(weight_t{0.0}, invalid_partner), @@ -314,7 +315,7 @@ std::tuple, weight_t> approximate_weighted_matchin dst_match_flags.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_odd.mutable_view()); @@ -327,7 +328,7 @@ std::tuple, weight_t> approximate_weighted_matchin vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_odd.mutable_view()); @@ -346,7 +347,7 @@ std::tuple, weight_t> approximate_weighted_matchin dst_match_flags.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_even.mutable_view()); @@ -359,7 +360,7 @@ std::tuple, weight_t> approximate_weighted_matchin vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_even.mutable_view()); diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index 18fb3fdb251..d37a8864e68 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,11 +30,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -140,7 +140,7 @@ struct cluster_update_op_t { template struct return_edge_weight_t { __device__ auto operator()( - vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, weight_t w) const + vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, weight_t w) const { return w; } @@ -150,7 +150,7 @@ struct return_edge_weight_t { template struct return_one_t { __device__ auto operator()( - vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) const { return 1.0; } diff --git a/cpp/src/community/detail/maximal_independent_moves.cuh b/cpp/src/community/detail/maximal_independent_moves.cuh index 85892f711ba..70b812d687a 100644 --- a/cpp/src/community/detail/maximal_independent_moves.cuh +++ b/cpp/src/community/detail/maximal_independent_moves.cuh @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,13 +32,13 @@ #include #include #include -#include #include #include #include #include #include +#include namespace cugraph { diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 01a68a3a0d7..6038fcc6f27 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -48,6 +47,8 @@ #include #include +#include + CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. diff --git a/cpp/src/community/ecg_impl.cuh b/cpp/src/community/ecg_impl.cuh index d01b13f0b35..100efdb025d 100644 --- a/cpp/src/community/ecg_impl.cuh +++ b/cpp/src/community/ecg_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,8 @@ #include +#include + namespace cugraph { namespace detail { @@ -106,7 +108,7 @@ std::tuple, size_t, weight_t> ecg( edge_dst_dummy_property_t{}.view(), view_concat(*edge_weight_view, modified_edge_weights.view()), [min_weight, ensemble_size = static_cast(ensemble_size)] __device__( - auto, auto, thrust::nullopt_t, thrust::nullopt_t, auto edge_properties) { + auto, auto, cuda::std::nullopt_t, cuda::std::nullopt_t, auto edge_properties) { auto e_weight = thrust::get<0>(edge_properties); auto e_frequency = thrust::get<1>(edge_properties); return min_weight + (e_weight - min_weight) * e_frequency / ensemble_size; diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index fbf47615dbe..b210bcacf35 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -353,9 +354,9 @@ edge_property_t, edge_t> edge_t num_edges = edgelist_srcs.size(), num_triangles = num_triangles.data()] __device__(auto src, auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) { + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) { auto pair = thrust::make_tuple(src, dst); // Find its position in 'edges' diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 2b712a6de77..bd497b9c58c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -31,12 +31,12 @@ #include +#include #include #include #include #include #include -#include #include #include #include @@ -47,47 +47,52 @@ namespace { template struct exclude_self_loop_t { - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ cuda::std::optional> operator()( + vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return src != dst - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; } }; template struct extract_low_to_high_degree_weighted_edges_t { - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const { return (src_out_degree < dst_out_degree) - ? thrust::optional>{thrust::make_tuple( + ? cuda::std::optional>{thrust::make_tuple( src, dst, wgt)} : (((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) - ? thrust::optional< + ? cuda::std::optional< thrust::tuple>{thrust::make_tuple( src, dst, wgt)} - : thrust::nullopt); + : cuda::std::nullopt); } }; template struct extract_low_to_high_degree_edges_t { - __device__ thrust::optional> operator()(vertex_t src, - vertex_t dst, - edge_t src_out_degree, - edge_t dst_out_degree, - thrust::nullopt_t) const + __device__ cuda::std::optional> operator()( + vertex_t src, + vertex_t dst, + edge_t src_out_degree, + edge_t dst_out_degree, + cuda::std::nullopt_t) const { return (src_out_degree < dst_out_degree) - ? thrust::optional>{thrust::make_tuple(src, dst)} + ? cuda::std::optional>{thrust::make_tuple(src, dst)} : (((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) - ? thrust::optional>{thrust::make_tuple(src, - dst)} - : thrust::nullopt); + ? cuda::std::optional>{thrust::make_tuple(src, + dst)} + : cuda::std::nullopt); } }; @@ -343,7 +348,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), - [k] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + [k] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) { return count >= k - 2; }, edge_mask.mutable_view(), diff --git a/cpp/src/community/triangle_count_impl.cuh b/cpp/src/community/triangle_count_impl.cuh index 100451f06f3..6718d08cdd7 100644 --- a/cpp/src/community/triangle_count_impl.cuh +++ b/cpp/src/community/triangle_count_impl.cuh @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -64,19 +64,20 @@ struct is_two_or_greater_t { template struct extract_low_to_high_degree_edges_t { - __device__ thrust::optional> operator()(vertex_t src, - vertex_t dst, - edge_t src_out_degree, - edge_t dst_out_degree, - thrust::nullopt_t) const + __device__ cuda::std::optional> operator()( + vertex_t src, + vertex_t dst, + edge_t src_out_degree, + edge_t dst_out_degree, + cuda::std::nullopt_t) const { return (src_out_degree < dst_out_degree) - ? thrust::optional>{thrust::make_tuple(src, dst)} + ? cuda::std::optional>{thrust::make_tuple(src, dst)} : (((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) - ? thrust::optional>{thrust::make_tuple(src, - dst)} - : thrust::nullopt); + ? cuda::std::optional>{thrust::make_tuple(src, + dst)} + : cuda::std::nullopt); } }; @@ -85,8 +86,8 @@ struct intersection_op_t { __device__ thrust::tuple operator()( vertex_t, vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, raft::device_span intersection) const { return thrust::make_tuple(static_cast(intersection.size()), diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index b593c639946..2714d7e3d63 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,13 +32,13 @@ #include #include #include -#include #include #include #include #include #include +#include namespace cugraph { diff --git a/cpp/src/components/vertex_coloring_impl.cuh b/cpp/src/components/vertex_coloring_impl.cuh index fa7fb1f6099..7ad06d12027 100644 --- a/cpp/src/components/vertex_coloring_impl.cuh +++ b/cpp/src/components/vertex_coloring_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -52,7 +54,8 @@ rmm::device_uvector vertex_coloring( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { return !(src == dst); // mask out self-loop }, edge_masks_even.mutable_view()); @@ -119,7 +122,7 @@ rmm::device_uvector vertex_coloring( is_vertex_in_mis.begin(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [color_id] __device__( - auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, cuda::std::nullopt_t) { return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); }, edge_masks_odd.mutable_view()); @@ -140,7 +143,7 @@ rmm::device_uvector vertex_coloring( is_vertex_in_mis.begin(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [color_id] __device__( - auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, cuda::std::nullopt_t) { return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); }, edge_masks_even.mutable_view()); diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index e791f4dcad3..46db347e0bc 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -45,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -189,11 +189,11 @@ struct e_op_t { EdgeIterator edge_buffer_first{}; size_t* num_edge_inserts{}; - __device__ thrust::optional operator()(thrust::tuple tagged_src, - vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + __device__ cuda::std::optional operator()(thrust::tuple tagged_src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { auto tag = thrust::get<1>(tagged_src); auto dst_offset = dst - dst_first; @@ -207,8 +207,8 @@ struct e_op_t { *(edge_buffer_first + edge_idx) = tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag); } - return old == invalid_component_id::value ? thrust::optional{tag} - : thrust::nullopt; + return old == invalid_component_id::value ? cuda::std::optional{tag} + : cuda::std::nullopt; } }; @@ -231,9 +231,10 @@ struct v_op_t { size_t bucket_idx_conflict{}; // relevant only if GraphViewType::is_multi_gpu is true template - __device__ std::enable_if_t, thrust::optional>> - operator()(thrust::tuple tagged_v, int /* v_val */) const + __device__ + std::enable_if_t, cuda::std::optional>> + operator()(thrust::tuple tagged_v, int /* v_val */) const { auto tag = thrust::get<1>(tagged_v); auto v_offset = @@ -242,22 +243,23 @@ struct v_op_t { auto old = invalid_component_id::value; bool success = v_component.compare_exchange_strong(old, tag, cuda::std::memory_order_relaxed); if (!success && (old != tag)) { // conflict - return thrust::make_tuple(thrust::optional{bucket_idx_conflict}, - thrust::optional{std::byte{0}} /* dummy */); + return thrust::make_tuple(cuda::std::optional{bucket_idx_conflict}, + cuda::std::optional{std::byte{0}} /* dummy */); } else { return thrust::make_tuple( - success ? thrust::optional{bucket_idx_next} : thrust::nullopt, - success ? thrust::optional{std::byte{0}} /* dummy */ : thrust::nullopt); + success ? cuda::std::optional{bucket_idx_next} : cuda::std::nullopt, + success ? cuda::std::optional{std::byte{0}} /* dummy */ : cuda::std::nullopt); } } template - __device__ std::enable_if_t, thrust::optional>> - operator()(thrust::tuple /* tagged_v */, int /* v_val */) const + __device__ + std::enable_if_t, cuda::std::optional>> + operator()(thrust::tuple /* tagged_v */, int /* v_val */) const { - return thrust::make_tuple(thrust::optional{bucket_idx_next}, - thrust::optional{std::byte{0}} /* dummy */); + return thrust::make_tuple(cuda::std::optional{bucket_idx_next}, + cuda::std::optional{std::byte{0}} /* dummy */); } }; diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh index a2b6f6430f0..f1ff0912002 100644 --- a/cpp/src/cores/core_number_impl.cuh +++ b/cpp/src/cores/core_number_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -53,10 +53,10 @@ struct e_op_t { size_t k{}; edge_t delta{}; - __device__ thrust::optional operator()( - vertex_t, vertex_t, thrust::nullopt_t, edge_t dst_val, thrust::nullopt_t) const + __device__ cuda::std::optional operator()( + vertex_t, vertex_t, cuda::std::nullopt_t, edge_t dst_val, cuda::std::nullopt_t) const { - return dst_val >= k ? thrust::optional{delta} : thrust::nullopt; + return dst_val >= k ? cuda::std::optional{delta} : cuda::std::nullopt; } }; @@ -251,8 +251,8 @@ void core_number(raft::handle_t const& handle, auto new_core_number = v_val >= pushed_val ? v_val - pushed_val : edge_t{0}; new_core_number = new_core_number < (k - delta) ? (k - delta) : new_core_number; new_core_number = new_core_number < k_first ? edge_t{0} : new_core_number; - return thrust::make_tuple(thrust::optional{bucket_idx_next}, - thrust::optional{new_core_number}); + return thrust::make_tuple(cuda::std::optional{bucket_idx_next}, + cuda::std::optional{new_core_number}); }); } diff --git a/cpp/src/lookup/lookup_src_dst_impl.cuh b/cpp/src/lookup/lookup_src_dst_impl.cuh index 45bbf870d80..dd03e621022 100644 --- a/cpp/src/lookup/lookup_src_dst_impl.cuh +++ b/cpp/src/lookup/lookup_src_dst_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,8 @@ #include +#include + namespace cugraph { template @@ -370,7 +372,7 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku EdgeIdInputWrapper edge_id_view, EdgeTypeInputWrapper edge_type_view) { - static_assert(!std::is_same_v, + static_assert(!std::is_same_v, "Can not create edge id lookup table without edge ids"); using vertex_t = typename GraphViewType::vertex_type; @@ -411,17 +413,17 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), view_concat(edge_id_view, edge_type_view), - cuda::proclaim_return_type>>( + cuda::proclaim_return_type>>( [key_func = cugraph::detail::compute_gpu_id_from_ext_edge_id_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto, auto, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, thrust::tuple id_and_type) { - return thrust::optional>{thrust::make_tuple( + return cuda::std::optional>{thrust::make_tuple( key_func(thrust::get<0>(id_and_type)), thrust::get<1>(id_and_type))}; })); @@ -518,9 +520,9 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), edge_type_view, - cuda::proclaim_return_type>( - [] __device__(auto, auto, thrust::nullopt_t, thrust::nullopt_t, edge_type_t et) { - return thrust::optional{et}; + cuda::proclaim_return_type>( + [] __device__(auto, auto, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_type_t et) { + return cuda::std::optional{et}; })); thrust::sort(handle.get_thrust_policy(), edge_types.begin(), edge_types.end()); diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 2b89d214fd7..9816753852f 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -47,7 +48,6 @@ #include #include #include -#include #include #include @@ -138,7 +138,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -238,7 +238,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( if (edge_partition_e_mask) { for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (i < static_cast(num_edges_this_warp)) { auto key_idx_this_warp = static_cast(thrust::distance( @@ -261,7 +261,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( } } else { for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (i < static_cast(num_edges_this_warp)) { auto key_idx_this_warp = static_cast(thrust::distance( @@ -304,7 +304,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -359,7 +359,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( if (edge_partition_e_mask) { for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && ((*edge_partition_e_mask).get(local_edge_offset + i))) { e_op_result = call_e_op(i); @@ -370,7 +370,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( } } else { for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (i < static_cast(local_degree)) { e_op_result = call_e_op(i); } warp_push_buffer_elements( @@ -400,7 +400,7 @@ __global__ static void extract_transform_v_frontier_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -429,7 +429,7 @@ __global__ static void extract_transform_v_frontier_e_high_degree( ((static_cast(num_edges) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); while (idx < rounded_up_num_edges) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (idx < num_edges) { auto key_idx = thrust::distance( key_local_degree_offsets.begin() + 1, @@ -494,7 +494,7 @@ void extract_transform_v_frontier_e_edge_partition( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, OptionalOutputKeyIterator output_key_first, OptionalOutputValueIterator output_value_first, raft::device_span count /* size = 1 */, @@ -665,21 +665,21 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, EdgeOp>::type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -692,12 +692,12 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, static_assert(!std::is_same_v); static_assert( std::is_same_v && - !std::is_same_v, - thrust::optional>, - std::conditional_t, - thrust::optional, - thrust::optional>>>); + std::conditional_t< + !std::is_same_v && !std::is_same_v, + cuda::std::optional>, + std::conditional_t, + cuda::std::optional, + cuda::std::optional>>>); constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v && KeyBucketType::is_sorted_unique; @@ -1401,10 +1401,10 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(partition_idx)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, partition_idx) - : thrust::nullopt; + : cuda::std::nullopt; size_t num_streams_per_loop{1}; if (stream_pool_indices) { assert((*stream_pool_indices).size() >= num_concurrent_loops); diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 847c1db6937..d21a8153dc6 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -49,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -106,7 +106,7 @@ struct update_rx_major_local_degree_t { int minor_comm_size{}; edge_partition_device_view_t edge_partition{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; size_t reordered_idx_first{}; @@ -155,7 +155,7 @@ struct update_rx_major_local_nbrs_t { edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; size_t reordered_idx_first{}; @@ -214,7 +214,7 @@ struct update_rx_major_local_nbrs_t { if (local_degree > 0) { if (edge_partition_e_mask) { auto mask_first = (*edge_partition_e_mask).value_first(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto input_first = thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()); copy_if_mask_set(input_first, @@ -233,7 +233,7 @@ struct update_rx_major_local_nbrs_t { local_degree); } } else { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto input_first = thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + edge_offset; @@ -278,7 +278,7 @@ struct pick_min_degree_t { raft::device_span second_element_offsets{}; edge_partition_device_view_t edge_partition{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; __device__ edge_t operator()(thrust::tuple pair) const @@ -413,7 +413,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; VertexPairIterator vertex_pair_first; @@ -430,7 +430,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto pair = *(vertex_pair_first + i); vertex_t const* indices0{}; - std::conditional_t, + std::conditional_t, edge_property_value_t const*, void*> edge_property_values0{}; @@ -439,7 +439,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree0{0}; if constexpr (std::is_same_v) { indices0 = edge_partition.indices(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values0 = edge_partition_e_value_input.value_first(); } @@ -468,7 +468,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { indices0 = first_element_indices.begin(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values0 = first_element_edge_property_values; } @@ -478,7 +478,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } vertex_t const* indices1{}; - std::conditional_t, + std::conditional_t, edge_property_value_t const*, void*> edge_property_values1{}; @@ -487,7 +487,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree1{0}; if constexpr (std::is_same_v) { indices1 = edge_partition.indices(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values1 = edge_partition_e_value_input.value_first(); } @@ -516,7 +516,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { indices1 = second_element_indices.begin(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values1 = second_element_edge_property_values; } @@ -618,7 +618,7 @@ struct gatherv_indices_t { // in a single warp (better optimize if this becomes a performance bottleneck) for (int j = 0; j < minor_comm_size; ++j) { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto zipped_gathered_begin = thrust::make_zip_iterator( thrust::make_tuple(gathered_intersection_indices.begin(), gathered_nbr_intersection_e_property_values0, @@ -664,7 +664,7 @@ struct gatherv_indices_t { // number of groups" is recommended for load-balancing. template std::conditional_t< - !std::is_same_v, + !std::is_same_v, std::tuple, rmm::device_uvector, rmm::device_uvector, @@ -684,7 +684,7 @@ nbr_intersection(raft::handle_t const& handle, using edge_property_value_t = typename EdgeValueInputIterator::value_type; using edge_partition_e_input_device_view_t = - std::conditional_t, + std::conditional_t, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -692,16 +692,16 @@ nbr_intersection(raft::handle_t const& handle, edge_property_value_t>>; using optional_property_buffer_value_type = - std::conditional_t, + std::conditional_t, edge_property_value_t, void>; using optional_property_buffer_view_t = - std::conditional_t, + std::conditional_t, edge_property_value_t const*, void*>; using optional_property_buffer_mutable_view_t = - std::conditional_t, + std::conditional_t, edge_property_value_t*, void*>; @@ -907,11 +907,11 @@ nbr_intersection(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail:: edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -950,7 +950,7 @@ nbr_intersection(raft::handle_t const& handle, optional_property_buffer_mutable_view_t optional_local_e_property_values{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { local_e_property_values_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); optional_local_e_property_values = local_e_property_values_for_rx_majors.data(); @@ -964,11 +964,11 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_e_input_device_view_t(edge_value_input, i); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail:: edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = @@ -1045,7 +1045,7 @@ nbr_intersection(raft::handle_t const& handle, std::tie(major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { std::tie(major_e_property_values, std::ignore) = shuffle_values(major_comm, local_e_property_values_for_rx_majors.begin(), @@ -1132,16 +1132,18 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - [[maybe_unused]] std::conditional_t, - std::vector>, - std::byte /* dummy */> + [[maybe_unused]] std::conditional_t< + !std::is_same_v, + std::vector>, + std::byte /* dummy */> edge_partition_nbr_intersection_e_property_values0{}; - [[maybe_unused]] std::conditional_t, - std::vector>, - std::byte /* dummy */> + [[maybe_unused]] std::conditional_t< + !std::is_same_v, + std::vector>, + std::byte /* dummy */> edge_partition_nbr_intersection_e_property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_e_property_values0.reserve( graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_e_property_values1.reserve( @@ -1198,10 +1200,10 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_e_input_device_view_t(edge_value_input, i); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); @@ -1249,7 +1251,7 @@ nbr_intersection(raft::handle_t const& handle, optional_property_buffer_mutable_view_t rx_v_pair_optional_nbr_intersection_e_property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { rx_v_pair_nbr_intersection_e_property_values0.resize( rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); rx_v_pair_nbr_intersection_e_property_values1.resize( @@ -1264,7 +1266,7 @@ nbr_intersection(raft::handle_t const& handle, if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { optional_property_buffer_view_t optional_major_e_property_values{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { optional_major_e_property_values = major_e_property_values.data(); } @@ -1309,7 +1311,7 @@ nbr_intersection(raft::handle_t const& handle, CUGRAPH_FAIL("unimplemented."); } - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { rx_v_pair_nbr_intersection_indices.resize( thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), thrust::remove(handle.get_thrust_policy(), @@ -1515,7 +1517,7 @@ nbr_intersection(raft::handle_t const& handle, rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { device_multicast_sendrecv(minor_comm, rx_v_pair_nbr_intersection_e_property_values0.begin(), rx_v_pair_nbr_intersection_index_tx_counts, @@ -1548,7 +1550,7 @@ nbr_intersection(raft::handle_t const& handle, gathered_nbr_intersection_e_property_values1.size(), handle.get_stream()); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), @@ -1598,7 +1600,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes)); edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_e_property_values0.push_back( std::move(combined_nbr_intersection_e_property_values0)); edge_partition_nbr_intersection_e_property_values1.push_back( @@ -1612,7 +1614,7 @@ nbr_intersection(raft::handle_t const& handle, num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), @@ -1631,7 +1633,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices[i].end(), nbr_intersection_indices.begin() + index_offset); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::copy(handle.get_thrust_policy(), edge_partition_nbr_intersection_e_property_values0[i].begin(), edge_partition_nbr_intersection_e_property_values0[i].end(), @@ -1660,10 +1662,10 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, 0) - : thrust::nullopt; + : cuda::std::nullopt; rmm::device_uvector nbr_intersection_sizes( input_size, @@ -1699,7 +1701,7 @@ nbr_intersection(raft::handle_t const& handle, optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values0{}; optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), @@ -1770,7 +1772,7 @@ nbr_intersection(raft::handle_t const& handle, size_t{1} << 27, static_cast(thrust::distance(nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.end()))); - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { num_copied += static_cast(thrust::distance( tmp_indices.begin() + num_copied, thrust::copy_if(handle.get_thrust_policy(), @@ -1804,12 +1806,12 @@ nbr_intersection(raft::handle_t const& handle, num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_e_property_values0 = std::move(tmp_property_values0); nbr_intersection_e_property_values1 = std::move(tmp_property_values1); } #else - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { nbr_intersection_indices.resize( thrust::distance(nbr_intersection_indices.begin(), thrust::remove(handle.get_thrust_policy(), @@ -1845,7 +1847,7 @@ nbr_intersection(raft::handle_t const& handle, // 5. Return - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices)); diff --git a/cpp/src/prims/detail/partition_v_frontier.cuh b/cpp/src/prims/detail/partition_v_frontier.cuh index 018960d9a54..f5249aec304 100644 --- a/cpp/src/prims/detail/partition_v_frontier.cuh +++ b/cpp/src/prims/detail/partition_v_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include namespace cugraph { diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh index c521774a50d..1e47bb53a9e 100644 --- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh +++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -54,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -265,7 +265,7 @@ __global__ static void per_v_transform_reduce_e_hypersparse( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -296,7 +296,7 @@ __global__ static void per_v_transform_reduce_e_hypersparse( while (idx < key_count) { key_t key{}; vertex_t major{}; - thrust::optional major_idx{}; + cuda::std::optional major_idx{}; if constexpr (use_input_key) { key = *(key_first + idx); major = thrust_tuple_get_or_identity(key); @@ -402,7 +402,7 @@ __global__ static void per_v_transform_reduce_e_low_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -512,7 +512,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -596,7 +596,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree( ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { e_op_result = call_e_op(i); @@ -630,7 +630,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree( ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if (i < static_cast(local_degree) && call_pred_op(i)) { e_op_result = call_e_op(i); } @@ -699,7 +699,7 @@ __global__ static void per_v_transform_reduce_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -790,7 +790,7 @@ __global__ static void per_v_transform_reduce_e_high_degree( per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) * per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { e_op_result = call_e_op(i); @@ -835,7 +835,7 @@ __global__ static void per_v_transform_reduce_e_high_degree( per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) * per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && call_pred_op(i)) { e_op_result = call_e_op(i); } @@ -1141,7 +1141,7 @@ void per_v_transform_reduce_e_edge_partition( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper output_buffer, EdgeOp e_op, T major_init, @@ -1415,21 +1415,21 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, typename iterator_value_type_or_default_t::value_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -1519,10 +1519,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(static_cast(minor_comm_rank))); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, static_cast(minor_comm_rank)) - : thrust::nullopt; + : cuda::std::nullopt; std::optional> edge_partition_stream_pool_indices{std::nullopt}; if (local_vertex_partition_segment_offsets && (handle.get_stream_pool_size() >= max_segments)) { @@ -1737,10 +1737,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, sorted_unique_key_first, sorted_unique_nzd_key_last, deg1_v_first = (filter_input_key && graph_view.use_dcs()) - ? thrust::make_optional(graph_view.local_vertex_partition_range_first() + - (*local_vertex_partition_segment_offsets)[3] + - *((*hypersparse_degree_offsets).rbegin() + 1)) - : thrust::nullopt, + ? cuda::std::make_optional(graph_view.local_vertex_partition_range_first() + + (*local_vertex_partition_segment_offsets)[3] + + *((*hypersparse_degree_offsets).rbegin() + 1)) + : cuda::std::nullopt, vertex_partition_range_first = graph_view.local_vertex_partition_range_first()] __device__(size_t i) { if (i == 0) { @@ -3102,10 +3102,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(partition_idx)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, partition_idx) - : thrust::nullopt; + : cuda::std::nullopt; size_t num_streams_per_loop{1}; if (stream_pool_indices) { assert((*stream_pool_indices).size() >= num_concurrent_loops); diff --git a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh index dd0da77851b..3e38b85f105 100644 --- a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh +++ b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,12 +38,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -474,10 +474,10 @@ compute_valid_local_nbr_count_inclusive_sums( graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_major_first = aggregate_local_frontier_major_first + local_frontier_displacements[i]; @@ -1255,10 +1255,10 @@ compute_aggregate_local_frontier_local_degrees( graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_major_first = aggregate_local_frontier_major_first + local_frontier_displacements[i]; @@ -1501,9 +1501,9 @@ rmm::device_uvector convert_to_unmasked_local thrust::make_counting_iterator(size_t{0}), cuda::proclaim_return_type( [K, - key_indices = key_indices ? thrust::make_optional>( + key_indices = key_indices ? cuda::std::make_optional>( (*key_indices).data(), (*key_indices).size()) - : thrust::nullopt] __device__(size_t i) { + : cuda::std::nullopt] __device__(size_t i) { return key_indices ? (*key_indices)[i] : i / K; })); auto pair_first = thrust::make_zip_iterator(local_nbr_indices.begin(), sample_major_idx_first); @@ -1513,10 +1513,10 @@ rmm::device_uvector convert_to_unmasked_local graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_major_first = aggregate_local_frontier_major_first + local_frontier_displacements[i]; @@ -1863,10 +1863,10 @@ biased_sample_and_compute_local_nbr_indices( sample_local_random_numbers.data() + local_frontier_sample_offsets[i], local_frontier_sample_offsets[i + 1] - local_frontier_sample_offsets[i]), key_indices = - key_indices ? thrust::make_optional>( + key_indices ? cuda::std::make_optional>( (*key_indices).data() + local_frontier_sample_offsets[i], local_frontier_sample_offsets[i + 1] - local_frontier_sample_offsets[i]) - : thrust::nullopt, + : cuda::std::nullopt, key_idx_to_unique_key_idx = raft::device_span(aggregate_local_frontier_key_idx_to_unique_key_idx.data() + local_frontier_displacements[i], diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh index 5ebcddfe8da..8f414391596 100644 --- a/cpp/src/prims/detail/transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,9 +29,9 @@ #include +#include #include #include -#include #include #include @@ -389,21 +389,21 @@ auto transform_v_frontier_e(raft::handle_t const& handle, static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -424,10 +424,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_key_first = aggregate_local_frontier_key_first + local_frontier_displacements[i]; @@ -470,10 +470,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_key_first = aggregate_local_frontier_key_first + local_frontier_displacements[i]; diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh index 5741c98d90e..bb003aa8747 100644 --- a/cpp/src/prims/extract_transform_e.cuh +++ b/cpp/src/prims/extract_transform_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,9 +69,9 @@ namespace cugraph { * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). * @param e_op Quinary operator takes edge source, edge destination, property values for the source, - * property values for the destination, and property values for the edge and returns thrust::nullopt - * (if the return value is to be discarded) or a valid @p e_op output to be extracted and - * accumulated. + * property values for the destination, and property values for the edge and returns + * cuda::std::nullopt (if the return value is to be discarded) or a valid @p e_op output to be + * extracted and accumulated. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values. */ diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh index ba227b263bc..46984d6b4e5 100644 --- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh +++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,9 +57,9 @@ namespace cugraph { * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). * @param e_op Quinary operator takes edge source, edge destination, property values for the source, - * property values for the destination, and property values for the edge and returns thrust::nullopt - * (if the return value is to be discarded) or a valid @p e_op output to be extracted and - * accumulated. + * property values for the destination, and property values for the edge and returns + * cuda::std::nullopt (if the return value is to be discarded) or a valid @p e_op output to be + * extracted and accumulated. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values. */ diff --git a/cpp/src/prims/fill_edge_property.cuh b/cpp/src/prims/fill_edge_property.cuh index 54d0c454ec2..3e1383707a2 100644 --- a/cpp/src/prims/fill_edge_property.cuh +++ b/cpp/src/prims/fill_edge_property.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include +#include #include #include @@ -50,10 +51,10 @@ void fill_edge_property(raft::handle_t const& handle, for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; if constexpr (cugraph::has_packed_bool_element< std::remove_reference_t, diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index f03e8f54fb2..728c1eac2bd 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -109,7 +109,7 @@ struct call_intersection_op_t { typename GraphViewType::edge_type, GraphViewType::is_multi_gpu> edge_partition{}; - thrust::optional> unique_vertices; + cuda::std::optional> unique_vertices; VertexValueInputIterator vertex_property_first; IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; @@ -135,17 +135,17 @@ struct call_intersection_op_t { auto intersection = raft::device_span( nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); - std::conditional_t, + std::conditional_t, raft::device_span, std::byte /* dummy */> property_values0{}; - std::conditional_t, + std::conditional_t, raft::device_span, std::byte /* dummy */> property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { property_values0 = raft::device_span( nbr_intersection_property_values0 + nbr_offsets[i], nbr_intersection_property_values0 + +nbr_offsets[i + 1]); @@ -392,7 +392,7 @@ void per_v_pair_transform_dst_nbr_intersection( [[maybe_unused]] rmm::device_uvector r_nbr_intersection_property_values1(size_t{0}, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { std::tie(intersection_offsets, intersection_indices, r_nbr_intersection_property_values0, @@ -430,7 +430,7 @@ void per_v_pair_transform_dst_nbr_intersection( VertexPairIterator, VertexPairValueOutputIterator>{ edge_partition, - thrust::make_optional>( + cuda::std::make_optional>( (*sorted_unique_vertices).data(), (*sorted_unique_vertices).size()), vertex_value_input_for_sorted_unique_vertices_first, intersection_op, @@ -442,28 +442,29 @@ void per_v_pair_transform_dst_nbr_intersection( vertex_pair_first, vertex_pair_value_output_first}); } else { - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t< - GraphViewType, - VertexValueInputIterator, - typename decltype(r_nbr_intersection_property_values0)::const_pointer, - IntersectionOp, - decltype(chunk_vertex_pair_index_first), - VertexPairIterator, - VertexPairValueOutputIterator>{ - edge_partition, - thrust::optional>{thrust::nullopt}, - vertex_value_input_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - r_nbr_intersection_property_values0.data(), - r_nbr_intersection_property_values1.data(), - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t< + GraphViewType, + VertexValueInputIterator, + typename decltype(r_nbr_intersection_property_values0)::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ + edge_partition, + cuda::std::optional>{cuda::std::nullopt}, + vertex_value_input_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } chunk_vertex_pair_index_first += this_chunk_size; diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 30706632ad2..812e0a9b926 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -93,7 +93,7 @@ struct transform_local_nbr_indices_t { using edge_t = typename GraphViewType::edge_type; edge_partition_device_view_t edge_partition{}; - thrust::optional local_key_indices{thrust::nullopt}; + cuda::std::optional local_key_indices{cuda::std::nullopt}; KeyIterator key_first{}; LocalNbrIdxIterator local_nbr_idx_first{}; EdgePartitionSrcValueInputWrapper edge_partition_src_value_input; @@ -101,7 +101,7 @@ struct transform_local_nbr_indices_t { EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input; EdgeOp e_op{}; edge_t invalid_idx{}; - thrust::optional invalid_value{thrust::nullopt}; + cuda::std::optional invalid_value{cuda::std::nullopt}; size_t K{}; __device__ T operator()(size_t i) const @@ -241,21 +241,21 @@ per_v_random_select_transform_e(raft::handle_t const& handle, using key_buffer_t = dataframe_buffer_type_t; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -430,7 +430,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, EdgeOp, T>{ edge_partition, - thrust::make_optional(edge_partition_sample_key_index_first), + cuda::std::make_optional(edge_partition_sample_key_index_first), edge_partition_key_list_first, edge_partition_sample_local_nbr_index_first, edge_partition_src_value_input, @@ -454,7 +454,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, edge_partition_e_input_device_view_t, EdgeOp, T>{edge_partition, - thrust::nullopt, + cuda::std::nullopt, edge_partition_key_list_first, edge_partition_sample_local_nbr_index_first, edge_partition_src_value_input, diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index c13816242bc..4ebda9d42c4 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -49,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -89,15 +89,16 @@ struct tuple_to_minor_comm_rank_t { int minor_comm_size{}; template - __device__ std::enable_if_t, int> operator()( + __device__ std::enable_if_t, int> + operator()( thrust::tuple val /* major, minor key, edge value */) const { return key_func(thrust::get<1>(val)) % minor_comm_size; } template - __device__ std::enable_if_t, int> operator()( - thrust::tuple val /* major, minor key */) const + __device__ std::enable_if_t, int> + operator()(thrust::tuple val /* major, minor key */) const { return key_func(thrust::get<1>(val)) % minor_comm_size; } @@ -123,13 +124,13 @@ template struct call_key_aggregated_e_op_t { EdgePartitionDeviceView edge_partition{}; - thrust::optional edge_major_value_map{}; + cuda::std::optional edge_major_value_map{}; EdgePartitionMajorValueInputWrapper edge_partition_major_value_input{}; EdgeMinorKeyValueMap edge_minor_key_value_map{}; KeyAggregatedEdgeOp key_aggregated_e_op{}; template - __device__ std::enable_if_t, e_op_result_t> + __device__ std::enable_if_t, e_op_result_t> operator()(thrust::tuple val /* major, minor key, aggregated edge value */) const { @@ -145,7 +146,7 @@ struct call_key_aggregated_e_op_t { } template - __device__ std::enable_if_t, e_op_result_t> + __device__ std::enable_if_t, e_op_result_t> operator()(thrust::tuple val /* major, minor key */) const { auto major = thrust::get<0>(val); @@ -155,7 +156,7 @@ struct call_key_aggregated_e_op_t { : edge_partition_major_value_input.get( edge_partition.major_offset_from_major_nocheck(major)); return key_aggregated_e_op( - major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), thrust::nullopt); + major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), cuda::std::nullopt); } }; @@ -284,16 +285,16 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( using edge_value_t = typename EdgeValueInputWrapper::value_type; using kv_pair_value_t = typename KVStoreViewType::value_type; using optional_edge_value_buffer_value_type = - std::conditional_t, edge_value_t, void>; + std::conditional_t, edge_value_t, void>; static_assert( - std::is_same_v || std::is_arithmetic_v, + std::is_same_v || std::is_arithmetic_v, "Currently only scalar values are supported, should be extended to support thrust::tuple of " "arithmetic types and void (for dummy property values) to be consistent with other " "primitives."); // this will also require a custom edge value aggregation op. using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, @@ -303,7 +304,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( vertex_t, typename EdgeDstKeyInputWrapper::value_iterator>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -315,7 +316,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; // major + minor keys - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (is_thrust_tuple_of_arithmetic::value) { element_size += sum_thrust_tuple_element_sizes(); @@ -323,7 +324,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( element_size += sizeof(edge_value_t); } } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (is_thrust_tuple_of_arithmetic::value) { element_size += sum_thrust_tuple_element_sizes(); @@ -350,10 +351,10 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input, i); @@ -472,7 +473,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( 1, handle.get_stream()); handle.sync_stream(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { detail::copy_if_mask_set( handle, thrust::make_zip_iterator(minor_key_first, @@ -505,7 +506,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( (offsets_with_mask ? (*offsets_with_mask).data() : edge_partition.offsets()) + h_vertex_offsets[j], detail::rebase_offset_t{h_edge_offsets[j]}); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { cub::DeviceSegmentedSort::SortPairs( static_cast(nullptr), tmp_storage_bytes, @@ -536,7 +537,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( if (tmp_storage_bytes > d_tmp_storage.size()) { d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { cub::DeviceSegmentedSort::SortPairs( d_tmp_storage.data(), tmp_storage_bytes, @@ -573,7 +574,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( thrust::make_zip_iterator(unreduced_majors.begin(), unreduced_minor_keys.begin()); auto output_key_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { reduced_size += thrust::distance(output_key_first + reduced_size, thrust::get<0>(thrust::reduce_by_key( @@ -626,7 +627,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto const minor_comm_size = minor_comm.get_size(); rmm::device_uvector d_tx_value_counts(0, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto triplet_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin(), @@ -782,7 +783,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( tmp_minor_keys.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { std::tie(rx_key_aggregated_edge_values, std::ignore) = shuffle_values(minor_comm, detail::get_optional_dataframe_buffer_begin( @@ -795,7 +796,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( detail::shrink_to_fit_optional_dataframe_buffer( tmp_key_aggregated_edge_values, handle.get_stream()); } else { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto triplet_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin(), @@ -820,7 +821,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( } auto key_pair_first = thrust::make_zip_iterator(rx_majors.begin(), rx_minor_keys.begin()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { if (rx_majors.size() > mem_frugal_threshold) { // trade-off parallelism to lower peak memory auto second_first = @@ -956,15 +957,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto major_value_map_device_view = (GraphViewType::is_multi_gpu && edge_src_value_input.keys()) - ? thrust::make_optionalview())>>(multi_gpu_major_value_map_ptr->view()) - : thrust::nullopt; + : cuda::std::nullopt; std::conditional_t, detail::kv_cuco_store_find_device_view_t> dst_key_value_map_device_view( GraphViewType::is_multi_gpu ? multi_gpu_minor_key_value_map_ptr->view() : kv_store_view); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto triplet_first = thrust::make_zip_iterator( tmp_majors.begin(), tmp_minor_keys.begin(), diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh index aaa2703f1ae..d5d64f708ba 100644 --- a/cpp/src/prims/transform_e.cuh +++ b/cpp/src/prims/transform_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -273,21 +274,21 @@ void transform_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -306,10 +307,10 @@ void transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; @@ -464,21 +465,21 @@ void transform_e(raft::handle_t const& handle, std::is_same_v>); using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -541,10 +542,10 @@ void transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; if (do_expensive_check) { CUGRAPH_EXPECTS( diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index c938b10fbbb..eef34938c57 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -249,14 +249,14 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( using weight_t = float; // dummy using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 43722550c58..1e45fea0608 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,11 @@ #include +#include #include #include #include #include -#include #include #include #include @@ -68,7 +68,7 @@ __global__ static void transform_reduce_e_hypersparse( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -162,7 +162,7 @@ __global__ static void transform_reduce_e_low_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -251,7 +251,7 @@ __global__ static void transform_reduce_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -329,7 +329,7 @@ __global__ static void transform_reduce_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -445,21 +445,21 @@ T transform_reduce_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -486,10 +486,10 @@ T transform_reduce_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh index 3abce6f8bd5..8786336bd10 100644 --- a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh +++ b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,10 +31,10 @@ #include +#include #include #include #include -#include #include #include #include @@ -108,7 +108,7 @@ __global__ static void transform_reduce_by_src_dst_key_hypersparse( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -198,7 +198,7 @@ __global__ static void transform_reduce_by_src_dst_key_low_degree( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -284,7 +284,7 @@ __global__ static void transform_reduce_by_src_dst_key_mid_degree( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -383,7 +383,7 @@ __global__ static void transform_reduce_by_src_dst_key_high_degree( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -520,21 +520,21 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -556,10 +556,10 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; rmm::device_uvector tmp_keys(0, handle.get_stream()); std::optional> edge_offsets_with_mask{std::nullopt}; @@ -627,9 +627,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -650,9 +650,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -673,9 +673,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -695,9 +695,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -719,9 +719,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh index 87f590f571f..884079d103d 100644 --- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh +++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -52,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -86,7 +86,7 @@ template && !std::is_same_v, thrust::tuple, std::conditional_t, key_t, payload_t>>> @@ -106,7 +106,7 @@ struct transform_reduce_v_frontier_call_e_op_t { thrust::get<1>(*e_op_result)); } } else { - return thrust::nullopt; + return cuda::std::nullopt; } } }; @@ -121,7 +121,7 @@ struct update_keep_flag_t { raft::device_span keep_flags{}; key_t v_range_first{}; InputKeyIterator input_key_first{}; - thrust::optional invalid_input_key{}; + cuda::std::optional invalid_input_key{}; __device__ void operator()(size_t i) const { @@ -1058,10 +1058,10 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); @@ -1110,9 +1110,9 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor * outputs by (tagged-)destination ID. * - * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are - * assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag - * type (KeyBucketType::key_type is identical to a vertex type otherwise). + * Edge functor outputs are cuda::std::optional objects and invalid if cuda::std::nullopt. Vertices + * are assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag type + * (KeyBucketType::key_type is identical to a vertex type otherwise). * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam KeyBucketType Type of the vertex frontier bucket class which abstracts the @@ -1141,10 +1141,10 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for - * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be - * discarded); 2) dummy (but valid) thrust::optional object (e.g. - * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is - * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be + * the source, destination, and edge and returns 1) cuda::std::nullopt (if invalid and to be + * discarded); 2) dummy (but valid) cuda::std::optional object (e.g. + * cuda::std::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type + * is void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type * is not void). diff --git a/cpp/src/prims/update_v_frontier.cuh b/cpp/src/prims/update_v_frontier.cuh index a9b0a6b823b..0516ebc7d31 100644 --- a/cpp/src/prims/update_v_frontier.cuh +++ b/cpp/src/prims/update_v_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -157,10 +157,10 @@ struct check_invalid_bucket_idx_t { * graph_view.local_vertex_partition_range_size(). * @param v_op Ternary operator that takes (tagged-)vertex ID, *(@p vertex_value_input_first + i) * (where i is [0, @p graph_view.local_vertex_partition_range_size())) and the payload value for the - * (tagged-)vertex ID and returns a tuple of 1) a thrust::optional object optionally storing a - * bucket index and 2) a thrust::optional object optionally storing a new vertex property value. If - * the first element of the returned tuple is thrust::nullopt, this (tagged-)vertex won't be - * inserted to the vertex frontier. If the second element is thrust::nullopt, the vertex property + * (tagged-)vertex ID and returns a tuple of 1) a cuda::std::optional object optionally storing a + * bucket index and 2) a cuda::std::optional object optionally storing a new vertex property value. + * If the first element of the returned tuple is cuda::std::nullopt, this (tagged-)vertex won't be + * inserted to the vertex frontier. If the second element is cuda::std::nullopt, the vertex property * value for this vertex won't be updated. Note that it is currently undefined behavior if there are * multiple tagged-vertices with the same vertex ID (but with different tags) AND @p v_op results on * the tagged-vertices with the same vertex ID have more than one valid new vertex property values. @@ -286,13 +286,13 @@ void update_v_frontier(raft::handle_t const& handle, * graph_view.local_vertex_partition_range_size(). * @param v_op Binary operator that takes (tagged-)vertex ID, and *(@p vertex_value_input_first + i) * (where i is [0, @p graph_view.local_vertex_partition_range_size())) and returns a tuple of 1) a - * thrust::optional object optionally storing a bucket index and 2) a thrust::optional object + * cuda::std::optional object optionally storing a bucket index and 2) a cuda::std::optional object * optionally storing a new vertex property value. If the first element of the returned tuple is - * thrust::nullopt, this (tagged-)vertex won't be inserted to the vertex frontier. If the second - * element is thrust::nullopt, the vertex property value for this vertex won't be updated. Note that - * it is currently undefined behavior if there are multiple tagged-vertices with the same vertex ID - * (but with different tags) AND @p v_op results on the tagged-vertices with the same vertex ID have - * more than one valid new vertex property values. + * cuda::std::nullopt, this (tagged-)vertex won't be inserted to the vertex frontier. If the second + * element is cuda::std::nullopt, the vertex property value for this vertex won't be updated. Note + * that it is currently undefined behavior if there are multiple tagged-vertices with the same + * vertex ID (but with different tags) AND @p v_op results on the tagged-vertices with the same + * vertex ID have more than one valid new vertex property values. */ template -#include +#include #include namespace cugraph { @@ -40,8 +40,8 @@ struct return_edges_with_properties_e_op { template auto __host__ __device__ operator()(key_t optionally_tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, EdgeProperties edge_properties) const { static_assert(std::is_same_v || @@ -51,43 +51,43 @@ struct return_edges_with_properties_e_op { if constexpr (std::is_same_v) { vertex_t src{optionally_tagged_src}; - if constexpr (std::is_same_v) { - return thrust::make_optional(thrust::make_tuple(src, dst)); + if constexpr (std::is_same_v) { + return cuda::std::make_optional(thrust::make_tuple(src, dst)); } else if constexpr (std::is_arithmetic::value) { - return thrust::make_optional(thrust::make_tuple(src, dst, edge_properties)); + return cuda::std::make_optional(thrust::make_tuple(src, dst, edge_properties)); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 2)) { - return thrust::make_optional(thrust::make_tuple( + return cuda::std::make_optional(thrust::make_tuple( src, dst, thrust::get<0>(edge_properties), thrust::get<1>(edge_properties))); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 3)) { - return thrust::make_optional(thrust::make_tuple(src, - dst, - thrust::get<0>(edge_properties), - thrust::get<1>(edge_properties), - thrust::get<2>(edge_properties))); + return cuda::std::make_optional(thrust::make_tuple(src, + dst, + thrust::get<0>(edge_properties), + thrust::get<1>(edge_properties), + thrust::get<2>(edge_properties))); } } else if constexpr (std::is_same_v>) { vertex_t src{thrust::get<0>(optionally_tagged_src)}; int32_t label{thrust::get<1>(optionally_tagged_src)}; src = thrust::get<0>(optionally_tagged_src); - if constexpr (std::is_same_v) { - return thrust::make_optional(thrust::make_tuple(src, dst, label)); + if constexpr (std::is_same_v) { + return cuda::std::make_optional(thrust::make_tuple(src, dst, label)); } else if constexpr (std::is_arithmetic::value) { - return thrust::make_optional(thrust::make_tuple(src, dst, edge_properties, label)); + return cuda::std::make_optional(thrust::make_tuple(src, dst, edge_properties, label)); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 2)) { - return thrust::make_optional(thrust::make_tuple( + return cuda::std::make_optional(thrust::make_tuple( src, dst, thrust::get<0>(edge_properties), thrust::get<1>(edge_properties), label)); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 3)) { - return thrust::make_optional(thrust::make_tuple(src, - dst, - thrust::get<0>(edge_properties), - thrust::get<1>(edge_properties), - thrust::get<2>(edge_properties), - label)); + return cuda::std::make_optional(thrust::make_tuple(src, + dst, + thrust::get<0>(edge_properties), + thrust::get<1>(edge_properties), + thrust::get<2>(edge_properties), + label)); } } } diff --git a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh index 5c04d628f09..2a6136fb96a 100644 --- a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh +++ b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,11 +29,12 @@ #include -#include #include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh index f6793c4a157..9ce8edbb9c1 100644 --- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh +++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,10 +20,11 @@ #include #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu index 89634253ee7..35c2fc5abb1 100644 --- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu +++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,11 @@ #include #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu index 41cb7413bc4..b627431d53f 100644 --- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu +++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,11 @@ #include #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/sample_edges.cuh b/cpp/src/sampling/detail/sample_edges.cuh index 0c670c6507e..a4e228522aa 100644 --- a/cpp/src/sampling/detail/sample_edges.cuh +++ b/cpp/src/sampling/detail/sample_edges.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ #include -#include +#include #include #include @@ -41,12 +41,12 @@ struct sample_edges_op_t { template auto __host__ __device__ operator()(vertex_t src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, EdgeProperties edge_properties) const { // FIXME: A solution using thrust_tuple_cat would be more flexible here - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return thrust::make_tuple(src, dst); } else if constexpr (std::is_arithmetic::value) { return thrust::make_tuple(src, dst, edge_properties); @@ -68,7 +68,7 @@ struct sample_edges_op_t { template struct sample_edge_biases_op_t { auto __host__ __device__ - operator()(vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, bias_t bias) const + operator()(vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, bias_t bias) const { return bias; } diff --git a/cpp/src/sampling/detail/sampling_utils.hpp b/cpp/src/sampling/detail/sampling_utils.hpp index 17eb8dd0873..71387eb0e63 100644 --- a/cpp/src/sampling/detail/sampling_utils.hpp +++ b/cpp/src/sampling/detail/sampling_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ #include -#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh index 391dd99b1df..ce4888e3359 100644 --- a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh +++ b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,10 +32,11 @@ #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index bbc0fbc17af..b759e479bc6 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -31,6 +31,7 @@ #include +#include #include namespace cugraph { @@ -123,9 +124,9 @@ neighbor_sample_impl(raft::handle_t const& handle, *edge_type_view, [valid_edge_type = i] __device__(auto src, auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - /*thrust::nullopt_t*/ auto edge_type) { + cuda::std::nullopt_t, + cuda::std::nullopt_t, + /*cuda::std::nullopt_t*/ auto edge_type) { return edge_type == valid_edge_type; }, edge_mask.mutable_view(), diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 0b1d9dcdb56..440c0c7a6ec 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -216,8 +216,8 @@ struct col_indx_extract_t { ptr_d_coalesced_v = original::raw_const_ptr(d_coalesced_src_v), row_offsets = row_offsets_, col_indices = col_indices_, - values = values_ ? thrust::optional{*values_} - : thrust::nullopt] __device__(auto indx, auto col_indx) { + values = values_ ? cuda::std::optional{*values_} + : cuda::std::nullopt] __device__(auto indx, auto col_indx) { auto delta = ptr_d_sizes[indx] - 1; auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; auto start_row = row_offsets[v_indx]; diff --git a/cpp/src/sampling/random_walks_impl.cuh b/cpp/src/sampling/random_walks_impl.cuh index 6c10fc473f3..fbf0836dac5 100644 --- a/cpp/src/sampling/random_walks_impl.cuh +++ b/cpp/src/sampling/random_walks_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ #include -#include +#include #include #include @@ -54,14 +54,14 @@ template struct sample_edges_op_t { template __device__ std::enable_if_t, vertex_t> operator()( - vertex_t, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + vertex_t, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) const { return dst; } template __device__ std::enable_if_t, thrust::tuple> operator()( - vertex_t, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, W w) const + vertex_t, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, W w) const { return thrust::make_tuple(dst, w); } @@ -69,8 +69,8 @@ struct sample_edges_op_t { template struct biased_random_walk_e_bias_op_t { - __device__ bias_t - operator()(vertex_t, vertex_t, bias_t src_out_weight_sum, thrust::nullopt_t, bias_t weight) const + __device__ bias_t operator()( + vertex_t, vertex_t, bias_t src_out_weight_sum, cuda::std::nullopt_t, bias_t weight) const { return weight / src_out_weight_sum; } @@ -79,7 +79,7 @@ struct biased_random_walk_e_bias_op_t { template struct biased_sample_edges_op_t { __device__ thrust::tuple operator()( - vertex_t, vertex_t dst, weight_t, thrust::nullopt_t, weight_t weight) const + vertex_t, vertex_t dst, weight_t, cuda::std::nullopt_t, weight_t weight) const { return thrust::make_tuple(dst, weight); } @@ -99,9 +99,9 @@ struct node2vec_random_walk_e_bias_op_t { __device__ std::enable_if_t, bias_t> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { // Check tag (prev vert) for destination if (dst == thrust::get<1>(tagged_src)) { return 1.0 / p_; } @@ -126,8 +126,8 @@ struct node2vec_random_walk_e_bias_op_t { __device__ std::enable_if_t, bias_t> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, W) const { // Check tag (prev vert) for destination @@ -155,9 +155,9 @@ struct node2vec_sample_edges_op_t { __device__ std::enable_if_t, vertex_t> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return dst; } @@ -166,8 +166,8 @@ struct node2vec_sample_edges_op_t { __device__ std::enable_if_t, thrust::tuple> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, W w) const { return thrust::make_tuple(dst, w); diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp index 2c5658b32a5..d371b4141e0 100644 --- a/cpp/src/sampling/rw_traversals.hpp +++ b/cpp/src/sampling/rw_traversals.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,12 +27,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include @@ -138,7 +138,7 @@ struct uniform_selector_t { { } - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src_v, real_t rnd_val, vertex_t = 0 /* not used*/, @@ -146,7 +146,7 @@ struct uniform_selector_t { bool = false /* not used*/) const { auto crt_out_deg = ptr_d_cache_out_degs_[src_v]; - if (crt_out_deg == 0) return thrust::nullopt; // src_v is a sink + if (crt_out_deg == 0) return cuda::std::nullopt; // src_v is a sink vertex_t v_indx = static_cast(rnd_val >= 1.0 ? crt_out_deg - 1 : rnd_val * crt_out_deg); @@ -156,7 +156,8 @@ struct uniform_selector_t { auto weight_value = (values_ == nullptr ? weight_t{1} : values_[start_row + col_indx]); // account for un-weighted graphs - return thrust::optional{thrust::make_tuple(col_indices_[start_row + col_indx], weight_value)}; + return cuda::std::optional{ + thrust::make_tuple(col_indices_[start_row + col_indx], weight_value)}; } private: @@ -211,7 +212,7 @@ struct biased_selector_t { // Sum(weights(neighborhood(src_v))) are pre-computed and // stored in ptr_d_sum_weights_ (too expensive to check, here); // - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src_v, real_t rnd_val, vertex_t = 0 /* not used*/, @@ -223,7 +224,7 @@ struct biased_selector_t { auto col_indx_begin = row_offsets_[src_v]; auto col_indx_end = row_offsets_[src_v + 1]; - if (col_indx_begin == col_indx_end) return thrust::nullopt; // src_v is a sink + if (col_indx_begin == col_indx_end) return cuda::std::nullopt; // src_v is a sink auto col_indx = col_indx_begin; auto prev_col_indx = col_indx; @@ -234,7 +235,7 @@ struct biased_selector_t { run_sum_w += values_[col_indx]; prev_col_indx = col_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_col_indx], values_[prev_col_indx])}; } @@ -293,9 +294,9 @@ struct node2vec_selector_t { q_(q), coalesced_alpha_{ (max_degree > 0) && (num_paths > 0) && (ptr_alpha != nullptr) - ? thrust::optional>{thrust::make_tuple( + ? cuda::std::optional>{thrust::make_tuple( max_degree, num_paths, ptr_alpha)} - : thrust::nullopt} + : cuda::std::nullopt} { } @@ -324,7 +325,7 @@ struct node2vec_selector_t { } } - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src_v, real_t rnd_val, vertex_t prev_v, edge_t path_index, bool start_path) const { auto const offset_indx_begin = row_offsets_[src_v]; @@ -333,7 +334,7 @@ struct node2vec_selector_t { weight_t sum_scaled_weights{0}; auto offset_indx = offset_indx_begin; - if (offset_indx_begin == offset_indx_end) return thrust::nullopt; // src_v is a sink + if (offset_indx_begin == offset_indx_end) return cuda::std::nullopt; // src_v is a sink // for 1st vertex in path just use biased random selection: // @@ -359,7 +360,7 @@ struct node2vec_selector_t { run_sum_w += crt_weight; prev_offset_indx = offset_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_offset_indx], values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])}; } @@ -402,7 +403,7 @@ struct node2vec_selector_t { run_sum_w += ptr_d_scaled_weights[start_alpha_offset + nghbr_indx]; prev_offset_indx = offset_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_offset_indx], values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])}; @@ -435,7 +436,7 @@ struct node2vec_selector_t { run_sum_w += scaled_weight; prev_offset_indx = offset_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_offset_indx], values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])}; } @@ -459,7 +460,7 @@ struct node2vec_selector_t { // this is information related to a scratchpad buffer, used as cache, hence mutable; // (necessary, because get_strategy() is const) // - mutable thrust::optional> + mutable cuda::std::optional> coalesced_alpha_; // tuple }; diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index 151350dad6d..ef1a31400f7 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -51,9 +52,9 @@ namespace { template struct edge_order_t { - thrust::optional> edgelist_label_offsets{thrust::nullopt}; - thrust::optional> edgelist_edge_types{thrust::nullopt}; - thrust::optional> edgelist_hops{thrust::nullopt}; + cuda::std::optional> edgelist_label_offsets{cuda::std::nullopt}; + cuda::std::optional> edgelist_edge_types{cuda::std::nullopt}; + cuda::std::optional> edgelist_hops{cuda::std::nullopt}; raft::device_span edgelist_majors{}; raft::device_span edgelist_minors{}; @@ -99,8 +100,8 @@ struct edge_order_t { template struct is_first_triplet_in_run_t { - thrust::optional> edgelist_label_offsets{thrust::nullopt}; - thrust::optional> edgelist_hops{thrust::nullopt}; + cuda::std::optional> edgelist_label_offsets{cuda::std::nullopt}; + cuda::std::optional> edgelist_hops{cuda::std::nullopt}; raft::device_span edgelist_majors{}; __device__ bool operator()(size_t i) const @@ -142,7 +143,7 @@ struct compute_label_index_t { template struct optionally_compute_label_index_t { - thrust::optional> edgelist_label_offsets{thrust::nullopt}; + cuda::std::optional> edgelist_label_offsets{cuda::std::nullopt}; __device__ label_index_t operator()(size_t i) const { @@ -2370,9 +2371,9 @@ heterogeneous_renumber_sampled_edgelist( cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), edge_types = edgelist_edge_types - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edgelist_edge_types).data(), (*edgelist_edge_types).size()) - : thrust::nullopt, + : cuda::std::nullopt, renumber_map = raft::device_span(segment_sorted_edge_id_renumber_map.data(), segment_sorted_edge_id_renumber_map.size()), @@ -2499,17 +2500,17 @@ sort_sampled_edge_tuples(raft::handle_t const& handle, handle.get_stream()); thrust::sequence(handle.get_thrust_policy(), indices.begin(), indices.end(), size_t{0}); edge_order_t edge_order_comp{ - edgelist_label_offsets ? thrust::make_optional>( + edgelist_label_offsets ? cuda::std::make_optional>( (*edgelist_label_offsets).data() + h_label_offsets[i], (h_label_offsets[i + 1] - h_label_offsets[i]) + 1) - : thrust::nullopt, + : cuda::std::nullopt, edgelist_edge_types && use_edge_type_as_sort_key - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edgelist_edge_types).data() + h_edge_offsets[i], indices.size()) - : thrust::nullopt, - edgelist_hops ? thrust::make_optional>( + : cuda::std::nullopt, + edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data() + h_edge_offsets[i], indices.size()) - : thrust::nullopt, + : cuda::std::nullopt, raft::device_span(edgelist_majors.data() + h_edge_offsets[i], indices.size()), raft::device_span(edgelist_minors.data() + h_edge_offsets[i], indices.size())}; @@ -2686,8 +2687,8 @@ renumber_and_compress_sampled_edgelist( auto label_index_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), optionally_compute_label_index_t{ - edgelist_label_offsets ? thrust::make_optional(*edgelist_label_offsets) - : thrust::nullopt}); + edgelist_label_offsets ? cuda::std::make_optional(*edgelist_label_offsets) + : cuda::std::nullopt}); auto input_key_first = thrust::make_zip_iterator(label_index_first, (*edgelist_hops).begin()); rmm::device_uvector unique_key_label_indices(min_vertices.size(), handle.get_stream()); @@ -2781,9 +2782,9 @@ renumber_and_compress_sampled_edgelist( thrust::make_counting_iterator(edgelist_majors.size()), is_first_triplet_in_run_t{ detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops ? thrust::make_optional>( + edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, raft::device_span( edgelist_majors.data(), edgelist_majors.size())}); // number of unique ((label), (hop), major) triplets @@ -2910,15 +2911,15 @@ renumber_and_compress_sampled_edgelist( major_vertex_counts.begin(), major_vertex_counts.end(), [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops = edgelist_hops ? thrust::make_optional>( + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, edgelist_majors = raft::device_span(edgelist_majors.data(), edgelist_majors.size()), seed_vertices = renumbered_seed_vertices - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_seed_vertices).data(), (*renumbered_seed_vertices).size()) - : thrust::nullopt, + : cuda::std::nullopt, seed_vertex_label_offsets = detail::to_thrust_optional(seed_vertex_label_offsets), num_hops, compress_per_hop] __device__(size_t i) { @@ -3045,9 +3046,9 @@ renumber_and_compress_sampled_edgelist( [major_vertex_counts = raft::device_span(major_vertex_counts.data(), major_vertex_counts.size()), minor_vertex_counts = minor_vertex_counts - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*minor_vertex_counts).data(), (*minor_vertex_counts).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_hops, compress_per_hop] __device__(size_t i) { auto vertex_count = major_vertex_counts[i]; @@ -3310,9 +3311,9 @@ renumber_and_sort_sampled_edgelist( (*edgelist_label_hop_offsets).begin(), cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops = edgelist_hops ? thrust::make_optional>( + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_hops, num_edges = edgelist_majors.size()] __device__(size_t i) { size_t start_offset{0}; @@ -3500,12 +3501,12 @@ heterogeneous_renumber_and_sort_sampled_edgelist( cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), edgelist_edge_types = edgelist_edge_types - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edgelist_edge_types).data(), (*edgelist_edge_types).size()) - : thrust::nullopt, - edgelist_hops = edgelist_hops ? thrust::make_optional>( + : cuda::std::nullopt, + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_edge_types, num_hops, num_edges = edgelist_majors.size()] __device__(size_t i) { @@ -3653,9 +3654,9 @@ sort_sampled_edgelist(raft::handle_t const& handle, (*edgelist_label_hop_offsets).begin(), cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops = edgelist_hops ? thrust::make_optional>( + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_hops, num_edges = edgelist_majors.size()] __device__(size_t i) { size_t start_offset{0}; diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 31de9b1e5d3..f526a6788e1 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -47,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -126,8 +126,8 @@ rmm::device_uvector compute_major_degrees( partition.vertex_partition_range_first(major_range_vertex_partition_id); auto offsets = edge_partition_offsets[i]; - auto masks = - edge_partition_masks ? thrust::make_optional((*edge_partition_masks)[i]) : thrust::nullopt; + auto masks = edge_partition_masks ? cuda::std::make_optional((*edge_partition_masks)[i]) + : cuda::std::nullopt; auto segment_offset_size_per_partition = edge_partition_segment_offsets.size() / static_cast(minor_comm_size); auto num_local_degrees = @@ -202,7 +202,8 @@ rmm::device_uvector compute_major_degrees( handle.get_thrust_policy(), degrees.begin(), degrees.end(), - [offsets, masks = masks ? thrust::make_optional(*masks) : thrust::nullopt] __device__(auto i) { + [offsets, + masks = masks ? cuda::std::make_optional(*masks) : cuda::std::nullopt] __device__(auto i) { auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = @@ -842,10 +843,10 @@ graph_view_t(this->local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform(handle.get_thrust_policy(), sorted_edge_first + edge_partition_offsets[i], sorted_edge_first + edge_partition_offsets[i + 1], @@ -913,10 +914,10 @@ graph_view_t(this->local_edge_partition_view()); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, 0) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform( handle.get_thrust_policy(), edge_first, @@ -987,10 +988,10 @@ graph_view_t(this->local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform( handle.get_thrust_policy(), sorted_edge_first + edge_partition_offsets[i], @@ -1058,10 +1059,10 @@ graph_view_t(this->local_edge_partition_view()); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, 0) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform( handle.get_thrust_policy(), edge_first, diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh index 3822055b037..e319dc03bbb 100644 --- a/cpp/src/structure/induced_subgraph_impl.cuh +++ b/cpp/src/structure/induced_subgraph_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ #include +#include #include #include #include @@ -44,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -58,7 +58,7 @@ namespace detail { template struct induced_subgraph_weighted_edge_op { - using return_type = thrust::optional>; + using return_type = cuda::std::optional>; raft::device_span dst_subgraph_offsets; raft::device_span dst_subgraph_vertices; @@ -74,15 +74,15 @@ struct induced_subgraph_weighted_edge_op { dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph], dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph + 1], dst) - ? thrust::make_optional( + ? cuda::std::make_optional( thrust::make_tuple(thrust::get<0>(tagged_src), dst, wgt, subgraph)) - : thrust::nullopt; + : cuda::std::nullopt; } }; template struct induced_subgraph_unweighted_edge_op { - using return_type = thrust::optional>; + using return_type = cuda::std::optional>; raft::device_span dst_subgraph_offsets; raft::device_span dst_subgraph_vertices; @@ -91,15 +91,16 @@ struct induced_subgraph_unweighted_edge_op { vertex_t dst, property_t sv, property_t dv, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { size_t subgraph = thrust::get<1>(tagged_src); return thrust::binary_search(thrust::seq, dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph], dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph + 1], dst) - ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), dst, subgraph)) - : thrust::nullopt; + ? cuda::std::make_optional( + thrust::make_tuple(thrust::get<0>(tagged_src), dst, subgraph)) + : cuda::std::nullopt; } }; @@ -203,8 +204,8 @@ extract_induced_subgraphs( dst_subgraph_vertices = raft::device_span(dst_subgraph_vertices_v.data(), dst_subgraph_vertices_v.size()); - // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns thrust::nullopt if - // the destination vertex has a property of 0, return the edge if the destination vertex has a + // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns cuda::std::nullopt + // if the destination vertex has a property of 0, return the edge if the destination vertex has a // property of 1 vertex_frontier_t vertex_frontier(handle, 1); @@ -233,7 +234,7 @@ extract_induced_subgraphs( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), *edge_weight_view, - detail::induced_subgraph_weighted_edge_op{ + detail::induced_subgraph_weighted_edge_op{ dst_subgraph_offsets, dst_subgraph_vertices}, do_expensive_check); @@ -253,7 +254,7 @@ extract_induced_subgraphs( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), edge_dummy_property_t{}.view(), - detail::induced_subgraph_unweighted_edge_op{ + detail::induced_subgraph_unweighted_edge_op{ dst_subgraph_offsets, dst_subgraph_vertices}, do_expensive_check); diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index ba40db1f085..2c2674f5bbe 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ #include +#include #include #include #include @@ -40,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -78,20 +78,26 @@ struct topdown_e_op_t { detail::edge_partition_endpoint_property_device_view_t visited_flags{}; vertex_t dst_first{}; - __device__ thrust::optional operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ cuda::std::optional operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { auto dst_offset = dst - dst_first; auto old = prev_visited_flags.get(dst_offset); if (!old) { old = visited_flags.atomic_or(dst_offset, true); } - return old ? thrust::nullopt : thrust::optional{src}; + return old ? cuda::std::nullopt : cuda::std::optional{src}; } }; template struct bottomup_e_op_t { - __device__ vertex_t operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ vertex_t operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return dst; } @@ -103,8 +109,11 @@ struct bottomup_pred_op_t { prev_visited_flags{}; // visited in the previous iterations vertex_t dst_first{}; - __device__ bool operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ bool operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return prev_visited_flags.get(dst - dst_first); } @@ -260,10 +269,10 @@ void bfs(raft::handle_t const& handle, auto edge_mask_view = graph_view.edge_mask_view(); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, partition_idx) - : thrust::nullopt; + : cuda::std::nullopt; auto high_and_mid_degree_segment_size = (*segment_offsets)[2]; // compute local degrees for high & mid degree segments only, for // low & hypersparse segments, use low_degree_threshold * diff --git a/cpp/src/traversal/k_hop_nbrs_impl.cuh b/cpp/src/traversal/k_hop_nbrs_impl.cuh index 44fa21a5252..be462720e51 100644 --- a/cpp/src/traversal/k_hop_nbrs_impl.cuh +++ b/cpp/src/traversal/k_hop_nbrs_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,13 +30,13 @@ #include +#include #include #include #include #include #include #include -#include #include #include @@ -48,11 +48,11 @@ namespace { template struct e_op_t { - __device__ thrust::optional operator()(thrust::tuple tagged_src, - vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + __device__ cuda::std::optional operator()(thrust::tuple tagged_src, + vertex_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return thrust::get<1>(tagged_src); } diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index b3cd0d57c67..ffe706ca45d 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,12 +37,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -133,11 +133,11 @@ struct e_op_t { weight_t cutoff{}; weight_t invalid_distance{}; - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, weight_t w) const { aggregate_vi_t aggregator{num_origins}; @@ -150,9 +150,9 @@ struct e_op_t { auto dst_val = key_to_dist_map.find(aggregator(thrust::make_tuple(dst, origin_idx))); if (dst_val != invalid_distance) { threshold = dst_val < threshold ? dst_val : threshold; } return (new_distance < threshold) - ? thrust::optional>{thrust::make_tuple(origin_idx, - new_distance)} - : thrust::nullopt; + ? cuda::std::optional>{thrust::make_tuple(origin_idx, + new_distance)} + : cuda::std::nullopt; } }; @@ -644,8 +644,8 @@ rmm::device_uvector od_shortest_distances( thrust::tuple, weight_t, vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, weight_t, e_op_t> e_op_wrapper{e_op}; diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh index 3429672b151..8006bbf4063 100644 --- a/cpp/src/traversal/sssp_impl.cuh +++ b/cpp/src/traversal/sssp_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,11 +32,11 @@ #include +#include #include #include #include #include -#include #include #include @@ -52,8 +52,8 @@ struct e_op_t { weight_t const* distances{}; weight_t cutoff{}; - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, weight_t src_val, thrust::nullopt_t, weight_t w) const + __device__ cuda::std::optional> operator()( + vertex_t src, vertex_t dst, weight_t src_val, cuda::std::nullopt_t, weight_t w) const { auto push = true; auto new_distance = src_val + w; @@ -65,9 +65,9 @@ struct e_op_t { threshold = old_distance < threshold ? old_distance : threshold; } if (new_distance >= threshold) { push = false; } - return push ? thrust::optional>{thrust::make_tuple( + return push ? cuda::std::optional>{thrust::make_tuple( new_distance, src)} - : thrust::nullopt; + : cuda::std::nullopt; } }; @@ -223,11 +223,11 @@ void sssp(raft::handle_t const& handle, auto new_dist = thrust::get<0>(pushed_val); auto update = (new_dist < v_val); return thrust::make_tuple( - update ? thrust::optional{new_dist < near_far_threshold ? bucket_idx_next_near - : bucket_idx_far} - : thrust::nullopt, - update ? thrust::optional>{pushed_val} - : thrust::nullopt); + update ? cuda::std::optional{new_dist < near_far_threshold ? bucket_idx_next_near + : bucket_idx_far} + : cuda::std::nullopt, + update ? cuda::std::optional>{pushed_val} + : cuda::std::nullopt); }); vertex_frontier.bucket(bucket_idx_cur_near).clear(); @@ -250,9 +250,9 @@ void sssp(raft::handle_t const& handle, auto dist = *(distances + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)); return dist >= old_near_far_threshold - ? thrust::optional{dist < near_far_threshold ? bucket_idx_cur_near - : bucket_idx_far} - : thrust::nullopt; + ? cuda::std::optional{dist < near_far_threshold ? bucket_idx_cur_near + : bucket_idx_far} + : cuda::std::nullopt; }); near_size = vertex_frontier.bucket(bucket_idx_cur_near).aggregate_size(); far_size = vertex_frontier.bucket(bucket_idx_far).aggregate_size(); diff --git a/cpp/tests/components/mg_vertex_coloring_test.cu b/cpp/tests/components/mg_vertex_coloring_test.cu index 17327e35c97..89b1df9264e 100644 --- a/cpp/tests/components/mg_vertex_coloring_test.cu +++ b/cpp/tests/components/mg_vertex_coloring_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,8 @@ #include +#include + #include #include @@ -130,7 +132,7 @@ class Tests_MGGraphColoring : cugraph::detail::edge_minor_property_view_t( d_colors.data(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + [] __device__(auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return uint8_t{1}; } else { @@ -168,7 +170,7 @@ class Tests_MGGraphColoring d_colors.begin(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [renumber_map = (*mg_renumber_map).data()] __device__( - auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return vertex_t{1}; } else { diff --git a/cpp/tests/components/vertex_coloring_test.cu b/cpp/tests/components/vertex_coloring_test.cu index fed64f272d7..cf55146c5ba 100644 --- a/cpp/tests/components/vertex_coloring_test.cu +++ b/cpp/tests/components/vertex_coloring_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,8 @@ #include +#include + #include #include @@ -107,7 +109,7 @@ class Tests_SGGraphColoring cugraph::detail::edge_minor_property_view_t(d_colors.data(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + [] __device__(auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return uint8_t{1}; } else { @@ -142,7 +144,7 @@ class Tests_SGGraphColoring vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [renumber_map = (*sg_renumber_map).data()] __device__( - auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return vertex_t{1}; } else { diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 63a785fb182..8796383f45d 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,11 +37,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -127,15 +127,16 @@ class Tests_MGCountIfE hr_timer.start("MG count_if_e"); } - auto result = count_if_e( - *handle_, - mg_graph_view, - mg_src_prop.view(), - mg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto row, auto col, auto src_property, auto dst_property, thrust::nullopt_t) { - return src_property < dst_property; - }); + auto result = + count_if_e(*handle_, + mg_graph_view, + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto row, auto col, auto src_property, auto dst_property, cuda::std::nullopt_t) { + return src_property < dst_property; + }); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -179,7 +180,7 @@ class Tests_MGCountIfE sg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto row, auto col, auto src_property, auto dst_property, thrust::nullopt_t) { + auto row, auto col, auto src_property, auto dst_property, cuda::std::nullopt_t) { return src_property < dst_property; }); ASSERT_TRUE(expected_result == result); diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index d3d6524cbdb..27e3f471c5b 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,11 +40,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -65,12 +65,12 @@ struct e_op_t { std::is_same_v>); using return_type = - thrust::optional, - thrust::tuple, - thrust::tuple>>; + cuda::std::optional, + thrust::tuple, + thrust::tuple>>; __device__ return_type operator()( - vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const + vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, cuda::std::nullopt_t) const { auto output_payload = static_cast(1); if (src_val < dst_val) { @@ -82,7 +82,7 @@ struct e_op_t { src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload)); } } else { - return thrust::nullopt; + return cuda::std::nullopt; } } }; diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index a8393d84e43..0c625da0a6d 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,11 +39,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -65,7 +65,7 @@ struct e_op_t { static_assert(std::is_same_v || std::is_same_v>); - using return_type = thrust::optional, std::conditional_t, thrust::tuple, @@ -78,7 +78,7 @@ struct e_op_t { vertex_t dst, property_t src_val, property_t dst_val, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { auto output_payload = static_cast(1); if (src_val < dst_val) { @@ -109,7 +109,7 @@ struct e_op_t { } } } else { - return thrust::nullopt; + return cuda::std::nullopt; } } }; diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index 386fce24a87..30a53cd15a4 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,9 +41,9 @@ #include +#include #include #include -#include #include #include @@ -53,7 +53,7 @@ template struct e_bias_op_t { __device__ bias_t - operator()(vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, bias_t bias) const + operator()(vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, bias_t bias) const { return bias; } @@ -65,8 +65,11 @@ struct e_op_t { cugraph::to_thrust_tuple(property_t{}), cugraph::to_thrust_tuple(property_t{}))); - __device__ result_t operator()( - vertex_t src, vertex_t dst, property_t src_prop, property_t dst_prop, thrust::nullopt_t) const + __device__ result_t operator()(vertex_t src, + vertex_t dst, + property_t src_prop, + property_t dst_prop, + cuda::std::nullopt_t) const { if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { static_assert(thrust::tuple_size::value == size_t{2}); @@ -401,18 +404,19 @@ class Tests_MGPerVRandomSelectTransformOutgoingE thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(mg_aggregate_frontier_vertices.size()), [frontier_vertex_first = mg_aggregate_frontier_vertices.begin(), - sample_offsets = mg_aggregate_sample_offsets ? thrust::make_optional( + sample_offsets = mg_aggregate_sample_offsets ? cuda::std::make_optional( (*mg_aggregate_sample_offsets).data()) - : thrust::nullopt, + : cuda::std::nullopt, sample_e_op_result_first = cugraph::get_dataframe_buffer_begin(mg_aggregate_sample_e_op_results), sg_offsets = sg_offsets.begin(), sg_indices = sg_indices.begin(), - sg_biases = sg_biases ? thrust::make_optional((*sg_biases).begin()) : thrust::nullopt, - K = prims_usecase.K, + sg_biases = + sg_biases ? cuda::std::make_optional((*sg_biases).begin()) : cuda::std::nullopt, + K = prims_usecase.K, with_replacement = prims_usecase.with_replacement, - invalid_value = - invalid_value ? thrust::make_optional(*invalid_value) : thrust::nullopt, + invalid_value = invalid_value ? cuda::std::make_optional(*invalid_value) + : cuda::std::nullopt, property_transform = cugraph::test::detail::vertex_property_transform{ hash_bin_count}] __device__(size_t i) { @@ -461,8 +465,8 @@ class Tests_MGPerVRandomSelectTransformOutgoingE auto sg_nbr_first = sg_indices + *(sg_offsets + sg_src); auto sg_nbr_last = sg_indices + *(sg_offsets + (sg_src + vertex_t{1})); auto sg_nbr_bias_first = - sg_biases ? thrust::make_optional((*sg_biases) + *(sg_offsets + sg_src)) - : thrust::nullopt; + sg_biases ? cuda::std::make_optional((*sg_biases) + *(sg_offsets + sg_src)) + : cuda::std::nullopt; if (sg_src != v) { return true; } if (sg_nbr_bias_first) { diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu index 3dd256544b4..040e0a6d716 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,12 +40,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include @@ -218,7 +218,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), mg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::plus{}, cugraph::get_dataframe_buffer_begin(mg_results[i])); @@ -245,7 +245,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), mg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_minimum{}, cugraph::get_dataframe_buffer_begin(mg_results[i])); @@ -272,7 +272,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), mg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_maximum{}, cugraph::get_dataframe_buffer_begin(mg_results[i])); @@ -414,7 +414,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), sg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::plus{}, cugraph::get_dataframe_buffer_begin(global_result)); @@ -441,7 +441,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), sg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_minimum{}, cugraph::get_dataframe_buffer_begin(global_result)); @@ -468,7 +468,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), sg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_maximum{}, cugraph::get_dataframe_buffer_begin(global_result)); diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index 41830b3017c..57d77f6c4bd 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,12 +40,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include @@ -62,7 +62,7 @@ struct e_op_t { vertex_t dst, result_t src_property, result_t dst_property, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { if (src_property < dst_property) { return src_property; diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu index 3984c7cd86b..c94637cc657 100644 --- a/cpp/tests/prims/mg_transform_e.cu +++ b/cpp/tests/prims/mg_transform_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ #include +#include #include #include #include -#include #include #include @@ -177,7 +177,8 @@ class Tests_MGTransformE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -192,7 +193,8 @@ class Tests_MGTransformE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index e290f05e9e4..b5dcfaa7aa7 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,11 +38,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -138,7 +138,8 @@ class Tests_MGTransformReduceE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -189,7 +190,7 @@ class Tests_MGTransformReduceE sg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu index b050e314a15..830b48acade 100644 --- a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu +++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,11 +39,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -149,7 +149,8 @@ class Tests_MGTransformReduceEBySrcDstKey mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), mg_src_key.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -179,7 +180,8 @@ class Tests_MGTransformReduceEBySrcDstKey mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -279,7 +281,7 @@ class Tests_MGTransformReduceEBySrcDstKey cugraph::edge_dummy_property_t{}.view(), sg_src_key.view(), [] __device__( - auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -301,7 +303,7 @@ class Tests_MGTransformReduceEBySrcDstKey cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), [] __device__( - auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index 085077017b3..acc89491e56 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,11 +38,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -62,25 +62,25 @@ struct e_op_t { vertex_t dst, property_t src_val, property_t dst_val, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { if constexpr (std::is_same_v) { if constexpr (std::is_same_v) { - return src_val < dst_val ? thrust::optional{std::byte{0}} /* dummy */ - : thrust::nullopt; + return src_val < dst_val ? cuda::std::optional{std::byte{0}} /* dummy */ + : cuda::std::nullopt; } else { - return src_val < dst_val ? thrust::optional{static_cast(1)} - : thrust::nullopt; + return src_val < dst_val ? cuda::std::optional{static_cast(1)} + : cuda::std::nullopt; } } else { auto tag = thrust::get<1>(optionally_tagged_src); if constexpr (std::is_same_v) { - return src_val < dst_val ? thrust::optional{tag} : thrust::nullopt; + return src_val < dst_val ? cuda::std::optional{tag} : cuda::std::nullopt; } else { return src_val < dst_val - ? thrust::optional>{thrust::make_tuple( + ? cuda::std::optional>{thrust::make_tuple( tag, static_cast(1))} - : thrust::nullopt; + : cuda::std::nullopt; } } } diff --git a/cpp/tests/prims/result_compare.cuh b/cpp/tests/prims/result_compare.cuh index 5a1abb90e3c..7ee87d402cd 100644 --- a/cpp/tests/prims/result_compare.cuh +++ b/cpp/tests/prims/result_compare.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ #include +#include #include -#include #include #include @@ -36,7 +36,7 @@ namespace detail { template __host__ __device__ bool compare_arithmetic_scalar(T val0, T val1, - thrust::optional threshold_ratio) + cuda::std::optional threshold_ratio) { if (threshold_ratio) { return std::abs(val0 - val1) <= (std::max(std::abs(val0), std::abs(val1)) * *threshold_ratio); @@ -58,15 +58,16 @@ struct comparator { return detail::compare_arithmetic_scalar( t0, t1, - std::is_floating_point_v ? thrust::optional{threshold_ratio} : thrust::nullopt); + std::is_floating_point_v ? cuda::std::optional{threshold_ratio} : cuda::std::nullopt); } else { - auto val0 = thrust::get<0>(t0); - auto val1 = thrust::get<0>(t1); - auto passed = detail::compare_arithmetic_scalar( - val0, - val1, - std::is_floating_point_v ? thrust::optional{threshold_ratio} - : thrust::nullopt); + auto val0 = thrust::get<0>(t0); + auto val1 = thrust::get<0>(t1); + auto passed = + detail::compare_arithmetic_scalar(val0, + val1, + std::is_floating_point_v + ? cuda::std::optional{threshold_ratio} + : cuda::std::nullopt); if (!passed) return false; if constexpr (thrust::tuple_size::value >= 2) { @@ -76,8 +77,8 @@ struct comparator { detail::compare_arithmetic_scalar(val0, val1, std::is_floating_point_v - ? thrust::optional{threshold_ratio} - : thrust::nullopt); + ? cuda::std::optional{threshold_ratio} + : cuda::std::nullopt); if (!passed) return false; } if constexpr (thrust::tuple_size::value >= 3) { diff --git a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu index a0babc3b921..ac0523bbce7 100644 --- a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu +++ b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -383,26 +384,27 @@ bool compare_heterogeneous_edgelist( this_label_org_sorted_indices.begin(), this_label_org_sorted_indices.end(), [edge_types = org_edgelist_edge_types - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*org_edgelist_edge_types).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, - hops = org_edgelist_hops ? thrust::make_optional>( + : cuda::std::nullopt, + hops = org_edgelist_hops ? cuda::std::make_optional>( (*org_edgelist_hops).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, srcs = raft::device_span(org_edgelist_srcs.data() + label_start_offset, label_end_offset - label_start_offset), dsts = raft::device_span(org_edgelist_dsts.data() + label_start_offset, label_end_offset - label_start_offset), - weights = org_edgelist_weights ? thrust::make_optional>( + weights = org_edgelist_weights ? cuda::std::make_optional>( (*org_edgelist_weights).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, - edge_ids = org_edgelist_edge_ids ? thrust::make_optional>( - (*org_edgelist_edge_ids).data() + label_start_offset, - label_end_offset - label_start_offset) - : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) { + : cuda::std::nullopt, + edge_ids = org_edgelist_edge_ids + ? cuda::std::make_optional>( + (*org_edgelist_edge_ids).data() + label_start_offset, + label_end_offset - label_start_offset) + : cuda::std::nullopt] __device__(size_t l_idx, size_t r_idx) { edge_type_t l_edge_type{0}; edge_type_t r_edge_type{0}; if (edge_types) { @@ -673,15 +675,15 @@ bool compare_heterogeneous_edgelist( raft::device_span(this_edge_type_unrenumbered_edgelist_dsts.data(), this_edge_type_unrenumbered_edgelist_dsts.size()), weights = renumbered_edgelist_weights - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_edgelist_weights).data() + edge_type_start_offset, edge_type_end_offset - edge_type_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, edge_ids = renumbered_edgelist_edge_ids - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_edgelist_edge_ids).data() + edge_type_start_offset, edge_type_end_offset - edge_type_start_offset) - : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) { + : cuda::std::nullopt] __device__(size_t l_idx, size_t r_idx) { vertex_t l_src = srcs[l_idx]; vertex_t r_src = srcs[r_idx]; @@ -721,15 +723,15 @@ bool compare_heterogeneous_edgelist( raft::device_span(org_edgelist_dsts.data() + label_start_offset, label_end_offset - label_start_offset), org_weights = org_edgelist_weights - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*org_edgelist_weights).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, org_edge_ids = org_edgelist_edge_ids - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*org_edgelist_edge_ids).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, unrenumbered_srcs = raft::device_span(this_edge_type_unrenumbered_edgelist_srcs.data(), this_edge_type_unrenumbered_edgelist_srcs.size()), @@ -738,16 +740,16 @@ bool compare_heterogeneous_edgelist( this_edge_type_unrenumbered_edgelist_dsts.size()), unrenumbered_weights = renumbered_edgelist_weights - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_edgelist_weights).data() + edge_type_start_offset, edge_type_end_offset - edge_type_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, unrenumbered_edge_ids = unrenumbered_edgelist_edge_ids - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*unrenumbered_edgelist_edge_ids).data(), (*unrenumbered_edgelist_edge_ids).size()) - : thrust:: + : cuda::std:: nullopt] __device__(size_t org_idx /* from label_start_offset */, size_t unrenumbered_idx /* from edge_type_start_offset */) { diff --git a/cpp/tests/utilities/property_generator_kernels.cuh b/cpp/tests/utilities/property_generator_kernels.cuh index 78b22e0dac2..5c4bc00cdfa 100644 --- a/cpp/tests/utilities/property_generator_kernels.cuh +++ b/cpp/tests/utilities/property_generator_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ #include "prims/update_edge_src_dst_property.cuh" #include "utilities/property_generator_utilities.hpp" -#include +#include #include #include @@ -69,8 +69,11 @@ template struct edge_property_transform { int32_t mod{}; - constexpr __device__ property_t operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + constexpr __device__ property_t operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { static_assert(cugraph::is_thrust_tuple_of_arithmetic::value || std::is_arithmetic_v); From 9e3a45722042d07083846de1fb39cbd348c75e90 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Wed, 29 Jan 2025 14:34:23 -0800 Subject: [PATCH 4/9] [BUG] Output Edge Labels in the Distributed Sampler (#4898) We currently do not output edge labels in the distributed sampler, which breaks some link prediction workflows where the graph contains pre-labeled edges. This PR adds support for that so these workflows can be enabled. Authors: - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4898 --- .../cugraph/gnn/data_loading/dist_sampler.py | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py index 2edafe95716..6ac4ca142ee 100644 --- a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py @@ -223,6 +223,7 @@ def __get_call_groups( input_id: TensorType, seeds_per_call: int, assume_equal_input_size: bool = False, + label: Optional[TensorType] = None, ): torch = import_optional("torch") @@ -231,6 +232,8 @@ def __get_call_groups( # many batches. seeds_call_groups = torch.split(seeds, seeds_per_call, dim=-1) index_call_groups = torch.split(input_id, seeds_per_call, dim=-1) + if label is not None: + label_call_groups = torch.split(label, seeds_per_call, dim=-1) # Need to add empties to the list of call groups to handle the case # where not all ranks have the same number of call groups. This @@ -251,8 +254,16 @@ def __get_call_groups( [torch.tensor([], dtype=torch.int64, device=input_id.device)] * (int(num_call_groups) - len(index_call_groups)) ) + if label is not None: + label_call_groups = list(label_call_groups) + ( + [torch.tensor([], dtype=label.dtype, device=label.device)] + * (int(num_call_groups) - len(label_call_groups)) + ) - return seeds_call_groups, index_call_groups + if label is not None: + return seeds_call_groups, index_call_groups, label_call_groups + else: + return seeds_call_groups, index_call_groups def sample_from_nodes( self, @@ -344,7 +355,7 @@ def sample_from_nodes( def __sample_from_edges_func( self, call_id: int, - current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor"], + current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"], batch_id_start: int, batch_size: int, batches_per_call: int, @@ -353,7 +364,7 @@ def __sample_from_edges_func( ) -> Union[None, Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]]: torch = import_optional("torch") - current_seeds, current_ix = current_seeds_and_ix + current_seeds, current_ix, current_label = current_seeds_and_ix num_seed_edges = current_ix.numel() # The index gets stored as-is regardless of what makes it into @@ -468,6 +479,7 @@ def __sample_from_edges_func( random_state=random_state, ) minibatch_dict["input_index"] = current_ix.cuda() + minibatch_dict["input_label"] = current_label.cuda() minibatch_dict["input_offsets"] = input_offsets minibatch_dict[ "edge_inverse" @@ -505,6 +517,7 @@ def sample_from_edges( random_state: int = 62, assume_equal_input_size: bool = False, input_id: Optional[TensorType] = None, + input_label: Optional[TensorType] = None, ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: """ Performs sampling starting from seed edges. @@ -527,6 +540,10 @@ def sample_from_edges( Input ids corresponding to the original batch tensor, if it was permuted prior to calling this function. If present, will be saved with the samples. + input_label: Optional[TensorType] + Input labels corresponding to the input seeds. Typically used + for link prediction sampling. If present, will be saved with + the samples. Generally not compatible with negative sampling. """ torch = import_optional("torch") @@ -545,12 +562,20 @@ def sample_from_edges( local_num_batches, assume_equal_input_size=assume_equal_input_size ) - edges_call_groups, index_call_groups = self.__get_call_groups( + groups = self.__get_call_groups( edges, input_id, actual_seed_edges_per_call, assume_equal_input_size=input_size_is_equal, + label=input_label, ) + if len(groups) == 2: + edges_call_groups, index_call_groups = groups + label_call_groups = [torch.tensor([], dtype=torch.int32)] * len( + edges_call_groups + ) + else: + edges_call_groups, index_call_groups, label_call_groups = groups sample_args = [ batch_id_start, @@ -563,14 +588,14 @@ def sample_from_edges( if self.__writer is None: # Buffered sampling return BufferedSampleReader( - zip(edges_call_groups, index_call_groups), + zip(edges_call_groups, index_call_groups, label_call_groups), self.__sample_from_edges_func, *sample_args, ) else: # Unbuffered sampling for i, current_seeds_and_ix in enumerate( - zip(edges_call_groups, index_call_groups) + zip(edges_call_groups, index_call_groups, label_call_groups) ): sample_args[0] = self.__sample_from_edges_func( i, From 5a41b41549e2afa5efbdf5b19f7601013a2450f0 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 30 Jan 2025 00:48:08 -0500 Subject: [PATCH 5/9] Update MG negative sampling to return random samples distributed as specified (#4885) Modifies the new negative sampling interface so that when called from MG, each rank specifies how many samples they wish to receive, and to randomly distribute the samples across the calling GPUs. Marked breaking as it changes the C++ interface... although nothing uses it yet. Closes #4672 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4885 --- cpp/include/cugraph/sampling_functions.hpp | 5 +- cpp/src/detail/permute_range.cuh | 4 +- cpp/src/detail/permute_range_v32.cu | 19 +- cpp/src/detail/permute_range_v64.cu | 19 +- cpp/src/sampling/negative_sampling_impl.cuh | 240 ++++++++++++++++---- cpp/tests/sampling/mg_negative_sampling.cpp | 34 +-- 6 files changed, 210 insertions(+), 111 deletions(-) diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index 35c51c1ea6d..0ccf49ddfb6 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -1152,7 +1152,10 @@ lookup_endpoints_from_edge_ids_and_types( * @param dst_biases Optional bias for randomly selecting destination vertices. If std::nullopt * vertices will be selected uniformly. In multi-GPU environment the biases should be partitioned * based on the vertex partitions. - * @param num_samples Number of negative samples to generate + * @param num_samples Number of negative samples to generate. In SG mode this represents the total + * number of samples to generate. In MG mode, each gpu will provide the number of samples desired + * on that GPU. The total number of samples in MG mode will be the aggregation of these values, the + * resulting samples will be randomly distributed across the ranks. * @param remove_duplicates If true, remove duplicate samples * @param remove_existing_edges If true, remove samples that are actually edges in the graph * @param exact_number_of_samples If true, repeat generation until we get the exact number of diff --git a/cpp/src/detail/permute_range.cuh b/cpp/src/detail/permute_range.cuh index c7cd57c2048..a9d1b27f52d 100644 --- a/cpp/src/detail/permute_range.cuh +++ b/cpp/src/detail/permute_range.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ rmm::device_uvector permute_range(raft::handle_t const& handle, sub_range_sizes.begin(), sub_range_sizes.end(), sub_range_sizes.begin(), global_start); CUGRAPH_EXPECTS( sub_range_sizes[comm_rank] == local_range_start, - "Invalid input arguments: a rage must have contiguous and non-overlapping values"); + "Invalid input arguments: a range must have contiguous and non-overlapping values"); } rmm::device_uvector permuted_integers(local_range_size, handle.get_stream()); diff --git a/cpp/src/detail/permute_range_v32.cu b/cpp/src/detail/permute_range_v32.cu index 6a7bc059901..91d23487f03 100644 --- a/cpp/src/detail/permute_range_v32.cu +++ b/cpp/src/detail/permute_range_v32.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,23 +16,6 @@ #include "detail/permute_range.cuh" -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include - namespace cugraph { namespace detail { diff --git a/cpp/src/detail/permute_range_v64.cu b/cpp/src/detail/permute_range_v64.cu index ad7daf16419..a6dbc9a72ae 100644 --- a/cpp/src/detail/permute_range_v64.cu +++ b/cpp/src/detail/permute_range_v64.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,23 +16,6 @@ #include "detail/permute_range.cuh" -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include - namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/negative_sampling_impl.cuh b/cpp/src/sampling/negative_sampling_impl.cuh index 541eda67860..9aedc5dfc35 100644 --- a/cpp/src/sampling/negative_sampling_impl.cuh +++ b/cpp/src/sampling/negative_sampling_impl.cuh @@ -16,8 +16,11 @@ #pragma once +#include "cugraph/detail/collect_comm_wrapper.hpp" +#include "cugraph/utilities/device_comm.hpp" #include "prims/reduce_v.cuh" #include "prims/update_edge_src_dst_property.cuh" +#include "thrust/iterator/zip_iterator.h" #include "utilities/collect_comm.cuh" #include @@ -26,6 +29,10 @@ #include #include +#include +#include +#include + #include #include @@ -37,6 +44,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -265,11 +274,19 @@ std::tuple, rmm::device_uvector> negativ bool exact_number_of_samples, bool do_expensive_check) { - rmm::device_uvector src(0, handle.get_stream()); - rmm::device_uvector dst(0, handle.get_stream()); + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); // Optimistically assume we can do this in one pass - size_t samples_in_this_batch = num_samples; + size_t total_samples{num_samples}; + std::vector samples_per_gpu; + + if constexpr (multi_gpu) { + samples_per_gpu = host_scalar_allgather(handle.get_comms(), num_samples, handle.get_stream()); + total_samples = std::reduce(samples_per_gpu.begin(), samples_per_gpu.end()); + } + + size_t samples_in_this_batch = total_samples; // Normalize the biases and (for MG) determine how the biases are // distributed across the GPUs. @@ -298,16 +315,16 @@ std::tuple, rmm::device_uvector> negativ : 0); } - auto batch_src = create_local_samples( + auto batch_srcs = create_local_samples( handle, rng_state, graph_view, normalized_src_biases, gpu_src_biases, samples_in_this_batch); - auto batch_dst = create_local_samples( + auto batch_dsts = create_local_samples( handle, rng_state, graph_view, normalized_dst_biases, gpu_dst_biases, samples_in_this_batch); if constexpr (multi_gpu) { auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - std::tie(batch_src, - batch_dst, + std::tie(batch_srcs, + batch_dsts, std::ignore, std::ignore, std::ignore, @@ -320,8 +337,8 @@ std::tuple, rmm::device_uvector> negativ int32_t, int32_t>( handle, - std::move(batch_src), - std::move(batch_dst), + std::move(batch_srcs), + std::move(batch_dsts), std::nullopt, std::nullopt, std::nullopt, @@ -333,42 +350,43 @@ std::tuple, rmm::device_uvector> negativ if (remove_existing_edges) { auto has_edge_flags = graph_view.has_edge(handle, - raft::device_span{batch_src.data(), batch_src.size()}, - raft::device_span{batch_dst.data(), batch_dst.size()}, + raft::device_span{batch_srcs.data(), batch_srcs.size()}, + raft::device_span{batch_dsts.data(), batch_dsts.size()}, do_expensive_check); - auto begin_iter = thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()); + auto begin_iter = thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()); auto new_end = thrust::remove_if(handle.get_thrust_policy(), begin_iter, - begin_iter + batch_src.size(), + begin_iter + batch_srcs.size(), has_edge_flags.begin(), thrust::identity()); - batch_src.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); - batch_dst.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + batch_srcs.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + batch_dsts.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); } if (remove_duplicates) { thrust::sort(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), - thrust::make_zip_iterator(batch_src.end(), batch_dst.end())); + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), + thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end())); - auto new_end = thrust::unique(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), - thrust::make_zip_iterator(batch_src.end(), batch_dst.end())); + auto new_end = + thrust::unique(handle.get_thrust_policy(), + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), + thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end())); - size_t new_size = - thrust::distance(thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), new_end); + size_t new_size = thrust::distance( + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), new_end); - if (src.size() > 0) { - rmm::device_uvector new_src(src.size() + new_size, handle.get_stream()); - rmm::device_uvector new_dst(dst.size() + new_size, handle.get_stream()); + if (srcs.size() > 0) { + rmm::device_uvector new_src(srcs.size() + new_size, handle.get_stream()); + rmm::device_uvector new_dst(dsts.size() + new_size, handle.get_stream()); thrust::merge(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), new_end, - thrust::make_zip_iterator(src.begin(), dst.begin()), - thrust::make_zip_iterator(src.end(), dst.end()), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end()), thrust::make_zip_iterator(new_src.begin(), new_dst.begin())); new_end = thrust::unique(handle.get_thrust_policy(), @@ -378,32 +396,32 @@ std::tuple, rmm::device_uvector> negativ new_size = thrust::distance(thrust::make_zip_iterator(new_src.begin(), new_dst.begin()), new_end); - src = std::move(new_src); - dst = std::move(new_dst); + srcs = std::move(new_src); + dsts = std::move(new_dst); } else { - src = std::move(batch_src); - dst = std::move(batch_dst); + srcs = std::move(batch_srcs); + dsts = std::move(batch_dsts); } - src.resize(new_size, handle.get_stream()); - dst.resize(new_size, handle.get_stream()); - } else if (src.size() > 0) { - size_t current_end = src.size(); + srcs.resize(new_size, handle.get_stream()); + dsts.resize(new_size, handle.get_stream()); + } else if (srcs.size() > 0) { + size_t current_end = srcs.size(); - src.resize(src.size() + batch_src.size(), handle.get_stream()); - dst.resize(dst.size() + batch_dst.size(), handle.get_stream()); + srcs.resize(srcs.size() + batch_srcs.size(), handle.get_stream()); + dsts.resize(dsts.size() + batch_dsts.size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), - thrust::make_zip_iterator(batch_src.end(), batch_dst.end()), - thrust::make_zip_iterator(src.begin(), dst.begin()) + current_end); + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), + thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end()), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()) + current_end); } else { - src = std::move(batch_src); - dst = std::move(batch_dst); + srcs = std::move(batch_srcs); + dsts = std::move(batch_dsts); } if (exact_number_of_samples) { - size_t current_sample_size = src.size(); + size_t current_sample_size = srcs.size(); if constexpr (multi_gpu) { current_sample_size = cugraph::host_scalar_allreduce( handle.get_comms(), current_sample_size, raft::comms::op_t::SUM, handle.get_stream()); @@ -412,16 +430,142 @@ std::tuple, rmm::device_uvector> negativ // FIXME: We could oversample and discard the unnecessary samples // to reduce the number of iterations in the outer loop, but it seems like // exact_number_of_samples is an edge case not worth optimizing for at this time. - samples_in_this_batch = num_samples - current_sample_size; + samples_in_this_batch = total_samples - current_sample_size; } else { samples_in_this_batch = 0; } } - src.shrink_to_fit(handle.get_stream()); - dst.shrink_to_fit(handle.get_stream()); + srcs.shrink_to_fit(handle.get_stream()); + dsts.shrink_to_fit(handle.get_stream()); + + if constexpr (multi_gpu) { + auto const& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + // Randomly shuffle the samples so that each gpu gets their + // desired number of samples + + if (!exact_number_of_samples) { + // If we didn't force generating the exact number of samples, + // we might have fewer samples than requested. We need to + // accommodate this situation. For now we'll just + // uniformly(-ish) reduce the requested size. + size_t total_extracted = host_scalar_allreduce( + handle.get_comms(), srcs.size(), raft::comms::op_t::SUM, handle.get_stream()); + size_t reduction = total_samples - total_extracted; + + while (reduction > 0) { + size_t est_reduction_per_gpu = (reduction + comm_size - 1) / comm_size; + for (size_t i = 0; i < samples_per_gpu.size(); ++i) { + if (samples_per_gpu[i] > est_reduction_per_gpu) { + samples_per_gpu[i] -= est_reduction_per_gpu; + reduction -= est_reduction_per_gpu; + } else { + reduction -= samples_per_gpu[i]; + samples_per_gpu[i] = 0; + } + + if (reduction < est_reduction_per_gpu) est_reduction_per_gpu = reduction; + } + } + num_samples = samples_per_gpu[comm_rank]; + } + + // Mimic the logic of permute_range... + // + // 1) Randomly assign each entry to a GPU + // 2) Count how many are assigned to each GPU + // 3) Allgatherv (allgather?) to give each GPU a count for how many entries are destined for + // that GPU 4) Identify extras/deficits for each GPU, arbitrarily adjust counts to make correct + // 5) Shuffle accordingly + // + rmm::device_uvector gpu_assignment(srcs.size(), handle.get_stream()); + + cugraph::detail::uniform_random_fill(handle.get_stream(), + gpu_assignment.data(), + gpu_assignment.size(), + int{0}, + int{comm_size}, + rng_state); + + thrust::sort_by_key(handle.get_thrust_policy(), + gpu_assignment.begin(), + gpu_assignment.end(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin())); + + rmm::device_uvector d_send_counts(comm_size, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + d_send_counts.begin(), + d_send_counts.end(), + [gpu_assignment_span = raft::device_span{ + gpu_assignment.data(), gpu_assignment.size()}] __device__(size_t i) { + auto begin = thrust::lower_bound( + thrust::seq, gpu_assignment_span.begin(), gpu_assignment_span.end(), static_cast(i)); + auto end = + thrust::upper_bound(thrust::seq, begin, gpu_assignment_span.end(), static_cast(i)); + return thrust::distance(begin, end); + }); + + std::vector tx_value_counts(comm_size, 0); + raft::update_host( + tx_value_counts.data(), d_send_counts.data(), d_send_counts.size(), handle.get_stream()); + + std::forward_as_tuple(std::tie(srcs, dsts), std::ignore) = + cugraph::shuffle_values(handle.get_comms(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + tx_value_counts, + handle.get_stream()); + + rmm::device_uvector fractional_random_numbers(srcs.size(), handle.get_stream()); + + cugraph::detail::uniform_random_fill(handle.get_stream(), + fractional_random_numbers.data(), + fractional_random_numbers.size(), + float{0.0}, + float{1.0}, + rng_state); + thrust::sort_by_key(handle.get_thrust_policy(), + fractional_random_numbers.begin(), + fractional_random_numbers.end(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin())); + + size_t nr_extras{0}; + size_t nr_deficits{0}; + if (srcs.size() > num_samples) { + nr_extras = srcs.size() - static_cast(num_samples); + } else { + nr_deficits = static_cast(num_samples) - srcs.size(); + } + + auto extra_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(srcs.data() + num_samples, nr_extras)); + // nr_extras > 0 ? nr_extras : 0)); + auto extra_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(dsts.data() + num_samples, nr_extras)); + // nr_extras > 0 ? nr_extras : 0)); + + srcs.resize(num_samples, handle.get_stream()); + dsts.resize(num_samples, handle.get_stream()); + auto deficits = + cugraph::host_scalar_allgather(handle.get_comms(), nr_deficits, handle.get_stream()); + + std::exclusive_scan(deficits.begin(), deficits.end(), deficits.begin(), vertex_t{0}); + + raft::copy(srcs.data() + num_samples - nr_deficits, + extra_srcs.begin() + deficits[comm_rank], + nr_deficits, + handle.get_stream()); + + raft::copy(dsts.data() + num_samples - nr_deficits, + extra_dsts.begin() + deficits[comm_rank], + nr_deficits, + handle.get_stream()); + } - return std::make_tuple(std::move(src), std::move(dst)); + return std::make_tuple(std::move(srcs), std::move(dsts)); } } // namespace cugraph diff --git a/cpp/tests/sampling/mg_negative_sampling.cpp b/cpp/tests/sampling/mg_negative_sampling.cpp index 7c64bb7fbbb..eb9f4fbb394 100644 --- a/cpp/tests/sampling/mg_negative_sampling.cpp +++ b/cpp/tests/sampling/mg_negative_sampling.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cugraph/utilities/host_scalar_comm.hpp" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" #include "utilities/property_generator_utilities.hpp" @@ -85,8 +86,9 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParamview()); } - size_t num_samples = - graph_view.compute_number_of_edges(*handle_) * negative_sampling_usecase.sample_multiplier; + size_t num_samples = graph_view.compute_number_of_edges(*handle_) * + negative_sampling_usecase.sample_multiplier / + handle_->get_comms().get_size(); rmm::device_uvector src_bias_v(0, handle_->get_stream()); rmm::device_uvector dst_bias_v(0, handle_->get_stream()); @@ -150,26 +152,8 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam{src_out.data(), src_out.size()}, raft::device_span{dst_out.data(), dst_out.size()}); - // TODO: Move this to validation_utilities... - auto h_vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - rmm::device_uvector d_vertex_partition_range_lasts( - h_vertex_partition_range_lasts.size(), handle_->get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - h_vertex_partition_range_lasts.data(), - h_vertex_partition_range_lasts.size(), - handle_->get_stream()); - - size_t error_count = cugraph::test::count_edges_on_wrong_int_gpu( - *handle_, - raft::device_span{src_out.data(), src_out.size()}, - raft::device_span{dst_out.data(), dst_out.size()}, - raft::device_span{d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()}); - - ASSERT_EQ(error_count, 0) << "generate edges out of range > 0"; - if ((negative_sampling_usecase.remove_duplicates) && (src_out.size() > 0)) { - error_count = cugraph::test::count_duplicate_vertex_pairs_sorted( + size_t error_count = cugraph::test::count_duplicate_vertex_pairs_sorted( *handle_, raft::device_span{src_out.data(), src_out.size()}, raft::device_span{dst_out.data(), dst_out.size()}); @@ -184,7 +168,7 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam( *handle_, graph_view, std::nullopt, std::nullopt, std::nullopt, std::nullopt); - error_count = cugraph::test::count_intersection( + size_t error_count = cugraph::test::count_intersection( *handle_, raft::device_span{graph_src.data(), graph_src.size()}, raft::device_span{graph_dst.data(), graph_dst.size()}, @@ -202,7 +186,9 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParamget_comms(), src_out.size(), raft::comms::op_t::SUM, handle_->get_stream()); - ASSERT_EQ(sz, num_samples) << "Expected exact number of samples"; + size_t aggregate_sample_count = cugraph::host_scalar_allreduce( + handle_->get_comms(), num_samples, raft::comms::op_t::SUM, handle_->get_stream()); + ASSERT_EQ(sz, aggregate_sample_count) << "Expected exact number of samples"; } // TBD: How do we determine if we have properly reflected the biases? From ef450140bcd5f52b376dc84cb8c26c5c753d9eb3 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 30 Jan 2025 06:55:40 +0100 Subject: [PATCH 6/9] Fix issues when building with upcoming cccl (#4888) We are removing a lot of deprecated thrust features, so replace them by the equivalent `cuda::std` ones Authors: - Michael Schellenberger Costa (https://github.com/miscco) Approvers: - Bradley Dice (https://github.com/bdice) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4888 --- cpp/include/cugraph/utilities/shuffle_comm.cuh | 12 ++++++------ cpp/src/prims/property_op_utils.cuh | 6 ++---- cpp/src/traversal/extract_bfs_paths_impl.cuh | 5 +++-- cpp/tests/utilities/check_utilities.hpp | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 98fa2cb1706..d173cc08a1c 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,21 +145,21 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm, template struct key_group_id_less_t { - KeyToGroupIdOp key_to_group_id_op{}; + KeyToGroupIdOp key_to_group_id_op; int pivot{}; __device__ bool operator()(key_type k) const { return key_to_group_id_op(k) < pivot; } }; template struct value_group_id_less_t { - ValueToGroupIdOp value_to_group_id_op{}; + ValueToGroupIdOp value_to_group_id_op; int pivot{}; __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) < pivot; } }; template struct kv_pair_group_id_less_t { - KeyToGroupIdOp key_to_group_id_op{}; + KeyToGroupIdOp key_to_group_id_op; int pivot{}; __device__ bool operator()(thrust::tuple t) const { @@ -169,14 +169,14 @@ struct kv_pair_group_id_less_t { template struct value_group_id_greater_equal_t { - ValueToGroupIdOp value_to_group_id_op{}; + ValueToGroupIdOp value_to_group_id_op; int pivot{}; __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) >= pivot; } }; template struct kv_pair_group_id_greater_equal_t { - KeyToGroupIdOp key_to_group_id_op{}; + KeyToGroupIdOp key_to_group_id_op; int pivot{}; __device__ bool operator()(thrust::tuple t) const { diff --git a/cpp/src/prims/property_op_utils.cuh b/cpp/src/prims/property_op_utils.cuh index 04ad22cbf71..2cab42c1dc8 100644 --- a/cpp/src/prims/property_op_utils.cuh +++ b/cpp/src/prims/property_op_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -127,9 +127,7 @@ template typename Op> struct property_op : public Op {}; template typename Op> -struct property_op, Op> - : public thrust:: - binary_function, thrust::tuple, thrust::tuple> { +struct property_op, Op> { using Type = thrust::tuple; private: diff --git a/cpp/src/traversal/extract_bfs_paths_impl.cuh b/cpp/src/traversal/extract_bfs_paths_impl.cuh index d228460bec3..d0a7979d14b 100644 --- a/cpp/src/traversal/extract_bfs_paths_impl.cuh +++ b/cpp/src/traversal/extract_bfs_paths_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include +#include #include #include #include @@ -52,7 +53,7 @@ template struct compute_max { vertex_t __device__ operator()(vertex_t lhs, vertex_t rhs) { - return thrust::max(lhs, rhs); + return cuda::std::max(lhs, rhs); } }; diff --git a/cpp/tests/utilities/check_utilities.hpp b/cpp/tests/utilities/check_utilities.hpp index a22d95c87de..6974d14be04 100644 --- a/cpp/tests/utilities/check_utilities.hpp +++ b/cpp/tests/utilities/check_utilities.hpp @@ -97,7 +97,7 @@ struct device_nearly_equal { bool __device__ operator()(type_t lhs, type_t rhs) const { return std::abs(lhs - rhs) < - cuda::std::max(thrust::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + cuda::std::max(cuda::std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); } }; From 4baef585cd0a932ade6b307564c6eebf41ffe61d Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Thu, 30 Jan 2025 22:13:05 +0100 Subject: [PATCH 7/9] Optimize K-Truss (#4742) This PR introduces several optimization to speed up K-Truss. In fact, our K-Truss implementation computes the intersection of all edges regardless they are weak or not which can be very expensive if only few edges need to be invalidated. By running `nbr_intersection` on the weak edges, this considerably improves the runtime. Authors: - Joseph Nke (https://github.com/jnke2016) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4742 --- cpp/src/community/k_truss_impl.cuh | 810 +++++++++++++++++++++-------- 1 file changed, 585 insertions(+), 225 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index bd497b9c58c..25a1cf63f4d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -19,6 +19,7 @@ #include "prims/extract_transform_e.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -32,6 +33,7 @@ #include #include +#include #include #include #include @@ -43,6 +45,59 @@ namespace cugraph { +template +struct extract_weak_edges { + edge_t k{}; + __device__ cuda::std::optional> operator()( + vertex_t src, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_t count) const + { + // No need to process edges with count == 0 + return ((count < k - 2) && (count != 0)) + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; + } +}; + +template +struct is_k_or_greater_t { + edge_t k{}; + __device__ bool operator()(edge_t core_number) const { return core_number >= edge_t{k}; } +}; + +template +struct extract_triangles_endpoints { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + auto endpoints = thrust::make_tuple(weak_srcs[chunk_start + idx], // p + weak_dsts[chunk_start + idx], // q + intersection_indices[i] // r + ); + + auto p = weak_srcs[chunk_start + idx]; + auto q = weak_dsts[chunk_start + idx]; + auto r = intersection_indices[i]; + // Re-order the endpoints such that p < q < r in order to identify duplicate triangles + // which will cause overcompensation. comparing the vertex IDs is cheaper than comparing the + // degrees (d(p) < d(q) < d(r)) which will be done once in the latter stage to retrieve the + // direction of the edges once the triplet dependency is broken. + if (p > q) cuda::std::swap(p, q); + if (p > r) cuda::std::swap(p, r); + if (q > r) cuda::std::swap(q, r); + + return thrust::make_tuple(p, q, r); + } +}; + namespace { template @@ -60,39 +115,48 @@ struct exclude_self_loop_t { } }; -template -struct extract_low_to_high_degree_weighted_edges_t { - __device__ cuda::std::optional> operator()( - vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const - { - return (src_out_degree < dst_out_degree) - ? cuda::std::optional>{thrust::make_tuple( - src, dst, wgt)} - : (((src_out_degree == dst_out_degree) && - (src < dst) /* tie-breaking using vertex ID */) - ? cuda::std::optional< - thrust::tuple>{thrust::make_tuple( - src, dst, wgt)} - : cuda::std::nullopt); - } -}; - template -struct extract_low_to_high_degree_edges_t { - __device__ cuda::std::optional> operator()( +struct extract_low_to_high_degree_edges_from_endpoints_t { + raft::device_span srcs{}; + raft::device_span dsts{}; + raft::device_span count{}; + __device__ cuda::std::optional> operator()( vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, cuda::std::nullopt_t) const { - return (src_out_degree < dst_out_degree) - ? cuda::std::optional>{thrust::make_tuple(src, dst)} - : (((src_out_degree == dst_out_degree) && - (src < dst) /* tie-breaking using vertex ID */) - ? cuda::std::optional>{thrust::make_tuple(src, - dst)} - : cuda::std::nullopt); + // FIXME: Not the most efficient way because the entire edgelist is scan just to find + // the direction of the edges + auto itr = thrust::lower_bound(thrust::seq, + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end()), + thrust::make_tuple(src, dst)); + + if ((itr != thrust::make_zip_iterator(srcs.end(), dsts.end())) && + (*itr == thrust::make_tuple(src, dst))) { + auto idx = thrust::distance(thrust::make_zip_iterator(srcs.begin(), dsts.begin()), itr); + + if (src_out_degree < dst_out_degree) { + return cuda::std::optional>{ + thrust::make_tuple(src, dst, count[idx])}; + } else if (dst_out_degree < src_out_degree) { + return cuda::std::optional>{ + thrust::make_tuple(dst, src, count[idx])}; + } else { + if ((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) { + return cuda::std::optional>{ + thrust::make_tuple(src, dst, count[idx])}; + } else if ((src_out_degree == dst_out_degree) && + (src > dst) /* tie-breaking using vertex ID */) { + return cuda::std::optional>{ + thrust::make_tuple(dst, src, count[idx])}; + } + } + } else { + return cuda::std::nullopt; + } } }; @@ -119,246 +183,543 @@ k_truss(raft::handle_t const& handle, // nothing to do } - std::optional> modified_graph{std::nullopt}; - std::optional> modified_graph_view{std::nullopt}; - std::optional> renumber_map{std::nullopt}; - std::optional, weight_t>> - edge_weight{std::nullopt}; - std::optional> wgts{std::nullopt}; - - if (graph_view.count_self_loops(handle) > edge_t{0}) { - auto [srcs, dsts] = extract_transform_e(handle, - graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - exclude_self_loop_t{}); - - if constexpr (multi_gpu) { - std::tie( - srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } + // 2. Exclude self-loops and edges that do not belong to (k-1)-core - std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + auto cur_graph_view = graph_view; + auto unmasked_cur_graph_view = cur_graph_view; - modified_graph_view = (*modified_graph).view(); - } + if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); } + // mask for self-loops and edges not part of k-1 core + cugraph::edge_property_t undirected_mask(handle); + { + // 2.1 Exclude self-loops - // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + if (cur_graph_view.count_self_loops(handle) > edge_t{0}) { + // 2.1. Exclude self-loops - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; - - rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), - handle.get_stream()); - core_number( - handle, cur_graph_view, core_numbers.data(), k_core_degree_type_t::OUT, size_t{2}, size_t{2}); - - raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - - auto [srcs, dsts, wgts] = k_core(handle, - cur_graph_view, - edge_weight_view, - k - 1, - std::make_optional(k_core_degree_type_t::OUT), - std::make_optional(core_number_span)); - - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } + cugraph::edge_property_t self_loop_edge_mask(handle, + cur_graph_view); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, self_loop_edge_mask.mutable_view(), false); - std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( + transform_e( handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto, auto, auto) { return src != dst; }, + self_loop_edge_mask.mutable_view()); + + undirected_mask = std::move(self_loop_edge_mask); + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(undirected_mask.view()); + } - modified_graph_view = (*modified_graph).view(); + // 2.2 Find (k-1)-core and exclude edges that do not belong to (k-1)-core + { + rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), + handle.get_stream()); + core_number(handle, + cur_graph_view, + core_numbers.data(), + k_core_degree_type_t::OUT, + size_t{2}, + size_t{2}); + + edge_src_property_t edge_src_in_k_minus_1_cores( + handle, cur_graph_view); + edge_dst_property_t edge_dst_in_k_minus_1_cores( + handle, cur_graph_view); + auto in_k_minus_1_core_first = + thrust::make_transform_iterator(core_numbers.begin(), is_k_or_greater_t{k - 1}); + rmm::device_uvector in_k_minus_1_core_flags(core_numbers.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + in_k_minus_1_core_first, + in_k_minus_1_core_first + core_numbers.size(), + in_k_minus_1_core_flags.begin()); + update_edge_src_property(handle, + cur_graph_view, + in_k_minus_1_core_flags.begin(), + edge_src_in_k_minus_1_cores.mutable_view()); + update_edge_dst_property(handle, + cur_graph_view, + in_k_minus_1_core_flags.begin(), + edge_dst_in_k_minus_1_cores.mutable_view()); + + cugraph::edge_property_t in_k_minus_1_core_edge_mask( + handle, cur_graph_view); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, in_k_minus_1_core_edge_mask.mutable_view(), false); + + transform_e( + handle, + cur_graph_view, + edge_src_in_k_minus_1_cores.view(), + edge_dst_in_k_minus_1_cores.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto, auto, auto src_in_k_minus_1_core, auto dst_in_k_minus_1_core, auto) { + return src_in_k_minus_1_core && dst_in_k_minus_1_core; + }, + in_k_minus_1_core_edge_mask.mutable_view()); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); + undirected_mask = std::move(in_k_minus_1_core_edge_mask); + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(undirected_mask.view()); } - - renumber_map = std::move(tmp_renumber_map); } // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; + edge_src_property_t edge_src_out_degrees(handle, + cur_graph_view); + edge_dst_property_t edge_dst_out_degrees(handle, + cur_graph_view); + cugraph::edge_property_t, bool> dodg_mask( + handle, cur_graph_view); + { auto out_degrees = cur_graph_view.compute_out_degrees(handle); - edge_src_property_t edge_src_out_degrees(handle, - cur_graph_view); - edge_dst_property_t edge_dst_out_degrees(handle, - cur_graph_view); update_edge_src_property( handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); update_edge_dst_property( handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - if (edge_weight_view) { - std::tie(srcs, dsts, wgts) = extract_transform_e( - handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - *edge_weight_view, - extract_low_to_high_degree_weighted_edges_t{}); - } else { - std::tie(srcs, dsts) = - extract_transform_e(handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - edge_dummy_property_t{}.view(), - extract_low_to_high_degree_edges_t{}); - } - - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } - - std::optional> tmp_renumber_map{std::nullopt}; - - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - true); - - modified_graph_view = (*modified_graph).view(); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); - } - renumber_map = std::move(tmp_renumber_map); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, dodg_mask.mutable_view(), bool{false}); + + cugraph::transform_e( + handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_out_degree, auto dst_out_degree, auto) { + return (src_out_degree < dst_out_degree) ? true + : ((src_out_degree == dst_out_degree) && + (src < dst) /* tie-breaking using vertex ID */) + ? true + : false; + }, + dodg_mask.mutable_view(), + do_expensive_check); + + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(dodg_mask.view()); } // 4. Compute triangle count using nbr_intersection and unroll weak edges { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; + // Mask self loops and edges not being part of k-1 core + auto weak_edges_mask = std::move(undirected_mask); - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view, false); - cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); + cugraph::edge_bucket_t edgelist_weak(handle); + cugraph::edge_bucket_t edges_to_decrement_count(handle); + size_t prev_chunk_size = 0; // FIXME: Add support for chunking while (true) { - // FIXME: This approach is very expensive when invalidating only few edges per iteration - // and should be address. - auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view); + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + extract_weak_edges{k}); + + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); + + thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, weak_edgelist_last); + + // Perform nbr_intersection of the weak edges from the undirected + // graph view + cur_graph_view.clear_edge_mask(); + + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( + handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, do_expensive_check); + + // This array stores (p, q, r) which are endpoints for the triangles with weak edges + + auto triangles_endpoints = + allocate_dataframe_buffer>( + intersection_indices.size(), handle.get_stream()); + + // Extract endpoints for triangles with weak edges + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints), + extract_triangles_endpoints{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + auto unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + auto num_unique_triangles = thrust::distance( // Triangles are represented by their endpoints + get_dataframe_buffer_begin(triangles_endpoints), + unique_triangle_end); + + resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + // Shuffle the edges with respect to the undirected graph view to the GPU + // owning edge (p, q). Remember that the triplet (p, q, r) is ordered based on the + // vertex ID and not the degree so (p, q) might not be an edge in the DODG but is + // surely an edge in the undirected graph + std::tie(triangles_endpoints, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + num_unique_triangles = + thrust::distance(get_dataframe_buffer_begin(triangles_endpoints), unique_triangle_end); + resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); + } + + auto edgelist_to_update_count = allocate_dataframe_buffer>( + 3 * num_unique_triangles, handle.get_stream()); + + // The order no longer matters since duplicated triangles have been removed + // Flatten the endpoints to a list of egdes. + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), + get_dataframe_buffer_begin(edgelist_to_update_count), + [num_unique_triangles, + triangles_endpoints = + get_dataframe_buffer_begin(triangles_endpoints)] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_endpoints + idx_triangle).get_iterator_tuple(); + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } + + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<2>(triangle)); + } + + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<1>(triangle)); + dst = *(thrust::get<2>(triangle)); + } + + return thrust::make_tuple(src, dst); + }); + + if constexpr (multi_gpu) { + std::tie(std::get<0>(edgelist_to_update_count), + std::get<1>(edgelist_to_update_count), + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(edgelist_to_update_count)), + std::move(std::get<1>(edgelist_to_update_count)), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto unique_pair_count = + thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + std::tie(std::get<0>(vertex_pair_buffer_unique), + std::get<1>(vertex_pair_buffer_unique), + decrease_count) = + extract_transform_e( + handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + extract_low_to_high_degree_edges_from_endpoints_t{ + raft::device_span(std::get<0>(vertex_pair_buffer_unique).data(), + std::get<0>(vertex_pair_buffer_unique).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_unique).data(), + std::get<1>(vertex_pair_buffer_unique).size()), + raft::device_span(decrease_count.data(), decrease_count.size())}); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + std::forward_as_tuple(std::tie(std::get<0>(vertex_pair_buffer_unique), + std::get<1>(vertex_pair_buffer_unique), + decrease_count), + std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin(), + decrease_count.begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end(), + decrease_count.end()), + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + thrust::sort_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count.begin()); + + // Update count of weak edges + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + // Update count of weak edges from the DODG view + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [edge_buffer_first = + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span( + decrease_count.data(), decrease_count.size())] __device__(auto src, + auto dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + edge_t count) { + auto itr_pair = thrust::lower_bound( + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + count -= decrease_count[idx_pair]; + + return count; + }, + edge_triangle_counts.mutable_view(), + do_expensive_check); + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end())); + + edgelist_weak.insert( + weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin()); - // Mask all the edges that have k - 2 count + // Get undirected graph view + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); cugraph::transform_e( handle, cur_graph_view, + edgelist_weak, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [k] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) { - return count >= k - 2; + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { + return false; }, - edge_mask.mutable_view(), - false); + weak_edges_mask.mutable_view(), + do_expensive_check); + + edgelist_weak.clear(); + + // shuffle the edges if multi_gpu + if constexpr (multi_gpu) { + std::tie(weak_edgelist_dsts, + weak_edgelist_srcs, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(weak_edgelist_dsts), + std::move(weak_edgelist_srcs), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end())); + + edgelist_weak.insert( + weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin()); - cur_graph_view.attach_edge_mask(edge_mask.view()); + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + do_expensive_check); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } + + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); } + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + do_expensive_check); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; @@ -367,11 +728,10 @@ k_truss(raft::handle_t const& handle, decompress_to_edgelist( handle, cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size()))); + std::optional>{std::nullopt}); std::tie(edgelist_srcs, edgelist_dsts, From 1d542ef3d802e37ffd4b9984a85caedea6ce8384 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Jan 2025 22:20:23 -0600 Subject: [PATCH 8/9] Build and test with CUDA 12.8.0 (#4896) This PR uses CUDA 12.8.0 to build and test. xref: https://github.com/rapidsai/build-planning/issues/139 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cugraph/pull/4896 --- .../devcontainer.json | 8 ++--- .../devcontainer.json | 12 +++---- .github/workflows/build.yaml | 20 +++++------ .github/workflows/pr.yaml | 34 +++++++++---------- .github/workflows/test.yaml | 10 +++--- .../trigger-breaking-change-alert.yaml | 2 +- ..._64.yaml => all_cuda-128_arch-x86_64.yaml} | 4 +-- dependencies.yaml | 8 +++-- python/cugraph/pyproject.toml | 2 +- python/libcugraph/pyproject.toml | 4 +-- python/pylibcugraph/pyproject.toml | 2 +- 11 files changed, 55 insertions(+), 51 deletions(-) rename .devcontainer/{cuda12.5-conda => cuda12.8-conda}/devcontainer.json (91%) rename .devcontainer/{cuda12.5-pip => cuda12.8-pip}/devcontainer.json (88%) rename conda/environments/{all_cuda-125_arch-x86_64.yaml => all_cuda-128_arch-x86_64.yaml} (96%) diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json similarity index 91% rename from .devcontainer/cuda12.5-conda/devcontainer.json rename to .devcontainer/cuda12.8-conda/devcontainer.json index 3ed6fa9c37b..ad36130c6a0 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -3,7 +3,7 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "conda", "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { @@ -20,7 +20,7 @@ "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent", @@ -29,7 +29,7 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json similarity index 88% rename from .devcontainer/cuda12.5-pip/devcontainer.json rename to .devcontainer/cuda12.8-pip/devcontainer.json index fe402024e29..a2955b81a60 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -3,20 +3,20 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { - "version": "12.5", + "version": "12.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, @@ -28,7 +28,7 @@ "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent", @@ -36,7 +36,7 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 7348cf33c1d..b6a6156ecc1 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -47,7 +47,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-libcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-libcugraph: needs: wheel-build-libcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -91,7 +91,7 @@ jobs: wheel-build-pylibcugraph: needs: wheel-build-libcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -101,7 +101,7 @@ jobs: wheel-publish-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-build-cugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -122,7 +122,7 @@ jobs: wheel-publish-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e48f2e11acd..e4e09fbd279 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -31,7 +31,7 @@ jobs: - telemetry-setup - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.8.0 if: always() with: needs: ${{ toJSON(needs) }} @@ -59,7 +59,7 @@ jobs: changed-files: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda-12.8.0 with: files_yaml: | test_cpp: @@ -90,28 +90,28 @@ jobs: checks: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.8.0 with: enable_check_generated_files: false ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 with: build_type: pull-request node_type: cpu32 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 with: build_type: pull-request enable_check_symbols: true @@ -119,20 +119,20 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request conda-notebook-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks with: build_type: pull-request @@ -143,7 +143,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -153,7 +153,7 @@ jobs: wheel-build-libcugraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -162,14 +162,14 @@ jobs: wheel-build-pylibcugraph: needs: wheel-build-libcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: pull-request script: ci/build_wheel_pylibcugraph.sh wheel-tests-pylibcugraph: needs: [wheel-build-pylibcugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -177,14 +177,14 @@ jobs: wheel-build-cugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 with: build_type: pull-request script: ci/build_wheel_cugraph.sh wheel-tests-cugraph: needs: [wheel-build-cugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -192,10 +192,10 @@ jobs: devcontainer: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.8.0 with: arch: '["amd64"]' - cuda: '["12.5"]' + cuda: '["12.8"]' node_type: cpu32 build_command: | sccache -z; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4aa698c987f..a5c1e9a823f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibcugraph.sh wheel-tests-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 01dd2436beb..07f0f83cc92 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda-12.8.0 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml similarity index 96% rename from conda/environments/all_cuda-125_arch-x86_64.yaml rename to conda/environments/all_cuda-128_arch-x86_64.yaml index 83126df9194..22c5f594a42 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=12.6.2,<13.0a0 -- cuda-version=12.5 +- cuda-version=12.8 - cudf==25.2.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler @@ -77,4 +77,4 @@ dependencies: - torchmetrics - ucx-py==0.42.*,>=0.0.0a0 - wheel -name: all_cuda-125_arch-x86_64 +name: all_cuda-128_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 02fa03cff70..5419be9beda 100755 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: [conda] matrix: - cuda: ["11.8", "12.5"] + cuda: ["11.8", "12.8"] arch: [x86_64] includes: - checks @@ -279,6 +279,10 @@ dependencies: cuda: "12.5" packages: - cuda-version=12.5 + - matrix: + cuda: "12.8" + packages: + - cuda-version=12.8 cuda: specific: - output_types: [conda] @@ -300,7 +304,7 @@ dependencies: - cuda-nvtx common_build: common: - - output_types: [conda, pyproject] + - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - ninja diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index dfe3b085fdf..060d4ee1e99 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -72,7 +72,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["cugraph"] diff --git a/python/libcugraph/pyproject.toml b/python/libcugraph/pyproject.toml index a6191e28000..9d85bfa5dac 100644 --- a/python/libcugraph/pyproject.toml +++ b/python/libcugraph/pyproject.toml @@ -53,14 +53,14 @@ select = [ ] # detect when package size grows significantly -max_allowed_size_compressed = '1.2G' +max_allowed_size_compressed = '1.4G' [tool.scikit-build] build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["libcugraph"] wheel.install-dir = "libcugraph" diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml index ac124e1fd5f..3c50a79bfa3 100644 --- a/python/pylibcugraph/pyproject.toml +++ b/python/pylibcugraph/pyproject.toml @@ -57,7 +57,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["pylibcugraph"] From dc435cfcc9636f02f22ce81362c4ccf617a6d12d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 31 Jan 2025 05:27:41 -0800 Subject: [PATCH 9/9] Revert CUDA 12.8 shared workflow branch changes (#4906) This PR points the shared workflow branches back to the default 25.02 branches. xref: https://github.com/rapidsai/build-planning/issues/139 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cugraph/pull/4906 --- .github/workflows/build.yaml | 20 ++++++------ .github/workflows/pr.yaml | 32 +++++++++---------- .github/workflows/test.yaml | 10 +++--- .../trigger-breaking-change-alert.yaml | 2 +- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b6a6156ecc1..7348cf33c1d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -47,7 +47,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-libcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-libcugraph: needs: wheel-build-libcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -91,7 +91,7 @@ jobs: wheel-build-pylibcugraph: needs: wheel-build-libcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -101,7 +101,7 @@ jobs: wheel-publish-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-build-cugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -122,7 +122,7 @@ jobs: wheel-publish-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e4e09fbd279..8e3134b896e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -31,7 +31,7 @@ jobs: - telemetry-setup - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} @@ -59,7 +59,7 @@ jobs: changed-files: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -90,28 +90,28 @@ jobs: checks: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu32 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true @@ -119,20 +119,20 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request conda-notebook-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks with: build_type: pull-request @@ -143,7 +143,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -153,7 +153,7 @@ jobs: wheel-build-libcugraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -162,14 +162,14 @@ jobs: wheel-build-pylibcugraph: needs: wheel-build-libcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_pylibcugraph.sh wheel-tests-pylibcugraph: needs: [wheel-build-pylibcugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -177,14 +177,14 @@ jobs: wheel-build-cugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_cugraph.sh wheel-tests-cugraph: needs: [wheel-build-cugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -192,7 +192,7 @@ jobs: devcontainer: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.8"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a5c1e9a823f..4aa698c987f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibcugraph.sh wheel-tests-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 07f0f83cc92..01dd2436beb 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda-12.8.0 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }}