From 8fb86fba90e23f657751d372c6e08055754f907a Mon Sep 17 00:00:00 2001 From: Naim Date: Sat, 9 Mar 2024 02:14:49 +0100 Subject: [PATCH 1/6] Make vertex and edge shuffling function public --- cpp/include/cugraph/graph_partition_utils.cuh | 71 +++++++++++++++++++ cpp/src/community/detail/common_methods.cuh | 3 +- cpp/src/community/detail/refine_impl.cuh | 6 +- cpp/src/detail/collect_local_vertex_values.cu | 3 +- cpp/src/detail/graph_partition_utils.cuh | 45 ------------ cpp/src/detail/shuffle_vertex_pairs.cu | 3 +- cpp/src/detail/shuffle_vertices.cu | 12 ++-- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 5 +- .../transform_reduce_e_by_src_dst_key.cuh | 3 +- .../create_graph_from_edgelist_impl.cuh | 5 +- cpp/src/structure/relabel_impl.cuh | 3 +- cpp/src/structure/renumber_edgelist_impl.cuh | 13 ++-- cpp/src/structure/renumber_utils_impl.cuh | 3 +- cpp/tests/utilities/csv_file_utilities.cu | 3 +- .../utilities/matrix_market_file_utilities.cu | 5 +- 15 files changed, 110 insertions(+), 73 deletions(-) create mode 100644 cpp/include/cugraph/graph_partition_utils.cuh diff --git a/cpp/include/cugraph/graph_partition_utils.cuh b/cpp/include/cugraph/graph_partition_utils.cuh new file mode 100644 index 0000000000..792858f92f --- /dev/null +++ b/cpp/include/cugraph/graph_partition_utils.cuh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +namespace cugraph { + +template +struct compute_gpu_id_from_ext_vertex_t { + int comm_size{0}; + int major_comm_size{0}; + int minor_comm_size{0}; + + __host__ __device__ int operator()(vertex_t v) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto vertex_partition_id = static_cast(hash_func(v) % comm_size); + return partition_manager::compute_global_comm_rank_from_vertex_partition_id( + major_comm_size, minor_comm_size, vertex_partition_id); + } +}; + +template +struct compute_gpu_id_from_ext_edge_endpoints_t { + int comm_size{0}; + int major_comm_size{0}; + int minor_comm_size{0}; + + __host__ __device__ int operator()(vertex_t major, vertex_t minor) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_vertex_partition_id = static_cast(hash_func(major) % comm_size); + auto minor_vertex_partition_id = static_cast(hash_func(minor) % comm_size); + auto major_comm_rank = major_vertex_partition_id % major_comm_size; + auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; + return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); + } + + __host__ __device__ int operator()( + thrust::tuple pair /* major, minor */) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_vertex_partition_id = static_cast(hash_func(thrust::get<0>(pair)) % comm_size); + auto minor_vertex_partition_id = static_cast(hash_func(thrust::get<1>(pair)) % comm_size); + auto major_comm_rank = major_vertex_partition_id % major_comm_size; + auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; + return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); + } +}; + +} // namespace cugraph diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index fe0a415db3..34c5ffcb75 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -278,7 +279,7 @@ rmm::device_uvector update_clustering_by_delta_modularity( auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ comm_size, major_comm_size, minor_comm_size}; kv_store_t cluster_key_weight_map( diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index b767ce7d8b..862c6cde12 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -178,7 +178,7 @@ refine_clustering( auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ comm_size, major_comm_size, minor_comm_size}; vertex_louvain_cluster_weights = @@ -460,7 +460,7 @@ refine_clustering( major_comm_size, minor_comm_size}; - // cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + // cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ // comm_size, major_comm_size, minor_comm_size}; louvain_of_leiden_keys_used_in_edge_reduction = @@ -850,7 +850,7 @@ refine_clustering( major_comm_size, minor_comm_size}; - // cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + // cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ // comm_size, major_comm_size, minor_comm_size}; lovain_of_leiden_cluster_keys = diff --git a/cpp/src/detail/collect_local_vertex_values.cu b/cpp/src/detail/collect_local_vertex_values.cu index 2726e33b92..511346552a 100644 --- a/cpp/src/detail/collect_local_vertex_values.cu +++ b/cpp/src/detail/collect_local_vertex_values.cu @@ -17,6 +17,7 @@ #include "detail/graph_partition_utils.cuh" #include +#include #include #include @@ -50,7 +51,7 @@ rmm::device_uvector collect_local_vertex_values_from_ext_vertex_value_p d_vertices.begin(), d_vertices.end(), d_values.begin(), - cugraph::detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}, handle.get_stream()); } diff --git a/cpp/src/detail/graph_partition_utils.cuh b/cpp/src/detail/graph_partition_utils.cuh index 957436459c..423e9383e9 100644 --- a/cpp/src/detail/graph_partition_utils.cuh +++ b/cpp/src/detail/graph_partition_utils.cuh @@ -35,21 +35,6 @@ namespace cugraph { namespace detail { -template -struct compute_gpu_id_from_ext_vertex_t { - int comm_size{0}; - int major_comm_size{0}; - int minor_comm_size{0}; - - __host__ __device__ int operator()(vertex_t v) const - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto vertex_partition_id = static_cast(hash_func(v) % comm_size); - return partition_manager::compute_global_comm_rank_from_vertex_partition_id( - major_comm_size, minor_comm_size, vertex_partition_id); - } -}; - template struct compute_gpu_id_from_int_vertex_t { raft::device_span vertex_partition_range_lasts{}; @@ -91,36 +76,6 @@ struct compute_vertex_partition_id_from_int_vertex_t { } }; -template -struct compute_gpu_id_from_ext_edge_endpoints_t { - int comm_size{0}; - int major_comm_size{0}; - int minor_comm_size{0}; - - __host__ __device__ int operator()(vertex_t major, vertex_t minor) const - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto major_vertex_partition_id = static_cast(hash_func(major) % comm_size); - auto minor_vertex_partition_id = static_cast(hash_func(minor) % comm_size); - auto major_comm_rank = major_vertex_partition_id % major_comm_size; - auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; - return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); - } - - __host__ __device__ int operator()( - thrust::tuple pair /* major, minor */) const - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto major_vertex_partition_id = static_cast(hash_func(thrust::get<0>(pair)) % comm_size); - auto minor_vertex_partition_id = static_cast(hash_func(thrust::get<1>(pair)) % comm_size); - auto major_comm_rank = major_vertex_partition_id % major_comm_size; - auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; - return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); - } -}; - template struct compute_gpu_id_from_int_edge_endpoints_t { raft::device_span vertex_partition_range_lasts{}; diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/detail/shuffle_vertex_pairs.cu index 33a7834f5f..74036f890b 100644 --- a/cpp/src/detail/shuffle_vertex_pairs.cu +++ b/cpp/src/detail/shuffle_vertex_pairs.cu @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -308,7 +309,7 @@ shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( std::move(weights), std::move(edge_ids), std::move(edge_types), - cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ + cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}); } diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index be6875f107..5bc056eb52 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -16,6 +16,7 @@ #include "detail/graph_partition_utils.cuh" #include +#include #include #include @@ -73,11 +74,10 @@ rmm::device_uvector shuffle_ext_vertices_to_local_gpu_by_vertex_partit auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - return shuffle_vertices_by_gpu_id_impl( - handle, - std::move(vertices), - cugraph::detail::compute_gpu_id_from_ext_vertex_t{ - comm_size, major_comm_size, minor_comm_size}); + return shuffle_vertices_by_gpu_id_impl(handle, + std::move(vertices), + cugraph::compute_gpu_id_from_ext_vertex_t{ + comm_size, major_comm_size, minor_comm_size}); } template @@ -97,7 +97,7 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( handle, std::move(vertices), std::move(values), - cugraph::detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}); } diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 3b25ae5077..5b6331b0f2 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -205,7 +206,7 @@ struct reduce_with_init_t { * destinations assigned to this process in multi-GPU). Use cugraph::edge_dst_property_t::view(). * Use update_edge_dst_property to fill the wrapper. * @param map_unique_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs - * (assigned to this process in multi-GPU, `cugraph::detail::compute_gpu_id_from_ext_vertex_t` is + * (assigned to this process in multi-GPU, `cugraph::compute_gpu_id_from_ext_vertex_t` is * used to map keys to processes). (Key, value) pairs may be provided by * transform_reduce_by_src_key_e() or transform_reduce_by_dst_key_e(). * @param map_unique_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs @@ -730,7 +731,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( collect_values_for_unique_keys(handle, kv_store_view, std::move(unique_minor_keys), - cugraph::detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}); if constexpr (KVStoreViewType::binary_search) { diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh index 4220308507..60da4d1f87 100644 --- a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh +++ b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -564,7 +565,7 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, tmp_keys.end(), get_dataframe_buffer_begin(tmp_value_buffer), [key_func = - detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto val) { return key_func(val); }, diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh index 1c15842982..fb409f8433 100644 --- a/cpp/src/structure/create_graph_from_edgelist_impl.cuh +++ b/cpp/src/structure/create_graph_from_edgelist_impl.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,7 @@ void expensive_check_edgelist(raft::handle_t const& handle, (*vertices).end(), [comm_rank, key_func = - detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto val) { return key_func(val) != comm_rank; }) == 0, @@ -163,7 +164,7 @@ void expensive_check_edgelist(raft::handle_t const& handle, edge_first + edgelist_majors.size(), [comm_rank, gpu_id_key_func = - detail::compute_gpu_id_from_ext_edge_endpoints_t{ + cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto e) { return (gpu_id_key_func(e) != comm_rank); }) == 0, diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh index ce18ec893d..d265871b0a 100644 --- a/cpp/src/structure/relabel_impl.cuh +++ b/cpp/src/structure/relabel_impl.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -67,7 +68,7 @@ void relabel(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto key_func = detail::compute_gpu_id_from_ext_vertex_t{ + auto key_func = cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}; // find unique old labels (to be relabeled) diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index 41f81d72ab..b133ac5fe5 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -78,7 +79,7 @@ template struct find_unused_id_t { raft::device_span sorted_local_vertices{}; size_t num_workers{}; - compute_gpu_id_from_ext_vertex_t gpu_id_op{}; + cugraph::compute_gpu_id_from_ext_vertex_t gpu_id_op{}; int comm_rank{}; vertex_t invalid_id{}; @@ -199,7 +200,7 @@ std::optional find_locally_unused_ext_vertex_id( auto num_workers = std::min(static_cast(handle.get_device_properties().multiProcessorCount) * size_t{1024}, sorted_local_vertices.size() + size_t{1}); - auto gpu_id_op = compute_gpu_id_from_ext_vertex_t{int{1}, int{1}, int{1}}; + auto gpu_id_op = cugraph::compute_gpu_id_from_ext_vertex_t{int{1}, int{1}, int{1}}; if (multi_gpu && (handle.get_comms().get_size() > int{1})) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -207,8 +208,8 @@ std::optional find_locally_unused_ext_vertex_id( auto const major_comm_size = major_comm.get_size(); auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - gpu_id_op = - compute_gpu_id_from_ext_vertex_t{comm_size, major_comm_size, minor_comm_size}; + gpu_id_op = cugraph::compute_gpu_id_from_ext_vertex_t{ + comm_size, major_comm_size, minor_comm_size}; } auto unused_id = thrust::transform_reduce( handle.get_thrust_policy(), @@ -665,7 +666,7 @@ void expensive_check_edgelist( minor_comm_rank, i, gpu_id_key_func = - detail::compute_gpu_id_from_ext_edge_endpoints_t{ + cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}, local_edge_partition_id_key_func = detail::compute_local_edge_partition_id_from_ext_edge_endpoints_t{ @@ -708,7 +709,7 @@ void expensive_check_edgelist( (*sorted_local_vertices).end(), [comm_rank, key_func = - detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto val) { return key_func(val) != comm_rank; }) == 0, diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh index 3efa58d963..90f3e3dff6 100644 --- a/cpp/src/structure/renumber_utils_impl.cuh +++ b/cpp/src/structure/renumber_utils_impl.cuh @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -405,7 +406,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, collect_values_for_unique_keys(handle, local_renumber_map.view(), std::move(sorted_unique_ext_vertices), - detail::compute_gpu_id_from_ext_vertex_t{ + cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}); renumber_map_ptr = std::make_unique>( diff --git a/cpp/tests/utilities/csv_file_utilities.cu b/cpp/tests/utilities/csv_file_utilities.cu index d801b18cf0..5afa4e3920 100644 --- a/cpp/tests/utilities/csv_file_utilities.cu +++ b/cpp/tests/utilities/csv_file_utilities.cu @@ -18,6 +18,7 @@ #include "utilities/test_utilities.hpp" #include +#include #include #include @@ -219,7 +220,7 @@ read_edgelist_from_csv_file(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto edge_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ + auto edge_key_func = cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}; size_t number_of_local_edges{}; if (d_edgelist_weights) { diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index b76e9fdf8c..b901dda5fd 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -18,6 +18,7 @@ #include "utilities/test_utilities.hpp" #include +#include #include #include @@ -330,7 +331,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto vertex_key_func = cugraph::detail::compute_gpu_id_from_ext_vertex_t{ + auto vertex_key_func = cugraph::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}; d_vertices.resize( thrust::distance(d_vertices.begin(), @@ -342,7 +343,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, handle.get_stream()); d_vertices.shrink_to_fit(handle.get_stream()); - auto edge_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ + auto edge_key_func = cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}; size_t number_of_local_edges{}; if (d_edgelist_weights) { From a934252f611b352801b076746c695d9aa756c333 Mon Sep 17 00:00:00 2001 From: Naim Date: Sun, 10 Mar 2024 16:07:52 +0100 Subject: [PATCH 2/6] Revert "Make vertex and edge shuffling function public" This reverts commit 8fb86fba90e23f657751d372c6e08055754f907a. --- cpp/include/cugraph/graph_partition_utils.cuh | 71 ------------------- cpp/src/community/detail/common_methods.cuh | 3 +- cpp/src/community/detail/refine_impl.cuh | 6 +- cpp/src/detail/collect_local_vertex_values.cu | 3 +- cpp/src/detail/graph_partition_utils.cuh | 45 ++++++++++++ cpp/src/detail/shuffle_vertex_pairs.cu | 3 +- cpp/src/detail/shuffle_vertices.cu | 12 ++-- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 5 +- .../transform_reduce_e_by_src_dst_key.cuh | 3 +- .../create_graph_from_edgelist_impl.cuh | 5 +- cpp/src/structure/relabel_impl.cuh | 3 +- cpp/src/structure/renumber_edgelist_impl.cuh | 13 ++-- cpp/src/structure/renumber_utils_impl.cuh | 3 +- cpp/tests/utilities/csv_file_utilities.cu | 3 +- .../utilities/matrix_market_file_utilities.cu | 5 +- 15 files changed, 73 insertions(+), 110 deletions(-) delete mode 100644 cpp/include/cugraph/graph_partition_utils.cuh diff --git a/cpp/include/cugraph/graph_partition_utils.cuh b/cpp/include/cugraph/graph_partition_utils.cuh deleted file mode 100644 index 792858f92f..0000000000 --- a/cpp/include/cugraph/graph_partition_utils.cuh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include - -#include - -namespace cugraph { - -template -struct compute_gpu_id_from_ext_vertex_t { - int comm_size{0}; - int major_comm_size{0}; - int minor_comm_size{0}; - - __host__ __device__ int operator()(vertex_t v) const - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto vertex_partition_id = static_cast(hash_func(v) % comm_size); - return partition_manager::compute_global_comm_rank_from_vertex_partition_id( - major_comm_size, minor_comm_size, vertex_partition_id); - } -}; - -template -struct compute_gpu_id_from_ext_edge_endpoints_t { - int comm_size{0}; - int major_comm_size{0}; - int minor_comm_size{0}; - - __host__ __device__ int operator()(vertex_t major, vertex_t minor) const - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto major_vertex_partition_id = static_cast(hash_func(major) % comm_size); - auto minor_vertex_partition_id = static_cast(hash_func(minor) % comm_size); - auto major_comm_rank = major_vertex_partition_id % major_comm_size; - auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; - return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); - } - - __host__ __device__ int operator()( - thrust::tuple pair /* major, minor */) const - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto major_vertex_partition_id = static_cast(hash_func(thrust::get<0>(pair)) % comm_size); - auto minor_vertex_partition_id = static_cast(hash_func(thrust::get<1>(pair)) % comm_size); - auto major_comm_rank = major_vertex_partition_id % major_comm_size; - auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; - return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); - } -}; - -} // namespace cugraph diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index 34c5ffcb75..fe0a415db3 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -28,7 +28,6 @@ #include #include -#include #include #include @@ -279,7 +278,7 @@ rmm::device_uvector update_clustering_by_delta_modularity( auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ comm_size, major_comm_size, minor_comm_size}; kv_store_t cluster_key_weight_map( diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 862c6cde12..b767ce7d8b 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -178,7 +178,7 @@ refine_clustering( auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ comm_size, major_comm_size, minor_comm_size}; vertex_louvain_cluster_weights = @@ -460,7 +460,7 @@ refine_clustering( major_comm_size, minor_comm_size}; - // cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + // cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ // comm_size, major_comm_size, minor_comm_size}; louvain_of_leiden_keys_used_in_edge_reduction = @@ -850,7 +850,7 @@ refine_clustering( major_comm_size, minor_comm_size}; - // cugraph::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ + // cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ // comm_size, major_comm_size, minor_comm_size}; lovain_of_leiden_cluster_keys = diff --git a/cpp/src/detail/collect_local_vertex_values.cu b/cpp/src/detail/collect_local_vertex_values.cu index 511346552a..2726e33b92 100644 --- a/cpp/src/detail/collect_local_vertex_values.cu +++ b/cpp/src/detail/collect_local_vertex_values.cu @@ -17,7 +17,6 @@ #include "detail/graph_partition_utils.cuh" #include -#include #include #include @@ -51,7 +50,7 @@ rmm::device_uvector collect_local_vertex_values_from_ext_vertex_value_p d_vertices.begin(), d_vertices.end(), d_values.begin(), - cugraph::compute_gpu_id_from_ext_vertex_t{ + cugraph::detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}, handle.get_stream()); } diff --git a/cpp/src/detail/graph_partition_utils.cuh b/cpp/src/detail/graph_partition_utils.cuh index 423e9383e9..957436459c 100644 --- a/cpp/src/detail/graph_partition_utils.cuh +++ b/cpp/src/detail/graph_partition_utils.cuh @@ -35,6 +35,21 @@ namespace cugraph { namespace detail { +template +struct compute_gpu_id_from_ext_vertex_t { + int comm_size{0}; + int major_comm_size{0}; + int minor_comm_size{0}; + + __host__ __device__ int operator()(vertex_t v) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto vertex_partition_id = static_cast(hash_func(v) % comm_size); + return partition_manager::compute_global_comm_rank_from_vertex_partition_id( + major_comm_size, minor_comm_size, vertex_partition_id); + } +}; + template struct compute_gpu_id_from_int_vertex_t { raft::device_span vertex_partition_range_lasts{}; @@ -76,6 +91,36 @@ struct compute_vertex_partition_id_from_int_vertex_t { } }; +template +struct compute_gpu_id_from_ext_edge_endpoints_t { + int comm_size{0}; + int major_comm_size{0}; + int minor_comm_size{0}; + + __host__ __device__ int operator()(vertex_t major, vertex_t minor) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_vertex_partition_id = static_cast(hash_func(major) % comm_size); + auto minor_vertex_partition_id = static_cast(hash_func(minor) % comm_size); + auto major_comm_rank = major_vertex_partition_id % major_comm_size; + auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; + return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); + } + + __host__ __device__ int operator()( + thrust::tuple pair /* major, minor */) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_vertex_partition_id = static_cast(hash_func(thrust::get<0>(pair)) % comm_size); + auto minor_vertex_partition_id = static_cast(hash_func(thrust::get<1>(pair)) % comm_size); + auto major_comm_rank = major_vertex_partition_id % major_comm_size; + auto minor_comm_rank = minor_vertex_partition_id / major_comm_size; + return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks( + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); + } +}; + template struct compute_gpu_id_from_int_edge_endpoints_t { raft::device_span vertex_partition_range_lasts{}; diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/detail/shuffle_vertex_pairs.cu index 74036f890b..33a7834f5f 100644 --- a/cpp/src/detail/shuffle_vertex_pairs.cu +++ b/cpp/src/detail/shuffle_vertex_pairs.cu @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -309,7 +308,7 @@ shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( std::move(weights), std::move(edge_ids), std::move(edge_types), - cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ + cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}); } diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index 5bc056eb52..be6875f107 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -16,7 +16,6 @@ #include "detail/graph_partition_utils.cuh" #include -#include #include #include @@ -74,10 +73,11 @@ rmm::device_uvector shuffle_ext_vertices_to_local_gpu_by_vertex_partit auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - return shuffle_vertices_by_gpu_id_impl(handle, - std::move(vertices), - cugraph::compute_gpu_id_from_ext_vertex_t{ - comm_size, major_comm_size, minor_comm_size}); + return shuffle_vertices_by_gpu_id_impl( + handle, + std::move(vertices), + cugraph::detail::compute_gpu_id_from_ext_vertex_t{ + comm_size, major_comm_size, minor_comm_size}); } template @@ -97,7 +97,7 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( handle, std::move(vertices), std::move(values), - cugraph::compute_gpu_id_from_ext_vertex_t{ + cugraph::detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}); } diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 5b6331b0f2..3b25ae5077 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -206,7 +205,7 @@ struct reduce_with_init_t { * destinations assigned to this process in multi-GPU). Use cugraph::edge_dst_property_t::view(). * Use update_edge_dst_property to fill the wrapper. * @param map_unique_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs - * (assigned to this process in multi-GPU, `cugraph::compute_gpu_id_from_ext_vertex_t` is + * (assigned to this process in multi-GPU, `cugraph::detail::compute_gpu_id_from_ext_vertex_t` is * used to map keys to processes). (Key, value) pairs may be provided by * transform_reduce_by_src_key_e() or transform_reduce_by_dst_key_e(). * @param map_unique_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs @@ -731,7 +730,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( collect_values_for_unique_keys(handle, kv_store_view, std::move(unique_minor_keys), - cugraph::compute_gpu_id_from_ext_vertex_t{ + cugraph::detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}); if constexpr (KVStoreViewType::binary_search) { diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh index 60da4d1f87..4220308507 100644 --- a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh +++ b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -565,7 +564,7 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, tmp_keys.end(), get_dataframe_buffer_begin(tmp_value_buffer), [key_func = - cugraph::compute_gpu_id_from_ext_vertex_t{ + detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto val) { return key_func(val); }, diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh index fb409f8433..1c15842982 100644 --- a/cpp/src/structure/create_graph_from_edgelist_impl.cuh +++ b/cpp/src/structure/create_graph_from_edgelist_impl.cuh @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -149,7 +148,7 @@ void expensive_check_edgelist(raft::handle_t const& handle, (*vertices).end(), [comm_rank, key_func = - cugraph::compute_gpu_id_from_ext_vertex_t{ + detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto val) { return key_func(val) != comm_rank; }) == 0, @@ -164,7 +163,7 @@ void expensive_check_edgelist(raft::handle_t const& handle, edge_first + edgelist_majors.size(), [comm_rank, gpu_id_key_func = - cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ + detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto e) { return (gpu_id_key_func(e) != comm_rank); }) == 0, diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh index d265871b0a..ce18ec893d 100644 --- a/cpp/src/structure/relabel_impl.cuh +++ b/cpp/src/structure/relabel_impl.cuh @@ -20,7 +20,6 @@ #include #include -#include #include #include #include @@ -68,7 +67,7 @@ void relabel(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto key_func = cugraph::compute_gpu_id_from_ext_vertex_t{ + auto key_func = detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}; // find unique old labels (to be relabeled) diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index b133ac5fe5..41f81d72ab 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -20,7 +20,6 @@ #include #include -#include #include #include #include @@ -79,7 +78,7 @@ template struct find_unused_id_t { raft::device_span sorted_local_vertices{}; size_t num_workers{}; - cugraph::compute_gpu_id_from_ext_vertex_t gpu_id_op{}; + compute_gpu_id_from_ext_vertex_t gpu_id_op{}; int comm_rank{}; vertex_t invalid_id{}; @@ -200,7 +199,7 @@ std::optional find_locally_unused_ext_vertex_id( auto num_workers = std::min(static_cast(handle.get_device_properties().multiProcessorCount) * size_t{1024}, sorted_local_vertices.size() + size_t{1}); - auto gpu_id_op = cugraph::compute_gpu_id_from_ext_vertex_t{int{1}, int{1}, int{1}}; + auto gpu_id_op = compute_gpu_id_from_ext_vertex_t{int{1}, int{1}, int{1}}; if (multi_gpu && (handle.get_comms().get_size() > int{1})) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -208,8 +207,8 @@ std::optional find_locally_unused_ext_vertex_id( auto const major_comm_size = major_comm.get_size(); auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - gpu_id_op = cugraph::compute_gpu_id_from_ext_vertex_t{ - comm_size, major_comm_size, minor_comm_size}; + gpu_id_op = + compute_gpu_id_from_ext_vertex_t{comm_size, major_comm_size, minor_comm_size}; } auto unused_id = thrust::transform_reduce( handle.get_thrust_policy(), @@ -666,7 +665,7 @@ void expensive_check_edgelist( minor_comm_rank, i, gpu_id_key_func = - cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ + detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}, local_edge_partition_id_key_func = detail::compute_local_edge_partition_id_from_ext_edge_endpoints_t{ @@ -709,7 +708,7 @@ void expensive_check_edgelist( (*sorted_local_vertices).end(), [comm_rank, key_func = - cugraph::compute_gpu_id_from_ext_vertex_t{ + detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto val) { return key_func(val) != comm_rank; }) == 0, diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh index 90f3e3dff6..3efa58d963 100644 --- a/cpp/src/structure/renumber_utils_impl.cuh +++ b/cpp/src/structure/renumber_utils_impl.cuh @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -406,7 +405,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, collect_values_for_unique_keys(handle, local_renumber_map.view(), std::move(sorted_unique_ext_vertices), - cugraph::compute_gpu_id_from_ext_vertex_t{ + detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}); renumber_map_ptr = std::make_unique>( diff --git a/cpp/tests/utilities/csv_file_utilities.cu b/cpp/tests/utilities/csv_file_utilities.cu index 5afa4e3920..d801b18cf0 100644 --- a/cpp/tests/utilities/csv_file_utilities.cu +++ b/cpp/tests/utilities/csv_file_utilities.cu @@ -18,7 +18,6 @@ #include "utilities/test_utilities.hpp" #include -#include #include #include @@ -220,7 +219,7 @@ read_edgelist_from_csv_file(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto edge_key_func = cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ + auto edge_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}; size_t number_of_local_edges{}; if (d_edgelist_weights) { diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index b901dda5fd..b76e9fdf8c 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -18,7 +18,6 @@ #include "utilities/test_utilities.hpp" #include -#include #include #include @@ -331,7 +330,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto vertex_key_func = cugraph::compute_gpu_id_from_ext_vertex_t{ + auto vertex_key_func = cugraph::detail::compute_gpu_id_from_ext_vertex_t{ comm_size, major_comm_size, minor_comm_size}; d_vertices.resize( thrust::distance(d_vertices.begin(), @@ -343,7 +342,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, handle.get_stream()); d_vertices.shrink_to_fit(handle.get_stream()); - auto edge_key_func = cugraph::compute_gpu_id_from_ext_edge_endpoints_t{ + auto edge_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, major_comm_size, minor_comm_size}; size_t number_of_local_edges{}; if (d_edgelist_weights) { From 29e71b05e0582a12b2cb61f9afd4b4e394c2bdd1 Mon Sep 17 00:00:00 2001 From: Naim Date: Mon, 11 Mar 2024 04:21:20 +0100 Subject: [PATCH 3/6] Make external vertex and edge shuffling function public --- cpp/include/cugraph/graph_functions.hpp | 56 ++++++++++++++++++++ cpp/src/detail/shuffle_vertex_pairs.cu | 65 +++++++++++++++++++++++ cpp/src/detail/shuffle_vertices.cu | 70 +++++++++++++++++++++++++ 3 files changed, 191 insertions(+) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 90425f86be..6fd87f1192 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1052,4 +1052,60 @@ remove_multi_edges(raft::handle_t const& handle, std::optional>&& edgelist_edge_types, bool keep_min_value_edge = false); +/** + * @brief Shuffle external vertex ids and values to the proper GPU. + * * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam value_t Type of values. Needs to an integral or floating point type. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices List of vertex ids + * @param values List of values + * @return Tuple of vectors storing vertex ids and values mapped to this GPU. + */ +template +std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +/** + * @brief Shuffle external vertex ids to the proper GPU. + * * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices List of vertex ids + * @return Vector of vertex ids mapped to this GPU. + */ +template +rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, + rmm::device_uvector&& vertices); + +/** + * @brief Shuffle external edges to the proper GPU. + * * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Currently float and double are supported. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edge_srcs List of source vertex ids + * @param edge_dsts List of destination vertex ids + * @param edge_weights Optional list of edge weights + * @return Tuple of vectors storing edge sources, destinations and optional weights mapped to this + * GPU. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace cugraph diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/detail/shuffle_vertex_pairs.cu index 33a7834f5f..62e09ad93d 100644 --- a/cpp/src/detail/shuffle_vertex_pairs.cu +++ b/cpp/src/detail/shuffle_vertex_pairs.cu @@ -519,4 +519,69 @@ shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( std::vector const& vertex_partition_range_lasts); } // namespace detail + +template +std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + + auto const minor_comm_size = minor_comm.get_size(); + + std::tie(edge_srcs, edge_dsts, edge_weights, std::ignore, std::ignore) = + cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< + vertex_t, + vertex_t, + weight_t, + int32_t>(handle, + std::move(edge_srcs), + std::move(edge_dsts), + std::move(edge_weights), + std::nullopt, + std::nullopt); + + return std::make_tuple(std::move(edge_srcs), std::move(edge_dsts), std::move(edge_weights)); +} + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_wgts); + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_wgts); + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_wgts); + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_wgts); + } // namespace cugraph diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index be6875f107..c01251b134 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -249,4 +249,74 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& values); } // namespace detail + +template +std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values) +{ + return detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, std::move(vertices), std::move(values)); +} + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertices_and_values(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template +rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, + rmm::device_uvector&& vertices) +{ + return detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning(handle, + std::move(vertices)); +} + +template rmm::device_uvector shuffle_external_vertices( + raft::handle_t const& handle, rmm::device_uvector&& d_vertices); + +template rmm::device_uvector shuffle_external_vertices( + raft::handle_t const& handle, rmm::device_uvector&& d_vertices); + } // namespace cugraph From 4a9ad9977c2243948c4401b658c63c2d31c599b5 Mon Sep 17 00:00:00 2001 From: Naim Date: Mon, 11 Mar 2024 21:52:09 +0100 Subject: [PATCH 4/6] Address PR comments part-1 --- cpp/include/cugraph/graph_functions.hpp | 44 +++++----- cpp/src/detail/shuffle_vertex_pairs.cu | 103 ++++++++++++++++-------- cpp/src/detail/shuffle_vertices.cu | 60 +++++++------- 3 files changed, 125 insertions(+), 82 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6fd87f1192..5229e536dd 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1053,36 +1053,36 @@ remove_multi_edges(raft::handle_t const& handle, bool keep_min_value_edge = false); /** - * @brief Shuffle external vertex ids and values to the proper GPU. + * @brief Shuffle external vertex ids to the proper GPU. * * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam value_t Type of values. Needs to an integral or floating point type. * * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param vertices List of vertex ids - * @param values List of values - * @return Tuple of vectors storing vertex ids and values mapped to this GPU. + * @return Vector of vertex ids mapped to this GPU. */ -template -std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +template +rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, + rmm::device_uvector&& vertices); /** - * @brief Shuffle external vertex ids to the proper GPU. + * @brief Shuffle external vertex ids and values to the proper GPU. * * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam value_t Type of values. Needs to an integral or floating point type. * * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param vertices List of vertex ids - * @return Vector of vertex ids mapped to this GPU. + * @param values List of values + * @return Tuple of vectors storing vertex ids and values mapped to this GPU. */ -template -rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, - rmm::device_uvector&& vertices); +template +std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); /** * @brief Shuffle external edges to the proper GPU. @@ -1096,16 +1096,22 @@ rmm::device_uvector shuffle_external_vertices(raft::handle_t const& ha * @param edge_srcs List of source vertex ids * @param edge_dsts List of destination vertex ids * @param edge_weights Optional list of edge weights - * @return Tuple of vectors storing edge sources, destinations and optional weights mapped to this - * GPU. + * @param edge_ids Optional list of edge ids + * @param edge_types Optional list of edge types + * @return Tuple of vectors storing edge sources, destinations, optional weights, + * optional edge ids, optional edge types mapped to this GPU. */ -template +template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>, + std::optional>> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& edge_srcs, rmm::device_uvector&& edge_dsts, - std::optional>&& edge_weights); + std::optional>&& edge_weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); } // namespace cugraph diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/detail/shuffle_vertex_pairs.cu index 62e09ad93d..b473796aa9 100644 --- a/cpp/src/detail/shuffle_vertex_pairs.cu +++ b/cpp/src/detail/shuffle_vertex_pairs.cu @@ -520,68 +520,105 @@ shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( } // namespace detail -template +template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>, + std::optional>> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& edge_srcs, rmm::device_uvector&& edge_dsts, - std::optional>&& edge_weights) + std::optional>&& edge_weights, + std::optional>&& edge_ids, + std::optional>&& edge_types) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); auto const major_comm_size = major_comm.get_size(); auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - std::tie(edge_srcs, edge_dsts, edge_weights, std::ignore, std::ignore) = - cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< - vertex_t, - vertex_t, - weight_t, - int32_t>(handle, - std::move(edge_srcs), - std::move(edge_dsts), - std::move(edge_weights), - std::nullopt, - std::nullopt); - - return std::make_tuple(std::move(edge_srcs), std::move(edge_dsts), std::move(edge_weights)); + return detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(edge_srcs), + std::move(edge_dsts), + std::move(edge_weights), + std::move(edge_ids), + std::move(edge_types)); } template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>, + std::optional>> shuffle_external_edges(raft::handle_t const& handle, - rmm::device_uvector&& edge_srcs, - rmm::device_uvector&& edge_dsts, - std::optional>&& edge_wgts); + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>, + std::optional>> shuffle_external_edges(raft::handle_t const& handle, - rmm::device_uvector&& edge_srcs, - rmm::device_uvector&& edge_dsts, - std::optional>&& edge_wgts); + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>, + std::optional>> shuffle_external_edges(raft::handle_t const& handle, - rmm::device_uvector&& edge_srcs, - rmm::device_uvector&& edge_dsts, - std::optional>&& edge_wgts); + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>, + std::optional>> shuffle_external_edges(raft::handle_t const& handle, - rmm::device_uvector&& edge_srcs, - rmm::device_uvector&& edge_dsts, - std::optional>&& edge_wgts); + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); } // namespace cugraph diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index c01251b134..b396201f50 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -252,58 +252,58 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values) +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values) { return detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( handle, std::move(vertices), std::move(values)); } template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template std::tuple, rmm::device_uvector> -shuffle_external_vertices_and_values(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values); +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); template rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, From 289bcbadabe1108b044cc1d9528ee85e92489cb9 Mon Sep 17 00:00:00 2001 From: Naim Date: Mon, 11 Mar 2024 22:13:58 +0100 Subject: [PATCH 5/6] Move shuffle_vertex_pairs.cu and shuffle_vertices.cu to utilities --- cpp/CMakeLists.txt | 4 ++-- cpp/src/{detail => utilities}/shuffle_vertex_pairs.cu | 0 cpp/src/{detail => utilities}/shuffle_vertices.cu | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename cpp/src/{detail => utilities}/shuffle_vertex_pairs.cu (100%) rename cpp/src/{detail => utilities}/shuffle_vertices.cu (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a3392627fb..3131404712 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -186,9 +186,9 @@ endif() # which should give us a better parallel schedule. set(CUGRAPH_SOURCES - src/detail/shuffle_vertices.cu + src/utilities/shuffle_vertices.cu src/detail/permute_range.cu - src/detail/shuffle_vertex_pairs.cu + src/utilities/shuffle_vertex_pairs.cu src/detail/collect_local_vertex_values.cu src/detail/groupby_and_count.cu src/detail/collect_comm_wrapper.cu diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/utilities/shuffle_vertex_pairs.cu similarity index 100% rename from cpp/src/detail/shuffle_vertex_pairs.cu rename to cpp/src/utilities/shuffle_vertex_pairs.cu diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/utilities/shuffle_vertices.cu similarity index 100% rename from cpp/src/detail/shuffle_vertices.cu rename to cpp/src/utilities/shuffle_vertices.cu From aaba770445351ed1a409142de33cfa8d4abe9f25 Mon Sep 17 00:00:00 2001 From: Naim Date: Mon, 11 Mar 2024 22:28:38 +0100 Subject: [PATCH 6/6] Fix doc string --- cpp/include/cugraph/graph_functions.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 5229e536dd..6d4470e825 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1054,7 +1054,7 @@ remove_multi_edges(raft::handle_t const& handle, /** * @brief Shuffle external vertex ids to the proper GPU. - * * + * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -1068,9 +1068,10 @@ rmm::device_uvector shuffle_external_vertices(raft::handle_t const& ha /** * @brief Shuffle external vertex ids and values to the proper GPU. - * * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam value_t Type of values. Needs to an integral or floating point type. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam value_t Type of values. currently supported types are int32_t, + * int64_t, size_t, float and double. * * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. @@ -1086,7 +1087,7 @@ shuffle_external_vertex_value_pairs(raft::handle_t const& handle, /** * @brief Shuffle external edges to the proper GPU. - * * + * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weight. Currently float and double are supported.