From 22ae61f213102c695ad00946621a06c8834cc117 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 25 Aug 2023 15:53:11 -0700 Subject: [PATCH 1/6] Use thread_index_type to avoid out of bounds accesses --- cpp/src/join/conditional_join_kernels.cuh | 34 ++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index dc455ad9cef..d3b180ccd8d 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -67,20 +67,21 @@ __global__ void compute_conditional_join_output_size( &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; std::size_t thread_counter{0}; - cudf::size_type const start_idx = threadIdx.x + blockIdx.x * block_size; - cudf::size_type const stride = block_size * gridDim.x; - cudf::size_type const left_num_rows = left_table.num_rows(); - cudf::size_type const right_num_rows = right_table.num_rows(); - auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); - auto const inner_num_rows = (swap_tables ? left_num_rows : right_num_rows); + cudf::thread_index_type const start_idx = threadIdx.x + blockIdx.x * block_size; + cudf::thread_index_type const stride = thread_index_type{block_size} * gridDim.x; + cudf::thread_index_type const left_num_rows = left_table.num_rows(); + cudf::thread_index_type const right_num_rows = right_table.num_rows(); + auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); + auto const inner_num_rows = (swap_tables ? left_num_rows : right_num_rows); auto evaluator = cudf::ast::detail::expression_evaluator( left_table, right_table, device_expression_data); - for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows; + for (cudf::thread_index_type outer_row_index = start_idx; outer_row_index < outer_num_rows; outer_row_index += stride) { bool found_match = false; - for (cudf::size_type inner_row_index = 0; inner_row_index < inner_num_rows; inner_row_index++) { + for (cudf::thread_index_type inner_row_index = 0; inner_row_index < inner_num_rows; + ++inner_row_index) { auto output_dest = cudf::ast::detail::value_expression_result(); auto const left_row_index = swap_tables ? inner_row_index : outer_row_index; auto const right_row_index = swap_tables ? outer_row_index : inner_row_index; @@ -161,18 +162,18 @@ __global__ void conditional_join(table_device_view left_table, auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; - int const warp_id = threadIdx.x / detail::warp_size; - int const lane_id = threadIdx.x % detail::warp_size; - cudf::size_type const left_num_rows = left_table.num_rows(); - cudf::size_type const right_num_rows = right_table.num_rows(); - auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); - auto const inner_num_rows = (swap_tables ? left_num_rows : right_num_rows); + int const warp_id = threadIdx.x / detail::warp_size; + int const lane_id = threadIdx.x % detail::warp_size; + cudf::thread_index_type const left_num_rows = left_table.num_rows(); + cudf::thread_index_type const right_num_rows = right_table.num_rows(); + cudf::thread_index_type const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); + cudf::thread_index_type const inner_num_rows = (swap_tables ? left_num_rows : right_num_rows); if (0 == lane_id) { current_idx_shared[warp_id] = 0; } __syncwarp(); - cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size; + cudf::thread_index_type outer_row_index = threadIdx.x + blockIdx.x * block_size; unsigned int const activemask = __ballot_sync(0xffff'ffffu, outer_row_index < outer_num_rows); @@ -181,7 +182,8 @@ __global__ void conditional_join(table_device_view left_table, if (outer_row_index < outer_num_rows) { bool found_match = false; - for (size_type inner_row_index(0); inner_row_index < inner_num_rows; ++inner_row_index) { + for (thread_index_type inner_row_index(0); inner_row_index < inner_num_rows; + ++inner_row_index) { auto output_dest = cudf::ast::detail::value_expression_result(); auto const left_row_index = swap_tables ? inner_row_index : outer_row_index; auto const right_row_index = swap_tables ? outer_row_index : inner_row_index; From 4d822f98de0d451b2e4e042e1c491b3975d69850 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 28 Aug 2023 09:57:06 -0700 Subject: [PATCH 2/6] Address reviews --- cpp/src/join/conditional_join_kernels.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index d3b180ccd8d..e832c38e59b 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -67,7 +67,7 @@ __global__ void compute_conditional_join_output_size( &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; std::size_t thread_counter{0}; - cudf::thread_index_type const start_idx = threadIdx.x + blockIdx.x * block_size; + auto const start_idx = cudf::detail::grid_1d::global_thread_id(); cudf::thread_index_type const stride = thread_index_type{block_size} * gridDim.x; cudf::thread_index_type const left_num_rows = left_table.num_rows(); cudf::thread_index_type const right_num_rows = right_table.num_rows(); @@ -173,7 +173,7 @@ __global__ void conditional_join(table_device_view left_table, __syncwarp(); - cudf::thread_index_type outer_row_index = threadIdx.x + blockIdx.x * block_size; + auto outer_row_index = cudf::detail::grid_1d::global_thread_id(); unsigned int const activemask = __ballot_sync(0xffff'ffffu, outer_row_index < outer_num_rows); From 7934679867a3dfa3b31098ed30c90944d6eed867 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Sep 2023 09:50:09 -0700 Subject: [PATCH 3/6] PR reviews --- cpp/src/join/conditional_join_kernels.cuh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index e832c38e59b..350384552cd 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -67,8 +67,9 @@ __global__ void compute_conditional_join_output_size( &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; std::size_t thread_counter{0}; - auto const start_idx = cudf::detail::grid_1d::global_thread_id(); - cudf::thread_index_type const stride = thread_index_type{block_size} * gridDim.x; + auto const start_idx = cudf::detail::grid_1d::global_thread_id(); + cudf::thread_index_type const stride = cudf::detail::grid_1d::grid_stride(); + ; cudf::thread_index_type const left_num_rows = left_table.num_rows(); cudf::thread_index_type const right_num_rows = right_table.num_rows(); auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); @@ -82,9 +83,9 @@ __global__ void compute_conditional_join_output_size( bool found_match = false; for (cudf::thread_index_type inner_row_index = 0; inner_row_index < inner_num_rows; ++inner_row_index) { - auto output_dest = cudf::ast::detail::value_expression_result(); - auto const left_row_index = swap_tables ? inner_row_index : outer_row_index; - auto const right_row_index = swap_tables ? outer_row_index : inner_row_index; + auto output_dest = cudf::ast::detail::value_expression_result(); + cudf::size_type const left_row_index = swap_tables ? inner_row_index : outer_row_index; + cudf::size_type const right_row_index = swap_tables ? outer_row_index : inner_row_index; evaluator.evaluate( output_dest, left_row_index, right_row_index, 0, thread_intermediate_storage); if (output_dest.is_valid() && output_dest.value()) { From fd887f70488a17a0416e4f06a7816abf5ba942fd Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 7 Sep 2023 11:36:19 -0700 Subject: [PATCH 4/6] Apply suggestions from code review --- cpp/src/join/conditional_join_kernels.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index 350384552cd..1f79cb754a7 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -68,7 +68,7 @@ __global__ void compute_conditional_join_output_size( std::size_t thread_counter{0}; auto const start_idx = cudf::detail::grid_1d::global_thread_id(); - cudf::thread_index_type const stride = cudf::detail::grid_1d::grid_stride(); + auto const stride = cudf::detail::grid_1d::grid_stride(); ; cudf::thread_index_type const left_num_rows = left_table.num_rows(); cudf::thread_index_type const right_num_rows = right_table.num_rows(); From 9b9a22cffbd0765f18c038daff5467606be2f496 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 7 Sep 2023 11:51:47 -0700 Subject: [PATCH 5/6] Formatting --- cpp/src/join/conditional_join_kernels.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index 1f79cb754a7..77dfba7aeaa 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -67,8 +67,8 @@ __global__ void compute_conditional_join_output_size( &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; std::size_t thread_counter{0}; - auto const start_idx = cudf::detail::grid_1d::global_thread_id(); - auto const stride = cudf::detail::grid_1d::grid_stride(); + auto const start_idx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); ; cudf::thread_index_type const left_num_rows = left_table.num_rows(); cudf::thread_index_type const right_num_rows = right_table.num_rows(); From 907c793b486941cc8591224d506dcaff2049ba35 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 7 Sep 2023 12:08:31 -0700 Subject: [PATCH 6/6] One more fix --- cpp/src/join/conditional_join_kernels.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index 77dfba7aeaa..f665aba698f 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -69,7 +69,7 @@ __global__ void compute_conditional_join_output_size( std::size_t thread_counter{0}; auto const start_idx = cudf::detail::grid_1d::global_thread_id(); auto const stride = cudf::detail::grid_1d::grid_stride(); - ; + cudf::thread_index_type const left_num_rows = left_table.num_rows(); cudf::thread_index_type const right_num_rows = right_table.num_rows(); auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows);