Skip to content

Commit

Permalink
Fix integer overflow in compiled binaryop (#17354)
Browse files Browse the repository at this point in the history
For large columns, the computed stride might end up overflowing size_type. To fix this, use the grid_1d helper. See also #10368.

- Closes #17353

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: #17354
  • Loading branch information
wence- authored Nov 19, 2024
1 parent 9c5cd81 commit c7bfa77
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 12 deletions.
19 changes: 7 additions & 12 deletions cpp/src/binaryop/compiled/binary_ops.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/unary.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -253,16 +253,11 @@ struct binary_op_double_device_dispatcher {
template <typename Functor>
CUDF_KERNEL void for_each_kernel(cudf::size_type size, Functor f)
{
int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;
auto start = cudf::detail::grid_1d::global_thread_id();
auto const stride = cudf::detail::grid_1d::grid_stride();

#pragma unroll
for (cudf::size_type i = start; i < size; i += step) {
for (auto i = start; i < size; i += stride) {
f(i);
}
}
Expand All @@ -282,9 +277,9 @@ void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f)
int min_grid_size;
CUDF_CUDA_TRY(
cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, for_each_kernel<decltype(f)>));
// 2 elements per thread.
int const grid_size = util::div_rounding_up_safe(size, 2 * block_size);
for_each_kernel<<<grid_size, block_size, 0, stream.value()>>>(size, std::forward<Functor&&>(f));
auto grid = cudf::detail::grid_1d(size, block_size, 2 /* elements_per_thread */);
for_each_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
size, std::forward<Functor&&>(f));
}

template <class BinaryOperator>
Expand Down
22 changes: 22 additions & 0 deletions cpp/tests/binaryop/binop-compiled-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#include <cudf_test/testing_main.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/aggregation.hpp>
#include <cudf/binaryop.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/reduction.hpp>
#include <cudf/types.hpp>

#include <thrust/iterator/counting_iterator.h>
Expand Down Expand Up @@ -820,4 +822,24 @@ TEST_F(BinaryOperationCompiledTest_NullOpsString, NullMin_Vector_Vector)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
}

TEST(BinaryOperationCompiledTest, LargeColumnNoOverflow)
{
cudf::size_type num_rows{1'799'989'091};
auto big = cudf::make_column_from_scalar(
cudf::numeric_scalar<cudf::id_to_type<cudf::type_id::INT8>>{10, true}, num_rows);
auto small = cudf::make_column_from_scalar(
cudf::numeric_scalar<cudf::id_to_type<cudf::type_id::INT8>>{1, true}, num_rows);

auto mask = cudf::binary_operation(big->view(),
small->view(),
cudf::binary_operator::GREATER,
cudf::data_type{cudf::type_id::BOOL8});

auto agg = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
auto result =
cudf::reduce(mask->view(), *agg, cudf::data_type{cudf::type_to_id<cudf::size_type>()});
auto got = static_cast<cudf::numeric_scalar<cudf::size_type>*>(result.get())->value();
EXPECT_EQ(num_rows, got);
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit c7bfa77

Please sign in to comment.