Skip to content
/ cudf Public
forked from rapidsai/cudf

Commit

Permalink
Fix integer overflow in compiled binaryop
Browse files Browse the repository at this point in the history
For large columns, the computed stride might end up overflowing
size_type. To fix this, use the grid_1d helper. See also rapidsai#10368.

- Closes rapidsai#17353
  • Loading branch information
wence- committed Nov 18, 2024
1 parent e4de8e4 commit 28908e4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
18 changes: 7 additions & 11 deletions cpp/src/binaryop/compiled/binary_ops.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/unary.hpp>

Expand Down Expand Up @@ -253,16 +254,11 @@ struct binary_op_double_device_dispatcher {
template <typename Functor>
CUDF_KERNEL void for_each_kernel(cudf::size_type size, Functor f)
{
int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;
auto start = cudf::detail::grid_1d::global_thread_id();
auto const stride = cudf::detail::grid_1d::grid_stride();

#pragma unroll
for (cudf::size_type i = start; i < size; i += step) {
for (auto i = start; i < size; i += stride) {
f(i);
}
}
Expand All @@ -282,9 +278,9 @@ void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f)
int min_grid_size;
CUDF_CUDA_TRY(
cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, for_each_kernel<decltype(f)>));
// 2 elements per thread.
int const grid_size = util::div_rounding_up_safe(size, 2 * block_size);
for_each_kernel<<<grid_size, block_size, 0, stream.value()>>>(size, std::forward<Functor&&>(f));
auto grid = cudf::detail::grid_1d(size, block_size, 2 /* elements_per_thread */);
for_each_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
size, std::forward<Functor&&>(f));
}

template <class BinaryOperator>
Expand Down
22 changes: 22 additions & 0 deletions cpp/tests/binaryop/binop-compiled-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#include <cudf_test/testing_main.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/aggregation.hpp>
#include <cudf/binaryop.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/reduction.hpp>
#include <cudf/types.hpp>

#include <thrust/iterator/counting_iterator.h>
Expand Down Expand Up @@ -820,4 +822,24 @@ TEST_F(BinaryOperationCompiledTest_NullOpsString, NullMin_Vector_Vector)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
}

TEST(BinaryOperationCompiledTest, LargeColumnNoOverflow)
{
cudf::size_type num_rows{1799989091};
auto big = cudf::make_column_from_scalar(
cudf::numeric_scalar<cudf::id_to_type<cudf::type_id::INT8>>{10, true}, num_rows);
auto small = cudf::make_column_from_scalar(
cudf::numeric_scalar<cudf::id_to_type<cudf::type_id::INT8>>{1, true}, num_rows);

auto mask = cudf::binary_operation(big->view(),
small->view(),
cudf::binary_operator::GREATER,
cudf::data_type{cudf::type_id::BOOL8});

auto agg = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
auto result =
cudf::reduce(mask->view(), *agg, cudf::data_type{cudf::type_to_id<cudf::size_type>()});
auto got = static_cast<cudf::numeric_scalar<cudf::size_type>*>(result.get())->value();
EXPECT_EQ(num_rows, got);
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit 28908e4

Please sign in to comment.