Skip to content

Commit

Permalink
Fix some possible thread-id overflow calculations (#17473)
Browse files Browse the repository at this point in the history
Fixes some possible thread-id calculations or usages that may possibly overflow `int32` type or `size_type`.
Reference #10368

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: #17473
  • Loading branch information
davidwendt authored Dec 11, 2024
1 parent 3801e74 commit 63c5a38
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 12 deletions.
11 changes: 6 additions & 5 deletions cpp/include/cudf/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ __launch_bounds__(block_size) CUDF_KERNEL
mutable_column_device_view out,
size_type* __restrict__ const valid_count)
{
auto tidx = cudf::detail::grid_1d::global_thread_id<block_size>();
auto const stride = cudf::detail::grid_1d::grid_stride<block_size>();
int const warp_id = tidx / cudf::detail::warp_size;
size_type const warps_per_grid = gridDim.x * block_size / cudf::detail::warp_size;
auto tidx = cudf::detail::grid_1d::global_thread_id<block_size>();

auto const stride = cudf::detail::grid_1d::grid_stride<block_size>();
auto const warp_id = tidx / cudf::detail::warp_size;
auto const warps_per_grid = stride / cudf::detail::warp_size;

// begin/end indices for the column data
size_type const begin = 0;
Expand All @@ -60,7 +61,7 @@ __launch_bounds__(block_size) CUDF_KERNEL

// lane id within the current warp
constexpr size_type leader_lane{0};
int const lane_id = threadIdx.x % cudf::detail::warp_size;
auto const lane_id = threadIdx.x % cudf::detail::warp_size;

size_type warp_valid_count{0};

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/partitioning/partitioning.cu
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ CUDF_KERNEL void compute_row_partition_numbers(row_hasher_t the_hasher,
auto const stride = cudf::detail::grid_1d::grid_stride();

// Initialize local histogram
size_type partition_number = threadIdx.x;
thread_index_type partition_number = threadIdx.x;
while (partition_number < num_partitions) {
shared_partition_sizes[partition_number] = 0;
partition_number += blockDim.x;
Expand Down Expand Up @@ -207,7 +207,7 @@ CUDF_KERNEL void compute_row_output_locations(size_type* __restrict__ row_partit
extern __shared__ size_type shared_partition_offsets[];

// Initialize array of this blocks offsets from global array
size_type partition_number = threadIdx.x;
thread_index_type partition_number = threadIdx.x;
while (partition_number < num_partitions) {
shared_partition_offsets[partition_number] =
block_partition_offsets[partition_number * gridDim.x + blockIdx.x];
Expand Down Expand Up @@ -303,7 +303,8 @@ CUDF_KERNEL void copy_block_partitions(InputIter input_iter,

// Fetch the offset in the output buffer of each partition in this thread
// block
for (size_type ipartition = threadIdx.x; ipartition < num_partitions; ipartition += blockDim.x) {
for (thread_index_type ipartition = threadIdx.x; ipartition < num_partitions;
ipartition += blockDim.x) {
partition_offset_global[ipartition] =
scanned_block_partition_sizes[ipartition * gridDim.x + blockIdx.x];
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/quantiles/tdigest/tdigest_aggregation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ CUDF_KERNEL void generate_cluster_limits_kernel(int delta,
size_type const* group_cluster_offsets,
bool has_nulls)
{
int const tid = threadIdx.x + blockIdx.x * blockDim.x;
auto const tid = cudf::detail::grid_1d::global_thread_id();

auto const group_index = tid;
if (group_index >= num_groups) { return; }
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/transform/jit/kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ CUDF_KERNEL void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data
{
// cannot use global_thread_id utility due to a JIT build issue by including
// the `cudf/detail/utilities/cuda.cuh` header
thread_index_type const start = threadIdx.x + blockIdx.x * blockDim.x;
thread_index_type const stride = blockDim.x * gridDim.x;
auto const block_size = static_cast<thread_index_type>(blockDim.x);
thread_index_type const start = threadIdx.x + blockIdx.x * block_size;
thread_index_type const stride = block_size * gridDim.x;

for (auto i = start; i < static_cast<thread_index_type>(size); i += stride) {
GENERIC_UNARY_OP(&out_data[i], in_data[i]);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ CUDF_KERNEL void compute_segment_sizes(device_span<column_device_view const> col
size_type max_branch_depth)
{
extern __shared__ row_span thread_branch_stacks[];
int const tid = threadIdx.x + blockIdx.x * blockDim.x;
auto const tid = static_cast<size_type>(cudf::detail::grid_1d::global_thread_id());

auto const num_segments = static_cast<size_type>(output.size());
if (tid >= num_segments) { return; }
Expand Down

0 comments on commit 63c5a38

Please sign in to comment.