From fcf7c34606ac1533f154041139fa31c0268effdd Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 18 Dec 2024 14:40:57 +0000 Subject: [PATCH] No need for int64s --- cpp/src/rolling/detail/rolling.cuh | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index a11348eb02f..4c54f125223 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -1034,16 +1034,17 @@ __launch_bounds__(block_size) CUDF_KERNEL size_type warp_valid_count{0}; - int64_t const num_rows = input.size(); - auto active_threads = __ballot_sync(0xffff'ffffu, i < num_rows); + auto const num_rows = input.size(); + auto active_threads = __ballot_sync(0xffff'ffffu, i < num_rows); while (i < num_rows) { - // to prevent overflow issues when computing bounds use int64_t - int64_t const preceding_window = preceding_window_begin[i]; - int64_t const following_window = following_window_begin[i]; + // The caller is required to provide window bounds that will + // result in indexing that is in-bounds for the column. Therefore all + // of these calculations cannot overflow and we can do everything + // in size_type arithmetic. Moreover, we require that start <= + // end, i.e., the window is never "reversed" though it may be empty. + auto const preceding_window = preceding_window_begin[i]; + auto const following_window = following_window_begin[i]; - // compute bounds. It is required that these calculations produce - // a value that fits in size_type and is in-bounds for the input - // column. Moreover, we require start <= end. size_type const start = i - preceding_window + 1; size_type const end = i + following_window + 1;