From db2b1d10a82ec63d4278feca9c92d96a586e51d6 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:10:42 +0000 Subject: [PATCH 1/7] Allow templated begin value to `counting_transform_iterator` --- cpp/include/cudf/detail/iterator.cuh | 4 ++-- cpp/src/io/parquet/reader_impl_preprocess.cu | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 4349e1b70fd..b46d50314a2 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -66,8 +66,8 @@ namespace detail { * @param f The unary function to apply to the counting iterator. * @return A transform iterator that applies `f` to a counting iterator */ -template -CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(cudf::size_type start, +template +CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f); diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 5138a92ac14..d8f7d339a7f 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -1629,10 +1629,7 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num get_page_nesting_size{ d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()}); - // Manually create a int64_t `key_start` compatible counting_transform_iterator to avoid - // implicit casting to size_type. - auto const reduction_keys = thrust::make_transform_iterator( - thrust::make_counting_iterator(key_start), get_reduction_key{subpass.pages.size()}); + auto const reduction_keys = cudf::detail::make_counting_transform_iterator(key_start, get_reduction_key{subpass.pages.size()}); // Find the size of each column thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), From ac82d73d46942acc3ca21e6769e9d4f7aed1a2e4 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:19:52 +0000 Subject: [PATCH 2/7] style fix --- cpp/src/io/parquet/reader_impl_preprocess.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index d8f7d339a7f..f4d1d036ecd 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -1629,7 +1629,8 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num get_page_nesting_size{ d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()}); - auto const reduction_keys = cudf::detail::make_counting_transform_iterator(key_start, get_reduction_key{subpass.pages.size()}); + auto const reduction_keys = cudf::detail::make_counting_transform_iterator( + key_start, get_reduction_key{subpass.pages.size()}); // Find the size of each column thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), From ed31c32580baa5f6d413e12b8cc22a97c522e7b8 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:33:49 +0000 Subject: [PATCH 3/7] Force int32_t bound for counting transform iterator --- cpp/include/cudf/detail/iterator.cuh | 11 ++++++++--- .../cudf/detail/utilities/batched_memset.hpp | 4 ++-- .../cudf/strings/detail/strings_children.cuh | 17 +++++++++-------- cpp/src/io/parquet/page_data.cu | 7 ++++--- cpp/src/io/parquet/reader_impl_preprocess.cu | 6 ++++-- cpp/tests/io/orc_chunked_reader_test.cu | 9 +++++---- 6 files changed, 32 insertions(+), 22 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index b46d50314a2..b4a4f01522e 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -49,7 +49,8 @@ namespace cudf { namespace detail { /** * @brief Convenience wrapper for creating a `thrust::transform_iterator` over a - * `thrust::counting_iterator`. + * `thrust::counting_iterator` of type smaller than or equal to int32_t in size. + * * * Example: * @code{.cpp} @@ -62,11 +63,15 @@ namespace detail { * iter[n] == n * n * @endcode * - * @param start The starting value of the counting iterator + * @param start The starting value of the counting iterator (must be int32_t or smaller type). * @param f The unary function to apply to the counting iterator. * @return A transform iterator that applies `f` to a counting iterator */ -template +template and + std::numeric_limits::max() <= + std::numeric_limits::max()>> CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { diff --git a/cpp/include/cudf/detail/utilities/batched_memset.hpp b/cpp/include/cudf/detail/utilities/batched_memset.hpp index 75f738f7529..78be5b91248 100644 --- a/cpp/include/cudf/detail/utilities/batched_memset.hpp +++ b/cpp/include/cudf/detail/utilities/batched_memset.hpp @@ -53,8 +53,8 @@ void batched_memset(std::vector> const& bufs, cudf::detail::make_device_uvector_async(bufs, stream, cudf::get_current_device_resource_ref()); // get a vector with the sizes of all buffers - auto sizes = cudf::detail::make_counting_transform_iterator( - static_cast(0), + auto sizes = thrust::make_transform_iterator( + thrust::counting_iterator(0), cuda::proclaim_return_type( [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); })); diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index fb0b25cf9f1..de2f1770e28 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -65,19 +65,20 @@ rmm::device_uvector make_chars_buffer(column_view const& offsets, auto chars_data = rmm::device_uvector(chars_size, stream, mr); auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets); - auto const src_ptrs = cudf::detail::make_counting_transform_iterator( - 0u, cuda::proclaim_return_type([begin] __device__(uint32_t idx) { + auto const src_ptrs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type([begin] __device__(uint32_t idx) { // Due to a bug in cub (https://github.com/NVIDIA/cccl/issues/586), // we have to use `const_cast` to remove `const` qualifier from the source pointer. // This should be fine as long as we only read but not write anything to the source. return reinterpret_cast(const_cast(begin[idx].first)); })); - auto const src_sizes = cudf::detail::make_counting_transform_iterator( - 0u, cuda::proclaim_return_type([begin] __device__(uint32_t idx) { - return begin[idx].second; - })); - auto const dst_ptrs = cudf::detail::make_counting_transform_iterator( - 0u, + auto const src_sizes = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [begin] __device__(uint32_t idx) { return begin[idx].second; })); + auto const dst_ptrs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), cuda::proclaim_return_type([offsets = d_offsets, output = chars_data.data()] __device__( uint32_t idx) { return output + offsets[idx]; })); diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index b3276c81c1f..0d24fa4236f 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -21,6 +21,7 @@ #include +#include #include namespace cudf::io::parquet::detail { @@ -476,9 +477,9 @@ void WriteFinalOffsets(host_span offsets, auto d_src_data = cudf::detail::make_device_uvector_async( offsets, stream, cudf::get_current_device_resource_ref()); // Iterator for the source (scalar) data - auto src_iter = cudf::detail::make_counting_transform_iterator( - static_cast(0), - cuda::proclaim_return_type( + auto src_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( [src = d_src_data.begin()] __device__(std::size_t i) { return src + i; })); // Copy buffer addresses to device and create an iterator diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index f4d1d036ecd..60e35465793 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -1629,8 +1629,10 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num get_page_nesting_size{ d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()}); - auto const reduction_keys = cudf::detail::make_counting_transform_iterator( - key_start, get_reduction_key{subpass.pages.size()}); + // Manually create a size_t `key_start` compatible counting_transform_iterator. + auto const reduction_keys = + thrust::make_transform_iterator(thrust::make_counting_iterator(key_start), + get_reduction_key{subpass.pages.size()}); // Find the size of each column thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 8ad1fea649d..5f1aea71f73 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1358,10 +1358,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - auto const it = cudf::detail::make_counting_transform_iterator(0l, [num_rows](int64_t i) { - return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); - }); - auto const col = data_col(it, it + num_rows); + auto const it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [num_rows](int64_t i) { + return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); + }); + auto const col = data_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; std::vector data_buffer; From 5a27bc5745906c5c532a835b008f88de461d5a68 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:19:34 -0700 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Yunsong Wang --- cpp/include/cudf/detail/iterator.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index b4a4f01522e..77f049fa9aa 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -49,7 +49,7 @@ namespace cudf { namespace detail { /** * @brief Convenience wrapper for creating a `thrust::transform_iterator` over a - * `thrust::counting_iterator` of type smaller than or equal to int32_t in size. + * `thrust::counting_iterator` within the range [0, INT_MAX]. * * * Example: @@ -69,9 +69,9 @@ namespace detail { */ template and - std::numeric_limits::max() <= - std::numeric_limits::max()>> + typename = cuda::std::enable_if_t and + cuda::std::numeric_limits::max() <= + cuda::std::numeric_limits::max()>> CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { From 201d76173f302093f910c912038bf4ae24ab52d5 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:27:14 +0000 Subject: [PATCH 5/7] style fix and cuda std headers --- cpp/include/cudf/detail/iterator.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 77f049fa9aa..4607d921043 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -38,13 +38,13 @@ #include #include +#include +#include #include #include #include #include -#include - namespace cudf { namespace detail { /** @@ -70,8 +70,8 @@ namespace detail { template and - cuda::std::numeric_limits::max() <= - cuda::std::numeric_limits::max()>> + cuda::std::numeric_limits::max() <= + cuda::std::numeric_limits::max()>> CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { From 0024a64b3a57799bb6273afc5ee97879909d76f9 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:28:12 +0000 Subject: [PATCH 6/7] Apply suggestion from reviews --- cpp/include/cudf/detail/iterator.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 4607d921043..9eefb90756e 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -63,7 +63,7 @@ namespace detail { * iter[n] == n * n * @endcode * - * @param start The starting value of the counting iterator (must be int32_t or smaller type). + * @param start The starting value of the counting iterator (must be size_type or smaller type). * @param f The unary function to apply to the counting iterator. * @return A transform iterator that applies `f` to a counting iterator */ From 50f252e896214b926c5fbff6b635de0b9af5842d Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Mon, 21 Oct 2024 17:24:15 +0000 Subject: [PATCH 7/7] Use static_assert instead of enable_if --- cpp/include/cudf/detail/iterator.cuh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 9eefb90756e..30f36d6a5da 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -67,14 +67,17 @@ namespace detail { * @param f The unary function to apply to the counting iterator. * @return A transform iterator that applies `f` to a counting iterator */ -template and - cuda::std::numeric_limits::max() <= - cuda::std::numeric_limits::max()>> +template CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { + // Check if the `start` for counting_iterator is of size_type or a smaller integral type + static_assert( + cuda::std::is_integral_v and + cuda::std::numeric_limits::digits <= + cuda::std::numeric_limits::digits, + "The `start` for the counting_transform_iterator must be size_type or smaller type"); + return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f); }