Skip to content

Commit

Permalink
Add compile time check to ensure the counting_iterator type in `cou…
Browse files Browse the repository at this point in the history
…nting_transform_iterator` fits in `size_type` (#17118)

This PR adds a compile time check to enforce that the `start` argument to `cudf::detail::counting_transform_iterator`, which is used to determine the type of `counting_iterator`, is of a type that fits in `int32_t` (aka `size_type`). The PR also modifies the instances of `counting_transform_iterator` that need to work with `counting_iterators` of type > `int32_t` to manually created `counting_transform_iterators` using thrust.

More context in this [comment](https://github.com/rapidsai/cudf/pull/17059/files#r1803925659).

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

URL: #17118
  • Loading branch information
mhaseeb123 authored Oct 22, 2024
1 parent 69ca387 commit 13de3c1
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 27 deletions.
20 changes: 14 additions & 6 deletions cpp/include/cudf/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,19 @@
#include <cudf/scalar/scalar_device_view.cuh>

#include <cuda/std/optional>
#include <cuda/std/type_traits>
#include <cuda/std/utility>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/pair.h>

#include <utility>

namespace cudf {
namespace detail {
/**
* @brief Convenience wrapper for creating a `thrust::transform_iterator` over a
* `thrust::counting_iterator`.
* `thrust::counting_iterator` within the range [0, INT_MAX].
*
*
* Example:
* @code{.cpp}
Expand All @@ -62,14 +63,21 @@ namespace detail {
* iter[n] == n * n
* @endcode
*
* @param start The starting value of the counting iterator
* @param start The starting value of the counting iterator (must be size_type or smaller type).
* @param f The unary function to apply to the counting iterator.
* @return A transform iterator that applies `f` to a counting iterator
*/
template <typename UnaryFunction>
CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(cudf::size_type start,
template <typename CountingIterType, typename UnaryFunction>
CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start,
UnaryFunction f)
{
// Check if the `start` for counting_iterator is of size_type or a smaller integral type
static_assert(
cuda::std::is_integral_v<CountingIterType> and
cuda::std::numeric_limits<CountingIterType>::digits <=
cuda::std::numeric_limits<cudf::size_type>::digits,
"The `start` for the counting_transform_iterator must be size_type or smaller type");

return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/utilities/batched_memset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void batched_memset(std::vector<cudf::device_span<T>> const& bufs,
cudf::detail::make_device_uvector_async(bufs, stream, cudf::get_current_device_resource_ref());

// get a vector with the sizes of all buffers
auto sizes = cudf::detail::make_counting_transform_iterator(
static_cast<std::size_t>(0),
auto sizes = thrust::make_transform_iterator(
thrust::counting_iterator<std::size_t>(0),
cuda::proclaim_return_type<std::size_t>(
[gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); }));

Expand Down
17 changes: 9 additions & 8 deletions cpp/include/cudf/strings/detail/strings_children.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,20 @@ rmm::device_uvector<char> make_chars_buffer(column_view const& offsets,
auto chars_data = rmm::device_uvector<char>(chars_size, stream, mr);
auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets);

auto const src_ptrs = cudf::detail::make_counting_transform_iterator(
0u, cuda::proclaim_return_type<void*>([begin] __device__(uint32_t idx) {
auto const src_ptrs = thrust::make_transform_iterator(
thrust::make_counting_iterator<uint32_t>(0),
cuda::proclaim_return_type<void*>([begin] __device__(uint32_t idx) {
// Due to a bug in cub (https://github.com/NVIDIA/cccl/issues/586),
// we have to use `const_cast` to remove `const` qualifier from the source pointer.
// This should be fine as long as we only read but not write anything to the source.
return reinterpret_cast<void*>(const_cast<char*>(begin[idx].first));
}));
auto const src_sizes = cudf::detail::make_counting_transform_iterator(
0u, cuda::proclaim_return_type<size_type>([begin] __device__(uint32_t idx) {
return begin[idx].second;
}));
auto const dst_ptrs = cudf::detail::make_counting_transform_iterator(
0u,
auto const src_sizes = thrust::make_transform_iterator(
thrust::make_counting_iterator<uint32_t>(0),
cuda::proclaim_return_type<size_type>(
[begin] __device__(uint32_t idx) { return begin[idx].second; }));
auto const dst_ptrs = thrust::make_transform_iterator(
thrust::make_counting_iterator<uint32_t>(0),
cuda::proclaim_return_type<char*>([offsets = d_offsets, output = chars_data.data()] __device__(
uint32_t idx) { return output + offsets[idx]; }));

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <rmm/exec_policy.hpp>

#include <thrust/iterator/transform_iterator.h>
#include <thrust/reduce.h>

namespace cudf::io::parquet::detail {
Expand Down Expand Up @@ -476,9 +477,9 @@ void WriteFinalOffsets(host_span<size_type const> offsets,
auto d_src_data = cudf::detail::make_device_uvector_async(
offsets, stream, cudf::get_current_device_resource_ref());
// Iterator for the source (scalar) data
auto src_iter = cudf::detail::make_counting_transform_iterator(
static_cast<std::size_t>(0),
cuda::proclaim_return_type<size_type*>(
auto src_iter = thrust::make_transform_iterator(
thrust::make_counting_iterator<std::size_t>(0),
cuda::proclaim_return_type<cudf::size_type*>(
[src = d_src_data.begin()] __device__(std::size_t i) { return src + i; }));

// Copy buffer addresses to device and create an iterator
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1629,10 +1629,10 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
get_page_nesting_size{
d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()});

// Manually create a int64_t `key_start` compatible counting_transform_iterator to avoid
// implicit casting to size_type.
auto const reduction_keys = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(key_start), get_reduction_key{subpass.pages.size()});
// Manually create a size_t `key_start` compatible counting_transform_iterator.
auto const reduction_keys =
thrust::make_transform_iterator(thrust::make_counting_iterator<std::size_t>(key_start),
get_reduction_key{subpass.pages.size()});

// Find the size of each column
thrust::reduce_by_key(rmm::exec_policy_nosync(_stream),
Expand Down
9 changes: 5 additions & 4 deletions cpp/tests/io/orc_chunked_reader_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1358,10 +1358,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow)
int64_t constexpr total_rows = num_rows * num_reps;
static_assert(total_rows > std::numeric_limits<cudf::size_type>::max());

auto const it = cudf::detail::make_counting_transform_iterator(0l, [num_rows](int64_t i) {
return (i % num_rows) % static_cast<int64_t>(std::numeric_limits<data_type>::max() / 2);
});
auto const col = data_col(it, it + num_rows);
auto const it = thrust::make_transform_iterator(
thrust::make_counting_iterator<int64_t>(0), [num_rows](int64_t i) {
return (i % num_rows) % static_cast<int64_t>(std::numeric_limits<data_type>::max() / 2);
});
auto const col = data_col(it, it + num_rows);
auto const chunk_table = cudf::table_view{{col}};

std::vector<char> data_buffer;
Expand Down

0 comments on commit 13de3c1

Please sign in to comment.