Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compile time check to ensure the counting_iterator type in counting_transform_iterator fits in size_type #17118

17 changes: 11 additions & 6 deletions cpp/include/cudf/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,19 @@
#include <cudf/scalar/scalar_device_view.cuh>

#include <cuda/std/optional>
#include <cuda/std/type_traits>
#include <cuda/std/utility>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/pair.h>

#include <utility>

namespace cudf {
namespace detail {
/**
* @brief Convenience wrapper for creating a `thrust::transform_iterator` over a
* `thrust::counting_iterator`.
* `thrust::counting_iterator` within the range [0, INT_MAX].
*
*
* Example:
* @code{.cpp}
Expand All @@ -62,12 +63,16 @@ namespace detail {
* iter[n] == n * n
* @endcode
*
* @param start The starting value of the counting iterator
* @param start The starting value of the counting iterator (must be size_type or smaller type).
* @param f The unary function to apply to the counting iterator.
* @return A transform iterator that applies `f` to a counting iterator
*/
template <typename UnaryFunction>
CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(cudf::size_type start,
template <typename CountingIterType,
typename UnaryFunction,
typename = cuda::std::enable_if_t<cuda::std::is_integral_v<CountingIterType> and
cuda::std::numeric_limits<CountingIterType>::max() <=
cuda::std::numeric_limits<cudf::size_type>::max()>>
mhaseeb123 marked this conversation as resolved.
Show resolved Hide resolved
CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start,
UnaryFunction f)
{
return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f);
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/utilities/batched_memset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void batched_memset(std::vector<cudf::device_span<T>> const& bufs,
cudf::detail::make_device_uvector_async(bufs, stream, cudf::get_current_device_resource_ref());

// get a vector with the sizes of all buffers
auto sizes = cudf::detail::make_counting_transform_iterator(
static_cast<std::size_t>(0),
auto sizes = thrust::make_transform_iterator(
thrust::counting_iterator<std::size_t>(0),
cuda::proclaim_return_type<std::size_t>(
[gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); }));

Expand Down
17 changes: 9 additions & 8 deletions cpp/include/cudf/strings/detail/strings_children.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,20 @@ rmm::device_uvector<char> make_chars_buffer(column_view const& offsets,
auto chars_data = rmm::device_uvector<char>(chars_size, stream, mr);
auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets);

auto const src_ptrs = cudf::detail::make_counting_transform_iterator(
0u, cuda::proclaim_return_type<void*>([begin] __device__(uint32_t idx) {
auto const src_ptrs = thrust::make_transform_iterator(
thrust::make_counting_iterator<uint32_t>(0),
cuda::proclaim_return_type<void*>([begin] __device__(uint32_t idx) {
// Due to a bug in cub (https://github.com/NVIDIA/cccl/issues/586),
// we have to use `const_cast` to remove `const` qualifier from the source pointer.
// This should be fine as long as we only read but not write anything to the source.
return reinterpret_cast<void*>(const_cast<char*>(begin[idx].first));
}));
auto const src_sizes = cudf::detail::make_counting_transform_iterator(
0u, cuda::proclaim_return_type<size_type>([begin] __device__(uint32_t idx) {
return begin[idx].second;
}));
auto const dst_ptrs = cudf::detail::make_counting_transform_iterator(
0u,
auto const src_sizes = thrust::make_transform_iterator(
thrust::make_counting_iterator<uint32_t>(0),
cuda::proclaim_return_type<size_type>(
[begin] __device__(uint32_t idx) { return begin[idx].second; }));
auto const dst_ptrs = thrust::make_transform_iterator(
thrust::make_counting_iterator<uint32_t>(0),
cuda::proclaim_return_type<char*>([offsets = d_offsets, output = chars_data.data()] __device__(
uint32_t idx) { return output + offsets[idx]; }));

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <rmm/exec_policy.hpp>

#include <thrust/iterator/transform_iterator.h>
#include <thrust/reduce.h>

namespace cudf::io::parquet::detail {
Expand Down Expand Up @@ -476,9 +477,9 @@ void WriteFinalOffsets(host_span<size_type const> offsets,
auto d_src_data = cudf::detail::make_device_uvector_async(
offsets, stream, cudf::get_current_device_resource_ref());
// Iterator for the source (scalar) data
auto src_iter = cudf::detail::make_counting_transform_iterator(
static_cast<std::size_t>(0),
cuda::proclaim_return_type<size_type*>(
auto src_iter = thrust::make_transform_iterator(
thrust::make_counting_iterator<std::size_t>(0),
cuda::proclaim_return_type<cudf::size_type*>(
[src = d_src_data.begin()] __device__(std::size_t i) { return src + i; }));

// Copy buffer addresses to device and create an iterator
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1629,10 +1629,10 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
get_page_nesting_size{
d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()});

// Manually create a int64_t `key_start` compatible counting_transform_iterator to avoid
// implicit casting to size_type.
auto const reduction_keys = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(key_start), get_reduction_key{subpass.pages.size()});
// Manually create a size_t `key_start` compatible counting_transform_iterator.
auto const reduction_keys =
thrust::make_transform_iterator(thrust::make_counting_iterator<std::size_t>(key_start),
get_reduction_key{subpass.pages.size()});

// Find the size of each column
thrust::reduce_by_key(rmm::exec_policy_nosync(_stream),
Expand Down
9 changes: 5 additions & 4 deletions cpp/tests/io/orc_chunked_reader_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1358,10 +1358,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow)
int64_t constexpr total_rows = num_rows * num_reps;
static_assert(total_rows > std::numeric_limits<cudf::size_type>::max());

auto const it = cudf::detail::make_counting_transform_iterator(0l, [num_rows](int64_t i) {
return (i % num_rows) % static_cast<int64_t>(std::numeric_limits<data_type>::max() / 2);
});
auto const col = data_col(it, it + num_rows);
auto const it = thrust::make_transform_iterator(
thrust::make_counting_iterator<int64_t>(0), [num_rows](int64_t i) {
return (i % num_rows) % static_cast<int64_t>(std::numeric_limits<data_type>::max() / 2);
});
auto const col = data_col(it, it + num_rows);
auto const chunk_table = cudf::table_view{{col}};

std::vector<char> data_buffer;
Expand Down
Loading