Skip to content

Commit ab3fbdf

Browse files
committed
Merge branch-25.02 into branch-25.04
2 parents d1bd468 + 551e452 commit ab3fbdf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+358
-184
lines changed

conda/environments/all_cuda-118_arch-x86_64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ dependencies:
7070
- polars>=1.11,<1.18
7171
- pre-commit
7272
- ptxcompiler
73-
- pyarrow>=14.0.0,<19.0.0a0
73+
- pyarrow>=14.0.0,<20.0.0a0
7474
- pydata-sphinx-theme>=0.15.4
7575
- pynvml>=12.0.0,<13.0.0a0
7676
- pytest-benchmark

conda/environments/all_cuda-125_arch-x86_64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ dependencies:
6767
- pandoc
6868
- polars>=1.11,<1.18
6969
- pre-commit
70-
- pyarrow>=14.0.0,<19.0.0a0
70+
- pyarrow>=14.0.0,<20.0.0a0
7171
- pydata-sphinx-theme>=0.15.4
7272
- pynvjitlink>=0.0.0a0
7373
- pynvml>=12.0.0,<13.0.0a0

cpp/include/cudf/detail/utilities/integer_utils.hpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ CUDF_HOST_DEVICE constexpr S round_up_safe(S number_to_round, S modulus)
7373
* `modulus` is positive and does not check for overflow.
7474
*/
7575
template <typename S>
76-
constexpr S round_down_safe(S number_to_round, S modulus) noexcept
76+
CUDF_HOST_DEVICE constexpr S round_down_safe(S number_to_round, S modulus) noexcept
7777
{
7878
auto remainder = number_to_round % modulus;
7979
auto rounded_down = number_to_round - remainder;
@@ -113,24 +113,26 @@ CUDF_HOST_DEVICE constexpr S round_up_unsafe(S number_to_round, S modulus) noexc
113113
* the result will be incorrect
114114
*/
115115
template <typename S, typename T>
116-
constexpr S div_rounding_up_unsafe(S const& dividend, T const& divisor) noexcept
116+
CUDF_HOST_DEVICE constexpr S div_rounding_up_unsafe(S const& dividend, T const& divisor) noexcept
117117
{
118118
return (dividend + divisor - 1) / divisor;
119119
}
120120

121121
namespace detail {
122122
template <typename I>
123-
constexpr I div_rounding_up_safe(std::integral_constant<bool, false>,
124-
I dividend,
125-
I divisor) noexcept
123+
CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(cuda::std::integral_constant<bool, false>,
124+
I dividend,
125+
I divisor) noexcept
126126
{
127127
// TODO: This could probably be implemented faster
128128
return (dividend > divisor) ? 1 + div_rounding_up_unsafe(dividend - divisor, divisor)
129129
: (dividend > 0);
130130
}
131131

132132
template <typename I>
133-
constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend, I divisor) noexcept
133+
CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(cuda::std::integral_constant<bool, true>,
134+
I dividend,
135+
I divisor) noexcept
134136
{
135137
auto quotient = dividend / divisor;
136138
auto remainder = dividend % divisor;
@@ -156,9 +158,9 @@ constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend,
156158
* the non-integral division `dividend/divisor`
157159
*/
158160
template <typename I>
159-
constexpr I div_rounding_up_safe(I dividend, I divisor) noexcept
161+
CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(I dividend, I divisor) noexcept
160162
{
161-
using i_is_a_signed_type = std::integral_constant<bool, std::is_signed_v<I>>;
163+
using i_is_a_signed_type = cuda::std::integral_constant<bool, cuda::std::is_signed_v<I>>;
162164
return detail::div_rounding_up_safe(i_is_a_signed_type{}, dividend, divisor);
163165
}
164166

cpp/include/cudf/fixed_point/temporary.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -54,7 +54,7 @@ auto to_string(T value) -> std::string
5454
}
5555

5656
template <typename T>
57-
constexpr auto abs(T value)
57+
CUDF_HOST_DEVICE constexpr auto abs(T value)
5858
{
5959
return value >= 0 ? value : -value;
6060
}
@@ -72,7 +72,7 @@ CUDF_HOST_DEVICE inline auto max(T lhs, T rhs)
7272
}
7373

7474
template <typename BaseType>
75-
constexpr auto exp10(int32_t exponent)
75+
CUDF_HOST_DEVICE constexpr auto exp10(int32_t exponent)
7676
{
7777
BaseType value = 1;
7878
while (exponent > 0)

cpp/include/cudf/io/text/detail/multistate.hpp

+9-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@
1818

1919
#include <cudf/utilities/export.hpp>
2020

21+
#include <cuda/functional>
22+
2123
#include <cstdint>
2224

2325
namespace CUDF_EXPORT cudf {
@@ -45,7 +47,7 @@ struct multistate {
4547
*
4648
* @note: The behavior of this function is undefined if size() => max_segment_count
4749
*/
48-
constexpr void enqueue(uint8_t head, uint8_t tail)
50+
CUDF_HOST_DEVICE constexpr void enqueue(uint8_t head, uint8_t tail)
4951
{
5052
_heads |= (head & 0xFu) << (_size * 4);
5153
_tails |= (tail & 0xFu) << (_size * 4);
@@ -55,17 +57,17 @@ struct multistate {
5557
/**
5658
* @brief get's the number of segments this multistate represents
5759
*/
58-
[[nodiscard]] constexpr uint8_t size() const { return _size; }
60+
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t size() const { return _size; }
5961

6062
/**
6163
* @brief get's the highest (____, tail] value this multistate represents
6264
*/
63-
[[nodiscard]] constexpr uint8_t max_tail() const
65+
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t max_tail() const
6466
{
6567
uint8_t maximum = 0;
6668

6769
for (uint8_t i = 0; i < _size; i++) {
68-
maximum = std::max(maximum, get_tail(i));
70+
maximum = cuda::std::max(maximum, get_tail(i));
6971
}
7072

7173
return maximum;
@@ -74,15 +76,15 @@ struct multistate {
7476
/**
7577
* @brief get's the Nth (head, ____] value state this multistate represents
7678
*/
77-
[[nodiscard]] constexpr uint8_t get_head(uint8_t idx) const
79+
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t get_head(uint8_t idx) const
7880
{
7981
return (_heads >> (idx * 4)) & 0xFu;
8082
}
8183

8284
/**
8385
* @brief get's the Nth (____, tail] value state this multistate represents
8486
*/
85-
[[nodiscard]] constexpr uint8_t get_tail(uint8_t idx) const
87+
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t get_tail(uint8_t idx) const
8688
{
8789
return (_tails >> (idx * 4)) & 0xFu;
8890
}

cpp/include/cudf/strings/detail/convert/fixed_point.cuh

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
1717

1818
#include <cudf/fixed_point/temporary.hpp>
1919

20+
#include <cuda/std/limits>
2021
#include <cuda/std/optional>
2122
#include <cuda/std/type_traits>
2223
#include <thrust/pair.h>
@@ -46,7 +47,7 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
4647
// highest value where another decimal digit cannot be appended without an overflow;
4748
// this preserves the most digits when scaling the final result for this type
4849
constexpr UnsignedDecimalType decimal_max =
49-
(std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;
50+
(cuda::std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;
5051

5152
__uint128_t value = 0; // for checking overflow
5253
int32_t exp_offset = 0;
@@ -90,7 +91,8 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
9091
template <bool check_only = false>
9192
__device__ cuda::std::optional<int32_t> parse_exponent(char const* iter, char const* iter_end)
9293
{
93-
constexpr uint32_t exponent_max = static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
94+
constexpr uint32_t exponent_max =
95+
static_cast<uint32_t>(cuda::std::numeric_limits<int32_t>::max());
9496

9597
// get optional exponent sign
9698
int32_t const exp_sign = [&iter] {

cpp/include/cudf/strings/detail/convert/fixed_point_to_string.cuh

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
1717

1818
#include <cudf/strings/detail/convert/int_to_string.cuh>
1919

20+
#include <cuda/std/functional>
21+
2022
namespace cudf::strings::detail {
2123

2224
/**
@@ -33,7 +35,7 @@ __device__ inline int32_t fixed_point_string_size(__int128_t const& value, int32
3335
auto const abs_value = numeric::detail::abs(value);
3436
auto const exp_ten = numeric::detail::exp10<__int128_t>(-scale);
3537
auto const fraction = count_digits(abs_value % exp_ten);
36-
auto const num_zeros = std::max(0, (-scale - fraction));
38+
auto const num_zeros = cuda::std::max(0, (-scale - fraction));
3739
return static_cast<int32_t>(value < 0) + // sign if negative
3840
count_digits(abs_value / exp_ten) + // integer
3941
1 + // decimal point
@@ -66,7 +68,7 @@ __device__ inline void fixed_point_to_string(__int128_t const& value, int32_t sc
6668
if (value < 0) *out_ptr++ = '-'; // add sign
6769
auto const abs_value = numeric::detail::abs(value);
6870
auto const exp_ten = numeric::detail::exp10<__int128_t>(-scale);
69-
auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten)));
71+
auto const num_zeros = cuda::std::max(0, (-scale - count_digits(abs_value % exp_ten)));
7072

7173
out_ptr += integer_to_string(abs_value / exp_ten, out_ptr); // add the integer part
7274
*out_ptr++ = '.'; // add decimal point

cpp/include/cudf/strings/detail/convert/int_to_string.cuh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -67,7 +67,7 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer)
6767
* @return size_type number of digits in input value
6868
*/
6969
template <typename IntegerType>
70-
constexpr size_type count_digits(IntegerType value)
70+
__device__ constexpr size_type count_digits(IntegerType value)
7171
{
7272
if (value == 0) return 1;
7373
bool const is_negative = cuda::std::is_signed<IntegerType>() ? (value < 0) : false;

cpp/include/cudf/strings/detail/strings_children.cuh

+18-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -41,6 +41,21 @@ namespace cudf {
4141
namespace strings {
4242
namespace detail {
4343

44+
template <typename Iter>
45+
struct string_offsets_fn {
46+
Iter _begin;
47+
size_type _strings_count;
48+
constexpr string_offsets_fn(Iter begin, size_type strings_count)
49+
: _begin{begin}, _strings_count{strings_count}
50+
{
51+
}
52+
53+
__device__ constexpr size_type operator()(size_type idx) const noexcept
54+
{
55+
return idx < _strings_count ? static_cast<size_type>(_begin[idx]) : size_type{0};
56+
};
57+
};
58+
4459
/**
4560
* @brief Gather characters to create a strings column using the given string-index pair iterator
4661
*
@@ -133,11 +148,8 @@ std::pair<std::unique_ptr<column>, int64_t> make_offsets_child_column(
133148
// using exclusive-scan technically requires strings_count+1 input values even though
134149
// the final input value is never used.
135150
// The input iterator is wrapped here to allow the 'last value' to be safely read.
136-
auto map_fn = cuda::proclaim_return_type<size_type>(
137-
[begin, strings_count] __device__(size_type idx) -> size_type {
138-
return idx < strings_count ? static_cast<size_type>(begin[idx]) : size_type{0};
139-
});
140-
auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn);
151+
auto input_itr =
152+
cudf::detail::make_counting_transform_iterator(0, string_offsets_fn{begin, strings_count});
141153
// Use the sizes-to-offsets iterator to compute the total number of elements
142154
auto const total_bytes =
143155
cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream);

cpp/src/io/csv/datetime.cuh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -197,7 +197,7 @@ __inline__ __device__ cuda::std::chrono::hh_mm_ss<duration_ms> extract_time_of_d
197197
/**
198198
* @brief Checks whether `c` is decimal digit
199199
*/
200-
constexpr bool is_digit(char c) { return c >= '0' and c <= '9'; }
200+
__device__ constexpr bool is_digit(char c) { return c >= '0' and c <= '9'; }
201201

202202
/**
203203
* @brief Parses a datetime string and computes the corresponding timestamp.

cpp/src/io/json/write_json.cu

+51-19
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -376,6 +376,48 @@ std::unique_ptr<column> struct_to_strings(table_view const& strings_columns,
376376
{});
377377
}
378378

379+
struct scatter_fn {
380+
column_device_view _col;
381+
size_type* _d_strview_offsets;
382+
string_view* _d_strviews;
383+
size_type const* _labels;
384+
size_type const* _list_offsets;
385+
column_device_view _d_strings_children;
386+
string_view _element_seperator;
387+
string_view _element_narep;
388+
389+
scatter_fn(column_device_view col,
390+
size_type* d_strview_offsets,
391+
string_view* d_strviews,
392+
size_type const* labels,
393+
size_type const* list_offsets,
394+
column_device_view d_strings_children,
395+
string_view const element_separator,
396+
string_view const element_narep) noexcept
397+
: _col{col},
398+
_d_strview_offsets{d_strview_offsets},
399+
_d_strviews{d_strviews},
400+
_labels{labels},
401+
_list_offsets{list_offsets},
402+
_d_strings_children{d_strings_children},
403+
_element_seperator{element_separator},
404+
_element_narep{element_narep}
405+
{
406+
}
407+
408+
__device__ void operator()(size_type idx) const
409+
{
410+
auto const label = _labels[idx];
411+
auto const sublist_index = idx - _list_offsets[label];
412+
auto const strview_index = _d_strview_offsets[label] + sublist_index * 2 + 1;
413+
// value or na_rep
414+
auto const strview = _d_strings_children.element<cudf::string_view>(idx);
415+
_d_strviews[strview_index] = _d_strings_children.is_null(idx) ? _element_narep : strview;
416+
// separator
417+
if (sublist_index != 0) { _d_strviews[strview_index - 1] = _element_seperator; }
418+
}
419+
};
420+
379421
/**
380422
* @brief Concatenates a list of strings columns into a single strings column.
381423
*
@@ -461,24 +503,14 @@ std::unique_ptr<column> join_list_of_strings(lists_column_view const& lists_stri
461503
thrust::for_each(rmm::exec_policy_nosync(stream),
462504
thrust::make_counting_iterator<size_type>(0),
463505
thrust::make_counting_iterator<size_type>(num_strings),
464-
[col = *col_device_view,
465-
d_strview_offsets = d_strview_offsets.begin(),
466-
d_strviews = d_strviews.begin(),
467-
labels = labels->view().begin<size_type>(),
468-
list_offsets = offsets.begin<size_type>(),
469-
d_strings_children = *d_strings_children,
470-
element_separator,
471-
element_narep] __device__(auto idx) {
472-
auto const label = labels[idx];
473-
auto const sublist_index = idx - list_offsets[label];
474-
auto const strview_index = d_strview_offsets[label] + sublist_index * 2 + 1;
475-
// value or na_rep
476-
auto const strview = d_strings_children.element<cudf::string_view>(idx);
477-
d_strviews[strview_index] =
478-
d_strings_children.is_null(idx) ? element_narep : strview;
479-
// separator
480-
if (sublist_index != 0) { d_strviews[strview_index - 1] = element_separator; }
481-
});
506+
scatter_fn{*col_device_view,
507+
d_strview_offsets.data(),
508+
d_strviews.data(),
509+
labels->view().data<size_type>(),
510+
offsets.data<size_type>(),
511+
*d_strings_children,
512+
element_separator,
513+
element_narep});
482514

483515
auto joined_col = make_strings_column(d_strviews, string_view{nullptr, 0}, stream, mr);
484516

0 commit comments

Comments
 (0)