From bba150c9581dee689dabd2d44f81241fa21306ee Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Nov 2024 10:12:05 -0800 Subject: [PATCH 01/59] Implement host udf aggregation Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 101 +++++++++++------- .../cudf/detail/aggregation/aggregation.hpp | 35 ++++++ cpp/src/aggregation/aggregation.cpp | 23 ++++ cpp/src/groupby/sort/aggregate.cpp | 17 +++ cpp/src/reductions/reductions.cpp | 3 + 5 files changed, 142 insertions(+), 37 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index f5f514d26d9..6fed35d8f2b 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -18,6 +18,9 @@ #include #include +#include + +#include #include #include @@ -84,43 +87,44 @@ class aggregation { * @brief Possible aggregation operations */ enum Kind { - SUM, ///< sum reduction - PRODUCT, ///< product reduction - MIN, ///< min reduction - MAX, ///< max reduction - COUNT_VALID, ///< count number of valid elements - COUNT_ALL, ///< count number of elements - ANY, ///< any reduction - ALL, ///< all reduction - SUM_OF_SQUARES, ///< sum of squares reduction - MEAN, ///< arithmetic mean reduction - M2, ///< sum of squares of differences from the mean - VARIANCE, ///< variance - STD, ///< standard deviation - MEDIAN, ///< median reduction - QUANTILE, ///< compute specified quantile(s) - ARGMAX, ///< Index of max element - ARGMIN, ///< Index of min element - NUNIQUE, ///< count number of unique elements - NTH_ELEMENT, ///< get the nth element - ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - EWMA, ///< get exponential weighted moving average at current index - RANK, ///< get rank of current index - COLLECT_LIST, ///< collect values into a list - COLLECT_SET, ///< collect values into a list without duplicate entries - LEAD, ///< window function, accesses row at specified offset following current row - LAG, ///< window function, accesses row at specified offset preceding current row - PTX, ///< PTX UDF based reduction - CUDA, ///< CUDA UDF based reduction - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries - MERGE_M2, ///< merge partial values of M2 aggregation, - COVARIANCE, ///< covariance between two sets of elements - CORRELATION, ///< correlation between two sets of elements - TDIGEST, ///< create a tdigest from a set of input values - MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together - HISTOGRAM, ///< compute frequency of each element - MERGE_HISTOGRAM ///< merge partial values of HISTOGRAM aggregation, + SUM, ///< sum reduction + PRODUCT, ///< product reduction + MIN, ///< min reduction + MAX, ///< max reduction + COUNT_VALID, ///< count number of valid elements + COUNT_ALL, ///< count number of elements + ANY, ///< any reduction + ALL, ///< all reduction + SUM_OF_SQUARES, ///< sum of squares reduction + MEAN, ///< arithmetic mean reduction + M2, ///< sum of squares of differences from the mean + VARIANCE, ///< variance + STD, ///< standard deviation + MEDIAN, ///< median reduction + QUANTILE, ///< compute specified quantile(s) + ARGMAX, ///< Index of max element + ARGMIN, ///< Index of min element + NUNIQUE, ///< count number of unique elements + NTH_ELEMENT, ///< get the nth element + ROW_NUMBER, ///< get row-number of current index (relative to rolling window) + EWMA, ///< get exponential weighted moving average at current index + RANK, ///< get rank of current index + COLLECT_LIST, ///< collect values into a list + COLLECT_SET, ///< collect values into a list without duplicate entries + LEAD, ///< window function, accesses row at specified offset following current row + LAG, ///< window function, accesses row at specified offset preceding current row + PTX, ///< PTX UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_M2, ///< merge partial values of M2 aggregation, + COVARIANCE, ///< covariance between two sets of elements + CORRELATION, ///< correlation between two sets of elements + TDIGEST, ///< create a tdigest from a set of input values + MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together + HISTOGRAM, ///< compute frequency of each element + MERGE_HISTOGRAM, ///< merge partial values of HISTOGRAM aggregation + HOST_UDF ///< host side UDF aggregation }; aggregation() = delete; @@ -770,5 +774,28 @@ std::unique_ptr make_tdigest_aggregation(int max_centroids = 1000); template std::unique_ptr make_merge_tdigest_aggregation(int max_centroids = 1000); +// We should pass as many parameters as possible to this function pointer, +// thus the UDF can have anything it needs to perform its operations. +// Currently (modify if needed): +// column_view const& input, +// cudf::device_span group_offsets, +// cudf::device_span group_labels, +// size_type num_groups, +// int max_centroids, +// rmm::cuda_stream_view stream, +// rmm::device_async_resource_ref mr +using host_udf_func_type = std::function(column_view const&, + device_span, + device_span, + size_type, + rmm::cuda_stream_view, + rmm::device_async_resource_ref)>; +/** + * @brief make_host_udf_aggregation + * @return + */ +template +std::unique_ptr make_host_udf_aggregation(host_udf_func_type udf_func_); + /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 6661a461b8b..aec73496475 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class tdigest_aggregation const& agg); virtual std::vector> visit( data_type col_type, class merge_tdigest_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class host_udf_aggregation const& agg); }; class aggregation_finalizer { // Declares the interface for the finalizer @@ -144,6 +147,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class tdigest_aggregation const& agg); virtual void visit(class merge_tdigest_aggregation const& agg); virtual void visit(class ewma_aggregation const& agg); + virtual void visit(class host_udf_aggregation const& agg); }; /** @@ -1186,6 +1190,30 @@ class merge_tdigest_aggregation final : public groupby_aggregation, public reduc void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief + */ +class host_udf_aggregation final : public groupby_aggregation, public reduce_aggregation { + public: + host_udf_func_type host_udf_ptr; + + explicit host_udf_aggregation(host_udf_func_type host_udf_ptr_) + : aggregation{HOST_UDF}, host_udf_ptr{std::move(host_udf_ptr_)} + { + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Sentinel value used for `ARGMAX` aggregation. * @@ -1462,6 +1490,11 @@ struct target_type_impl +struct target_type_impl { + using type = struct_view; +}; + /** * @brief Helper alias to get the accumulator type for performing aggregation * `k` on elements of type `Source` @@ -1579,6 +1612,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind return f.template operator()(std::forward(args)...); case aggregation::EWMA: return f.template operator()(std::forward(args)...); + case aggregation::HOST_UDF: + return f.template operator()(std::forward(args)...); default: { #ifndef __CUDA_ARCH__ CUDF_FAIL("Unsupported aggregation."); diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index a60a7f63882..64c877bb505 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -237,6 +237,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, host_udf_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + // aggregation_finalizer ---------------------------------------- void aggregation_finalizer::visit(aggregation const& agg) {} @@ -410,6 +416,11 @@ void aggregation_finalizer::visit(merge_tdigest_aggregation const& agg) visit(static_cast(agg)); } +void aggregation_finalizer::visit(host_udf_aggregation const& agg) +{ + visit(static_cast(agg)); +} + } // namespace detail std::vector> aggregation::get_simple_aggregations( @@ -917,6 +928,18 @@ make_merge_tdigest_aggregation(int max_centroids); template CUDF_EXPORT std::unique_ptr make_merge_tdigest_aggregation(int max_centroids); +template +std::unique_ptr make_host_udf_aggregation(host_udf_func_type udf_func_) +{ + return std::make_unique(udf_func_); +} +template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation( + host_udf_func_type); +template CUDF_EXPORT std::unique_ptr + make_host_udf_aggregation(host_udf_func_type); +template CUDF_EXPORT std::unique_ptr + make_host_udf_aggregation(host_udf_func_type); + namespace detail { namespace { struct target_type_functor { diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 3041e261945..7ae621573c1 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -791,6 +791,23 @@ void aggregate_result_functor::operator()(aggregatio mr)); } +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + // TODO: Add a name string to the aggregation so that we can look up different host UDFs. + if (cache.has_result(values, agg)) { return; } + auto const udf_ptr = dynamic_cast(agg).host_udf_ptr; + CUDF_EXPECTS(udf_ptr != nullptr, "errrrrrrrrr"); + cache.add_result(values, + agg, + udf_ptr(get_grouped_values(), + helper.group_offsets(stream), + helper.group_labels(stream), + helper.num_groups(stream), + stream, + mr)); +} + } // namespace detail // Sort-based groupby diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 75ebc078930..94c8722ccbe 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -144,6 +144,9 @@ struct reduce_dispatch_functor { auto td_agg = static_cast(agg); return tdigest::detail::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr); } + case aggregation::HOST_UDF: { + CUDF_FAIL("Host UDF aggregation is not implemented in `reduction`"); + } default: CUDF_FAIL("Unsupported reduction operator"); } } From 04e2bdaa1daf399d9c2a387f8d902dd646fe3d0a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Nov 2024 10:56:00 -0800 Subject: [PATCH 02/59] Add test Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 39 +--------- cpp/tests/groupby/host_udf_tests.cu | 111 ++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 38 deletions(-) create mode 100644 cpp/tests/groupby/host_udf_tests.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 23632f6fbba..abfb9da22f1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -122,44 +122,7 @@ ConfigureTest(TIMESTAMPS_TEST wrappers/timestamps_test.cu) # * groupby tests --------------------------------------------------------------------------------- ConfigureTest( GROUPBY_TEST - groupby/argmin_tests.cpp - groupby/argmax_tests.cpp - groupby/collect_list_tests.cpp - groupby/collect_set_tests.cpp - groupby/correlation_tests.cpp - groupby/count_scan_tests.cpp - groupby/count_tests.cpp - groupby/covariance_tests.cpp - groupby/groupby_test_util.cpp - groupby/groups_tests.cpp - groupby/histogram_tests.cpp - groupby/keys_tests.cpp - groupby/lists_tests.cpp - groupby/m2_tests.cpp - groupby/min_tests.cpp - groupby/max_scan_tests.cpp - groupby/max_tests.cpp - groupby/mean_tests.cpp - groupby/median_tests.cpp - groupby/merge_m2_tests.cpp - groupby/merge_lists_tests.cpp - groupby/merge_sets_tests.cpp - groupby/min_scan_tests.cpp - groupby/nth_element_tests.cpp - groupby/nunique_tests.cpp - groupby/product_scan_tests.cpp - groupby/product_tests.cpp - groupby/quantile_tests.cpp - groupby/rank_scan_tests.cpp - groupby/replace_nulls_tests.cpp - groupby/shift_tests.cpp - groupby/std_tests.cpp - groupby/structs_tests.cpp - groupby/sum_of_squares_tests.cpp - groupby/sum_scan_tests.cpp - groupby/sum_tests.cpp - groupby/tdigest_tests.cu - groupby/var_tests.cpp + groupby/host_udf_tests.cu GPUS 1 PERCENT 100 ) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu new file mode 100644 index 00000000000..2c263c5da88 --- /dev/null +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +using namespace cudf::test::iterators; + +struct test : public cudf::test::BaseFixture {}; + +std::unique_ptr double_sqr(cudf::column_view const& values, + cudf::device_span group_offsets, + cudf::device_span group_labels, + cudf::size_type num_groups, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto output = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.size()), + output->mutable_view().begin(), + [values = values.begin()] __device__(int idx) -> int { + return 2 * values[idx] * values[idx]; + }); + return output; +} + +std::unique_ptr triple_sqr(cudf::column_view const& values, + cudf::device_span group_offsets, + cudf::device_span group_labels, + cudf::size_type num_groups, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto output = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.size()), + output->mutable_view().begin(), + [values = values.begin()] __device__(int idx) -> int { + return 3 * values[idx] * values[idx]; + }); + return output; +} + +TEST_F(test, double_sqr) +{ + cudf::test::fixed_width_column_wrapper keys{1, 1, 1, 1, 1}; + cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4}; + + auto agg = cudf::make_host_udf_aggregation(double_sqr); + std::vector requests; + requests.emplace_back(); + requests[0].values = vals; + requests[0].aggregations.push_back(std::move(agg)); + cudf::groupby::groupby gb_obj( + cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); + + auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + + // Got output: 0,2,8,18,32 + cudf::test::print(*result.second[0].results[0]); +} + +TEST_F(test, triple_sqr) +{ + cudf::test::fixed_width_column_wrapper keys{1, 1, 1, 1, 1}; + cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4}; + + auto agg = cudf::make_host_udf_aggregation(triple_sqr); + std::vector requests; + requests.emplace_back(); + requests[0].values = vals; + requests[0].aggregations.push_back(std::move(agg)); + cudf::groupby::groupby gb_obj( + cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); + + auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + + // Got output: 0,3,12,27,48 + cudf::test::print(*result.second[0].results[0]); +} From 5f7ab2b56d3ed6691ecf0d62ef3f484101d5c552 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Nov 2024 11:35:50 -0800 Subject: [PATCH 03/59] Change example to compute aggregation on each group Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 54 ++++++++++++++++++----------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 2c263c5da88..0cc657b2c51 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -27,13 +27,14 @@ #include +#include #include -#include using namespace cudf::test::iterators; struct test : public cudf::test::BaseFixture {}; +// For each group: compute (group_idx + 1)* values^2 * 2 std::unique_ptr double_sqr(cudf::column_view const& values, cudf::device_span group_offsets, cudf::device_span group_labels, @@ -43,16 +44,22 @@ std::unique_ptr double_sqr(cudf::column_view const& values, { auto output = cudf::make_numeric_column( cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.size()), - output->mutable_view().begin(), - [values = values.begin()] __device__(int idx) -> int { - return 2 * values[idx] * values[idx]; - }); + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_groups), + [output = output->mutable_view().begin(), + values = values.begin(), + group_offsets] __device__(int idx) -> int { + auto start = group_offsets[idx]; + auto end = group_offsets[idx + 1]; + for (int i = start; i < end; ++i) { + output[i] = (idx + 1) * 2 * values[i] * values[i]; + } + }); return output; } +// For each group: compute (group_idx + 1)* values^2 * 3 std::unique_ptr triple_sqr(cudf::column_view const& values, cudf::device_span group_offsets, cudf::device_span group_labels, @@ -62,20 +69,25 @@ std::unique_ptr triple_sqr(cudf::column_view const& values, { auto output = cudf::make_numeric_column( cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.size()), - output->mutable_view().begin(), - [values = values.begin()] __device__(int idx) -> int { - return 3 * values[idx] * values[idx]; - }); + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_groups), + [output = output->mutable_view().begin(), + values = values.begin(), + group_offsets] __device__(int idx) -> int { + auto start = group_offsets[idx]; + auto end = group_offsets[idx + 1]; + for (int i = start; i < end; ++i) { + output[i] = (idx + 1) * 3 * values[i] * values[i]; + } + }); return output; } TEST_F(test, double_sqr) { - cudf::test::fixed_width_column_wrapper keys{1, 1, 1, 1, 1}; - cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4}; + cudf::test::fixed_width_column_wrapper keys{1, 2, 3, 1, 2, 3}; + cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4, 5}; auto agg = cudf::make_host_udf_aggregation(double_sqr); std::vector requests; @@ -87,14 +99,14 @@ TEST_F(test, double_sqr) auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); - // Got output: 0,2,8,18,32 + // Got output: 0,18,4,64,24,150 cudf::test::print(*result.second[0].results[0]); } TEST_F(test, triple_sqr) { - cudf::test::fixed_width_column_wrapper keys{1, 1, 1, 1, 1}; - cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4}; + cudf::test::fixed_width_column_wrapper keys{1, 2, 3, 1, 2, 3}; + cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4, 5}; auto agg = cudf::make_host_udf_aggregation(triple_sqr); std::vector requests; @@ -106,6 +118,6 @@ TEST_F(test, triple_sqr) auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); - // Got output: 0,3,12,27,48 + // Got output: 0,27,6,96,36,225 cudf::test::print(*result.second[0].results[0]); } From 57674e15060b3f1e90f0c7a3c256127c2fdfb4d5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Nov 2024 15:55:40 -0800 Subject: [PATCH 04/59] Add `host_udf_base` class Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 193 +++++++++++++++++++++---------- 1 file changed, 132 insertions(+), 61 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 6fed35d8f2b..b397b4d2f4b 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -24,6 +24,9 @@ #include #include +#include +#include +#include #include /** @@ -87,44 +90,44 @@ class aggregation { * @brief Possible aggregation operations */ enum Kind { - SUM, ///< sum reduction - PRODUCT, ///< product reduction - MIN, ///< min reduction - MAX, ///< max reduction - COUNT_VALID, ///< count number of valid elements - COUNT_ALL, ///< count number of elements - ANY, ///< any reduction - ALL, ///< all reduction - SUM_OF_SQUARES, ///< sum of squares reduction - MEAN, ///< arithmetic mean reduction - M2, ///< sum of squares of differences from the mean - VARIANCE, ///< variance - STD, ///< standard deviation - MEDIAN, ///< median reduction - QUANTILE, ///< compute specified quantile(s) - ARGMAX, ///< Index of max element - ARGMIN, ///< Index of min element - NUNIQUE, ///< count number of unique elements - NTH_ELEMENT, ///< get the nth element - ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - EWMA, ///< get exponential weighted moving average at current index - RANK, ///< get rank of current index - COLLECT_LIST, ///< collect values into a list - COLLECT_SET, ///< collect values into a list without duplicate entries - LEAD, ///< window function, accesses row at specified offset following current row - LAG, ///< window function, accesses row at specified offset preceding current row - PTX, ///< PTX UDF based reduction - CUDA, ///< CUDA UDF based reduction - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries - MERGE_M2, ///< merge partial values of M2 aggregation, - COVARIANCE, ///< covariance between two sets of elements - CORRELATION, ///< correlation between two sets of elements - TDIGEST, ///< create a tdigest from a set of input values - MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together - HISTOGRAM, ///< compute frequency of each element - MERGE_HISTOGRAM, ///< merge partial values of HISTOGRAM aggregation - HOST_UDF ///< host side UDF aggregation + SUM, ///< sum reduction + PRODUCT, ///< product reduction + MIN, ///< min reduction + MAX, ///< max reduction + COUNT_VALID, ///< count number of valid elements + COUNT_ALL, ///< count number of elements + ANY, ///< any reduction + ALL, ///< all reduction + SUM_OF_SQUARES, ///< sum of squares reduction + MEAN, ///< arithmetic mean reduction + M2, ///< sum of squares of differences from the mean + VARIANCE, ///< variance + STD, ///< standard deviation + MEDIAN, ///< median reduction + QUANTILE, ///< compute specified quantile(s) + ARGMAX, ///< Index of max element + ARGMIN, ///< Index of min element + NUNIQUE, ///< count number of unique elements + NTH_ELEMENT, ///< get the nth element + ROW_NUMBER, ///< get row-number of current index (relative to rolling window) + EWMA, ///< get exponential weighted moving average at current index + RANK, ///< get rank of current index + COLLECT_LIST, ///< collect values into a list + COLLECT_SET, ///< collect values into a list without duplicate entries + LEAD, ///< window function, accesses row at specified offset following current row + LAG, ///< window function, accesses row at specified offset preceding current row + PTX, ///< PTX based UDF aggregation + CUDA, ///< CUDA based UDF aggregation + HOST_UDF, ///< host based UDF aggregation + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_M2, ///< merge partial values of M2 aggregation, + COVARIANCE, ///< covariance between two sets of elements + CORRELATION, ///< correlation between two sets of elements + TDIGEST, ///< create a tdigest from a set of input values + MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together + HISTOGRAM, ///< compute frequency of each element + MERGE_HISTOGRAM ///< merge partial values of HISTOGRAM aggregation }; aggregation() = delete; @@ -603,6 +606,97 @@ std::unique_ptr make_udf_aggregation(udf_type type, std::string const& user_defined_aggregator, data_type output_type); +/** + * @brief The base class for HOST_UDF implementation. + * + * The users need to derive from this base class, defining their own implementation for a UDF + * function as well as all the required data from libcudf to perform its operations. + */ +struct host_udf_base { + host_udf_base() = default; + virtual ~host_udf_base() = default; + + /** + * @brief Define the data that may be needed for computing the aggregation. + * + * Each derived HOST_UDF class may need a different set of intermediate aggregation data (such as + * sorted values, sorted keys, group offsets etc). It is inefficient to evaluate and pass down all + * these data at once. As such, the derived HOST_UDF class will define a set of data that it + * needs, and only such requested data will be evaluated. + */ + // TODO: add more + enum class input_kind { OUTPUT_DTYPE, GROUPED_VALUES, GROUPED_OFFSETS, GROUPED_LABELS }; + + /** + * @brief Return a set of data kind that is needed for computing the aggregation on the derived + * HOST_UDF class. + * + * This set is used by libcudf to determine which data need to be evaluated and pass down to the + * instance of the derived HOST_UDF class at runtime. + * + * @return A set of `input_kind` enum. + */ + [[nodiscard]] virtual std::unordered_set get_required_data() const = 0; + + /** + * Aggregation data that is needed for performing UDF computation. + */ + using input_data = std::variant, size_type, data_type>; + + /** + * Output type of the UDF class. It can be either a scalar (for reduction) or a column + * (for groupby) aggregations. + */ + using output_type = std::variant, std::unique_ptr>; + + /** + * @brief Perform UDF aggregation computation. + * + * @param agg_data The aggregation data needed for performing all computation + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations + * @return The computed cudf column as the result of UDF aggregation + */ + [[nodiscard]] virtual output_type operator()( + std::unordered_map const& agg_data, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) = 0; + + /** + * @brief Compares two HOST_UDF objects for equality. + * + * @param other The other HOST_UDF object to compare with + * @return True if the two object are equal + */ + [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0; + + /** + * @brief Computes the hash value of the HOST_UDF object. + * + * @return The hash value of the object + */ + [[nodiscard]] virtual size_t do_hash() const = 0; + + /** + * @pure @brief Clones the HOST_UDF object. + * + * A class derived from `host_udf_base` should not store too much data such that its instances + * remain lightweight for efficient cloning. + * + * @return A copy of the HOST_UDF object + */ + [[nodiscard]] virtual std::unique_ptr clone() const = 0; +}; + +/** + * @brief Factory to create a HOST_UDF aggregation + * + * @param host_udf An instance of a class derived from `host_udf_base` to perform UDF aggregation + * @return A HOST_UDF aggregation object + */ +template +std::unique_ptr make_host_udf_aggregation(std::unique_ptr host_udf); + /** * @brief Factory to create a MERGE_LISTS aggregation. * @@ -774,28 +868,5 @@ std::unique_ptr make_tdigest_aggregation(int max_centroids = 1000); template std::unique_ptr make_merge_tdigest_aggregation(int max_centroids = 1000); -// We should pass as many parameters as possible to this function pointer, -// thus the UDF can have anything it needs to perform its operations. -// Currently (modify if needed): -// column_view const& input, -// cudf::device_span group_offsets, -// cudf::device_span group_labels, -// size_type num_groups, -// int max_centroids, -// rmm::cuda_stream_view stream, -// rmm::device_async_resource_ref mr -using host_udf_func_type = std::function(column_view const&, - device_span, - device_span, - size_type, - rmm::cuda_stream_view, - rmm::device_async_resource_ref)>; -/** - * @brief make_host_udf_aggregation - * @return - */ -template -std::unique_ptr make_host_udf_aggregation(host_udf_func_type udf_func_); - /** @} */ // end of group } // namespace CUDF_EXPORT cudf From 47c7a7cfad036fa170957d10041f531cee969fa4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Nov 2024 15:59:26 -0800 Subject: [PATCH 05/59] Rename variable Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index b397b4d2f4b..71767ed9a66 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -658,7 +658,7 @@ struct host_udf_base { * @return The computed cudf column as the result of UDF aggregation */ [[nodiscard]] virtual output_type operator()( - std::unordered_map const& agg_data, + std::unordered_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) = 0; From 5e6017a9178a4642a2553e6ce22969fb159f23d1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 13:47:29 -0800 Subject: [PATCH 06/59] Rewrite docs Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 87 ++++++++++++++++++++------------ cpp/include/cudf/reduction.hpp | 5 -- 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 71767ed9a66..cc818848214 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -76,6 +77,11 @@ enum class rank_percentage : int32_t { ONE_NORMALIZED ///< (rank - 1) / (count - 1) }; +/** + * @brief Enum to describe scan operation type. + */ +enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; + /** * @brief Abstract base class for specifying the desired aggregation in an * `aggregation_request`. @@ -607,55 +613,70 @@ std::unique_ptr make_udf_aggregation(udf_type type, data_type output_type); /** - * @brief The base class for HOST_UDF implementation. + * @brief The base class for host-based UDF implementation. * - * The users need to derive from this base class, defining their own implementation for a UDF - * function as well as all the required data from libcudf to perform its operations. + * An actual implementation of host-based UDF needs to be derived from this base class, defining + * its own operations as well as all the required input data to the aggregation. */ struct host_udf_base { host_udf_base() = default; virtual ~host_udf_base() = default; /** - * @brief Define the data that may be needed for computing the aggregation. + * @brief Define the possible data that may be needed in the derived class for its operations. * - * Each derived HOST_UDF class may need a different set of intermediate aggregation data (such as - * sorted values, sorted keys, group offsets etc). It is inefficient to evaluate and pass down all - * these data at once. As such, the derived HOST_UDF class will define a set of data that it - * needs, and only such requested data will be evaluated. + * Each derived host-based UDF class may need a different set of input data (such as sorted + * values, group labels, group offsets etc). It is inefficient to evaluate and pass down all these + * data at once from libcudf. A solution for that is, the derived class defines a subset of data + * that it needs and only such data will be evaluated. */ - // TODO: add more - enum class input_kind { OUTPUT_DTYPE, GROUPED_VALUES, GROUPED_OFFSETS, GROUPED_LABELS }; + enum class input_kind { + INPUT_VALUES, // the input values column + NULL_POLICY, // to control null handling, used in scan + OUTPUT_DTYPE, // output data type, used in reduction + INIT_VALUE, // initial value for reduction + SCAN_TYPE, // used in scan aggregations + OFFSETS, // offsets for sort-based groupby or segmented reduction + GROUP_LABELS, // group labels used in sort-based groupby + SORTED_GROUPED_VALUES, // the input values grouped according to the input `keys` and + // sorted within each group, used in sort-based groupby + GROUPED_VALUES, // the input values grouped according to the input `keys` for which the + // values within each group maintain their original order, + // used in sort-based groupby + NUM_GROUPS // number of groups, used in sort-based groupby + }; /** - * @brief Return a set of data kind that is needed for computing the aggregation on the derived - * HOST_UDF class. - * - * This set is used by libcudf to determine which data need to be evaluated and pass down to the - * instance of the derived HOST_UDF class at runtime. + * @brief Return a set of data kind that is needed for computing the aggregation. * * @return A set of `input_kind` enum. */ - [[nodiscard]] virtual std::unordered_set get_required_data() const = 0; + [[nodiscard]] virtual std::unordered_set const& get_required_data() const = 0; /** - * Aggregation data that is needed for performing UDF computation. + * Aggregation data that is needed for computing the aggregation. */ - using input_data = std::variant, size_type, data_type>; + using input_data = std::variant>, + scan_type, + device_span, + size_type>; /** - * Output type of the UDF class. It can be either a scalar (for reduction) or a column - * (for groupby) aggregations. + * Output type of the aggregation. It can be either a scalar (for reduction) or a column + * (for groupby) aggregation. */ using output_type = std::variant, std::unique_ptr>; /** - * @brief Perform UDF aggregation computation. + * @brief Perform the main computation for the host-based UDF. * - * @param agg_data The aggregation data needed for performing all computation + * @param input The input data needed for performing all computation * @param stream The CUDA stream to use for any kernel launches * @param mr Device memory resource to use for any allocations - * @return The computed cudf column as the result of UDF aggregation + * @return The output result of the aggregation */ [[nodiscard]] virtual output_type operator()( std::unordered_map const& input, @@ -663,35 +684,35 @@ struct host_udf_base { rmm::device_async_resource_ref mr) = 0; /** - * @brief Compares two HOST_UDF objects for equality. + * @brief Compares two instances of the derived class for equality. * - * @param other The other HOST_UDF object to compare with - * @return True if the two object are equal + * @param other The other derived class's instance to compare with + * @return True if the two instances are equal */ [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0; /** - * @brief Computes the hash value of the HOST_UDF object. + * @brief Computes hash value of the derived class's instance. * - * @return The hash value of the object + * @return The hash value of the instance */ - [[nodiscard]] virtual size_t do_hash() const = 0; + [[nodiscard]] virtual std::size_t do_hash() const = 0; /** - * @pure @brief Clones the HOST_UDF object. + * @pure @brief Clones the instance. * * A class derived from `host_udf_base` should not store too much data such that its instances * remain lightweight for efficient cloning. * - * @return A copy of the HOST_UDF object + * @return A new instance cloned from this */ - [[nodiscard]] virtual std::unique_ptr clone() const = 0; + [[nodiscard]] virtual std::unique_ptr clone() const = 0; }; /** * @brief Factory to create a HOST_UDF aggregation * - * @param host_udf An instance of a class derived from `host_udf_base` to perform UDF aggregation + * @param host_udf An instance of a class derived from `host_udf_base` to perform aggregation * @return A HOST_UDF aggregation object */ template diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index 41be2e70cc3..d72f85d7e28 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -30,11 +30,6 @@ namespace CUDF_EXPORT cudf { * @file */ -/** - * @brief Enum to describe scan operation type - */ -enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; - /** * @brief Computes the reduction of the values in all rows of a column. * From 174678f9e321915b715e01e59d9f2fc896fa91d9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 14:30:06 -0800 Subject: [PATCH 07/59] Implement `host_udf_aggregation` Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 2 +- .../cudf/detail/aggregation/aggregation.hpp | 71 ++++++++++++------- cpp/src/groupby/sort/aggregate.cpp | 66 ++++++++++++----- 3 files changed, 94 insertions(+), 45 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index cc818848214..02e9f336089 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -651,7 +651,7 @@ struct host_udf_base { * * @return A set of `input_kind` enum. */ - [[nodiscard]] virtual std::unordered_set const& get_required_data() const = 0; + [[nodiscard]] virtual std::unordered_set const& get_required_data_kinds() const = 0; /** * Aggregation data that is needed for computing the aggregation. diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index aec73496475..cc7416958bb 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -89,6 +89,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class lead_lag_aggregation const& agg); virtual std::vector> visit(data_type col_type, class udf_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class host_udf_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_lists_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -105,8 +107,6 @@ class simple_aggregations_collector { // Declares the interface for the simple class tdigest_aggregation const& agg); virtual std::vector> visit( data_type col_type, class merge_tdigest_aggregation const& agg); - virtual std::vector> visit(data_type col_type, - class host_udf_aggregation const& agg); }; class aggregation_finalizer { // Declares the interface for the finalizer @@ -138,6 +138,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class collect_set_aggregation const& agg); virtual void visit(class lead_lag_aggregation const& agg); virtual void visit(class udf_aggregation const& agg); + virtual void visit(class host_udf_aggregation const& agg); virtual void visit(class merge_lists_aggregation const& agg); virtual void visit(class merge_sets_aggregation const& agg); virtual void visit(class merge_m2_aggregation const& agg); @@ -147,7 +148,6 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class tdigest_aggregation const& agg); virtual void visit(class merge_tdigest_aggregation const& agg); virtual void visit(class ewma_aggregation const& agg); - virtual void visit(class host_udf_aggregation const& agg); }; /** @@ -964,6 +964,47 @@ class udf_aggregation final : public rolling_aggregation { } }; +/** + * @brief Derived class for specifying a custom aggregation specified in host-based UDF. + */ +class host_udf_aggregation final : public groupby_aggregation, public reduce_aggregation { + public: + std::unique_ptr udf_ptr; + + host_udf_aggregation() = delete; + host_udf_aggregation(host_udf_aggregation const&) = delete; + + explicit host_udf_aggregation(std::unique_ptr&& udf_ptr_) + : aggregation{HOST_UDF}, udf_ptr{std::move(udf_ptr_)} + { + CUDF_EXPECTS(udf_ptr != nullptr, "Invalid host-based UDF instance."); + } + + [[nodiscard]] bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return udf_ptr->is_equal(*other.udf_ptr); + } + + [[nodiscard]] size_t do_hash() const override + { + return this->aggregation::do_hash() ^ udf_ptr->do_hash(); + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(udf_ptr->clone()); + } + + std::vector> get_simple_aggregations( + data_type col_type, simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived aggregation class for specifying MERGE_LISTS aggregation */ @@ -1190,30 +1231,6 @@ class merge_tdigest_aggregation final : public groupby_aggregation, public reduc void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; -/** - * @brief - */ -class host_udf_aggregation final : public groupby_aggregation, public reduce_aggregation { - public: - host_udf_func_type host_udf_ptr; - - explicit host_udf_aggregation(host_udf_func_type host_udf_ptr_) - : aggregation{HOST_UDF}, host_udf_ptr{std::move(host_udf_ptr_)} - { - } - - [[nodiscard]] std::unique_ptr clone() const override - { - return std::make_unique(*this); - } - std::vector> get_simple_aggregations( - data_type col_type, simple_aggregations_collector& collector) const override - { - return collector.visit(col_type, *this); - } - void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } -}; - /** * @brief Sentinel value used for `ARGMAX` aggregation. * diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 7ae621573c1..1fbd9b1e402 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -446,6 +446,55 @@ void aggregate_result_functor::operator()(aggregation lists_column_view{collect_result->view()}, nulls_equal, nans_equal, stream, mr)); } +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(values, agg)) { return; } + + auto const& udf_ptr = dynamic_cast(agg).udf_ptr; + auto const& data_kinds = udf_ptr->get_required_data_kinds(); + + // Do not cache udf_input, as the actual input data may change from run to run. + std::unordered_map udf_input; + for (auto const kind : data_kinds) { + switch (kind) { + case cudf::host_udf_base::input_kind::INPUT_VALUES: { + udf_input.emplace(kind, values); + break; + } + + case cudf::host_udf_base::input_kind::OFFSETS: { + udf_input.emplace(kind, helper.group_offsets(stream)); + break; + } + + case cudf::host_udf_base::input_kind::GROUP_LABELS: { + udf_input.emplace(kind, helper.group_labels(stream)); + break; + } + + case cudf::host_udf_base::input_kind::SORTED_GROUPED_VALUES: { + udf_input.emplace(kind, get_sorted_values()); + break; + } + + case cudf::host_udf_base::input_kind::GROUPED_VALUES: { + udf_input.emplace(kind, get_grouped_values()); + break; + } + + case cudf::host_udf_base::input_kind::NUM_GROUPS: { + udf_input.emplace(kind, helper.num_groups(stream)); + break; + } + + default: CUDF_FAIL("Unsupported data kind in sort-based groupby aggregation.") + } + } + + cache.add_result(values, agg, udf_ptr(udf_input, stream, mr)); +} + /** * @brief Perform merging for the lists that correspond to the same key value. * @@ -791,23 +840,6 @@ void aggregate_result_functor::operator()(aggregatio mr)); } -template <> -void aggregate_result_functor::operator()(aggregation const& agg) -{ - // TODO: Add a name string to the aggregation so that we can look up different host UDFs. - if (cache.has_result(values, agg)) { return; } - auto const udf_ptr = dynamic_cast(agg).host_udf_ptr; - CUDF_EXPECTS(udf_ptr != nullptr, "errrrrrrrrr"); - cache.add_result(values, - agg, - udf_ptr(get_grouped_values(), - helper.group_offsets(stream), - helper.group_labels(stream), - helper.num_groups(stream), - stream, - mr)); -} - } // namespace detail // Sort-based groupby From cee28f6a2b1083d53fa257feb3ce316ce1d4c6c4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 15:00:57 -0800 Subject: [PATCH 08/59] Change the `host_udf_base` interface Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 02e9f336089..a7c5b910b9d 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -683,6 +683,16 @@ struct host_udf_base { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) = 0; + /** + * @brief Get the output when the input values is empty. + * + * This is needed since libcudf tries to avoid unnecessarily evaluating the intermediate data when + * the input values is empty. + * + * @return The output result of the aggregation when input values is empty + */ + [[nodiscard]] virtual output_type get_empty_output() const = 0; + /** * @brief Compares two instances of the derived class for equality. * From 0da498838381f25b26293db3c63d21827d730242 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 15:11:27 -0800 Subject: [PATCH 09/59] Remove `target_type_impl` for `HOST_UDF` Signed-off-by: Nghia Truong --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index cc7416958bb..7fa182c5eb8 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1507,11 +1507,6 @@ struct target_type_impl -struct target_type_impl { - using type = struct_view; -}; - /** * @brief Helper alias to get the accumulator type for performing aggregation * `k` on elements of type `Source` From c9c9ee6f0c87425d22fc211b4d765c0fa6607f76 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 15:13:33 -0800 Subject: [PATCH 10/59] Rewrite comments Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index a7c5b910b9d..ccbc6bbccf5 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -686,8 +686,8 @@ struct host_udf_base { /** * @brief Get the output when the input values is empty. * - * This is needed since libcudf tries to avoid unnecessarily evaluating the intermediate data when - * the input values is empty. + * This may be called in the situations that libcudf tries to avoid unnecessarily evaluating the + * intermediate data when the input values is empty. * * @return The output result of the aggregation when input values is empty */ From 227016bec9d95ea942313979a9a26135239e5953 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 15:13:46 -0800 Subject: [PATCH 11/59] Construct empty output when the input is empty Signed-off-by: Nghia Truong --- cpp/src/groupby/groupby.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 6eb82618e2a..e8826339dc9 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -140,6 +140,11 @@ struct empty_column_constructor { return empty_like(values); } + if constexpr (k == aggregation::Kind::HOST_UDF) { + auto const& udf_ptr = dynamic_cast(agg).udf_ptr; + return udf_ptr->get_empty_output(); + } + return make_empty_column(target_type(values.type(), k)); } }; From 15732cf2045fbf93496cff19b8de07ba826b58b8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 15:31:05 -0800 Subject: [PATCH 12/59] Implement `HOST_UDF` for reduction Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 2 +- .../cudf/detail/aggregation/aggregation.hpp | 2 +- cpp/src/groupby/sort/aggregate.cpp | 4 +-- cpp/src/reductions/reductions.cpp | 35 +++++++++++++++++-- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index ccbc6bbccf5..42c6e4604f1 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -631,7 +631,7 @@ struct host_udf_base { * that it needs and only such data will be evaluated. */ enum class input_kind { - INPUT_VALUES, // the input values column + INPUT_VALUES, // the input values column, may be used in any aggregation NULL_POLICY, // to control null handling, used in scan OUTPUT_DTYPE, // output data type, used in reduction INIT_VALUE, // initial value for reduction diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 7fa182c5eb8..c63e2b405e3 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -969,7 +969,7 @@ class udf_aggregation final : public rolling_aggregation { */ class host_udf_aggregation final : public groupby_aggregation, public reduce_aggregation { public: - std::unique_ptr udf_ptr; + std::unique_ptr const udf_ptr; host_udf_aggregation() = delete; host_udf_aggregation(host_udf_aggregation const&) = delete; diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 1fbd9b1e402..88b7af6bf60 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -488,11 +488,11 @@ void aggregate_result_functor::operator()(aggregation con break; } - default: CUDF_FAIL("Unsupported data kind in sort-based groupby aggregation.") + default: CUDF_FAIL("Unsupported data kind in host-based UDF groupby aggregation."); } } - cache.add_result(values, agg, udf_ptr(udf_input, stream, mr)); + cache.add_result(values, agg, std::get>(udf_ptr(udf_input, stream, mr))); } /** diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 94c8722ccbe..8638952aa8a 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -145,7 +145,34 @@ struct reduce_dispatch_functor { return tdigest::detail::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr); } case aggregation::HOST_UDF: { - CUDF_FAIL("Host UDF aggregation is not implemented in `reduction`"); + auto const& udf_ptr = dynamic_cast(agg).udf_ptr; + auto const& data_kinds = udf_ptr->get_required_data_kinds(); + + // Do not cache udf_input, as the actual input data may change from run to run. + std::unordered_map + udf_input; + for (auto const kind : data_kinds) { + switch (kind) { + case cudf::host_udf_base::input_kind::INPUT_VALUES: { + udf_input.emplace(kind, values); + break; + } + + case cudf::host_udf_base::input_kind::OUTPUT_DTYPE: { + udf_input.emplace(kind, output_dtype); + break; + } + + case cudf::host_udf_base::input_kind::INIT_VALUE: { + udf_input.emplace(kind, init); + break; + } + + default: CUDF_FAIL("Unsupported data kind in host-based UDF reduction."); + } + } + + return std::get>(udf_ptr(udf_input, stream, mr)); } default: CUDF_FAIL("Unsupported reduction operator"); } @@ -164,9 +191,11 @@ std::unique_ptr reduce(column_view const& col, cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || - agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { + agg.kind == aggregation::ANY || agg.kind == aggregation::ALL || + agg.kind == aggregation::HOST_UDF)) { CUDF_FAIL( - "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types"); + "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, ALL, and HOST_UDF " + "aggregation types"); } // Returns default scalar if input column is empty or all null From ee28be811c809880653ac3fd922ede4c38b60890 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 15:38:14 -0800 Subject: [PATCH 13/59] Implement `HOST_UDF` for segmented reduction Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 8 ++-- cpp/src/reductions/segmented/reductions.cpp | 46 ++++++++++++++++++++- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 42c6e4604f1..c43ecc179d4 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -632,11 +632,11 @@ struct host_udf_base { */ enum class input_kind { INPUT_VALUES, // the input values column, may be used in any aggregation - NULL_POLICY, // to control null handling, used in scan OUTPUT_DTYPE, // output data type, used in reduction INIT_VALUE, // initial value for reduction + NULL_POLICY, // to control null handling, used in segmented reduction and scan SCAN_TYPE, // used in scan aggregations - OFFSETS, // offsets for sort-based groupby or segmented reduction + OFFSETS, // offsets for segmented reduction or sort-based groupby GROUP_LABELS, // group labels used in sort-based groupby SORTED_GROUPED_VALUES, // the input values grouped according to the input `keys` and // sorted within each group, used in sort-based groupby @@ -657,16 +657,16 @@ struct host_udf_base { * Aggregation data that is needed for computing the aggregation. */ using input_data = std::variant>, + null_policy, scan_type, device_span, size_type>; /** * Output type of the aggregation. It can be either a scalar (for reduction) or a column - * (for groupby) aggregation. + * (for segmented reduction or groupby) aggregation. */ using output_type = std::variant, std::unique_ptr>; diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index c4f6c135dde..db509ddd906 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -95,6 +95,46 @@ struct segmented_reduce_dispatch_functor { } case segmented_reduce_aggregation::NUNIQUE: return segmented_nunique(col, offsets, null_handling, stream, mr); + case aggregation::HOST_UDF: { + auto const& udf_ptr = dynamic_cast(agg).udf_ptr; + auto const& data_kinds = udf_ptr->get_required_data_kinds(); + + // Do not cache udf_input, as the actual input data may change from run to run. + std::unordered_map + udf_input; + for (auto const kind : data_kinds) { + switch (kind) { + case cudf::host_udf_base::input_kind::INPUT_VALUES: { + udf_input.emplace(kind, values); + break; + } + + case cudf::host_udf_base::input_kind::OUTPUT_DTYPE: { + udf_input.emplace(kind, output_dtype); + break; + } + + case cudf::host_udf_base::input_kind::INIT_VALUE: { + udf_input.emplace(kind, init); + break; + } + + case cudf::host_udf_base::input_kind::NULL_POLICY: { + udf_input.emplace(kind, null_handling); + break; + } + + case cudf::host_udf_base::input_kind::OFFSETS: { + udf_input.emplace(kind, offsets); + break; + } + + default: CUDF_FAIL("Unsupported data kind in host-based UDF segmented reduction."); + } + } + + return std::get>(udf_ptr(udf_input, stream, mr)); + } default: CUDF_FAIL("Unsupported aggregation type."); } } @@ -114,9 +154,11 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || - agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { + agg.kind == aggregation::ANY || agg.kind == aggregation::ALL || + agg.kind == aggregation::HOST_UDF)) { CUDF_FAIL( - "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types"); + "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, ALL, and HOST_UDF " + "aggregation types"); } CUDF_EXPECTS(offsets.size() > 0, "`offsets` should have at least 1 element."); From 754ee585d2c7dd506cce63e313c3f5b154a79cae Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 16:17:32 -0800 Subject: [PATCH 14/59] Implementing tests Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 60 +++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 0cc657b2c51..d9233fc6384 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -24,6 +22,7 @@ #include #include +#include #include @@ -34,7 +33,60 @@ using namespace cudf::test::iterators; struct test : public cudf::test::BaseFixture {}; -// For each group: compute (group_idx + 1)* values^2 * 2 +/** + * @brief A host-based UDF implementation. + * + * The aggregations perform the following computation: + * - For reduction: compute `sum(value^2, for value in group)`. + * - For segmented reduction: compute `group_size * sum(value^2, for value in group)`. + * - For groupby: compute `(group_label + 1) * sum(value^2, for value in group)`. + */ +template +struct special_sum : cudf::host_udf_base { + [[nodiscard]] std::unordered_set const& get_required_data_kinds() const override + { + static std::unordered_set const required_data_kinds = + [&] -> std::unordered_set { + if constexpr (std::is_same_v) { + return {input_kind::INPUT_VALUES, input_kind::OUTPUT_DTYPE, input_kind::INIT_VALUE}; + } else if constexpr (std::is_same_v) { + return {input_kind::INPUT_VALUES, + input_kind::OUTPUT_DTYPE, + input_kind::INIT_VALUE, + input_kind::NULL_POLICY, + input_kind::OFFSETS}; + } else if constexpr (std::is_same_v) { + return { + input_kind::OFFSETS, + input_kind::GROUP_LABELS, + input_kind::GROUPED_VALUES, + input_kind::NUM_GROUPS, + }; + } else { + CUDF_FAIL("Unsupported agregation type."); + return {}; + } + }(); + + return required_data_kinds; + } + + [[nodiscard]] output_type operator()(std::unordered_map const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) override + { + } + + [[nodiscard]] output_type get_empty_output() const override + { + // + } + + [[nodiscard]] bool is_equal(host_udf_base const& other) const override; + [[nodiscard]] std::size_t do_hash() const override; + [[nodiscard]] std::unique_ptr clone() const override; +}; + std::unique_ptr double_sqr(cudf::column_view const& values, cudf::device_span group_offsets, cudf::device_span group_labels, From 5a7ea455aad8196469d2f682f641330f73cbe779 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 10:53:40 -0800 Subject: [PATCH 15/59] Fix error Signed-off-by: Nghia Truong --- cpp/src/reductions/reductions.cpp | 2 +- cpp/src/reductions/segmented/reductions.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 8638952aa8a..aba987007a0 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -154,7 +154,7 @@ struct reduce_dispatch_functor { for (auto const kind : data_kinds) { switch (kind) { case cudf::host_udf_base::input_kind::INPUT_VALUES: { - udf_input.emplace(kind, values); + udf_input.emplace(kind, col); break; } diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index db509ddd906..33dcc8eae4c 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -105,7 +105,7 @@ struct segmented_reduce_dispatch_functor { for (auto const kind : data_kinds) { switch (kind) { case cudf::host_udf_base::input_kind::INPUT_VALUES: { - udf_input.emplace(kind, values); + udf_input.emplace(kind, col); break; } From e0999bbb1eaceb13a28dfa29c691a84b1dc18b49 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 14:13:55 -0800 Subject: [PATCH 16/59] Change `host_udf_base` interface Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 16 +++++++++++----- cpp/src/groupby/groupby.cu | 2 +- cpp/src/groupby/sort/aggregate.cpp | 5 ----- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index c43ecc179d4..ec7a17e49df 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -640,10 +640,9 @@ struct host_udf_base { GROUP_LABELS, // group labels used in sort-based groupby SORTED_GROUPED_VALUES, // the input values grouped according to the input `keys` and // sorted within each group, used in sort-based groupby - GROUPED_VALUES, // the input values grouped according to the input `keys` for which the + GROUPED_VALUES // the input values grouped according to the input `keys` for which the // values within each group maintain their original order, // used in sort-based groupby - NUM_GROUPS // number of groups, used in sort-based groupby }; /** @@ -661,8 +660,7 @@ struct host_udf_base { std::optional>, null_policy, scan_type, - device_span, - size_type>; + device_span>; /** * Output type of the aggregation. It can be either a scalar (for reduction) or a column @@ -689,9 +687,17 @@ struct host_udf_base { * This may be called in the situations that libcudf tries to avoid unnecessarily evaluating the * intermediate data when the input values is empty. * + * @param output_dtype The expected output data type for reduction (if specified) + * @param init The initial value for reduction (if specified) + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations * @return The output result of the aggregation when input values is empty */ - [[nodiscard]] virtual output_type get_empty_output() const = 0; + [[nodiscard]] virtual output_type get_empty_output( + std::optional output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const = 0; /** * @brief Compares two instances of the derived class for equality. diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index e8826339dc9..260181eb895 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -142,7 +142,7 @@ struct empty_column_constructor { if constexpr (k == aggregation::Kind::HOST_UDF) { auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - return udf_ptr->get_empty_output(); + return udf_ptr->get_empty_output(std::nullopt, std::nullopt, stream, mr); } return make_empty_column(target_type(values.type(), k)); diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 88b7af6bf60..d27ec7ee0d7 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -483,11 +483,6 @@ void aggregate_result_functor::operator()(aggregation con break; } - case cudf::host_udf_base::input_kind::NUM_GROUPS: { - udf_input.emplace(kind, helper.num_groups(stream)); - break; - } - default: CUDF_FAIL("Unsupported data kind in host-based UDF groupby aggregation."); } } From 52e0acd90d485961aa2db27ec0087f543cbc7dc6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 14:14:29 -0800 Subject: [PATCH 17/59] Implement `test_udf_simple_type` Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 334 ++++++++++++++++++++++------ 1 file changed, 267 insertions(+), 67 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index d9233fc6384..193a04e7cb9 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -20,14 +20,22 @@ #include #include +#include #include #include +#include +#include #include +#include +#include +#include #include -#include #include +#include +#include +#include using namespace cudf::test::iterators; @@ -37,12 +45,22 @@ struct test : public cudf::test::BaseFixture {}; * @brief A host-based UDF implementation. * * The aggregations perform the following computation: - * - For reduction: compute `sum(value^2, for value in group)`. - * - For segmented reduction: compute `group_size * sum(value^2, for value in group)`. - * - For groupby: compute `(group_label + 1) * sum(value^2, for value in group)`. + * - For reduction: compute `sum(value^2, for value in group)` (this is sum of squared). + * - For segmented reduction: compute `segment_size * sum(value^2, for value in group)`. + * - For groupby: compute `(group_idx + 1) * sum(value^2, for value in group)`. + * + * In addition, for segmented reduction, if null_policy is set to `INCLUDE`, the null values are + * replaced with an initial value if it is provided. */ template -struct special_sum : cudf::host_udf_base { +class test_udf_simple_type : cudf::host_udf_base { + static_assert(std::is_same_v || + std::is_same_v || + std::is_same_v); + + public: + test_udf_simple_type() = default; + [[nodiscard]] std::unordered_set const& get_required_data_kinds() const override { static std::unordered_set const required_data_kinds = @@ -55,16 +73,8 @@ struct special_sum : cudf::host_udf_base { input_kind::INIT_VALUE, input_kind::NULL_POLICY, input_kind::OFFSETS}; - } else if constexpr (std::is_same_v) { - return { - input_kind::OFFSETS, - input_kind::GROUP_LABELS, - input_kind::GROUPED_VALUES, - input_kind::NUM_GROUPS, - }; } else { - CUDF_FAIL("Unsupported agregation type."); - return {}; + return {input_kind::OFFSETS, input_kind::GROUP_LABELS, input_kind::GROUPED_VALUES}; } }(); @@ -75,66 +85,256 @@ struct special_sum : cudf::host_udf_base { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) override { + if constexpr (std::is_same_v) { + auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); + auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + return cudf::double_type_dispatcher( + values.type(), output_dtype, reduce_fn{}, input, stream, mr); + } else if constexpr (std::is_same_v) { + auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); + auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + return cudf::double_type_dispatcher( + values.type(), output_dtype, segmented_reduce_fn{}, input, stream, mr); + } else { + auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); + return cudf::type_dispatcher(values.type(), groupby_fn{}, input, stream, mr); + } } - [[nodiscard]] output_type get_empty_output() const override + [[nodiscard]] output_type get_empty_output( + [[maybe_unused]] std::optional output_dtype, + [[maybe_unused]] std::optional> init, + [[maybe_unused]] rmm::cuda_stream_view stream, + [[maybe_unused]] rmm::mr::device_memory_resource* mr) const override { - // + if constexpr (std::is_same_v || + std::is_same_v) { + CUDF_EXPECTS(output_dtype.has_value(), + "Data type for the reduction result must be specified."); + if (init.has_value() && init.value().get().is_valid(stream)) { + CUDF_EXPECTS(output_dtype.value() == init.value().get().type(), + "Data type for reduction result must be the same as init value."); + return std::make_unique(init.value().get()); + } + return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); + } else { + return cudf::make_empty_column( + cudf::data_type{cudf::type_to_id()}); + } } - [[nodiscard]] bool is_equal(host_udf_base const& other) const override; - [[nodiscard]] std::size_t do_hash() const override; - [[nodiscard]] std::unique_ptr clone() const override; -}; + [[nodiscard]] bool is_equal(host_udf_base const& other) const override + { + // Just check if the other object is also instance of the same derived class. + return dynamic_cast(&other) != nullptr; + } -std::unique_ptr double_sqr(cudf::column_view const& values, - cudf::device_span group_offsets, - cudf::device_span group_labels, - cudf::size_type num_groups, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto output = cudf::make_numeric_column( - cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream); - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_groups), - [output = output->mutable_view().begin(), - values = values.begin(), - group_offsets] __device__(int idx) -> int { - auto start = group_offsets[idx]; - auto end = group_offsets[idx + 1]; - for (int i = start; i < end; ++i) { - output[i] = (idx + 1) * 2 * values[i] * values[i]; - } - }); - return output; -} + [[nodiscard]] std::size_t do_hash() const override + { + return std::hash{}({"test_udf_simple_type"}); + } -// For each group: compute (group_idx + 1)* values^2 * 3 -std::unique_ptr triple_sqr(cudf::column_view const& values, - cudf::device_span group_offsets, - cudf::device_span group_labels, - cudf::size_type num_groups, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto output = cudf::make_numeric_column( - cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream); - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_groups), - [output = output->mutable_view().begin(), - values = values.begin(), - group_offsets] __device__(int idx) -> int { - auto start = group_offsets[idx]; - auto end = group_offsets[idx + 1]; - for (int i = start; i < end; ++i) { - output[i] = (idx + 1) * 3 * values[i] * values[i]; - } - }); - return output; -} + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(); + } + + private: + struct reduce_fn { + template () || !cudf::is_numeric())> + output_type operator()(Args...) const + { + CUDF_FAIL("Unsupported input type."); + } + + template () && cudf::is_numeric())> + output_type operator()(std::unordered_map const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); + auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + auto const input_init_value = + std::get>>( + input.at(input_kind::INIT_VALUE)); + + if (values.size() == 0) { + return get_empty_output(output_dtype, input_init_value, stream, mr); + } + + auto const init_value = [&] -> OutputType { + if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { + CUDF_EXPECTS(output_dtype == input_init_value.value().get().type(), + "Data type for reduction result must be the same as init value."); + auto const numeric_init_scalar = + dynamic_cast(&input_init_value.value().get()); + CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); + return static_cast(numeric_init_scalar->value(stream)); + } + return OutputType{0}; + }(); + + auto const values_dv_ptr = cudf::column_device_view::create(values, stream); + auto const result = thrust::transform_reduce( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.size()), + [values = *values_dv_ptr] __device__(cudf::size_type idx) -> OutputType { + if (values.is_null(idx)) { return OutputType{0}; } + auto const val = static_cast(values.element(idx)); + return val * val; + }, + init_value, + thrust::plus<>{}); + + auto output = cudf::make_numeric_scalar(output_dtype, stream, mr); + static_cast*>(output.get())->set_value(result, stream); + return output; + } + }; + + struct segmented_reduce_fn { + template () || !cudf::is_numeric())> + output_type operator()(Args...) const + { + CUDF_FAIL("Unsupported input type."); + } + + template () && cudf::is_numeric())> + output_type operator()(std::unordered_map const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); + auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + auto const input_init_value = + std::get>>( + input.at(input_kind::INIT_VALUE)); + + if (values.size() == 0) { + return get_empty_output(output_dtype, input_init_value, stream, mr); + } + + auto const init_value = [&] -> OutputType { + if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { + CUDF_EXPECTS(output_dtype == input_init_value.value().get().type(), + "Data type for reduction result must be the same as init value."); + auto const numeric_init_scalar = + dynamic_cast(&input_init_value.value().get()); + CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); + return static_cast(numeric_init_scalar->value(stream)); + } + return OutputType{0}; + }(); + + auto const null_handling = std::get(input.at(input_kind::NULL_POLICY)); + auto const offsets = + std::get>(input.at(input_kind::OFFSETS)); + CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); + auto const num_segments = offsets.size() - 1; + + auto const values_dv_ptr = cudf::column_device_view::create(values, stream); + auto output = cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::UNALLOCATED, stream); + rmm::device_uvector validity(num_segments, stream); + + auto const result = thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), + [values = *values_dv_ptr, init_value, null_handling, offsets] __device__( + cudf::size_type idx) -> thrust::tuple { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + if (start == end) { return {OutputType{0}, false}; } + + auto sum = init_value; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { + if (null_handling == cudf::null_policy::INCLUDE) { sum += init_value * init_value; } + continue; + } + auto const val = static_cast(values.element(i)); + sum += val * val; + } + auto const segment_size = end - start; + return {segment_size * sum, true}; + }); + auto [null_mask, null_count] = + cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); + if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } + return output; + } + }; + + struct groupby_fn { + using OutputType = double; + + template ())> + output_type operator()(Args...) const + { + CUDF_FAIL("Unsupported input type."); + } + + template ())> + output_type operator()(std::unordered_map const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); + if (values.size() == 0) { return get_empty_output(std::nullopt, std::nullopt, stream, mr); } + + auto const offsets = + std::get>(input.at(input_kind::OFFSETS)); + CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); + auto const num_groups = offsets.size() - 1; + auto const group_indices = + std::get>(input.at(input_kind::GROUP_LABELS)); + + auto const values_dv_ptr = cudf::column_device_view::create(values, stream); + auto output = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, + num_groups, + cudf::mask_state::UNALLOCATED, + stream); + rmm::device_uvector validity(num_groups, stream); + + auto const result = thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_groups), + thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), + [values = *values_dv_ptr, offsets, group_indices] __device__( + cudf::size_type idx) -> thrust::tuple { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + if (start == end) { return {OutputType{0}, false}; } + + auto sum = OutputType{0}; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { continue; } + auto const val = static_cast(values.element(i)); + sum += val * val; + } + return {(group_indices[idx] + 1) * sum, true}; + }); + auto [null_mask, null_count] = + cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); + if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } + return output; + } + }; +}; TEST_F(test, double_sqr) { From a1b568b25391886621b8a2abfb7b2d2ff82a80f7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 14:25:43 -0800 Subject: [PATCH 18/59] Implement a simple test Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 52 ++++++++--------------------- 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 193a04e7cb9..b9f8bed13e3 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -37,10 +38,6 @@ #include #include -using namespace cudf::test::iterators; - -struct test : public cudf::test::BaseFixture {}; - /** * @brief A host-based UDF implementation. * @@ -53,7 +50,7 @@ struct test : public cudf::test::BaseFixture {}; * replaced with an initial value if it is provided. */ template -class test_udf_simple_type : cudf::host_udf_base { +class test_udf_simple_type : public cudf::host_udf_base { static_assert(std::is_same_v || std::is_same_v || std::is_same_v); @@ -336,40 +333,19 @@ class test_udf_simple_type : cudf::host_udf_base { }; }; -TEST_F(test, double_sqr) -{ - cudf::test::fixed_width_column_wrapper keys{1, 2, 3, 1, 2, 3}; - cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4, 5}; +// using namespace cudf::test::iterators; +using int32s_col = cudf::test::fixed_width_column_wrapper; - auto agg = cudf::make_host_udf_aggregation(double_sqr); - std::vector requests; - requests.emplace_back(); - requests[0].values = vals; - requests[0].aggregations.push_back(std::move(agg)); - cudf::groupby::groupby gb_obj( - cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); +struct HostUDFReductionTest : cudf::test::BaseFixture {}; - auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); - - // Got output: 0,18,4,64,24,150 - cudf::test::print(*result.second[0].results[0]); -} - -TEST_F(test, triple_sqr) +TEST_F(HostUDFReductionTest, SimpleInput) { - cudf::test::fixed_width_column_wrapper keys{1, 2, 3, 1, 2, 3}; - cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4, 5}; - - auto agg = cudf::make_host_udf_aggregation(triple_sqr); - std::vector requests; - requests.emplace_back(); - requests[0].values = vals; - requests[0].aggregations.push_back(std::move(agg)); - cudf::groupby::groupby gb_obj( - cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); - - auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); - - // Got output: 0,27,6,96,36,225 - cudf::test::print(*result.second[0].results[0]); + int32s_col vals{0, 1, 2, 3, 4, 5}; + + auto agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const reduced = cudf::reduce(vals, agg, cudf::data_type{cudf::type_id::INT64}); + auto const result = + static_cast*>(reduced.get())->value(cudf::get_default_stream()); + printf("Result: %ld\n", result); } From 7ec2dd9e4f55c47136268e067a3e66385ac47cf0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 14:59:48 -0800 Subject: [PATCH 19/59] Fix compile issues Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 4 +- cpp/src/aggregation/aggregation.cpp | 10 ++--- cpp/src/groupby/groupby.cu | 48 +++++++++++++-------- cpp/src/groupby/sort/aggregate.cpp | 15 ++++--- cpp/src/reductions/reductions.cpp | 2 +- cpp/src/reductions/segmented/reductions.cpp | 2 +- 6 files changed, 48 insertions(+), 33 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index ec7a17e49df..3c7626d7675 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -679,7 +679,7 @@ struct host_udf_base { [[nodiscard]] virtual output_type operator()( std::unordered_map const& input, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) = 0; + rmm::device_async_resource_ref mr) const = 0; /** * @brief Get the output when the input values is empty. @@ -697,7 +697,7 @@ struct host_udf_base { std::optional output_dtype, std::optional> init, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const = 0; + rmm::device_async_resource_ref mr) const = 0; /** * @brief Compares two instances of the derived class for equality. diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 64c877bb505..b4726c61ec8 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -929,16 +929,16 @@ template CUDF_EXPORT std::unique_ptr make_merge_tdigest_aggregation(int max_centroids); template -std::unique_ptr make_host_udf_aggregation(host_udf_func_type udf_func_) +std::unique_ptr make_host_udf_aggregation(std::unique_ptr&& udf_ptr_) { - return std::make_unique(udf_func_); + return std::make_unique(std::move(udf_ptr_)); } template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation( - host_udf_func_type); + std::unique_ptr&&); template CUDF_EXPORT std::unique_ptr - make_host_udf_aggregation(host_udf_func_type); +make_host_udf_aggregation(std::unique_ptr&&); template CUDF_EXPORT std::unique_ptr - make_host_udf_aggregation(host_udf_func_type); +make_host_udf_aggregation(std::unique_ptr&&); namespace detail { namespace { diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 260181eb895..ac6ec75c7a7 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -99,6 +99,8 @@ namespace { struct empty_column_constructor { column_view values; aggregation const& agg; + rmm::cuda_stream_view stream; + rmm::device_async_resource_ref mr; template std::unique_ptr operator()() const @@ -142,7 +144,8 @@ struct empty_column_constructor { if constexpr (k == aggregation::Kind::HOST_UDF) { auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - return udf_ptr->get_empty_output(std::nullopt, std::nullopt, stream, mr); + return std::get>( + udf_ptr->get_empty_output(std::nullopt, std::nullopt, stream, mr)); } return make_empty_column(target_type(values.type(), k)); @@ -151,25 +154,30 @@ struct empty_column_constructor { /// Make an empty table with appropriate types for requested aggs template -auto empty_results(host_span requests) +auto empty_results(host_span requests, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { std::vector empty_results; - std::transform( - requests.begin(), requests.end(), std::back_inserter(empty_results), [](auto const& request) { - std::vector> results; - - std::transform( - request.aggregations.begin(), - request.aggregations.end(), - std::back_inserter(results), - [&request](auto const& agg) { - return cudf::detail::dispatch_type_and_aggregation( - request.values.type(), agg->kind, empty_column_constructor{request.values, *agg}); - }); - - return aggregation_result{std::move(results)}; - }); + std::transform(requests.begin(), + requests.end(), + std::back_inserter(empty_results), + [stream, mr](auto const& request) { + std::vector> results; + + std::transform(request.aggregations.begin(), + request.aggregations.end(), + std::back_inserter(results), + [&request, stream, mr](auto const& agg) { + return cudf::detail::dispatch_type_and_aggregation( + request.values.type(), + agg->kind, + empty_column_constructor{request.values, *agg, stream, mr}); + }); + + return aggregation_result{std::move(results)}; + }); return empty_results; } @@ -218,7 +226,7 @@ std::pair, std::vector> groupby::aggr verify_valid_requests(requests); - if (_keys.num_rows() == 0) { return {empty_like(_keys), empty_results(requests)}; } + if (_keys.num_rows() == 0) { return {empty_like(_keys), empty_results(requests, stream, mr)}; } return dispatch_aggregation(requests, stream, mr); } @@ -236,7 +244,9 @@ std::pair, std::vector> groupby::scan verify_valid_requests(requests); - if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); } + if (_keys.num_rows() == 0) { + return std::pair(empty_like(_keys), empty_results(requests, cudf::get_default_stream(), mr)); + } return sort_scan(requests, cudf::get_default_stream(), mr); } diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index d27ec7ee0d7..e7c1e00930d 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -487,7 +487,8 @@ void aggregate_result_functor::operator()(aggregation con } } - cache.add_result(values, agg, std::get>(udf_ptr(udf_input, stream, mr))); + cache.add_result( + values, agg, std::get>((*udf_ptr)(udf_input, stream, mr))); } /** @@ -660,8 +661,10 @@ void aggregate_result_functor::operator()(aggregation c column_view_with_common_nulls(values.child(0), values.child(1), stream); auto mean_agg = make_mean_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*mean_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*mean_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr) + .operator()(*mean_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr) + .operator()(*mean_agg); auto const mean0 = cache.get_result(values_child0, *mean_agg); auto const mean1 = cache.get_result(values_child1, *mean_agg); @@ -709,8 +712,10 @@ void aggregate_result_functor::operator()(aggregation column_view_with_common_nulls(values.child(0), values.child(1), stream); auto std_agg = make_std_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*std_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*std_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr) + .operator()(*std_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr) + .operator()(*std_agg); // Compute covariance here to avoid repeated computation of mean & count auto cov_agg = make_covariance_aggregation(corr_agg._min_periods); diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index aba987007a0..aefd9e1e24f 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -172,7 +172,7 @@ struct reduce_dispatch_functor { } } - return std::get>(udf_ptr(udf_input, stream, mr)); + return std::get>((*udf_ptr)(udf_input, stream, mr)); } default: CUDF_FAIL("Unsupported reduction operator"); } diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 33dcc8eae4c..449152b0423 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -133,7 +133,7 @@ struct segmented_reduce_dispatch_functor { } } - return std::get>(udf_ptr(udf_input, stream, mr)); + return std::get>((*udf_ptr)(udf_input, stream, mr)); } default: CUDF_FAIL("Unsupported aggregation type."); } From 237bb722e9086f53688409f94e7e8279b8bb67bc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 15:06:16 -0800 Subject: [PATCH 20/59] Fix test Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 164 +++++++++++++++++----------- 1 file changed, 102 insertions(+), 62 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index b9f8bed13e3..80d9401397a 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -61,7 +61,7 @@ class test_udf_simple_type : public cudf::host_udf_base { [[nodiscard]] std::unordered_set const& get_required_data_kinds() const override { static std::unordered_set const required_data_kinds = - [&] -> std::unordered_set { + [&]() -> std::unordered_set { if constexpr (std::is_same_v) { return {input_kind::INPUT_VALUES, input_kind::OUTPUT_DTYPE, input_kind::INIT_VALUE}; } else if constexpr (std::is_same_v) { @@ -80,21 +80,21 @@ class test_udf_simple_type : public cudf::host_udf_base { [[nodiscard]] output_type operator()(std::unordered_map const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) override + rmm::device_async_resource_ref mr) const override { if constexpr (std::is_same_v) { auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); return cudf::double_type_dispatcher( - values.type(), output_dtype, reduce_fn{}, input, stream, mr); + values.type(), output_dtype, reduce_fn{this}, input, stream, mr); } else if constexpr (std::is_same_v) { auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); return cudf::double_type_dispatcher( - values.type(), output_dtype, segmented_reduce_fn{}, input, stream, mr); + values.type(), output_dtype, segmented_reduce_fn{this}, input, stream, mr); } else { auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); - return cudf::type_dispatcher(values.type(), groupby_fn{}, input, stream, mr); + return cudf::type_dispatcher(values.type(), groupby_fn{this}, input, stream, mr); } } @@ -102,7 +102,7 @@ class test_udf_simple_type : public cudf::host_udf_base { [[maybe_unused]] std::optional output_dtype, [[maybe_unused]] std::optional> init, [[maybe_unused]] rmm::cuda_stream_view stream, - [[maybe_unused]] rmm::mr::device_memory_resource* mr) const override + [[maybe_unused]] rmm::device_async_resource_ref mr) const override { if constexpr (std::is_same_v || std::is_same_v) { @@ -133,11 +133,14 @@ class test_udf_simple_type : public cudf::host_udf_base { [[nodiscard]] std::unique_ptr clone() const override { - return std::make_unique(); + return std::make_unique(); } private: struct reduce_fn { + // Store pointer to the parent class so we can call its functions. + test_udf_simple_type const* parent; + template () && cudf::is_numeric())> output_type operator()(std::unordered_map const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); @@ -161,15 +164,15 @@ class test_udf_simple_type : public cudf::host_udf_base { input.at(input_kind::INIT_VALUE)); if (values.size() == 0) { - return get_empty_output(output_dtype, input_init_value, stream, mr); + return parent->get_empty_output(output_dtype, input_init_value, stream, mr); } - auto const init_value = [&] -> OutputType { + auto const init_value = [&]() -> OutputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { CUDF_EXPECTS(output_dtype == input_init_value.value().get().type(), "Data type for reduction result must be the same as init value."); auto const numeric_init_scalar = - dynamic_cast(&input_init_value.value().get()); + dynamic_cast const*>(&input_init_value.value().get()); CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); return static_cast(numeric_init_scalar->value(stream)); } @@ -177,25 +180,35 @@ class test_udf_simple_type : public cudf::host_udf_base { }(); auto const values_dv_ptr = cudf::column_device_view::create(values, stream); - auto const result = thrust::transform_reduce( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.size()), - [values = *values_dv_ptr] __device__(cudf::size_type idx) -> OutputType { - if (values.is_null(idx)) { return OutputType{0}; } - auto const val = static_cast(values.element(idx)); - return val * val; - }, - init_value, - thrust::plus<>{}); + auto const result = + thrust::transform_reduce(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.size()), + transform_fn{*values_dv_ptr}, + init_value, + thrust::plus<>{}); auto output = cudf::make_numeric_scalar(output_dtype, stream, mr); static_cast*>(output.get())->set_value(result, stream); return output; } + + template + struct transform_fn { + cudf::column_device_view values; + OutputType __device__ operator()(cudf::size_type idx) const + { + if (values.is_null(idx)) { return OutputType{0}; } + auto const val = static_cast(values.element(idx)); + return val * val; + } + }; }; struct segmented_reduce_fn { + // Store pointer to the parent class so we can call its functions. + test_udf_simple_type const* parent; + template () && cudf::is_numeric())> output_type operator()(std::unordered_map const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); @@ -219,15 +232,15 @@ class test_udf_simple_type : public cudf::host_udf_base { input.at(input_kind::INIT_VALUE)); if (values.size() == 0) { - return get_empty_output(output_dtype, input_init_value, stream, mr); + return parent->get_empty_output(output_dtype, input_init_value, stream, mr); } - auto const init_value = [&] -> OutputType { + auto const init_value = [&]() -> OutputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { CUDF_EXPECTS(output_dtype == input_init_value.value().get().type(), "Data type for reduction result must be the same as init value."); auto const numeric_init_scalar = - dynamic_cast(&input_init_value.value().get()); + dynamic_cast const*>(&input_init_value.value().get()); CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); return static_cast(numeric_init_scalar->value(stream)); } @@ -250,32 +263,44 @@ class test_udf_simple_type : public cudf::host_udf_base { thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_segments), thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), - [values = *values_dv_ptr, init_value, null_handling, offsets] __device__( - cudf::size_type idx) -> thrust::tuple { - auto const start = offsets[idx]; - auto const end = offsets[idx + 1]; - if (start == end) { return {OutputType{0}, false}; } - - auto sum = init_value; - for (auto i = start; i < end; ++i) { - if (values.is_null(i)) { - if (null_handling == cudf::null_policy::INCLUDE) { sum += init_value * init_value; } - continue; - } - auto const val = static_cast(values.element(i)); - sum += val * val; - } - auto const segment_size = end - start; - return {segment_size * sum, true}; - }); + transform_fn{*values_dv_ptr, offsets, init_value, null_handling}); auto [null_mask, null_count] = cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } return output; } + + template + struct transform_fn { + cudf::column_device_view values; + cudf::device_span offsets; + OutputType init_value; + cudf::null_policy null_handling; + + thrust::tuple __device__ operator()(cudf::size_type idx) const + { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + if (start == end) { return {OutputType{0}, false}; } + + auto sum = init_value; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { + if (null_handling == cudf::null_policy::INCLUDE) { sum += init_value * init_value; } + continue; + } + auto const val = static_cast(values.element(i)); + sum += val * val; + } + auto const segment_size = end - start; + return {segment_size * sum, true}; + } + }; }; struct groupby_fn { + // Store pointer to the parent class so we can call its functions. + test_udf_simple_type const* parent; using OutputType = double; template ())> @@ -287,10 +312,12 @@ class test_udf_simple_type : public cudf::host_udf_base { template ())> output_type operator()(std::unordered_map const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + rmm::device_async_resource_ref mr) const { auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); - if (values.size() == 0) { return get_empty_output(std::nullopt, std::nullopt, stream, mr); } + if (values.size() == 0) { + return parent->get_empty_output(std::nullopt, std::nullopt, stream, mr); + } auto const offsets = std::get>(input.at(input_kind::OFFSETS)); @@ -311,25 +338,34 @@ class test_udf_simple_type : public cudf::host_udf_base { thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_groups), thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), - [values = *values_dv_ptr, offsets, group_indices] __device__( - cudf::size_type idx) -> thrust::tuple { - auto const start = offsets[idx]; - auto const end = offsets[idx + 1]; - if (start == end) { return {OutputType{0}, false}; } - - auto sum = OutputType{0}; - for (auto i = start; i < end; ++i) { - if (values.is_null(i)) { continue; } - auto const val = static_cast(values.element(i)); - sum += val * val; - } - return {(group_indices[idx] + 1) * sum, true}; - }); + transform_fn{*values_dv_ptr, offsets, group_indices}); auto [null_mask, null_count] = cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } return output; } + + template + struct transform_fn { + cudf::column_device_view values; + cudf::device_span offsets; + cudf::device_span group_indices; + + thrust::tuple __device__ operator()(cudf::size_type idx) const + { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + if (start == end) { return {OutputType{0}, false}; } + + auto sum = OutputType{0}; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { continue; } + auto const val = static_cast(values.element(i)); + sum += val * val; + } + return {(group_indices[idx] + 1) * sum, true}; + } + }; }; }; @@ -342,9 +378,13 @@ TEST_F(HostUDFReductionTest, SimpleInput) { int32s_col vals{0, 1, 2, 3, 4, 5}; - auto agg = cudf::make_host_udf_aggregation( + auto agg = cudf::make_host_udf_aggregation( std::make_unique>()); - auto const reduced = cudf::reduce(vals, agg, cudf::data_type{cudf::type_id::INT64}); + auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); auto const result = static_cast*>(reduced.get())->value(cudf::get_default_stream()); printf("Result: %ld\n", result); From b5b8f5b1221749d29dd1ab72843c605a27cc4640 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 15:23:58 -0800 Subject: [PATCH 21/59] Remove `init` value from `get_empty_output` Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 9 +++------ cpp/src/groupby/groupby.cu | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 3c7626d7675..1546043f5cb 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -688,16 +688,13 @@ struct host_udf_base { * intermediate data when the input values is empty. * * @param output_dtype The expected output data type for reduction (if specified) - * @param init The initial value for reduction (if specified) * @param stream The CUDA stream to use for any kernel launches * @param mr Device memory resource to use for any allocations * @return The output result of the aggregation when input values is empty */ - [[nodiscard]] virtual output_type get_empty_output( - std::optional output_dtype, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const = 0; + [[nodiscard]] virtual output_type get_empty_output(std::optional output_dtype, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const = 0; /** * @brief Compares two instances of the derived class for equality. diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index ac6ec75c7a7..16c1c89565c 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -144,8 +144,7 @@ struct empty_column_constructor { if constexpr (k == aggregation::Kind::HOST_UDF) { auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - return std::get>( - udf_ptr->get_empty_output(std::nullopt, std::nullopt, stream, mr)); + return std::get>(udf_ptr->get_empty_output(std::nullopt, stream, mr)); } return make_empty_column(target_type(values.type(), k)); From bfec6a22a77a1673596ce4901c65e7d31817cd29 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 15:25:38 -0800 Subject: [PATCH 22/59] Fix test Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 80d9401397a..a92b344a540 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -50,12 +50,11 @@ * replaced with an initial value if it is provided. */ template -class test_udf_simple_type : public cudf::host_udf_base { +struct test_udf_simple_type : cudf::host_udf_base { static_assert(std::is_same_v || std::is_same_v || std::is_same_v); - public: test_udf_simple_type() = default; [[nodiscard]] std::unordered_set const& get_required_data_kinds() const override @@ -100,7 +99,6 @@ class test_udf_simple_type : public cudf::host_udf_base { [[nodiscard]] output_type get_empty_output( [[maybe_unused]] std::optional output_dtype, - [[maybe_unused]] std::optional> init, [[maybe_unused]] rmm::cuda_stream_view stream, [[maybe_unused]] rmm::device_async_resource_ref mr) const override { @@ -108,11 +106,6 @@ class test_udf_simple_type : public cudf::host_udf_base { std::is_same_v) { CUDF_EXPECTS(output_dtype.has_value(), "Data type for the reduction result must be specified."); - if (init.has_value() && init.value().get().is_valid(stream)) { - CUDF_EXPECTS(output_dtype.value() == init.value().get().type(), - "Data type for reduction result must be the same as init value."); - return std::make_unique(init.value().get()); - } return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); } else { return cudf::make_empty_column( @@ -136,7 +129,6 @@ class test_udf_simple_type : public cudf::host_udf_base { return std::make_unique(); } - private: struct reduce_fn { // Store pointer to the parent class so we can call its functions. test_udf_simple_type const* parent; @@ -163,9 +155,7 @@ class test_udf_simple_type : public cudf::host_udf_base { std::get>>( input.at(input_kind::INIT_VALUE)); - if (values.size() == 0) { - return parent->get_empty_output(output_dtype, input_init_value, stream, mr); - } + if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } auto const init_value = [&]() -> OutputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { @@ -231,9 +221,7 @@ class test_udf_simple_type : public cudf::host_udf_base { std::get>>( input.at(input_kind::INIT_VALUE)); - if (values.size() == 0) { - return parent->get_empty_output(output_dtype, input_init_value, stream, mr); - } + if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } auto const init_value = [&]() -> OutputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { @@ -315,9 +303,7 @@ class test_udf_simple_type : public cudf::host_udf_base { rmm::device_async_resource_ref mr) const { auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); - if (values.size() == 0) { - return parent->get_empty_output(std::nullopt, std::nullopt, stream, mr); - } + if (values.size() == 0) { return parent->get_empty_output(std::nullopt, stream, mr); } auto const offsets = std::get>(input.at(input_kind::OFFSETS)); From 3bc9ae345566f65a29ca17f21d82a392e015195e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 22 Nov 2024 15:38:38 -0800 Subject: [PATCH 23/59] Fix compile issues Signed-off-by: Nghia Truong --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 2 +- cpp/src/aggregation/aggregation.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index c63e2b405e3..bd1a47b0061 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -974,7 +974,7 @@ class host_udf_aggregation final : public groupby_aggregation, public reduce_agg host_udf_aggregation() = delete; host_udf_aggregation(host_udf_aggregation const&) = delete; - explicit host_udf_aggregation(std::unique_ptr&& udf_ptr_) + explicit host_udf_aggregation(std::unique_ptr udf_ptr_) : aggregation{HOST_UDF}, udf_ptr{std::move(udf_ptr_)} { CUDF_EXPECTS(udf_ptr != nullptr, "Invalid host-based UDF instance."); diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index b4726c61ec8..3774e519283 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -929,16 +929,16 @@ template CUDF_EXPORT std::unique_ptr make_merge_tdigest_aggregation(int max_centroids); template -std::unique_ptr make_host_udf_aggregation(std::unique_ptr&& udf_ptr_) +std::unique_ptr make_host_udf_aggregation(std::unique_ptr udf_ptr_) { return std::make_unique(std::move(udf_ptr_)); } template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation( - std::unique_ptr&&); + std::unique_ptr); template CUDF_EXPORT std::unique_ptr -make_host_udf_aggregation(std::unique_ptr&&); + make_host_udf_aggregation(std::unique_ptr); template CUDF_EXPORT std::unique_ptr -make_host_udf_aggregation(std::unique_ptr&&); + make_host_udf_aggregation(std::unique_ptr); namespace detail { namespace { From 26be26249b6691e658a0cd4d8ccd4a2dd31327fb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 23 Nov 2024 21:06:44 -0800 Subject: [PATCH 24/59] Enable `segmented_reduce_aggregation` Signed-off-by: Nghia Truong --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 4 +++- cpp/src/aggregation/aggregation.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index bd1a47b0061..c3cdbcd8321 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -967,7 +967,9 @@ class udf_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a custom aggregation specified in host-based UDF. */ -class host_udf_aggregation final : public groupby_aggregation, public reduce_aggregation { +class host_udf_aggregation final : public groupby_aggregation, + public reduce_aggregation, + public segmented_reduce_aggregation { public: std::unique_ptr const udf_ptr; diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 3774e519283..e70724ab2f0 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -939,6 +939,8 @@ template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation(std::unique_ptr); template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation(std::unique_ptr); +template CUDF_EXPORT std::unique_ptr + make_host_udf_aggregation(std::unique_ptr); namespace detail { namespace { From 6b3e3f72a258669b824cb52d522b7df1f908e482 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 23 Nov 2024 22:03:47 -0800 Subject: [PATCH 25/59] Implement test for `segmented_reduce` Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 197 +++++++++++++++++++++++----- 1 file changed, 165 insertions(+), 32 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index a92b344a540..7cdea681257 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -129,6 +129,20 @@ struct test_udf_simple_type : cudf::host_udf_base { return std::make_unique(); } + // For faster compile times, we only support a few input/output types. + template + static constexpr bool is_valid_input_t() + { + return std::is_same_v || std::is_same_v; + } + + // For faster compile times, we only support a few input/output types. + template + static constexpr bool is_valid_output_t() + { + return std::is_same_v; + } + struct reduce_fn { // Store pointer to the parent class so we can call its functions. test_udf_simple_type const* parent; @@ -136,7 +150,7 @@ struct test_udf_simple_type : cudf::host_udf_base { template () || !cudf::is_numeric())> + CUDF_ENABLE_IF(!is_valid_input_t() || !is_valid_output_t())> output_type operator()(Args...) const { CUDF_FAIL("Unsupported input type."); @@ -144,7 +158,7 @@ struct test_udf_simple_type : cudf::host_udf_base { template () && cudf::is_numeric())> + CUDF_ENABLE_IF(is_valid_input_t() && is_valid_output_t())> output_type operator()(std::unordered_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const @@ -157,16 +171,14 @@ struct test_udf_simple_type : cudf::host_udf_base { if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } - auto const init_value = [&]() -> OutputType { + auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { - CUDF_EXPECTS(output_dtype == input_init_value.value().get().type(), - "Data type for reduction result must be the same as init value."); auto const numeric_init_scalar = - dynamic_cast const*>(&input_init_value.value().get()); + dynamic_cast const*>(&input_init_value.value().get()); CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); - return static_cast(numeric_init_scalar->value(stream)); + return numeric_init_scalar->value(stream); } - return OutputType{0}; + return InputType{0}; }(); auto const values_dv_ptr = cudf::column_device_view::create(values, stream); @@ -175,7 +187,7 @@ struct test_udf_simple_type : cudf::host_udf_base { thrust::make_counting_iterator(0), thrust::make_counting_iterator(values.size()), transform_fn{*values_dv_ptr}, - init_value, + static_cast(init_value), thrust::plus<>{}); auto output = cudf::make_numeric_scalar(output_dtype, stream, mr); @@ -202,7 +214,7 @@ struct test_udf_simple_type : cudf::host_udf_base { template () || !cudf::is_numeric())> + CUDF_ENABLE_IF(!is_valid_input_t() || !is_valid_output_t())> output_type operator()(Args...) const { CUDF_FAIL("Unsupported input type."); @@ -210,7 +222,7 @@ struct test_udf_simple_type : cudf::host_udf_base { template () && cudf::is_numeric())> + CUDF_ENABLE_IF(is_valid_input_t() && is_valid_output_t())> output_type operator()(std::unordered_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const @@ -223,35 +235,34 @@ struct test_udf_simple_type : cudf::host_udf_base { if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } - auto const init_value = [&]() -> OutputType { + auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { - CUDF_EXPECTS(output_dtype == input_init_value.value().get().type(), - "Data type for reduction result must be the same as init value."); auto const numeric_init_scalar = - dynamic_cast const*>(&input_init_value.value().get()); + dynamic_cast const*>(&input_init_value.value().get()); CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); - return static_cast(numeric_init_scalar->value(stream)); + return numeric_init_scalar->value(stream); } - return OutputType{0}; + return InputType{0}; }(); auto const null_handling = std::get(input.at(input_kind::NULL_POLICY)); auto const offsets = std::get>(input.at(input_kind::OFFSETS)); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); - auto const num_segments = offsets.size() - 1; + auto const num_segments = static_cast(offsets.size()) - 1; auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto output = cudf::make_numeric_column( output_dtype, num_segments, cudf::mask_state::UNALLOCATED, stream); rmm::device_uvector validity(num_segments, stream); - auto const result = thrust::transform( + thrust::transform( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_segments), thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), - transform_fn{*values_dv_ptr, offsets, init_value, null_handling}); + transform_fn{ + *values_dv_ptr, offsets, static_cast(init_value), null_handling}); auto [null_mask, null_count] = cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } @@ -281,7 +292,7 @@ struct test_udf_simple_type : cudf::host_udf_base { sum += val * val; } auto const segment_size = end - start; - return {segment_size * sum, true}; + return {static_cast(segment_size) * sum, true}; } }; }; @@ -308,7 +319,7 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const offsets = std::get>(input.at(input_kind::OFFSETS)); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); - auto const num_groups = offsets.size() - 1; + auto const num_groups = static_cast(offsets.size()) - 1; auto const group_indices = std::get>(input.at(input_kind::GROUP_LABELS)); @@ -319,7 +330,7 @@ struct test_udf_simple_type : cudf::host_udf_base { stream); rmm::device_uvector validity(num_groups, stream); - auto const result = thrust::transform( + thrust::transform( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_groups), @@ -349,29 +360,151 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const val = static_cast(values.element(i)); sum += val * val; } - return {(group_indices[idx] + 1) * sum, true}; + return {static_cast((group_indices[idx] + 1)) * sum, true}; } }; }; }; -// using namespace cudf::test::iterators; -using int32s_col = cudf::test::fixed_width_column_wrapper; +using doubles_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; +using int64s_col = cudf::test::fixed_width_column_wrapper; -struct HostUDFReductionTest : cudf::test::BaseFixture {}; +struct HostUDFImplementationTest : cudf::test::BaseFixture {}; -TEST_F(HostUDFReductionTest, SimpleInput) +TEST_F(HostUDFImplementationTest, ReductionSimpleInput) { - int32s_col vals{0, 1, 2, 3, 4, 5}; - - auto agg = cudf::make_host_udf_aggregation( + auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + auto const agg = cudf::make_host_udf_aggregation( std::make_unique>()); auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + EXPECT_TRUE(reduced->is_valid()); + EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); auto const result = static_cast*>(reduced.get())->value(cudf::get_default_stream()); - printf("Result: %ld\n", result); + auto constexpr expected = 55; // 0^2 + 1^2 + 2^2 + 3^2 + 4^2 + 5^2 = 55 + EXPECT_EQ(expected, result); +} + +TEST_F(HostUDFImplementationTest, ReductionEmptyInput) +{ + auto const vals = doubles_col{}; + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + EXPECT_FALSE(reduced->is_valid()); + EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); +} + +TEST_F(HostUDFImplementationTest, SegmentedReductionSimpleInput) +{ + auto const vals = doubles_col{ + {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto const offsets = int32s_col{0, 3, 5, 10}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + + // Test without init_value. + { + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init_value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // When null_policy is set to `INCLUDE`, the null values are replaced with the init_value. + // Since init_value is not given, it is set to 0. + // [ 3 * (0^2 + init^2 + 2^2), 2 * (3^2 + init^2), 5 * (5^2 + init^2 + init^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{12, 18, 850}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with init value, and include nulls. + { + auto const init_scalar = cudf::make_fixed_width_scalar(3.0); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + *init_scalar, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // When null_policy is set to `INCLUDE`, the null values are replaced with the init_value. + // [ 3 * (3 + 0^2 + 3^2 + 2^2), 2 * (3 + 3^2 + 3^2), 5 * (3 + 5^2 + 3^2 + 3^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{48, 42, 955}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with init value, and exclude nulls. + { + auto const init_scalar = cudf::make_fixed_width_scalar(3.0); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::EXCLUDE, + *init_scalar, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // [ 3 * (3 + 0^2 + 2^2), 2 * (3 + 3^2), 5 * (3 + 5^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{21, 24, 865}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + +TEST_F(HostUDFImplementationTest, SegmentedReductionEmptySegments) +{ + auto const vals = int32s_col{}; + auto const offsets = int32s_col{0, 0, 0, 0}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init_value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + auto const expected = int64s_col{{0, 0, 0, 0}, {false, false, false, false}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(HostUDFImplementationTest, SegmentedReductionEmptyInput) +{ + auto const vals = int32s_col{}; + // Cannot be empty due to a bug in the libcudf: https://github.com/rapidsai/cudf/issues/17433. + auto const offsets = int32s_col{0}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init_value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + auto const expected = int64s_col{}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } From 3d505da666587dee91d6e8abf8d45b46cb2d32c5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 23 Nov 2024 22:14:11 -0800 Subject: [PATCH 26/59] Fix empty output Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 7cdea681257..d8645d3d008 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -102,11 +102,14 @@ struct test_udf_simple_type : cudf::host_udf_base { [[maybe_unused]] rmm::cuda_stream_view stream, [[maybe_unused]] rmm::device_async_resource_ref mr) const override { - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v) { CUDF_EXPECTS(output_dtype.has_value(), "Data type for the reduction result must be specified."); return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); + } else if constexpr (std::is_same_v) { + CUDF_EXPECTS(output_dtype.has_value(), + "Data type for the reduction result must be specified."); + return cudf::make_empty_column(output_dtype.value()); } else { return cudf::make_empty_column( cudf::data_type{cudf::type_to_id()}); From 9d1ac9a37b842fec27ef6d20c28aa3160714d32d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 23 Nov 2024 22:19:17 -0800 Subject: [PATCH 27/59] Fix empty input handling Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index d8645d3d008..bd53e39e177 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -232,12 +232,24 @@ struct test_udf_simple_type : cudf::host_udf_base { { auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + auto const offsets = + std::get>(input.at(input_kind::OFFSETS)); + CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); + auto const num_segments = static_cast(offsets.size()) - 1; + + if (values.size() == 0) { + if (num_segments <= 0) { + return parent->get_empty_output(output_dtype, stream, mr); + } else { + return cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::ALL_NULL, stream, mr); + } + } + auto const input_init_value = std::get>>( input.at(input_kind::INIT_VALUE)); - if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } - auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { auto const numeric_init_scalar = @@ -249,10 +261,6 @@ struct test_udf_simple_type : cudf::host_udf_base { }(); auto const null_handling = std::get(input.at(input_kind::NULL_POLICY)); - auto const offsets = - std::get>(input.at(input_kind::OFFSETS)); - CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); - auto const num_segments = static_cast(offsets.size()) - 1; auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto output = cudf::make_numeric_column( @@ -488,7 +496,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionEmptySegments) std::nullopt, // init_value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); - auto const expected = int64s_col{{0, 0, 0, 0}, {false, false, false, false}}; + auto const expected = int64s_col{{0, 0, 0}, {false, false, false}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } From 3aefaf33bb0601e4dfbaf83e8dc2a1a3d2f3abee Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 23 Nov 2024 22:21:48 -0800 Subject: [PATCH 28/59] Fix comment Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index bd53e39e177..373cf5e86a6 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -424,7 +424,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionSimpleInput) auto const agg = cudf::make_host_udf_aggregation( std::make_unique>()); - // Test without init_value. + // Test without init value. { auto const result = cudf::segmented_reduce( vals, @@ -432,12 +432,12 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionSimpleInput) *agg, cudf::data_type{cudf::type_id::INT64}, cudf::null_policy::INCLUDE, - std::nullopt, // init_value + std::nullopt, // init value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); - // When null_policy is set to `INCLUDE`, the null values are replaced with the init_value. - // Since init_value is not given, it is set to 0. + // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. + // Since init value is not given, it is set to 0. // [ 3 * (0^2 + init^2 + 2^2), 2 * (3^2 + init^2), 5 * (5^2 + init^2 + init^2 + 8^2 + 9^2) ] auto const expected = int64s_col{12, 18, 850}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); @@ -456,7 +456,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionSimpleInput) cudf::get_default_stream(), cudf::get_current_device_resource_ref()); - // When null_policy is set to `INCLUDE`, the null values are replaced with the init_value. + // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. // [ 3 * (3 + 0^2 + 3^2 + 2^2), 2 * (3 + 3^2 + 3^2), 5 * (3 + 5^2 + 3^2 + 3^2 + 8^2 + 9^2) ] auto const expected = int64s_col{48, 42, 955}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); @@ -493,7 +493,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionEmptySegments) *agg, cudf::data_type{cudf::type_id::INT64}, cudf::null_policy::INCLUDE, - std::nullopt, // init_value + std::nullopt, // init value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expected = int64s_col{{0, 0, 0}, {false, false, false}}; @@ -513,7 +513,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionEmptyInput) *agg, cudf::data_type{cudf::type_id::INT64}, cudf::null_policy::INCLUDE, - std::nullopt, // init_value + std::nullopt, // init value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expected = int64s_col{}; From 9b61fe3e50d44307a6bc5061c4c6546b140c9039 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 24 Nov 2024 10:06:18 -0800 Subject: [PATCH 29/59] Rename tests Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 373cf5e86a6..f224e42c2fb 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -381,9 +381,9 @@ using doubles_col = cudf::test::fixed_width_column_wrapper; using int32s_col = cudf::test::fixed_width_column_wrapper; using int64s_col = cudf::test::fixed_width_column_wrapper; -struct HostUDFImplementationTest : cudf::test::BaseFixture {}; +struct HostUDFExampleTest : cudf::test::BaseFixture {}; -TEST_F(HostUDFImplementationTest, ReductionSimpleInput) +TEST_F(HostUDFExampleTest, ReductionSimpleInput) { auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; auto const agg = cudf::make_host_udf_aggregation( @@ -401,7 +401,7 @@ TEST_F(HostUDFImplementationTest, ReductionSimpleInput) EXPECT_EQ(expected, result); } -TEST_F(HostUDFImplementationTest, ReductionEmptyInput) +TEST_F(HostUDFExampleTest, ReductionEmptyInput) { auto const vals = doubles_col{}; auto const agg = cudf::make_host_udf_aggregation( @@ -415,7 +415,7 @@ TEST_F(HostUDFImplementationTest, ReductionEmptyInput) EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); } -TEST_F(HostUDFImplementationTest, SegmentedReductionSimpleInput) +TEST_F(HostUDFExampleTest, SegmentedReductionSimpleInput) { auto const vals = doubles_col{ {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, @@ -481,7 +481,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionSimpleInput) } } -TEST_F(HostUDFImplementationTest, SegmentedReductionEmptySegments) +TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) { auto const vals = int32s_col{}; auto const offsets = int32s_col{0, 0, 0, 0}.release(); @@ -500,7 +500,7 @@ TEST_F(HostUDFImplementationTest, SegmentedReductionEmptySegments) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } -TEST_F(HostUDFImplementationTest, SegmentedReductionEmptyInput) +TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) { auto const vals = int32s_col{}; // Cannot be empty due to a bug in the libcudf: https://github.com/rapidsai/cudf/issues/17433. From b87e2a73f0623ba817965f337c9e5523666b29f6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 24 Nov 2024 15:44:52 -0800 Subject: [PATCH 30/59] Fix groupby type Signed-off-by: Nghia Truong --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index c3cdbcd8321..a9bf3f94e5f 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1509,6 +1509,12 @@ struct target_type_impl +struct target_type_impl { + // Just a placeholder. The actual return type is unknown. + using type = struct_view; +}; + /** * @brief Helper alias to get the accumulator type for performing aggregation * `k` on elements of type `Source` From 697993bad157167671c3dd80562050b7f353d42d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 24 Nov 2024 15:58:23 -0800 Subject: [PATCH 31/59] Add test `GroupbySimpleInput` Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index f224e42c2fb..4fd67fc4dce 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -371,7 +371,7 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const val = static_cast(values.element(i)); sum += val * val; } - return {static_cast((group_indices[idx] + 1)) * sum, true}; + return {static_cast((group_indices[start] + 1)) * sum, true}; } }; }; @@ -519,3 +519,29 @@ TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) auto const expected = int64s_col{}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } + +TEST_F(HostUDFExampleTest, GroupbySimpleInput) +{ + auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; + auto const vals = doubles_col{ + {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + + std::vector requests; + requests.emplace_back(); + requests[0].values = vals; + requests[0].aggregations.push_back(std::move(agg)); + cudf::groupby::groupby gb_obj( + cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); + + auto const grp_result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + auto const& result = grp_result.second[0].results[0]; + + // Output type of groupby is double. + // Values grouped by keys: [ {0, 3, null, 9}, {null, null, null}, {2, 5, 8} ] + // [ 1 * (0^2 + 3^2 + 9^2), 0, 3 * (2^2 + 5^2 + 8^2) ] + auto const expected = doubles_col{90, 0, 279}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} From 7a817547ccee29f2ee42f655c68c31caffb2a05a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 24 Nov 2024 20:55:17 -0800 Subject: [PATCH 32/59] Add the ability to call other aggregations Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 98 ++++++++++++--- cpp/src/groupby/sort/aggregate.cpp | 114 +++++++++-------- cpp/src/reductions/reductions.cpp | 47 ++++--- cpp/src/reductions/segmented/reductions.cpp | 71 ++++++----- cpp/tests/groupby/host_udf_tests.cu | 131 +++++++++++++------- 5 files changed, 290 insertions(+), 171 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 1546043f5cb..84dc2ce616e 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -623,14 +623,14 @@ struct host_udf_base { virtual ~host_udf_base() = default; /** - * @brief Define the possible data that may be needed in the derived class for its operations. + * @brief Define the intermediate data that may be needed in the derived class for its operations. * - * Each derived host-based UDF class may need a different set of input data (such as sorted + * Each derived host-based UDF class may need a different set of intermediate data (such as sorted * values, group labels, group offsets etc). It is inefficient to evaluate and pass down all these * data at once from libcudf. A solution for that is, the derived class defines a subset of data * that it needs and only such data will be evaluated. */ - enum class input_kind { + enum class intermediate_data_kind { INPUT_VALUES, // the input values column, may be used in any aggregation OUTPUT_DTYPE, // output data type, used in reduction INIT_VALUE, // initial value for reduction @@ -646,25 +646,88 @@ struct host_udf_base { }; /** - * @brief Return a set of data kind that is needed for computing the aggregation. + * @brief The possible data kind that may be needed in the derived class for its operations. * - * @return A set of `input_kind` enum. + * Such data can be either intermediate data, or the results of other aggregations. */ - [[nodiscard]] virtual std::unordered_set const& get_required_data_kinds() const = 0; + struct input_data_kind { + using value_type = std::variant>; + value_type value; + + input_data_kind() = default; + input_data_kind(input_data_kind&&) = default; + input_data_kind(input_data_kind const& other) : value{copy_value(other.value)} {} + input_data_kind(intermediate_data_kind value_) : value{value_} {} + + template + input_data_kind(std::unique_ptr value_) : value{std::move(value_)} + { + static_assert(std::is_same_v || std::is_same_v); + } + + static value_type copy_value(value_type const& value) + { + if (std::holds_alternative(value)) { + return std::get(value); + } + return std::get>(value)->clone(); + } + + struct hash { + std::size_t operator()(input_data_kind const& kind) const + { + if (std::holds_alternative(kind.value)) { + return std::hash{}(static_cast(std::get(kind.value))); + } + return std::get>(kind.value)->do_hash(); + } + }; + + struct equal_to { + bool operator()(input_data_kind const& lhs, input_data_kind const& rhs) const + { + if (std::holds_alternative(lhs.value) != + std::holds_alternative(rhs.value)) { + return false; + } + if (std::holds_alternative(lhs.value)) { + return std::get(lhs.value) == + std::get(rhs.value); + } + return std::get>(lhs.value)->is_equal( + *std::get>(rhs.value)); + } + }; + }; + + using data_kind_set = + std::unordered_set; + + /** + * @brief Return a set of data kinds that is needed for computing the aggregation. + * + * @return A set of `input_data_kind`. + */ + [[nodiscard]] virtual data_kind_set get_required_data_kinds() const = 0; /** - * Aggregation data that is needed for computing the aggregation. + * @brief Type of the data that is passed to the derived class for computing aggregation. */ - using input_data = std::variant>, - null_policy, - scan_type, - device_span>; + using input_data_type = std::variant>, + null_policy, + scan_type, + device_span>; + + using host_udf_input_map = std::unordered_map; /** * Output type of the aggregation. It can be either a scalar (for reduction) or a column - * (for segmented reduction or groupby) aggregation. + * (for segmented reduction or groupby aggregations). */ using output_type = std::variant, std::unique_ptr>; @@ -676,10 +739,9 @@ struct host_udf_base { * @param mr Device memory resource to use for any allocations * @return The output result of the aggregation */ - [[nodiscard]] virtual output_type operator()( - std::unordered_map const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const = 0; + [[nodiscard]] virtual output_type operator()(host_udf_input_map const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const = 0; /** * @brief Get the output when the input values is empty. diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index e7c1e00930d..c42560661a1 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -446,51 +446,6 @@ void aggregate_result_functor::operator()(aggregation lists_column_view{collect_result->view()}, nulls_equal, nans_equal, stream, mr)); } -template <> -void aggregate_result_functor::operator()(aggregation const& agg) -{ - if (cache.has_result(values, agg)) { return; } - - auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - auto const& data_kinds = udf_ptr->get_required_data_kinds(); - - // Do not cache udf_input, as the actual input data may change from run to run. - std::unordered_map udf_input; - for (auto const kind : data_kinds) { - switch (kind) { - case cudf::host_udf_base::input_kind::INPUT_VALUES: { - udf_input.emplace(kind, values); - break; - } - - case cudf::host_udf_base::input_kind::OFFSETS: { - udf_input.emplace(kind, helper.group_offsets(stream)); - break; - } - - case cudf::host_udf_base::input_kind::GROUP_LABELS: { - udf_input.emplace(kind, helper.group_labels(stream)); - break; - } - - case cudf::host_udf_base::input_kind::SORTED_GROUPED_VALUES: { - udf_input.emplace(kind, get_sorted_values()); - break; - } - - case cudf::host_udf_base::input_kind::GROUPED_VALUES: { - udf_input.emplace(kind, get_grouped_values()); - break; - } - - default: CUDF_FAIL("Unsupported data kind in host-based UDF groupby aggregation."); - } - } - - cache.add_result( - values, agg, std::get>((*udf_ptr)(udf_input, stream, mr))); -} - /** * @brief Perform merging for the lists that correspond to the same key value. * @@ -661,10 +616,8 @@ void aggregate_result_functor::operator()(aggregation c column_view_with_common_nulls(values.child(0), values.child(1), stream); auto mean_agg = make_mean_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr) - .operator()(*mean_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr) - .operator()(*mean_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*mean_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*mean_agg); auto const mean0 = cache.get_result(values_child0, *mean_agg); auto const mean1 = cache.get_result(values_child1, *mean_agg); @@ -712,10 +665,8 @@ void aggregate_result_functor::operator()(aggregation column_view_with_common_nulls(values.child(0), values.child(1), stream); auto std_agg = make_std_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr) - .operator()(*std_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr) - .operator()(*std_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*std_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*std_agg); // Compute covariance here to avoid repeated computation of mean & count auto cov_agg = make_covariance_aggregation(corr_agg._min_periods); @@ -840,6 +791,63 @@ void aggregate_result_functor::operator()(aggregatio mr)); } +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(values, agg)) { return; } + + auto const& udf_ptr = dynamic_cast(agg).udf_ptr; + auto const& data_kinds = udf_ptr->get_required_data_kinds(); + + // Do not cache udf_input, as the actual input data may change from run to run. + cudf::host_udf_base::host_udf_input_map udf_input; + for (auto const& kind : data_kinds) { + if (std::holds_alternative(kind.value)) { + auto const intermediate_kind = + std::get(kind.value); + switch (intermediate_kind) { + case cudf::host_udf_base::intermediate_data_kind::INPUT_VALUES: { + udf_input.emplace(kind, values); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::OFFSETS: { + udf_input.emplace(kind, helper.group_offsets(stream)); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::GROUP_LABELS: { + udf_input.emplace(kind, helper.group_labels(stream)); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::SORTED_GROUPED_VALUES: { + udf_input.emplace(kind, get_sorted_values()); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::GROUPED_VALUES: { + udf_input.emplace(kind, get_grouped_values()); + break; + } + + default: CUDF_FAIL("Unsupported data kind in host-based UDF groupby aggregation."); + } + } else { // `kind` is another aggregation + auto other_agg = std::get>(kind.value)->clone(); + cudf::detail::aggregation_dispatcher(other_agg->kind, *this, *other_agg); + auto result = cache.get_result(values, *other_agg); + udf_input.emplace(std::move(other_agg), std::move(result)); + } + } + + auto output = (*udf_ptr)(udf_input, stream, mr); + CUDF_EXPECTS(std::holds_alternative>(output), + "Invalid output type from HOST_UDF groupby aggregation."); + + cache.add_result(values, agg, std::get>(std::move(output))); +} + } // namespace detail // Sort-based groupby diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index aefd9e1e24f..e5fe7bb515b 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -149,31 +149,38 @@ struct reduce_dispatch_functor { auto const& data_kinds = udf_ptr->get_required_data_kinds(); // Do not cache udf_input, as the actual input data may change from run to run. - std::unordered_map - udf_input; - for (auto const kind : data_kinds) { - switch (kind) { - case cudf::host_udf_base::input_kind::INPUT_VALUES: { - udf_input.emplace(kind, col); - break; - } + cudf::host_udf_base::host_udf_input_map udf_input; + for (auto const& kind : data_kinds) { + if (std::holds_alternative(kind.value)) { + auto const intermediate_kind = + std::get(kind.value); + switch (intermediate_kind) { + case cudf::host_udf_base::intermediate_data_kind::INPUT_VALUES: { + udf_input.emplace(kind, col); + break; + } - case cudf::host_udf_base::input_kind::OUTPUT_DTYPE: { - udf_input.emplace(kind, output_dtype); - break; - } + case cudf::host_udf_base::intermediate_data_kind::OUTPUT_DTYPE: { + udf_input.emplace(kind, output_dtype); + break; + } - case cudf::host_udf_base::input_kind::INIT_VALUE: { - udf_input.emplace(kind, init); - break; - } + case cudf::host_udf_base::intermediate_data_kind::INIT_VALUE: { + udf_input.emplace(kind, init); + break; + } - default: CUDF_FAIL("Unsupported data kind in host-based UDF reduction."); + default: CUDF_FAIL("Unsupported data kind in host-based UDF reduction."); + } + } else { + CUDF_FAIL("Reduction aggregation does not support calling other aggregations."); } } - - return std::get>((*udf_ptr)(udf_input, stream, mr)); - } + auto output = (*udf_ptr)(udf_input, stream, mr); + CUDF_EXPECTS(std::holds_alternative>(output), + "Invalid output type from HOST_UDF reduction."); + return std::get>(std::move(output)); + } // case aggregation::HOST_UDF default: CUDF_FAIL("Unsupported reduction operator"); } } diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 449152b0423..56baea33981 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -100,41 +100,48 @@ struct segmented_reduce_dispatch_functor { auto const& data_kinds = udf_ptr->get_required_data_kinds(); // Do not cache udf_input, as the actual input data may change from run to run. - std::unordered_map - udf_input; - for (auto const kind : data_kinds) { - switch (kind) { - case cudf::host_udf_base::input_kind::INPUT_VALUES: { - udf_input.emplace(kind, col); - break; + cudf::host_udf_base::host_udf_input_map udf_input; + for (auto const& kind : data_kinds) { + if (std::holds_alternative(kind.value)) { + auto const intermediate_kind = + std::get(kind.value); + switch (intermediate_kind) { + case cudf::host_udf_base::intermediate_data_kind::INPUT_VALUES: { + udf_input.emplace(kind, col); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::OUTPUT_DTYPE: { + udf_input.emplace(kind, output_dtype); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::INIT_VALUE: { + udf_input.emplace(kind, init); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::NULL_POLICY: { + udf_input.emplace(kind, null_handling); + break; + } + + case cudf::host_udf_base::intermediate_data_kind::OFFSETS: { + udf_input.emplace(kind, offsets); + break; + } + + default: CUDF_FAIL("Unsupported data kind in host-based UDF segmented reduction."); } - - case cudf::host_udf_base::input_kind::OUTPUT_DTYPE: { - udf_input.emplace(kind, output_dtype); - break; - } - - case cudf::host_udf_base::input_kind::INIT_VALUE: { - udf_input.emplace(kind, init); - break; - } - - case cudf::host_udf_base::input_kind::NULL_POLICY: { - udf_input.emplace(kind, null_handling); - break; - } - - case cudf::host_udf_base::input_kind::OFFSETS: { - udf_input.emplace(kind, offsets); - break; - } - - default: CUDF_FAIL("Unsupported data kind in host-based UDF segmented reduction."); + } else { + CUDF_FAIL("Reduction aggregation does not support calling other aggregations."); } } - - return std::get>((*udf_ptr)(udf_input, stream, mr)); - } + auto output = (*udf_ptr)(udf_input, stream, mr); + CUDF_EXPECTS(std::holds_alternative>(output), + "Invalid output type from HOST_UDF segmented reduction."); + return std::get>(std::move(output)); + } // case aggregation::HOST_UDF default: CUDF_FAIL("Unsupported aggregation type."); } } diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 4fd67fc4dce..b9179709573 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -44,7 +44,7 @@ * The aggregations perform the following computation: * - For reduction: compute `sum(value^2, for value in group)` (this is sum of squared). * - For segmented reduction: compute `segment_size * sum(value^2, for value in group)`. - * - For groupby: compute `(group_idx + 1) * sum(value^2, for value in group)`. + * - For groupby: compute `(group_idx + 1) * group_sum_of_squares - group_max * group_sum`. * * In addition, for segmented reduction, if null_policy is set to `INCLUDE`, the null values are * replaced with an initial value if it is provided. @@ -57,42 +57,48 @@ struct test_udf_simple_type : cudf::host_udf_base { test_udf_simple_type() = default; - [[nodiscard]] std::unordered_set const& get_required_data_kinds() const override + [[nodiscard]] data_kind_set get_required_data_kinds() const override { - static std::unordered_set const required_data_kinds = - [&]() -> std::unordered_set { - if constexpr (std::is_same_v) { - return {input_kind::INPUT_VALUES, input_kind::OUTPUT_DTYPE, input_kind::INIT_VALUE}; - } else if constexpr (std::is_same_v) { - return {input_kind::INPUT_VALUES, - input_kind::OUTPUT_DTYPE, - input_kind::INIT_VALUE, - input_kind::NULL_POLICY, - input_kind::OFFSETS}; - } else { - return {input_kind::OFFSETS, input_kind::GROUP_LABELS, input_kind::GROUPED_VALUES}; - } - }(); - - return required_data_kinds; + if constexpr (std::is_same_v) { + return {intermediate_data_kind::INPUT_VALUES, + intermediate_data_kind::OUTPUT_DTYPE, + intermediate_data_kind::INIT_VALUE}; + } else if constexpr (std::is_same_v) { + return {intermediate_data_kind::INPUT_VALUES, + intermediate_data_kind::OUTPUT_DTYPE, + intermediate_data_kind::INIT_VALUE, + intermediate_data_kind::NULL_POLICY, + intermediate_data_kind::OFFSETS}; + } else { + return {intermediate_data_kind::OFFSETS, + intermediate_data_kind::GROUP_LABELS, + intermediate_data_kind::GROUPED_VALUES, + cudf::make_max_aggregation(), + cudf::make_sum_aggregation()}; + } } - [[nodiscard]] output_type operator()(std::unordered_map const& input, + [[nodiscard]] output_type operator()(host_udf_input_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override { if constexpr (std::is_same_v) { - auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); - auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + auto const& values = + std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); return cudf::double_type_dispatcher( values.type(), output_dtype, reduce_fn{this}, input, stream, mr); } else if constexpr (std::is_same_v) { - auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); - auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + auto const& values = + std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); return cudf::double_type_dispatcher( values.type(), output_dtype, segmented_reduce_fn{this}, input, stream, mr); } else { - auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); + auto const& values = + std::get(input.at(intermediate_data_kind::GROUPED_VALUES)); return cudf::type_dispatcher(values.type(), groupby_fn{this}, input, stream, mr); } } @@ -162,15 +168,17 @@ struct test_udf_simple_type : cudf::host_udf_base { template () && is_valid_output_t())> - output_type operator()(std::unordered_map const& input, + output_type operator()(host_udf_input_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); - auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); + auto const& values = + std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); auto const input_init_value = std::get>>( - input.at(input_kind::INIT_VALUE)); + input.at(intermediate_data_kind::INIT_VALUE)); if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } @@ -226,14 +234,16 @@ struct test_udf_simple_type : cudf::host_udf_base { template () && is_valid_output_t())> - output_type operator()(std::unordered_map const& input, + output_type operator()(host_udf_input_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - auto const& values = std::get(input.at(input_kind::INPUT_VALUES)); - auto const output_dtype = std::get(input.at(input_kind::OUTPUT_DTYPE)); - auto const offsets = - std::get>(input.at(input_kind::OFFSETS)); + auto const& values = + std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); + auto const offsets = std::get>( + input.at(intermediate_data_kind::OFFSETS)); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); auto const num_segments = static_cast(offsets.size()) - 1; @@ -248,7 +258,7 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const input_init_value = std::get>>( - input.at(input_kind::INIT_VALUE)); + input.at(intermediate_data_kind::INIT_VALUE)); auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { @@ -260,7 +270,8 @@ struct test_udf_simple_type : cudf::host_udf_base { return InputType{0}; }(); - auto const null_handling = std::get(input.at(input_kind::NULL_POLICY)); + auto const null_handling = + std::get(input.at(intermediate_data_kind::NULL_POLICY)); auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto output = cudf::make_numeric_column( @@ -312,6 +323,10 @@ struct test_udf_simple_type : cudf::host_udf_base { // Store pointer to the parent class so we can call its functions. test_udf_simple_type const* parent; using OutputType = double; + template + using MaxType = cudf::detail::target_type_t; + template + using SumType = cudf::detail::target_type_t; template ())> output_type operator()(Args...) const @@ -320,19 +335,24 @@ struct test_udf_simple_type : cudf::host_udf_base { } template ())> - output_type operator()(std::unordered_map const& input, + output_type operator()(host_udf_input_map const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - auto const& values = std::get(input.at(input_kind::GROUPED_VALUES)); + auto const& values = + std::get(input.at(intermediate_data_kind::GROUPED_VALUES)); if (values.size() == 0) { return parent->get_empty_output(std::nullopt, stream, mr); } - auto const offsets = - std::get>(input.at(input_kind::OFFSETS)); + auto const offsets = std::get>( + input.at(intermediate_data_kind::OFFSETS)); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); - auto const num_groups = static_cast(offsets.size()) - 1; - auto const group_indices = - std::get>(input.at(input_kind::GROUP_LABELS)); + auto const num_groups = static_cast(offsets.size()) - 1; + auto const group_indices = std::get>( + input.at(intermediate_data_kind::GROUP_LABELS)); + auto const group_max = std::get( + input.at(cudf::make_max_aggregation())); + auto const group_sum = std::get( + input.at(cudf::make_sum_aggregation())); auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto output = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, @@ -346,7 +366,11 @@ struct test_udf_simple_type : cudf::host_udf_base { thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_groups), thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), - transform_fn{*values_dv_ptr, offsets, group_indices}); + transform_fn{*values_dv_ptr, + offsets, + group_indices, + group_max.begin>(), + group_sum.begin>()}); auto [null_mask, null_count] = cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } @@ -358,6 +382,8 @@ struct test_udf_simple_type : cudf::host_udf_base { cudf::column_device_view values; cudf::device_span offsets; cudf::device_span group_indices; + MaxType const* group_max; + SumType const* group_sum; thrust::tuple __device__ operator()(cudf::size_type idx) const { @@ -365,13 +391,19 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const end = offsets[idx + 1]; if (start == end) { return {OutputType{0}, false}; } - auto sum = OutputType{0}; + auto sum_sqr = OutputType{0}; + bool has_valid{false}; for (auto i = start; i < end; ++i) { if (values.is_null(i)) { continue; } + has_valid = true; auto const val = static_cast(values.element(i)); - sum += val * val; + sum_sqr += val * val; } - return {static_cast((group_indices[start] + 1)) * sum, true}; + + if (!has_valid) { return {OutputType{0}, false}; } + return {static_cast(group_indices[start] + 1) * sum_sqr - + static_cast(group_max[idx]) * static_cast(group_sum[idx]), + true}; } }; }; @@ -541,7 +573,10 @@ TEST_F(HostUDFExampleTest, GroupbySimpleInput) // Output type of groupby is double. // Values grouped by keys: [ {0, 3, null, 9}, {null, null, null}, {2, 5, 8} ] - // [ 1 * (0^2 + 3^2 + 9^2), 0, 3 * (2^2 + 5^2 + 8^2) ] - auto const expected = doubles_col{90, 0, 279}; + // Group sum_sqr: [ 90, null, 93 ] + // Group max: [ 9, null, 8 ] + // Group sum: [ 12, null, 15 ] + // Output: [ 1 * 90 - 9 * 12, null, 3 * 93 - 8 * 15 ] + auto const expected = doubles_col{{-18, 0, 159}, {true, false, true}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } From 1b8fb9263ec9947cb528f6b8821f40e6105bd9e4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 25 Nov 2024 12:56:55 -0800 Subject: [PATCH 33/59] Add anonymous namespace Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index b9179709573..9fe967a10cc 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -38,6 +38,7 @@ #include #include +namespace { /** * @brief A host-based UDF implementation. * @@ -409,6 +410,8 @@ struct test_udf_simple_type : cudf::host_udf_base { }; }; +} // namespace + using doubles_col = cudf::test::fixed_width_column_wrapper; using int32s_col = cudf::test::fixed_width_column_wrapper; using int64s_col = cudf::test::fixed_width_column_wrapper; From 91489c129c7d71d5d7c7890eb13119d2ce098f69 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 11:09:48 -0800 Subject: [PATCH 34/59] Refactor Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 178 ++++++++++++------ .../cudf/detail/aggregation/aggregation.hpp | 2 +- cpp/src/groupby/sort/aggregate.cpp | 68 ++++--- cpp/src/reductions/reductions.cpp | 48 +++-- cpp/src/reductions/segmented/reductions.cpp | 69 ++++--- cpp/tests/groupby/host_udf_tests.cu | 62 +++--- 6 files changed, 246 insertions(+), 181 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 84dc2ce616e..b19336d7dfc 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -623,76 +624,140 @@ struct host_udf_base { virtual ~host_udf_base() = default; /** - * @brief Define the intermediate data that may be needed in the derived class for its operations. + * @brief Define the possible data needed for reduction. + */ + enum class reduction_data_attribute : int32_t { + INPUT_VALUES, ///< The input values column + OUTPUT_DTYPE, ///< Data type for the output result + INIT_VALUE ///< Initial value + }; + + /** + * @brief Define the possible data needed for segmented reduction. + */ + enum class segmented_reduction_data_attribute : int32_t { + INPUT_VALUES, ///< The input values column + OUTPUT_DTYPE, ///< Data type for the output result + INIT_VALUE, ///< Initial value + NULL_POLICY, ///< To control null handling + OFFSETS ///< The offsets defining segments + }; + + /** + * @brief Define the possible data needed for groupby aggregations. * - * Each derived host-based UDF class may need a different set of intermediate data (such as sorted - * values, group labels, group offsets etc). It is inefficient to evaluate and pass down all these - * data at once from libcudf. A solution for that is, the derived class defines a subset of data - * that it needs and only such data will be evaluated. + * Note that only sort-based groupby aggregations are supported. */ - enum class intermediate_data_kind { - INPUT_VALUES, // the input values column, may be used in any aggregation - OUTPUT_DTYPE, // output data type, used in reduction - INIT_VALUE, // initial value for reduction - NULL_POLICY, // to control null handling, used in segmented reduction and scan - SCAN_TYPE, // used in scan aggregations - OFFSETS, // offsets for segmented reduction or sort-based groupby - GROUP_LABELS, // group labels used in sort-based groupby - SORTED_GROUPED_VALUES, // the input values grouped according to the input `keys` and - // sorted within each group, used in sort-based groupby - GROUPED_VALUES // the input values grouped according to the input `keys` for which the - // values within each group maintain their original order, - // used in sort-based groupby + enum class groupby_data_attribute : int32_t { + INPUT_VALUES, ///< The input values column + GROUPED_VALUES, ///< The input values grouped according to the input `keys` for which the + ///< values within each group maintain their original order + SORTED_GROUPED_VALUES, ///< The input values grouped according to the input `keys` and + ///< sorted within each group + GROUP_OFFSETS, ///< The offsets separating groups + GROUP_LABELS ///< Group labels (which is also the same as group indices) }; /** - * @brief The possible data kind that may be needed in the derived class for its operations. + * @brief The possible data that may be needed in the derived class for its operations. + * + * Such data can be either intermediate data such as sorted values or group labels etc, or the + * results of other aggregations. * - * Such data can be either intermediate data, or the results of other aggregations. + * Each derived host-based UDF class may need a different set of data. It is inefficient to + * evaluate and pass down all these possible data at once from libcudf. A solution for that is, + * the derived class can define a subset of data that it needs and libcudf will evaluate + * and pass down only data requested from that set. */ - struct input_data_kind { - using value_type = std::variant>; + struct data_attribute { + using value_type = std::variant>; value_type value; - input_data_kind() = default; - input_data_kind(input_data_kind&&) = default; - input_data_kind(input_data_kind const& other) : value{copy_value(other.value)} {} - input_data_kind(intermediate_data_kind value_) : value{value_} {} + data_attribute() = default; + data_attribute(data_attribute&&) = default; - template - input_data_kind(std::unique_ptr value_) : value{std::move(value_)} + // Copy constructor is needed to be used as keys in set and map. + // Since the `value` contains `unique_ptr`, we need to define the copy operator for it. + data_attribute(data_attribute const& other) : value{copy_value(other.value)} {} + + template || + std::is_same_v || + std::is_same_v)> + data_attribute(T value_) : value{value_} { - static_assert(std::is_same_v || std::is_same_v); + } + + template || + std::is_same_v)> + data_attribute(std::unique_ptr value_) : value{std::move(value_)} + { + if constexpr (std::is_same_v) { + CUDF_EXPECTS( + dynamic_cast(std::get>(value).get()) != nullptr, + "Requesting results from other aggregations is only supported in groupby " + "aggregations."); + } + CUDF_EXPECTS(std::get>(value) != nullptr, + "Invalid aggregation request."); } static value_type copy_value(value_type const& value) { - if (std::holds_alternative(value)) { - return std::get(value); + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); } return std::get>(value)->clone(); } struct hash { - std::size_t operator()(input_data_kind const& kind) const + std::size_t operator()(data_attribute const& attr) const { - if (std::holds_alternative(kind.value)) { - return std::hash{}(static_cast(std::get(kind.value))); - } - return std::get>(kind.value)->do_hash(); + auto const& value = attr.value; + auto const hash_value = [&] { + if (std::holds_alternative(value)) { + return std::hash{}(static_cast(std::get(value))); + } + if (std::holds_alternative(value)) { + return std::hash{}( + static_cast(std::get(value))); + } + if (std::holds_alternative(value)) { + return std::hash{}(static_cast(std::get(value))); + } + return std::get>(value)->do_hash(); + }(); + return value.index() ^ hash_value; } }; struct equal_to { - bool operator()(input_data_kind const& lhs, input_data_kind const& rhs) const + bool operator()(data_attribute const& lhs, data_attribute const& rhs) const { - if (std::holds_alternative(lhs.value) != - std::holds_alternative(rhs.value)) { - return false; + auto const& lhs_val = lhs.value; + auto const& rhs_val = rhs.value; + if (lhs_val.index() != rhs_val.index()) { return false; } + if (std::holds_alternative(lhs_val)) { + return std::get(lhs_val) == + std::get(rhs_val); + } + if (std::holds_alternative(lhs_val)) { + return std::get(lhs_val) == + std::get(rhs_val); } - if (std::holds_alternative(lhs.value)) { - return std::get(lhs.value) == - std::get(rhs.value); + if (std::holds_alternative(lhs_val)) { + return std::get(lhs_val) == + std::get(rhs_val); } return std::get>(lhs.value)->is_equal( *std::get>(rhs.value)); @@ -700,15 +765,18 @@ struct host_udf_base { }; }; - using data_kind_set = - std::unordered_set; + using input_data_attributes = + std::unordered_set; /** - * @brief Return a set of data kinds that is needed for computing the aggregation. + * @brief Return a set of attributes for the data that is needed for computing the aggregation. + * + * If this function is not overridden, all the data attributes (except results from other + * aggregations in groupby) are assumed to be needed. * - * @return A set of `input_data_kind`. + * @return A set of `data_attribute`. */ - [[nodiscard]] virtual data_kind_set get_required_data_kinds() const = 0; + [[nodiscard]] virtual input_data_attributes get_required_data() const { return {}; } /** * @brief Type of the data that is passed to the derived class for computing aggregation. @@ -717,16 +785,16 @@ struct host_udf_base { data_type, std::optional>, null_policy, - scan_type, device_span>; - using host_udf_input_map = std::unordered_map; + /** + * @brief Input to the aggregation, mapping from each data attribute to its actual data. + */ + using host_udf_input = std:: + unordered_map; /** - * Output type of the aggregation. It can be either a scalar (for reduction) or a column + * @brief Output type of the aggregation. It can be either a scalar (for reduction) or a column * (for segmented reduction or groupby aggregations). */ using output_type = std::variant, std::unique_ptr>; @@ -739,7 +807,7 @@ struct host_udf_base { * @param mr Device memory resource to use for any allocations * @return The output result of the aggregation */ - [[nodiscard]] virtual output_type operator()(host_udf_input_map const& input, + [[nodiscard]] virtual output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const = 0; diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index a9bf3f94e5f..f9de5c455b0 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -971,7 +971,7 @@ class host_udf_aggregation final : public groupby_aggregation, public reduce_aggregation, public segmented_reduce_aggregation { public: - std::unique_ptr const udf_ptr; + std::unique_ptr udf_ptr; host_udf_aggregation() = delete; host_udf_aggregation(host_udf_aggregation const&) = delete; diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index c42560661a1..acf4fb854db 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -616,8 +616,10 @@ void aggregate_result_functor::operator()(aggregation c column_view_with_common_nulls(values.child(0), values.child(1), stream); auto mean_agg = make_mean_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*mean_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*mean_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr) + .operator()(*mean_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr) + .operator()(*mean_agg); auto const mean0 = cache.get_result(values_child0, *mean_agg); auto const mean1 = cache.get_result(values_child1, *mean_agg); @@ -665,8 +667,10 @@ void aggregate_result_functor::operator()(aggregation column_view_with_common_nulls(values.child(0), values.child(1), stream); auto std_agg = make_std_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*std_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*std_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr) + .operator()(*std_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr) + .operator()(*std_agg); // Compute covariance here to avoid repeated computation of mean & count auto cov_agg = make_covariance_aggregation(corr_agg._min_periods); @@ -796,45 +800,48 @@ void aggregate_result_functor::operator()(aggregation con { if (cache.has_result(values, agg)) { return; } - auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - auto const& data_kinds = udf_ptr->get_required_data_kinds(); + auto const& udf_ptr = dynamic_cast(agg).udf_ptr; + auto data_attrs = udf_ptr->get_required_data(); + if (data_attrs.empty()) { // empty means everything + data_attrs = {host_udf_base::groupby_data_attribute::INPUT_VALUES, + host_udf_base::groupby_data_attribute::GROUPED_VALUES, + host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES, + host_udf_base::groupby_data_attribute::GROUP_OFFSETS, + host_udf_base::groupby_data_attribute::GROUP_LABELS}; + } // Do not cache udf_input, as the actual input data may change from run to run. - cudf::host_udf_base::host_udf_input_map udf_input; - for (auto const& kind : data_kinds) { - if (std::holds_alternative(kind.value)) { - auto const intermediate_kind = - std::get(kind.value); - switch (intermediate_kind) { - case cudf::host_udf_base::intermediate_data_kind::INPUT_VALUES: { - udf_input.emplace(kind, values); + host_udf_base::host_udf_input udf_input; + for (auto const& attr : data_attrs) { + CUDF_EXPECTS(std::holds_alternative(attr.value) || + std::holds_alternative>(attr.value), + "Invalid input data attribute for HOST_UDF groupby aggregation."); + if (std::holds_alternative(attr.value)) { + switch (std::get(attr.value)) { + case host_udf_base::groupby_data_attribute::INPUT_VALUES: { + udf_input.emplace(attr, values); break; } - - case cudf::host_udf_base::intermediate_data_kind::OFFSETS: { - udf_input.emplace(kind, helper.group_offsets(stream)); + case host_udf_base::groupby_data_attribute::GROUPED_VALUES: { + udf_input.emplace(attr, get_grouped_values()); break; } - - case cudf::host_udf_base::intermediate_data_kind::GROUP_LABELS: { - udf_input.emplace(kind, helper.group_labels(stream)); + case host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES: { + udf_input.emplace(attr, get_sorted_values()); break; } - - case cudf::host_udf_base::intermediate_data_kind::SORTED_GROUPED_VALUES: { - udf_input.emplace(kind, get_sorted_values()); + case host_udf_base::groupby_data_attribute::GROUP_OFFSETS: { + udf_input.emplace(attr, helper.group_offsets(stream)); break; } - - case cudf::host_udf_base::intermediate_data_kind::GROUPED_VALUES: { - udf_input.emplace(kind, get_grouped_values()); + case host_udf_base::groupby_data_attribute::GROUP_LABELS: { + udf_input.emplace(attr, helper.group_labels(stream)); break; } - - default: CUDF_FAIL("Unsupported data kind in host-based UDF groupby aggregation."); + default:; } - } else { // `kind` is another aggregation - auto other_agg = std::get>(kind.value)->clone(); + } else { // data is result from another aggregation + auto other_agg = std::get>(attr.value)->clone(); cudf::detail::aggregation_dispatcher(other_agg->kind, *this, *other_agg); auto result = cache.get_result(values, *other_agg); udf_input.emplace(std::move(other_agg), std::move(result)); @@ -844,7 +851,6 @@ void aggregate_result_functor::operator()(aggregation con auto output = (*udf_ptr)(udf_input, stream, mr); CUDF_EXPECTS(std::holds_alternative>(output), "Invalid output type from HOST_UDF groupby aggregation."); - cache.add_result(values, agg, std::get>(std::move(output))); } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index e5fe7bb515b..4b76096b5f6 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -146,34 +146,32 @@ struct reduce_dispatch_functor { } case aggregation::HOST_UDF: { auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - auto const& data_kinds = udf_ptr->get_required_data_kinds(); + auto data_attrs = udf_ptr->get_required_data(); + if (data_attrs.empty()) { // empty means everything + data_attrs = {host_udf_base::reduction_data_attribute::INPUT_VALUES, + host_udf_base::reduction_data_attribute::OUTPUT_DTYPE, + host_udf_base::reduction_data_attribute::INIT_VALUE}; + } // Do not cache udf_input, as the actual input data may change from run to run. - cudf::host_udf_base::host_udf_input_map udf_input; - for (auto const& kind : data_kinds) { - if (std::holds_alternative(kind.value)) { - auto const intermediate_kind = - std::get(kind.value); - switch (intermediate_kind) { - case cudf::host_udf_base::intermediate_data_kind::INPUT_VALUES: { - udf_input.emplace(kind, col); - break; - } - - case cudf::host_udf_base::intermediate_data_kind::OUTPUT_DTYPE: { - udf_input.emplace(kind, output_dtype); - break; - } - - case cudf::host_udf_base::intermediate_data_kind::INIT_VALUE: { - udf_input.emplace(kind, init); - break; - } - - default: CUDF_FAIL("Unsupported data kind in host-based UDF reduction."); + host_udf_base::host_udf_input udf_input; + for (auto const& attr : data_attrs) { + CUDF_EXPECTS(std::holds_alternative(attr.value), + "Invalid input data attribute for HOST_UDF reduction."); + switch (std::get(attr.value)) { + case host_udf_base::reduction_data_attribute::INPUT_VALUES: { + udf_input.emplace(attr, col); + break; + } + case host_udf_base::reduction_data_attribute::OUTPUT_DTYPE: { + udf_input.emplace(attr, output_dtype); + break; + } + case host_udf_base::reduction_data_attribute::INIT_VALUE: { + udf_input.emplace(attr, init); + break; } - } else { - CUDF_FAIL("Reduction aggregation does not support calling other aggregations."); + default:; } } auto output = (*udf_ptr)(udf_input, stream, mr); diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 56baea33981..4226be6710d 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -97,44 +97,43 @@ struct segmented_reduce_dispatch_functor { return segmented_nunique(col, offsets, null_handling, stream, mr); case aggregation::HOST_UDF: { auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - auto const& data_kinds = udf_ptr->get_required_data_kinds(); + auto data_attrs = udf_ptr->get_required_data(); + if (data_attrs.empty()) { // empty means everything + data_attrs = {host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES, + host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE, + host_udf_base::segmented_reduction_data_attribute::INIT_VALUE, + host_udf_base::segmented_reduction_data_attribute::NULL_POLICY, + host_udf_base::segmented_reduction_data_attribute::OFFSETS}; + } // Do not cache udf_input, as the actual input data may change from run to run. - cudf::host_udf_base::host_udf_input_map udf_input; - for (auto const& kind : data_kinds) { - if (std::holds_alternative(kind.value)) { - auto const intermediate_kind = - std::get(kind.value); - switch (intermediate_kind) { - case cudf::host_udf_base::intermediate_data_kind::INPUT_VALUES: { - udf_input.emplace(kind, col); - break; - } - - case cudf::host_udf_base::intermediate_data_kind::OUTPUT_DTYPE: { - udf_input.emplace(kind, output_dtype); - break; - } - - case cudf::host_udf_base::intermediate_data_kind::INIT_VALUE: { - udf_input.emplace(kind, init); - break; - } - - case cudf::host_udf_base::intermediate_data_kind::NULL_POLICY: { - udf_input.emplace(kind, null_handling); - break; - } - - case cudf::host_udf_base::intermediate_data_kind::OFFSETS: { - udf_input.emplace(kind, offsets); - break; - } - - default: CUDF_FAIL("Unsupported data kind in host-based UDF segmented reduction."); + host_udf_base::host_udf_input udf_input; + for (auto const& attr : data_attrs) { + CUDF_EXPECTS( + std::holds_alternative(attr.value), + "Invalid input data attribute for HOST_UDF segmented reduction."); + switch (std::get(attr.value)) { + case host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES: { + udf_input.emplace(attr, col); + break; + } + case host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE: { + udf_input.emplace(attr, output_dtype); + break; + } + case host_udf_base::segmented_reduction_data_attribute::INIT_VALUE: { + udf_input.emplace(attr, init); + break; + } + case host_udf_base::segmented_reduction_data_attribute::NULL_POLICY: { + udf_input.emplace(attr, null_handling); + break; + } + case host_udf_base::segmented_reduction_data_attribute::OFFSETS: { + udf_input.emplace(attr, offsets); + break; } - } else { - CUDF_FAIL("Reduction aggregation does not support calling other aggregations."); + default:; } } auto output = (*udf_ptr)(udf_input, stream, mr); diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 9fe967a10cc..39bacdf5c76 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -58,48 +58,42 @@ struct test_udf_simple_type : cudf::host_udf_base { test_udf_simple_type() = default; - [[nodiscard]] data_kind_set get_required_data_kinds() const override + [[nodiscard]] input_data_attributes get_required_data() const override { - if constexpr (std::is_same_v) { - return {intermediate_data_kind::INPUT_VALUES, - intermediate_data_kind::OUTPUT_DTYPE, - intermediate_data_kind::INIT_VALUE}; - } else if constexpr (std::is_same_v) { - return {intermediate_data_kind::INPUT_VALUES, - intermediate_data_kind::OUTPUT_DTYPE, - intermediate_data_kind::INIT_VALUE, - intermediate_data_kind::NULL_POLICY, - intermediate_data_kind::OFFSETS}; + if constexpr (std::is_same_v || + std::is_same_v) { + // Empty set, which means we need everything. + return {}; } else { - return {intermediate_data_kind::OFFSETS, - intermediate_data_kind::GROUP_LABELS, - intermediate_data_kind::GROUPED_VALUES, + return {groupby_data_attribute::GROUPED_VALUES, + groupby_data_attribute::GROUP_OFFSETS, + groupby_data_attribute::GROUP_LABELS, cudf::make_max_aggregation(), cudf::make_sum_aggregation()}; } } - [[nodiscard]] output_type operator()(host_udf_input_map const& input, + [[nodiscard]] output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override { if constexpr (std::is_same_v) { auto const& values = - std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + std::get(input.at(reduction_data_attribute::INPUT_VALUES)); auto const output_dtype = - std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); + std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); return cudf::double_type_dispatcher( values.type(), output_dtype, reduce_fn{this}, input, stream, mr); } else if constexpr (std::is_same_v) { auto const& values = - std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + std::get(input.at(segmented_reduction_data_attribute::INPUT_VALUES)); auto const output_dtype = - std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); + std::get(input.at(segmented_reduction_data_attribute::OUTPUT_DTYPE)); return cudf::double_type_dispatcher( values.type(), output_dtype, segmented_reduce_fn{this}, input, stream, mr); } else { auto const& values = - std::get(input.at(intermediate_data_kind::GROUPED_VALUES)); + std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); return cudf::type_dispatcher(values.type(), groupby_fn{this}, input, stream, mr); } } @@ -169,17 +163,17 @@ struct test_udf_simple_type : cudf::host_udf_base { template () && is_valid_output_t())> - output_type operator()(host_udf_input_map const& input, + output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { auto const& values = - std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + std::get(input.at(reduction_data_attribute::INPUT_VALUES)); auto const output_dtype = - std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); + std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); auto const input_init_value = std::get>>( - input.at(intermediate_data_kind::INIT_VALUE)); + input.at(reduction_data_attribute::INIT_VALUE)); if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } @@ -235,16 +229,16 @@ struct test_udf_simple_type : cudf::host_udf_base { template () && is_valid_output_t())> - output_type operator()(host_udf_input_map const& input, + output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { auto const& values = - std::get(input.at(intermediate_data_kind::INPUT_VALUES)); + std::get(input.at(segmented_reduction_data_attribute::INPUT_VALUES)); auto const output_dtype = - std::get(input.at(intermediate_data_kind::OUTPUT_DTYPE)); + std::get(input.at(segmented_reduction_data_attribute::OUTPUT_DTYPE)); auto const offsets = std::get>( - input.at(intermediate_data_kind::OFFSETS)); + input.at(segmented_reduction_data_attribute::OFFSETS)); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); auto const num_segments = static_cast(offsets.size()) - 1; @@ -259,7 +253,7 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const input_init_value = std::get>>( - input.at(intermediate_data_kind::INIT_VALUE)); + input.at(segmented_reduction_data_attribute::INIT_VALUE)); auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { @@ -272,7 +266,7 @@ struct test_udf_simple_type : cudf::host_udf_base { }(); auto const null_handling = - std::get(input.at(intermediate_data_kind::NULL_POLICY)); + std::get(input.at(segmented_reduction_data_attribute::NULL_POLICY)); auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto output = cudf::make_numeric_column( @@ -336,20 +330,20 @@ struct test_udf_simple_type : cudf::host_udf_base { } template ())> - output_type operator()(host_udf_input_map const& input, + output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { auto const& values = - std::get(input.at(intermediate_data_kind::GROUPED_VALUES)); + std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); if (values.size() == 0) { return parent->get_empty_output(std::nullopt, stream, mr); } auto const offsets = std::get>( - input.at(intermediate_data_kind::OFFSETS)); + input.at(groupby_data_attribute::GROUP_OFFSETS)); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); auto const num_groups = static_cast(offsets.size()) - 1; auto const group_indices = std::get>( - input.at(intermediate_data_kind::GROUP_LABELS)); + input.at(groupby_data_attribute::GROUP_LABELS)); auto const group_max = std::get( input.at(cudf::make_max_aggregation())); auto const group_sum = std::get( From b597192288a79454a55fa44f2f6c2230cfb8d2a9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 11:12:00 -0800 Subject: [PATCH 35/59] Revert cmake Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a702f983854..165b91dbfc0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -121,7 +121,45 @@ ConfigureTest(TIMESTAMPS_TEST wrappers/timestamps_test.cu) # * groupby tests --------------------------------------------------------------------------------- ConfigureTest( GROUPBY_TEST + groupby/argmin_tests.cpp + groupby/argmax_tests.cpp + groupby/collect_list_tests.cpp + groupby/collect_set_tests.cpp + groupby/correlation_tests.cpp + groupby/count_scan_tests.cpp + groupby/count_tests.cpp + groupby/covariance_tests.cpp + groupby/groupby_test_util.cpp + groupby/groups_tests.cpp + groupby/histogram_tests.cpp groupby/host_udf_tests.cu + groupby/keys_tests.cpp + groupby/lists_tests.cpp + groupby/m2_tests.cpp + groupby/min_tests.cpp + groupby/max_scan_tests.cpp + groupby/max_tests.cpp + groupby/mean_tests.cpp + groupby/median_tests.cpp + groupby/merge_m2_tests.cpp + groupby/merge_lists_tests.cpp + groupby/merge_sets_tests.cpp + groupby/min_scan_tests.cpp + groupby/nth_element_tests.cpp + groupby/nunique_tests.cpp + groupby/product_scan_tests.cpp + groupby/product_tests.cpp + groupby/quantile_tests.cpp + groupby/rank_scan_tests.cpp + groupby/replace_nulls_tests.cpp + groupby/shift_tests.cpp + groupby/std_tests.cpp + groupby/structs_tests.cpp + groupby/sum_of_squares_tests.cpp + groupby/sum_scan_tests.cpp + groupby/sum_tests.cpp + groupby/tdigest_tests.cu + groupby/var_tests.cpp GPUS 1 PERCENT 100 ) From 26c3ec4b695d227e7bbb86b897b0215b9224d7ff Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 11:24:15 -0800 Subject: [PATCH 36/59] Fix style Signed-off-by: Nghia Truong --- cpp/src/groupby/sort/aggregate.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index acf4fb854db..4e2166ddc1e 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -616,10 +616,8 @@ void aggregate_result_functor::operator()(aggregation c column_view_with_common_nulls(values.child(0), values.child(1), stream); auto mean_agg = make_mean_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr) - .operator()(*mean_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr) - .operator()(*mean_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*mean_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*mean_agg); auto const mean0 = cache.get_result(values_child0, *mean_agg); auto const mean1 = cache.get_result(values_child1, *mean_agg); @@ -667,10 +665,8 @@ void aggregate_result_functor::operator()(aggregation column_view_with_common_nulls(values.child(0), values.child(1), stream); auto std_agg = make_std_aggregation(); - aggregate_result_functor(values_child0, helper, cache, stream, mr) - .operator()(*std_agg); - aggregate_result_functor(values_child1, helper, cache, stream, mr) - .operator()(*std_agg); + aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()(*std_agg); + aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()(*std_agg); // Compute covariance here to avoid repeated computation of mean & count auto cov_agg = make_covariance_aggregation(corr_agg._min_periods); From 9c168e5aa67eed8f8de4018b2f5d3d5b26dd8beb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 11:38:31 -0800 Subject: [PATCH 37/59] Add docs Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index b19336d7dfc..42bb3c6e8a2 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -670,6 +670,9 @@ struct host_udf_base { * and pass down only data requested from that set. */ struct data_attribute { + /** + * @brief Hold all possible data types for the input of the derived class. + */ using value_type = std::variant(value)) { @@ -720,6 +726,9 @@ struct host_udf_base { return std::get>(value)->clone(); } + /** + * @brief Hash functor for `data_attribute`. + */ struct hash { std::size_t operator()(data_attribute const& attr) const { @@ -741,6 +750,9 @@ struct host_udf_base { } }; + /** + * @brief Equality comparison functor for `data_attribute`. + */ struct equal_to { bool operator()(data_attribute const& lhs, data_attribute const& rhs) const { @@ -765,6 +777,9 @@ struct host_udf_base { }; }; + /** + * @brief Set of attributes for the data that is needed for computing the aggregation. + */ using input_data_attributes = std::unordered_set; @@ -779,7 +794,8 @@ struct host_udf_base { [[nodiscard]] virtual input_data_attributes get_required_data() const { return {}; } /** - * @brief Type of the data that is passed to the derived class for computing aggregation. + * @brief Hold all possible types of the data that is passed to the derived class for computing + * aggregation. */ using input_data_type = std::variant Date: Tue, 26 Nov 2024 11:51:12 -0800 Subject: [PATCH 38/59] Fix docs Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 42bb3c6e8a2..43771127778 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -679,13 +679,19 @@ struct host_udf_base { std::unique_ptr>; value_type value; - data_attribute() = default; - data_attribute(data_attribute&&) = default; + data_attribute() = default; ///< Default constructor + data_attribute(data_attribute&&) = default; ///< Move constructor - // Copy constructor is needed to be used as keys in set and map. - // Since the `value` contains `unique_ptr`, we need to define the copy operator for it. + /** + * @brief Copy constructor. + * @param other The other data attribute to copy from. + */ data_attribute(data_attribute const& other) : value{copy_value(other.value)} {} + /** + * @brief Construct a new data attribute from aggregation attributes. + * @param value_ An aggregation attribute + */ template || std::is_same_v || @@ -694,6 +700,10 @@ struct host_udf_base { { } + /** + * @brief Construct a new data attribute from another aggregation request. + * @param value_ An aggregation request + */ template || std::is_same_v)> @@ -711,6 +721,8 @@ struct host_udf_base { /** * @brief Copy the value, used in copy constructor. + * @param value The value to copy + * @return The copied value */ static value_type copy_value(value_type const& value) { @@ -794,8 +806,8 @@ struct host_udf_base { [[nodiscard]] virtual input_data_attributes get_required_data() const { return {}; } /** - * @brief Hold all possible types of the data that is passed to the derived class for computing - * aggregation. + * @brief Hold all possible types of the data that is passed to the derived class for executing + * the aggregation. */ using input_data_type = std::variant Date: Tue, 26 Nov 2024 11:59:56 -0800 Subject: [PATCH 39/59] Still fix docs Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 43771127778..6d7327b1f3f 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -677,7 +677,8 @@ struct host_udf_base { segmented_reduction_data_attribute, groupby_data_attribute, std::unique_ptr>; - value_type value; + value_type value; ///< The actual data attribute, wrapped in this struct + ///< as a wrapper is needed to define hash and equal_to functors. data_attribute() = default; ///< Default constructor data_attribute(data_attribute&&) = default; ///< Move constructor @@ -742,6 +743,11 @@ struct host_udf_base { * @brief Hash functor for `data_attribute`. */ struct hash { + /** + * @brief Compute the hash value of a data attribute. + * @param attr The data attribute to hash + * @return The hash value of the data attribute + */ std::size_t operator()(data_attribute const& attr) const { auto const& value = attr.value; @@ -766,6 +772,12 @@ struct host_udf_base { * @brief Equality comparison functor for `data_attribute`. */ struct equal_to { + /** + * @brief Check if two data attributes are equal. + * @param lhs The left-hand side data attribute + * @param rhs The right-hand side data attribute + * @return True if the two data attributes are equal + */ bool operator()(data_attribute const& lhs, data_attribute const& rhs) const { auto const& lhs_val = lhs.value; From d289528029235304822b4521364aa827071238b6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 13:34:59 -0800 Subject: [PATCH 40/59] Implement Java & JNI for `HostUDFAggregation` Signed-off-by: Nghia Truong --- .../main/java/ai/rapids/cudf/Aggregation.java | 61 ++++++++++++++++--- java/src/main/native/src/AggregationJni.cpp | 47 +++++++++++--- 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 379750bb0b7..e69f1d5449b 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -62,15 +62,16 @@ enum Kind { LAG(23), PTX(24), CUDA(25), - M2(26), - MERGE_M2(27), - RANK(28), - DENSE_RANK(29), - PERCENT_RANK(30), - TDIGEST(31), // This can take a delta argument for accuracy level - MERGE_TDIGEST(32), // This can take a delta argument for accuracy level - HISTOGRAM(33), - MERGE_HISTOGRAM(34); + HOST_UDF(26), + M2(27), + MERGE_M2(28), + RANK(29), + DENSE_RANK(30), + PERCENT_RANK(31), + TDIGEST(32), // This can take a delta argument for accuracy level + MERGE_TDIGEST(33), // This can take a delta argument for accuracy level + HISTOGRAM(34), + MERGE_HISTOGRAM(35); final int nativeId; @@ -385,6 +386,38 @@ public boolean equals(Object other) { } } + static final class HostUDFAggregation extends Aggregation { + private final long udfNativeHandle; + private final long udfNativeHashCode; + + private HostUDFAggregation(long udfNativeHandle, long udfNativeHashCode) { + super(Kind.HOST_UDF); + this.udfNativeHandle = udfNativeHandle; + this.udfNativeHashCode = udfNativeHashCode; + } + + @Override + long createNativeInstance() { + return Aggregation.createHostUDFAgg(udfNativeHandle); + } + + @Override + public int hashCode() { + return 31 * kind.hashCode() + Long.hashCode(udfNativeHashCode); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (other instanceof HostUDFAggregation) { + HostUDFAggregation o = (HostUDFAggregation) other; + return Aggregation.areHostUDFsEqual(udfNativeHandle, o.udfNativeHandle); + } + return false; + } + } + protected final Kind kind; protected Aggregation(Kind kind) { @@ -990,4 +1023,14 @@ static MergeHistogramAggregation mergeHistogram() { * Create a TDigest aggregation. */ private static native long createTDigestAgg(int kind, int delta); + + /** + * Create a HOST_UDF aggregation. + */ + private static native long createHostUDFAgg(long udfNativeHandle); + + /** + * Compare two host UDFs to see if they are equal. + */ + private static native boolean areHostUDFsEqual(long lhsNativeHandle, long rhsNativeHandle); } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index c40f1c55500..77ab75ccb43 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -80,25 +80,28 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createNoParamAgg(JNIEnv* // case 23: LAG // case 24: PTX // case 25: CUDA - case 26: // M2 + // case 26: HOST_UDF + case 27: // M2 return cudf::make_m2_aggregation(); - case 27: // MERGE_M2 + case 28: // MERGE_M2 return cudf::make_merge_m2_aggregation(); - case 28: // RANK + case 29: // RANK return cudf::make_rank_aggregation( cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE); - case 29: // DENSE_RANK + case 30: // DENSE_RANK return cudf::make_rank_aggregation( cudf::rank_method::DENSE, {}, cudf::null_policy::INCLUDE); - case 30: // ANSI SQL PERCENT_RANK + case 31: // ANSI SQL PERCENT_RANK return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE, {}, cudf::rank_percentage::ONE_NORMALIZED); - case 33: // HISTOGRAM + // case 32: TDIGEST + // case 33: MERGE_TDIGEST + case 34: // HISTOGRAM return cudf::make_histogram_aggregation(); - case 34: // MERGE_HISTOGRAM + case 35: // MERGE_HISTOGRAM return cudf::make_merge_histogram_aggregation(); default: throw std::logic_error("Unsupported No Parameter Aggregation Operation"); @@ -296,4 +299,34 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createMergeSetsAgg(JNIEn CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* env, + jclass class_object, + jlong udf_native_handle) +{ + JNI_NULL_CHECK(env, udf_native_handle, "udf_native_handle is null", 0); + try { + cudf::jni::auto_set_device(env); + auto udf_ptr = reinterpret_cast(udf_native_handle)->clone(); + auto output = cudf::make_host_udf_aggregation(std::move(udf_ptr)); + return reinterpret_cast(output.release()); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_areHostUDFsEqual(JNIEnv* env, + jclass class_object, + jlong lhs_native_handle, + jlong rhs_native_handle) +{ + JNI_NULL_CHECK(env, lhs_native_handle, "lhs_native_handle is null", 0); + JNI_NULL_CHECK(env, rhs_native_handle, "rhs_native_handle is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const lhs_udf_ptr = reinterpret_cast(lhs_native_handle); + auto const rhs_udf_ptr = reinterpret_cast(rhs_native_handle); + return lhs_udf_ptr->is_equal(*rhs_udf_ptr); + } + CATCH_STD(env, 0); +} + } // extern "C" From a5133e6f31c9b2276a026cbd84c24b8a3c8df918 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 14:20:21 -0800 Subject: [PATCH 41/59] Fix instantiating code --- cpp/include/cudf/aggregation.hpp | 2 +- java/src/main/native/src/AggregationJni.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 6d7327b1f3f..571be214180 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -896,7 +896,7 @@ struct host_udf_base { * @param host_udf An instance of a class derived from `host_udf_base` to perform aggregation * @return A HOST_UDF aggregation object */ -template +template std::unique_ptr make_host_udf_aggregation(std::unique_ptr host_udf); /** diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index 77ab75ccb43..68c9bf62162 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -306,8 +306,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* JNI_NULL_CHECK(env, udf_native_handle, "udf_native_handle is null", 0); try { cudf::jni::auto_set_device(env); - auto udf_ptr = reinterpret_cast(udf_native_handle)->clone(); - auto output = cudf::make_host_udf_aggregation(std::move(udf_ptr)); + auto const udf_ptr = reinterpret_cast(udf_native_handle); + auto output = cudf::make_host_udf_aggregation(udf_ptr->clone()); return reinterpret_cast(output.release()); } CATCH_STD(env, 0); From 00434720efa2dd1e40b9ab3dbd1bb49a479665f0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 14:52:15 -0800 Subject: [PATCH 42/59] Remove unused headers Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 39bacdf5c76..90a841bfb73 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -17,13 +17,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include From f190fd8f522c611f1ef34e50b4aa13756db1f794 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 14:56:28 -0800 Subject: [PATCH 43/59] Fix style Signed-off-by: Nghia Truong --- java/src/main/native/src/AggregationJni.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index 68c9bf62162..c46019bba68 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -307,7 +307,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* try { cudf::jni::auto_set_device(env); auto const udf_ptr = reinterpret_cast(udf_native_handle); - auto output = cudf::make_host_udf_aggregation(udf_ptr->clone()); + auto output = cudf::make_host_udf_aggregation(udf_ptr->clone()); return reinterpret_cast(output.release()); } CATCH_STD(env, 0); From 4d559cf9070bb2c7325b7f7e6bc661193905b593 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 15:36:03 -0800 Subject: [PATCH 44/59] Add unit tests Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 1 + cpp/tests/groupby/host_udf_example_tests.cu | 576 ++++++++++++++++++ cpp/tests/groupby/host_udf_tests.cu | 619 +++++--------------- 3 files changed, 712 insertions(+), 484 deletions(-) create mode 100644 cpp/tests/groupby/host_udf_example_tests.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 165b91dbfc0..5e26f17e446 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -132,6 +132,7 @@ ConfigureTest( groupby/groupby_test_util.cpp groupby/groups_tests.cpp groupby/histogram_tests.cpp + groupby/host_udf_example_tests.cu groupby/host_udf_tests.cu groupby/keys_tests.cpp groupby/lists_tests.cpp diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu new file mode 100644 index 00000000000..385dc8d92f0 --- /dev/null +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -0,0 +1,576 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace { +/** + * @brief A host-based UDF implementation. + * + * The aggregations perform the following computation: + * - For reduction: compute `sum(value^2, for value in group)` (this is sum of squared). + * - For segmented reduction: compute `segment_size * sum(value^2, for value in group)`. + * - For groupby: compute `(group_idx + 1) * group_sum_of_squares - group_max * group_sum`. + * + * In addition, for segmented reduction, if null_policy is set to `INCLUDE`, the null values are + * replaced with an initial value if it is provided. + */ +template +struct test_udf_simple_type : cudf::host_udf_base { + static_assert(std::is_same_v || + std::is_same_v || + std::is_same_v); + + test_udf_simple_type() = default; + + [[nodiscard]] input_data_attributes get_required_data() const override + { + if constexpr (std::is_same_v || + std::is_same_v) { + // Empty set, which means we need everything. + return {}; + } else { + return {groupby_data_attribute::GROUPED_VALUES, + groupby_data_attribute::GROUP_OFFSETS, + groupby_data_attribute::GROUP_LABELS, + cudf::make_max_aggregation(), + cudf::make_sum_aggregation()}; + } + } + + [[nodiscard]] output_type operator()(host_udf_input const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const override + { + if constexpr (std::is_same_v) { + auto const& values = + std::get(input.at(reduction_data_attribute::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); + return cudf::double_type_dispatcher( + values.type(), output_dtype, reduce_fn{this}, input, stream, mr); + } else if constexpr (std::is_same_v) { + auto const& values = + std::get(input.at(segmented_reduction_data_attribute::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(segmented_reduction_data_attribute::OUTPUT_DTYPE)); + return cudf::double_type_dispatcher( + values.type(), output_dtype, segmented_reduce_fn{this}, input, stream, mr); + } else { + auto const& values = + std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); + return cudf::type_dispatcher(values.type(), groupby_fn{this}, input, stream, mr); + } + } + + [[nodiscard]] output_type get_empty_output( + [[maybe_unused]] std::optional output_dtype, + [[maybe_unused]] rmm::cuda_stream_view stream, + [[maybe_unused]] rmm::device_async_resource_ref mr) const override + { + if constexpr (std::is_same_v) { + CUDF_EXPECTS(output_dtype.has_value(), + "Data type for the reduction result must be specified."); + return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); + } else if constexpr (std::is_same_v) { + CUDF_EXPECTS(output_dtype.has_value(), + "Data type for the reduction result must be specified."); + return cudf::make_empty_column(output_dtype.value()); + } else { + return cudf::make_empty_column( + cudf::data_type{cudf::type_to_id()}); + } + } + + [[nodiscard]] bool is_equal(host_udf_base const& other) const override + { + // Just check if the other object is also instance of the same derived class. + return dynamic_cast(&other) != nullptr; + } + + [[nodiscard]] std::size_t do_hash() const override + { + return std::hash{}({"test_udf_simple_type"}); + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(); + } + + // For faster compile times, we only support a few input/output types. + template + static constexpr bool is_valid_input_t() + { + return std::is_same_v || std::is_same_v; + } + + // For faster compile times, we only support a few input/output types. + template + static constexpr bool is_valid_output_t() + { + return std::is_same_v; + } + + struct reduce_fn { + // Store pointer to the parent class so we can call its functions. + test_udf_simple_type const* parent; + + template () || !is_valid_output_t())> + output_type operator()(Args...) const + { + CUDF_FAIL("Unsupported input type."); + } + + template () && is_valid_output_t())> + output_type operator()(host_udf_input const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + auto const& values = + std::get(input.at(reduction_data_attribute::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); + auto const input_init_value = + std::get>>( + input.at(reduction_data_attribute::INIT_VALUE)); + + if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } + + auto const init_value = [&]() -> InputType { + if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { + auto const numeric_init_scalar = + dynamic_cast const*>(&input_init_value.value().get()); + CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); + return numeric_init_scalar->value(stream); + } + return InputType{0}; + }(); + + auto const values_dv_ptr = cudf::column_device_view::create(values, stream); + auto const result = + thrust::transform_reduce(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.size()), + transform_fn{*values_dv_ptr}, + static_cast(init_value), + thrust::plus<>{}); + + auto output = cudf::make_numeric_scalar(output_dtype, stream, mr); + static_cast*>(output.get())->set_value(result, stream); + return output; + } + + template + struct transform_fn { + cudf::column_device_view values; + OutputType __device__ operator()(cudf::size_type idx) const + { + if (values.is_null(idx)) { return OutputType{0}; } + auto const val = static_cast(values.element(idx)); + return val * val; + } + }; + }; + + struct segmented_reduce_fn { + // Store pointer to the parent class so we can call its functions. + test_udf_simple_type const* parent; + + template () || !is_valid_output_t())> + output_type operator()(Args...) const + { + CUDF_FAIL("Unsupported input type."); + } + + template () && is_valid_output_t())> + output_type operator()(host_udf_input const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + auto const& values = + std::get(input.at(segmented_reduction_data_attribute::INPUT_VALUES)); + auto const output_dtype = + std::get(input.at(segmented_reduction_data_attribute::OUTPUT_DTYPE)); + auto const offsets = std::get>( + input.at(segmented_reduction_data_attribute::OFFSETS)); + CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); + auto const num_segments = static_cast(offsets.size()) - 1; + + if (values.size() == 0) { + if (num_segments <= 0) { + return parent->get_empty_output(output_dtype, stream, mr); + } else { + return cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::ALL_NULL, stream, mr); + } + } + + auto const input_init_value = + std::get>>( + input.at(segmented_reduction_data_attribute::INIT_VALUE)); + + auto const init_value = [&]() -> InputType { + if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { + auto const numeric_init_scalar = + dynamic_cast const*>(&input_init_value.value().get()); + CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); + return numeric_init_scalar->value(stream); + } + return InputType{0}; + }(); + + auto const null_handling = + std::get(input.at(segmented_reduction_data_attribute::NULL_POLICY)); + + auto const values_dv_ptr = cudf::column_device_view::create(values, stream); + auto output = cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::UNALLOCATED, stream); + rmm::device_uvector validity(num_segments, stream); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), + transform_fn{ + *values_dv_ptr, offsets, static_cast(init_value), null_handling}); + auto [null_mask, null_count] = + cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); + if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } + return output; + } + + template + struct transform_fn { + cudf::column_device_view values; + cudf::device_span offsets; + OutputType init_value; + cudf::null_policy null_handling; + + thrust::tuple __device__ operator()(cudf::size_type idx) const + { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + if (start == end) { return {OutputType{0}, false}; } + + auto sum = init_value; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { + if (null_handling == cudf::null_policy::INCLUDE) { sum += init_value * init_value; } + continue; + } + auto const val = static_cast(values.element(i)); + sum += val * val; + } + auto const segment_size = end - start; + return {static_cast(segment_size) * sum, true}; + } + }; + }; + + struct groupby_fn { + // Store pointer to the parent class so we can call its functions. + test_udf_simple_type const* parent; + using OutputType = double; + template + using MaxType = cudf::detail::target_type_t; + template + using SumType = cudf::detail::target_type_t; + + template ())> + output_type operator()(Args...) const + { + CUDF_FAIL("Unsupported input type."); + } + + template ())> + output_type operator()(host_udf_input const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + auto const& values = + std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); + if (values.size() == 0) { return parent->get_empty_output(std::nullopt, stream, mr); } + + auto const offsets = std::get>( + input.at(groupby_data_attribute::GROUP_OFFSETS)); + CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); + auto const num_groups = static_cast(offsets.size()) - 1; + auto const group_indices = std::get>( + input.at(groupby_data_attribute::GROUP_LABELS)); + auto const group_max = std::get( + input.at(cudf::make_max_aggregation())); + auto const group_sum = std::get( + input.at(cudf::make_sum_aggregation())); + + auto const values_dv_ptr = cudf::column_device_view::create(values, stream); + auto output = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, + num_groups, + cudf::mask_state::UNALLOCATED, + stream); + rmm::device_uvector validity(num_groups, stream); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_groups), + thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), + transform_fn{*values_dv_ptr, + offsets, + group_indices, + group_max.begin>(), + group_sum.begin>()}); + auto [null_mask, null_count] = + cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); + if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } + return output; + } + + template + struct transform_fn { + cudf::column_device_view values; + cudf::device_span offsets; + cudf::device_span group_indices; + MaxType const* group_max; + SumType const* group_sum; + + thrust::tuple __device__ operator()(cudf::size_type idx) const + { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + if (start == end) { return {OutputType{0}, false}; } + + auto sum_sqr = OutputType{0}; + bool has_valid{false}; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { continue; } + has_valid = true; + auto const val = static_cast(values.element(i)); + sum_sqr += val * val; + } + + if (!has_valid) { return {OutputType{0}, false}; } + return {static_cast(group_indices[start] + 1) * sum_sqr - + static_cast(group_max[idx]) * static_cast(group_sum[idx]), + true}; + } + }; + }; +}; + +} // namespace + +using doubles_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; +using int64s_col = cudf::test::fixed_width_column_wrapper; + +struct HostUDFExampleTest : cudf::test::BaseFixture {}; + +TEST_F(HostUDFExampleTest, ReductionSimpleInput) +{ + auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + EXPECT_TRUE(reduced->is_valid()); + EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); + auto const result = + static_cast*>(reduced.get())->value(cudf::get_default_stream()); + auto constexpr expected = 55; // 0^2 + 1^2 + 2^2 + 3^2 + 4^2 + 5^2 = 55 + EXPECT_EQ(expected, result); +} + +TEST_F(HostUDFExampleTest, ReductionEmptyInput) +{ + auto const vals = doubles_col{}; + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + EXPECT_FALSE(reduced->is_valid()); + EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); +} + +TEST_F(HostUDFExampleTest, SegmentedReductionSimpleInput) +{ + auto const vals = doubles_col{ + {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto const offsets = int32s_col{0, 3, 5, 10}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + + // Test without init value. + { + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. + // Since init value is not given, it is set to 0. + // [ 3 * (0^2 + init^2 + 2^2), 2 * (3^2 + init^2), 5 * (5^2 + init^2 + init^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{12, 18, 850}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with init value, and include nulls. + { + auto const init_scalar = cudf::make_fixed_width_scalar(3.0); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + *init_scalar, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. + // [ 3 * (3 + 0^2 + 3^2 + 2^2), 2 * (3 + 3^2 + 3^2), 5 * (3 + 5^2 + 3^2 + 3^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{48, 42, 955}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with init value, and exclude nulls. + { + auto const init_scalar = cudf::make_fixed_width_scalar(3.0); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::EXCLUDE, + *init_scalar, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // [ 3 * (3 + 0^2 + 2^2), 2 * (3 + 3^2), 5 * (3 + 5^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{21, 24, 865}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + +TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) +{ + auto const vals = int32s_col{}; + auto const offsets = int32s_col{0, 0, 0, 0}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + auto const expected = int64s_col{{0, 0, 0}, {false, false, false}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) +{ + auto const vals = int32s_col{}; + // Cannot be empty due to a bug in the libcudf: https://github.com/rapidsai/cudf/issues/17433. + auto const offsets = int32s_col{0}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + auto const expected = int64s_col{}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(HostUDFExampleTest, GroupbySimpleInput) +{ + auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; + auto const vals = doubles_col{ + {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto agg = cudf::make_host_udf_aggregation( + std::make_unique>()); + + std::vector requests; + requests.emplace_back(); + requests[0].values = vals; + requests[0].aggregations.push_back(std::move(agg)); + cudf::groupby::groupby gb_obj( + cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); + + auto const grp_result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + auto const& result = grp_result.second[0].results[0]; + + // Output type of groupby is double. + // Values grouped by keys: [ {0, 3, null, 9}, {null, null, null}, {2, 5, 8} ] + // Group sum_sqr: [ 90, null, 93 ] + // Group max: [ 9, null, 8 ] + // Group sum: [ 12, null, 15 ] + // Output: [ 1 * 90 - 9 * 12, null, 3 * 93 - 8 * 15 ] + auto const expected = doubles_col{{-18, 0, 159}, {true, false, true}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 90a841bfb73..e7551f17b71 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -17,83 +17,138 @@ #include #include #include -#include -#include #include -#include -#include #include #include #include -#include - -#include -#include - -#include -#include -#include -#include namespace { /** - * @brief A host-based UDF implementation. - * - * The aggregations perform the following computation: - * - For reduction: compute `sum(value^2, for value in group)` (this is sum of squared). - * - For segmented reduction: compute `segment_size * sum(value^2, for value in group)`. - * - For groupby: compute `(group_idx + 1) * group_sum_of_squares - group_max * group_sum`. - * - * In addition, for segmented reduction, if null_policy is set to `INCLUDE`, the null values are - * replaced with an initial value if it is provided. + * @brief A host-based UDF implementation used for unit tests. */ -template -struct test_udf_simple_type : cudf::host_udf_base { +template +struct host_udf_unit_test : cudf::host_udf_base { static_assert(std::is_same_v || std::is_same_v || std::is_same_v); - test_udf_simple_type() = default; - - [[nodiscard]] input_data_attributes get_required_data() const override + input_data_attributes input_attrs; + host_udf_unit_test(input_data_attributes input_attrs_ = {}) : input_attrs(std::move(input_attrs_)) { - if constexpr (std::is_same_v || - std::is_same_v) { - // Empty set, which means we need everything. - return {}; - } else { - return {groupby_data_attribute::GROUPED_VALUES, - groupby_data_attribute::GROUP_OFFSETS, - groupby_data_attribute::GROUP_LABELS, - cudf::make_max_aggregation(), - cudf::make_sum_aggregation()}; - } } + [[nodiscard]] input_data_attributes get_required_data() const override { return input_attrs; } + + // This is the main testing function, which checks for the correctness of input data. + // The rests are just to satisfy the interface. [[nodiscard]] output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override { + SCOPED_TRACE("Original line of failure: " + std::to_string(test_location)); + + input_data_attributes check_attrs = input_attrs; if constexpr (std::is_same_v) { - auto const& values = - std::get(input.at(reduction_data_attribute::INPUT_VALUES)); - auto const output_dtype = - std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); - return cudf::double_type_dispatcher( - values.type(), output_dtype, reduce_fn{this}, input, stream, mr); + if (check_attrs.empty()) { + check_attrs = input_data_attributes{reduction_data_attribute::INPUT_VALUES, + reduction_data_attribute::OUTPUT_DTYPE, + reduction_data_attribute::INIT_VALUE}; + } + EXPECT_EQ(input.size(), check_attrs.size()); + for (auto const attr : check_attrs) { + EXPECT_TRUE(input.count(attr) > 0); + EXPECT_TRUE(std::holds_alternative(attr.value)); + switch (auto const attr_val = std::get(attr.value)) { + case reduction_data_attribute::INPUT_VALUES: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case reduction_data_attribute::OUTPUT_DTYPE: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case reduction_data_attribute::INIT_VALUE: + EXPECT_TRUE( + std::holds_alternative>>( + input.at(attr))); + break; + default:; + } + } } else if constexpr (std::is_same_v) { - auto const& values = - std::get(input.at(segmented_reduction_data_attribute::INPUT_VALUES)); - auto const output_dtype = - std::get(input.at(segmented_reduction_data_attribute::OUTPUT_DTYPE)); - return cudf::double_type_dispatcher( - values.type(), output_dtype, segmented_reduce_fn{this}, input, stream, mr); + if (check_attrs.empty()) { + check_attrs = input_data_attributes{segmented_reduction_data_attribute::INPUT_VALUES, + segmented_reduction_data_attribute::OUTPUT_DTYPE, + segmented_reduction_data_attribute::INIT_VALUE, + segmented_reduction_data_attribute::NULL_POLICY, + segmented_reduction_data_attribute::OFFSETS}; + } + EXPECT_EQ(input.size(), check_attrs.size()); + for (auto const attr : check_attrs) { + EXPECT_TRUE(input.count(attr) > 0); + EXPECT_TRUE(std::holds_alternative(attr.value)); + switch (auto const attr_val = std::get(attr.value)) { + case segmented_reduction_data_attribute::INPUT_VALUES: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case segmented_reduction_data_attribute::OUTPUT_DTYPE: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case segmented_reduction_data_attribute::INIT_VALUE: + EXPECT_TRUE( + std::holds_alternative>>( + input.at(attr))); + break; + case segmented_reduction_data_attribute::NULL_POLICY: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case segmented_reduction_data_attribute::OFFSETS: + EXPECT_TRUE( + std::holds_alternative>(input.at(attr))); + break; + default:; + } + } } else { - auto const& values = - std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); - return cudf::type_dispatcher(values.type(), groupby_fn{this}, input, stream, mr); + if (check_attrs.empty()) { + check_attrs = input_data_attributes{groupby_data_attribute::INPUT_VALUES, + groupby_data_attribute::GROUPED_VALUES, + groupby_data_attribute::SORTED_GROUPED_VALUES, + groupby_data_attribute::GROUP_OFFSETS, + groupby_data_attribute::GROUP_LABELS}; + } + EXPECT_EQ(input.size(), check_attrs.size()); + for (auto const attr : check_attrs) { + EXPECT_TRUE(input.count(attr) > 0); + EXPECT_TRUE(std::holds_alternative(attr.value) || + std::holds_alternative>(attr.value)); + if (std::holds_alternative(attr.value)) { + switch (auto const attr_val = std::get(attr.value)) { + case groupby_data_attribute::INPUT_VALUES: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case groupby_data_attribute::GROUPED_VALUES: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case groupby_data_attribute::SORTED_GROUPED_VALUES: + EXPECT_TRUE(std::holds_alternative(input.at(attr))); + break; + case groupby_data_attribute::GROUP_OFFSETS: + EXPECT_TRUE( + std::holds_alternative>(input.at(attr))); + break; + case groupby_data_attribute::GROUP_LABELS: + EXPECT_TRUE( + std::holds_alternative>(input.at(attr))); + break; + default:; + } + } else { // std::holds_alternative>(attr.value) + EXPECT_TRUE(std::holds_alternative>(input.at(attr))); + } + } } + + return get_empty_output(std::nullopt, stream, mr); } [[nodiscard]] output_type get_empty_output( @@ -102,419 +157,48 @@ struct test_udf_simple_type : cudf::host_udf_base { [[maybe_unused]] rmm::device_async_resource_ref mr) const override { if constexpr (std::is_same_v) { - CUDF_EXPECTS(output_dtype.has_value(), - "Data type for the reduction result must be specified."); - return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); + return cudf::make_fixed_width_scalar(0, stream, mr); } else if constexpr (std::is_same_v) { - CUDF_EXPECTS(output_dtype.has_value(), - "Data type for the reduction result must be specified."); - return cudf::make_empty_column(output_dtype.value()); + return cudf::make_empty_column(cudf::data_type{cudf::type_id::INT32}); } else { - return cudf::make_empty_column( - cudf::data_type{cudf::type_to_id()}); + return cudf::make_empty_column(cudf::data_type{cudf::type_id::INT32}); } } - [[nodiscard]] bool is_equal(host_udf_base const& other) const override - { - // Just check if the other object is also instance of the same derived class. - return dynamic_cast(&other) != nullptr; - } - - [[nodiscard]] std::size_t do_hash() const override - { - return std::hash{}({"test_udf_simple_type"}); - } - + [[nodiscard]] bool is_equal(host_udf_base const& other) const override { return true; } + [[nodiscard]] std::size_t do_hash() const override { return 0; } [[nodiscard]] std::unique_ptr clone() const override { - return std::make_unique(); + return std::make_unique(); } - - // For faster compile times, we only support a few input/output types. - template - static constexpr bool is_valid_input_t() - { - return std::is_same_v || std::is_same_v; - } - - // For faster compile times, we only support a few input/output types. - template - static constexpr bool is_valid_output_t() - { - return std::is_same_v; - } - - struct reduce_fn { - // Store pointer to the parent class so we can call its functions. - test_udf_simple_type const* parent; - - template () || !is_valid_output_t())> - output_type operator()(Args...) const - { - CUDF_FAIL("Unsupported input type."); - } - - template () && is_valid_output_t())> - output_type operator()(host_udf_input const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const - { - auto const& values = - std::get(input.at(reduction_data_attribute::INPUT_VALUES)); - auto const output_dtype = - std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); - auto const input_init_value = - std::get>>( - input.at(reduction_data_attribute::INIT_VALUE)); - - if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } - - auto const init_value = [&]() -> InputType { - if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { - auto const numeric_init_scalar = - dynamic_cast const*>(&input_init_value.value().get()); - CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); - return numeric_init_scalar->value(stream); - } - return InputType{0}; - }(); - - auto const values_dv_ptr = cudf::column_device_view::create(values, stream); - auto const result = - thrust::transform_reduce(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.size()), - transform_fn{*values_dv_ptr}, - static_cast(init_value), - thrust::plus<>{}); - - auto output = cudf::make_numeric_scalar(output_dtype, stream, mr); - static_cast*>(output.get())->set_value(result, stream); - return output; - } - - template - struct transform_fn { - cudf::column_device_view values; - OutputType __device__ operator()(cudf::size_type idx) const - { - if (values.is_null(idx)) { return OutputType{0}; } - auto const val = static_cast(values.element(idx)); - return val * val; - } - }; - }; - - struct segmented_reduce_fn { - // Store pointer to the parent class so we can call its functions. - test_udf_simple_type const* parent; - - template () || !is_valid_output_t())> - output_type operator()(Args...) const - { - CUDF_FAIL("Unsupported input type."); - } - - template () && is_valid_output_t())> - output_type operator()(host_udf_input const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const - { - auto const& values = - std::get(input.at(segmented_reduction_data_attribute::INPUT_VALUES)); - auto const output_dtype = - std::get(input.at(segmented_reduction_data_attribute::OUTPUT_DTYPE)); - auto const offsets = std::get>( - input.at(segmented_reduction_data_attribute::OFFSETS)); - CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); - auto const num_segments = static_cast(offsets.size()) - 1; - - if (values.size() == 0) { - if (num_segments <= 0) { - return parent->get_empty_output(output_dtype, stream, mr); - } else { - return cudf::make_numeric_column( - output_dtype, num_segments, cudf::mask_state::ALL_NULL, stream, mr); - } - } - - auto const input_init_value = - std::get>>( - input.at(segmented_reduction_data_attribute::INIT_VALUE)); - - auto const init_value = [&]() -> InputType { - if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { - auto const numeric_init_scalar = - dynamic_cast const*>(&input_init_value.value().get()); - CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); - return numeric_init_scalar->value(stream); - } - return InputType{0}; - }(); - - auto const null_handling = - std::get(input.at(segmented_reduction_data_attribute::NULL_POLICY)); - - auto const values_dv_ptr = cudf::column_device_view::create(values, stream); - auto output = cudf::make_numeric_column( - output_dtype, num_segments, cudf::mask_state::UNALLOCATED, stream); - rmm::device_uvector validity(num_segments, stream); - - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_segments), - thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), - transform_fn{ - *values_dv_ptr, offsets, static_cast(init_value), null_handling}); - auto [null_mask, null_count] = - cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); - if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } - return output; - } - - template - struct transform_fn { - cudf::column_device_view values; - cudf::device_span offsets; - OutputType init_value; - cudf::null_policy null_handling; - - thrust::tuple __device__ operator()(cudf::size_type idx) const - { - auto const start = offsets[idx]; - auto const end = offsets[idx + 1]; - if (start == end) { return {OutputType{0}, false}; } - - auto sum = init_value; - for (auto i = start; i < end; ++i) { - if (values.is_null(i)) { - if (null_handling == cudf::null_policy::INCLUDE) { sum += init_value * init_value; } - continue; - } - auto const val = static_cast(values.element(i)); - sum += val * val; - } - auto const segment_size = end - start; - return {static_cast(segment_size) * sum, true}; - } - }; - }; - - struct groupby_fn { - // Store pointer to the parent class so we can call its functions. - test_udf_simple_type const* parent; - using OutputType = double; - template - using MaxType = cudf::detail::target_type_t; - template - using SumType = cudf::detail::target_type_t; - - template ())> - output_type operator()(Args...) const - { - CUDF_FAIL("Unsupported input type."); - } - - template ())> - output_type operator()(host_udf_input const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const - { - auto const& values = - std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); - if (values.size() == 0) { return parent->get_empty_output(std::nullopt, stream, mr); } - - auto const offsets = std::get>( - input.at(groupby_data_attribute::GROUP_OFFSETS)); - CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); - auto const num_groups = static_cast(offsets.size()) - 1; - auto const group_indices = std::get>( - input.at(groupby_data_attribute::GROUP_LABELS)); - auto const group_max = std::get( - input.at(cudf::make_max_aggregation())); - auto const group_sum = std::get( - input.at(cudf::make_sum_aggregation())); - - auto const values_dv_ptr = cudf::column_device_view::create(values, stream); - auto output = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, - num_groups, - cudf::mask_state::UNALLOCATED, - stream); - rmm::device_uvector validity(num_groups, stream); - - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_groups), - thrust::make_zip_iterator(output->mutable_view().begin(), validity.begin()), - transform_fn{*values_dv_ptr, - offsets, - group_indices, - group_max.begin>(), - group_sum.begin>()}); - auto [null_mask, null_count] = - cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity<>{}, stream, mr); - if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } - return output; - } - - template - struct transform_fn { - cudf::column_device_view values; - cudf::device_span offsets; - cudf::device_span group_indices; - MaxType const* group_max; - SumType const* group_sum; - - thrust::tuple __device__ operator()(cudf::size_type idx) const - { - auto const start = offsets[idx]; - auto const end = offsets[idx + 1]; - if (start == end) { return {OutputType{0}, false}; } - - auto sum_sqr = OutputType{0}; - bool has_valid{false}; - for (auto i = start; i < end; ++i) { - if (values.is_null(i)) { continue; } - has_valid = true; - auto const val = static_cast(values.element(i)); - sum_sqr += val * val; - } - - if (!has_valid) { return {OutputType{0}, false}; } - return {static_cast(group_indices[start] + 1) * sum_sqr - - static_cast(group_max[idx]) * static_cast(group_sum[idx]), - true}; - } - }; - }; }; } // namespace -using doubles_col = cudf::test::fixed_width_column_wrapper; -using int32s_col = cudf::test::fixed_width_column_wrapper; -using int64s_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; -struct HostUDFExampleTest : cudf::test::BaseFixture {}; +struct HostUDFTest : cudf::test::BaseFixture {}; -TEST_F(HostUDFExampleTest, ReductionSimpleInput) +TEST_F(HostUDFTest, ReductionAllInput) { - auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + auto const vals = int32s_col{1, 2, 3}; auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); - auto const reduced = cudf::reduce(vals, - *agg, - cudf::data_type{cudf::type_id::INT64}, - cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); - EXPECT_TRUE(reduced->is_valid()); - EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); - auto const result = - static_cast*>(reduced.get())->value(cudf::get_default_stream()); - auto constexpr expected = 55; // 0^2 + 1^2 + 2^2 + 3^2 + 4^2 + 5^2 = 55 - EXPECT_EQ(expected, result); + std::make_unique>()); + [[maybe_unused]] auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); } -TEST_F(HostUDFExampleTest, ReductionEmptyInput) +TEST_F(HostUDFTest, SegmentedReductionAllInput) { - auto const vals = doubles_col{}; - auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); - auto const reduced = cudf::reduce(vals, - *agg, - cudf::data_type{cudf::type_id::INT64}, - cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); - EXPECT_FALSE(reduced->is_valid()); - EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); -} - -TEST_F(HostUDFExampleTest, SegmentedReductionSimpleInput) -{ - auto const vals = doubles_col{ - {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, - {true, false, true, true, false, true, false, false, true, true}}; + auto const vals = int32s_col{1, 2, 3}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + std::make_unique>()); - // Test without init value. - { - auto const result = cudf::segmented_reduce( - vals, - cudf::device_span(offsets->view().begin(), offsets->size()), - *agg, - cudf::data_type{cudf::type_id::INT64}, - cudf::null_policy::INCLUDE, - std::nullopt, // init value - cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); - - // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. - // Since init value is not given, it is set to 0. - // [ 3 * (0^2 + init^2 + 2^2), 2 * (3^2 + init^2), 5 * (5^2 + init^2 + init^2 + 8^2 + 9^2) ] - auto const expected = int64s_col{12, 18, 850}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } - - // Test with init value, and include nulls. - { - auto const init_scalar = cudf::make_fixed_width_scalar(3.0); - auto const result = cudf::segmented_reduce( - vals, - cudf::device_span(offsets->view().begin(), offsets->size()), - *agg, - cudf::data_type{cudf::type_id::INT64}, - cudf::null_policy::INCLUDE, - *init_scalar, - cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); - - // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. - // [ 3 * (3 + 0^2 + 3^2 + 2^2), 2 * (3 + 3^2 + 3^2), 5 * (3 + 5^2 + 3^2 + 3^2 + 8^2 + 9^2) ] - auto const expected = int64s_col{48, 42, 955}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } - - // Test with init value, and exclude nulls. - { - auto const init_scalar = cudf::make_fixed_width_scalar(3.0); - auto const result = cudf::segmented_reduce( - vals, - cudf::device_span(offsets->view().begin(), offsets->size()), - *agg, - cudf::data_type{cudf::type_id::INT64}, - cudf::null_policy::EXCLUDE, - *init_scalar, - cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); - - // [ 3 * (3 + 0^2 + 2^2), 2 * (3 + 3^2), 5 * (3 + 5^2 + 8^2 + 9^2) ] - auto const expected = int64s_col{21, 24, 865}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } -} - -TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) -{ - auto const vals = int32s_col{}; - auto const offsets = int32s_col{0, 0, 0, 0}.release(); - auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); - auto const result = cudf::segmented_reduce( + [[maybe_unused]] auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), *agg, @@ -523,38 +207,14 @@ TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) std::nullopt, // init value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); - auto const expected = int64s_col{{0, 0, 0}, {false, false, false}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } -TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) +TEST_F(HostUDFTest, GroupbyAllInput) { - auto const vals = int32s_col{}; - // Cannot be empty due to a bug in the libcudf: https://github.com/rapidsai/cudf/issues/17433. - auto const offsets = int32s_col{0}.release(); - auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); - auto const result = cudf::segmented_reduce( - vals, - cudf::device_span(offsets->view().begin(), offsets->size()), - *agg, - cudf::data_type{cudf::type_id::INT64}, - cudf::null_policy::INCLUDE, - std::nullopt, // init value - cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); - auto const expected = int64s_col{}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); -} - -TEST_F(HostUDFExampleTest, GroupbySimpleInput) -{ - auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; - auto const vals = doubles_col{ - {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, - {true, false, true, true, false, true, false, false, true, true}}; - auto agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + auto const keys = int32s_col{0, 1, 2}; + auto const vals = int32s_col{0, 1, 2}; + auto agg = cudf::make_host_udf_aggregation( + std::make_unique>()); std::vector requests; requests.emplace_back(); @@ -563,15 +223,6 @@ TEST_F(HostUDFExampleTest, GroupbySimpleInput) cudf::groupby::groupby gb_obj( cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); - auto const grp_result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); - auto const& result = grp_result.second[0].results[0]; - - // Output type of groupby is double. - // Values grouped by keys: [ {0, 3, null, 9}, {null, null, null}, {2, 5, 8} ] - // Group sum_sqr: [ 90, null, 93 ] - // Group max: [ 9, null, 8 ] - // Group sum: [ 12, null, 15 ] - // Output: [ 1 * 90 - 9 * 12, null, 3 * 93 - 8 * 15 ] - auto const expected = doubles_col{{-18, 0, 159}, {true, false, true}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + [[maybe_unused]] auto const grp_result = + gb_obj.aggregate(requests, cudf::test::get_default_stream()); } From 22df3314eb08b40ccf998ff23fb2356d35d4de81 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 16:04:05 -0800 Subject: [PATCH 45/59] Implement random tests Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 132 ++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 9 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index e7551f17b71..53327a432b5 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -23,20 +23,21 @@ #include #include +#include +#include + namespace { /** * @brief A host-based UDF implementation used for unit tests. */ template -struct host_udf_unit_test : cudf::host_udf_base { +struct host_udf_test : cudf::host_udf_base { static_assert(std::is_same_v || std::is_same_v || std::is_same_v); input_data_attributes input_attrs; - host_udf_unit_test(input_data_attributes input_attrs_ = {}) : input_attrs(std::move(input_attrs_)) - { - } + host_udf_test(input_data_attributes input_attrs_ = {}) : input_attrs(std::move(input_attrs_)) {} [[nodiscard]] input_data_attributes get_required_data() const override { return input_attrs; } @@ -169,21 +170,64 @@ struct host_udf_unit_test : cudf::host_udf_base { [[nodiscard]] std::size_t do_hash() const override { return 0; } [[nodiscard]] std::unique_ptr clone() const override { - return std::make_unique(); + return std::make_unique(); } }; +cudf::host_udf_base::input_data_attributes get_subset( + cudf::host_udf_base::input_data_attributes const& attrs) +{ + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution size_distr(1, attrs.size() - 1); + auto const subset_size = size_distr(gen); + + auto const elements = + std::vector(attrs.begin(), attrs.end()); + std::uniform_int_distribution idx_distr(0, attrs.size() - 1); + cudf::host_udf_base::input_data_attributes output; + while (output.size() < subset_size) { + output.insert(elements[idx_distr(gen)]); + } + + printf("subset_size: %d\n", (int)subset_size); + printf("original size: %d\n", (int)attrs.size()); + return output; +} + +std::unique_ptr get_random_agg() +{ + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distr(1, 4); + auto const agg_idx = distr(gen); + switch (agg_idx) { + case 1: return cudf::make_min_aggregation(); + case 2: return cudf::make_max_aggregation(); + case 3: return cudf::make_sum_aggregation(); + case 4: return cudf::make_product_aggregation(); + default:; + } + CUDF_UNREACHABLE("This should not be reached."); + return nullptr; +} + } // namespace using int32s_col = cudf::test::fixed_width_column_wrapper; +// Number of randomly testing on the input data attributes. +// For each test, a subset of data attributes will be randomly generated from all the possible input +// data attributes. That subset will be tested for correctness. +constexpr int NUM_RANDOM_TESTS = 10; + struct HostUDFTest : cudf::test::BaseFixture {}; TEST_F(HostUDFTest, ReductionAllInput) { auto const vals = int32s_col{1, 2, 3}; auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + std::make_unique>()); [[maybe_unused]] auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, @@ -191,13 +235,30 @@ TEST_F(HostUDFTest, ReductionAllInput) cudf::get_current_device_resource_ref()); } +TEST_F(HostUDFTest, ReductionSomeInput) +{ + auto const vals = int32s_col{1, 2, 3}; + for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { + auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ + cudf::host_udf_base::reduction_data_attribute::INPUT_VALUES, + cudf::host_udf_base::reduction_data_attribute::OUTPUT_DTYPE, + cudf::host_udf_base::reduction_data_attribute::INIT_VALUE}); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>(std::move(input_attrs))); + [[maybe_unused]] auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + } +} + TEST_F(HostUDFTest, SegmentedReductionAllInput) { auto const vals = int32s_col{1, 2, 3}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); - + std::make_unique>()); [[maybe_unused]] auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), @@ -209,12 +270,38 @@ TEST_F(HostUDFTest, SegmentedReductionAllInput) cudf::get_current_device_resource_ref()); } +TEST_F(HostUDFTest, SegmentedReductionSomeInput) +{ + auto const vals = int32s_col{1, 2, 3}; + auto const offsets = int32s_col{0, 3, 5, 10}.release(); + for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { + auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ + cudf::host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES, + cudf::host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE, + cudf::host_udf_base::segmented_reduction_data_attribute::INIT_VALUE, + cudf::host_udf_base::segmented_reduction_data_attribute::NULL_POLICY, + cudf::host_udf_base::segmented_reduction_data_attribute::OFFSETS}); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique>( + std::move(input_attrs))); + [[maybe_unused]] auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + } +} + TEST_F(HostUDFTest, GroupbyAllInput) { auto const keys = int32s_col{0, 1, 2}; auto const vals = int32s_col{0, 1, 2}; auto agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + std::make_unique>()); std::vector requests; requests.emplace_back(); @@ -226,3 +313,30 @@ TEST_F(HostUDFTest, GroupbyAllInput) [[maybe_unused]] auto const grp_result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); } + +TEST_F(HostUDFTest, GroupbySomeInput) +{ + auto const keys = int32s_col{0, 1, 2}; + auto const vals = int32s_col{0, 1, 2}; + for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { + auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ + cudf::host_udf_base::groupby_data_attribute::INPUT_VALUES, + cudf::host_udf_base::groupby_data_attribute::GROUPED_VALUES, + cudf::host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES, + cudf::host_udf_base::groupby_data_attribute::GROUP_OFFSETS, + cudf::host_udf_base::groupby_data_attribute::GROUP_LABELS}); + input_attrs.insert(get_random_agg()); + auto agg = cudf::make_host_udf_aggregation( + std::make_unique>(std::move(input_attrs))); + + std::vector requests; + requests.emplace_back(); + requests[0].values = vals; + requests[0].aggregations.push_back(std::move(agg)); + cudf::groupby::groupby gb_obj( + cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); + + [[maybe_unused]] auto const grp_result = + gb_obj.aggregate(requests, cudf::test::get_default_stream()); + } +} From 82379ca20bffefcae260ad393bf499344b6124d0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 16:05:17 -0800 Subject: [PATCH 46/59] Fix compile issue Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cu index 53327a432b5..993a0cde0d5 100644 --- a/cpp/tests/groupby/host_udf_tests.cu +++ b/cpp/tests/groupby/host_udf_tests.cu @@ -57,7 +57,7 @@ struct host_udf_test : cudf::host_udf_base { reduction_data_attribute::INIT_VALUE}; } EXPECT_EQ(input.size(), check_attrs.size()); - for (auto const attr : check_attrs) { + for (auto const& attr : check_attrs) { EXPECT_TRUE(input.count(attr) > 0); EXPECT_TRUE(std::holds_alternative(attr.value)); switch (auto const attr_val = std::get(attr.value)) { @@ -84,7 +84,7 @@ struct host_udf_test : cudf::host_udf_base { segmented_reduction_data_attribute::OFFSETS}; } EXPECT_EQ(input.size(), check_attrs.size()); - for (auto const attr : check_attrs) { + for (auto const& attr : check_attrs) { EXPECT_TRUE(input.count(attr) > 0); EXPECT_TRUE(std::holds_alternative(attr.value)); switch (auto const attr_val = std::get(attr.value)) { @@ -118,7 +118,7 @@ struct host_udf_test : cudf::host_udf_base { groupby_data_attribute::GROUP_LABELS}; } EXPECT_EQ(input.size(), check_attrs.size()); - for (auto const attr : check_attrs) { + for (auto const& attr : check_attrs) { EXPECT_TRUE(input.count(attr) > 0); EXPECT_TRUE(std::holds_alternative(attr.value) || std::holds_alternative>(attr.value)); @@ -144,7 +144,7 @@ struct host_udf_test : cudf::host_udf_base { default:; } } else { // std::holds_alternative>(attr.value) - EXPECT_TRUE(std::holds_alternative>(input.at(attr))); + EXPECT_TRUE(std::holds_alternative(input.at(attr))); } } } From ecfb8792d223d6536d110a840f64a931bf521713 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 16:09:29 -0800 Subject: [PATCH 47/59] Rename test file Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/groupby/{host_udf_tests.cu => host_udf_tests.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/tests/groupby/{host_udf_tests.cu => host_udf_tests.cpp} (100%) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5e26f17e446..9ab3c88f4ce 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -133,7 +133,7 @@ ConfigureTest( groupby/groups_tests.cpp groupby/histogram_tests.cpp groupby/host_udf_example_tests.cu - groupby/host_udf_tests.cu + groupby/host_udf_tests.cpp groupby/keys_tests.cpp groupby/lists_tests.cpp groupby/m2_tests.cpp diff --git a/cpp/tests/groupby/host_udf_tests.cu b/cpp/tests/groupby/host_udf_tests.cpp similarity index 100% rename from cpp/tests/groupby/host_udf_tests.cu rename to cpp/tests/groupby/host_udf_tests.cpp From ef4392ecf8e4e2adf62f2c051ab8679c628b27c5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 20:04:11 -0800 Subject: [PATCH 48/59] Rewrite tests, adding more check Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_tests.cpp | 56 +++++++++++++++++----------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/cpp/tests/groupby/host_udf_tests.cpp b/cpp/tests/groupby/host_udf_tests.cpp index 993a0cde0d5..024d66b10f0 100644 --- a/cpp/tests/groupby/host_udf_tests.cpp +++ b/cpp/tests/groupby/host_udf_tests.cpp @@ -30,14 +30,18 @@ namespace { /** * @brief A host-based UDF implementation used for unit tests. */ -template +template struct host_udf_test : cudf::host_udf_base { static_assert(std::is_same_v || std::is_same_v || std::is_same_v); - input_data_attributes input_attrs; - host_udf_test(input_data_attributes input_attrs_ = {}) : input_attrs(std::move(input_attrs_)) {} + bool* const test_run; // to check if the test is accidentally skipped + input_data_attributes const input_attrs; + host_udf_test(bool* test_run_, input_data_attributes input_attrs_ = {}) + : test_run{test_run_}, input_attrs(std::move(input_attrs_)) + { + } [[nodiscard]] input_data_attributes get_required_data() const override { return input_attrs; } @@ -47,7 +51,7 @@ struct host_udf_test : cudf::host_udf_base { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override { - SCOPED_TRACE("Original line of failure: " + std::to_string(test_location)); + SCOPED_TRACE("Original line of failure: " + std::to_string(test_location_line)); input_data_attributes check_attrs = input_attrs; if constexpr (std::is_same_v) { @@ -60,7 +64,7 @@ struct host_udf_test : cudf::host_udf_base { for (auto const& attr : check_attrs) { EXPECT_TRUE(input.count(attr) > 0); EXPECT_TRUE(std::holds_alternative(attr.value)); - switch (auto const attr_val = std::get(attr.value)) { + switch (std::get(attr.value)) { case reduction_data_attribute::INPUT_VALUES: EXPECT_TRUE(std::holds_alternative(input.at(attr))); break; @@ -87,7 +91,7 @@ struct host_udf_test : cudf::host_udf_base { for (auto const& attr : check_attrs) { EXPECT_TRUE(input.count(attr) > 0); EXPECT_TRUE(std::holds_alternative(attr.value)); - switch (auto const attr_val = std::get(attr.value)) { + switch (std::get(attr.value)) { case segmented_reduction_data_attribute::INPUT_VALUES: EXPECT_TRUE(std::holds_alternative(input.at(attr))); break; @@ -123,7 +127,7 @@ struct host_udf_test : cudf::host_udf_base { EXPECT_TRUE(std::holds_alternative(attr.value) || std::holds_alternative>(attr.value)); if (std::holds_alternative(attr.value)) { - switch (auto const attr_val = std::get(attr.value)) { + switch (std::get(attr.value)) { case groupby_data_attribute::INPUT_VALUES: EXPECT_TRUE(std::holds_alternative(input.at(attr))); break; @@ -149,6 +153,7 @@ struct host_udf_test : cudf::host_udf_base { } } + *test_run = true; // test is run successfully return get_empty_output(std::nullopt, stream, mr); } @@ -170,7 +175,7 @@ struct host_udf_test : cudf::host_udf_base { [[nodiscard]] std::size_t do_hash() const override { return 0; } [[nodiscard]] std::unique_ptr clone() const override { - return std::make_unique(); + return std::make_unique(test_run, input_attrs); } }; @@ -190,8 +195,6 @@ cudf::host_udf_base::input_data_attributes get_subset( output.insert(elements[idx_distr(gen)]); } - printf("subset_size: %d\n", (int)subset_size); - printf("original size: %d\n", (int)attrs.size()); return output; } @@ -199,9 +202,8 @@ std::unique_ptr get_random_agg() { std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution distr(1, 4); - auto const agg_idx = distr(gen); - switch (agg_idx) { + std::uniform_int_distribution distr(1, 4); + switch (distr(gen)) { case 1: return cudf::make_min_aggregation(); case 2: return cudf::make_max_aggregation(); case 3: return cudf::make_sum_aggregation(); @@ -225,40 +227,46 @@ struct HostUDFTest : cudf::test::BaseFixture {}; TEST_F(HostUDFTest, ReductionAllInput) { + bool test_run = false; auto const vals = int32s_col{1, 2, 3}; auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + std::make_unique>(&test_run)); [[maybe_unused]] auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + EXPECT_TRUE(test_run); } TEST_F(HostUDFTest, ReductionSomeInput) { auto const vals = int32s_col{1, 2, 3}; for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { + bool test_run = false; auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ cudf::host_udf_base::reduction_data_attribute::INPUT_VALUES, cudf::host_udf_base::reduction_data_attribute::OUTPUT_DTYPE, cudf::host_udf_base::reduction_data_attribute::INIT_VALUE}); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>(std::move(input_attrs))); + std::make_unique>(&test_run, + std::move(input_attrs))); [[maybe_unused]] auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + EXPECT_TRUE(test_run); } } TEST_F(HostUDFTest, SegmentedReductionAllInput) { + bool test_run = false; auto const vals = int32s_col{1, 2, 3}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + std::make_unique>(&test_run)); [[maybe_unused]] auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), @@ -268,6 +276,7 @@ TEST_F(HostUDFTest, SegmentedReductionAllInput) std::nullopt, // init value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + EXPECT_TRUE(test_run); } TEST_F(HostUDFTest, SegmentedReductionSomeInput) @@ -275,6 +284,7 @@ TEST_F(HostUDFTest, SegmentedReductionSomeInput) auto const vals = int32s_col{1, 2, 3}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { + bool test_run = false; auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ cudf::host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES, cudf::host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE, @@ -283,7 +293,7 @@ TEST_F(HostUDFTest, SegmentedReductionSomeInput) cudf::host_udf_base::segmented_reduction_data_attribute::OFFSETS}); auto const agg = cudf::make_host_udf_aggregation( std::make_unique>( - std::move(input_attrs))); + &test_run, std::move(input_attrs))); [[maybe_unused]] auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), @@ -293,15 +303,17 @@ TEST_F(HostUDFTest, SegmentedReductionSomeInput) std::nullopt, // init value cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + EXPECT_TRUE(test_run); } } TEST_F(HostUDFTest, GroupbyAllInput) { + bool test_run = false; auto const keys = int32s_col{0, 1, 2}; auto const vals = int32s_col{0, 1, 2}; auto agg = cudf::make_host_udf_aggregation( - std::make_unique>()); + std::make_unique>(&test_run)); std::vector requests; requests.emplace_back(); @@ -309,9 +321,9 @@ TEST_F(HostUDFTest, GroupbyAllInput) requests[0].aggregations.push_back(std::move(agg)); cudf::groupby::groupby gb_obj( cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); - [[maybe_unused]] auto const grp_result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + EXPECT_TRUE(test_run); } TEST_F(HostUDFTest, GroupbySomeInput) @@ -319,6 +331,7 @@ TEST_F(HostUDFTest, GroupbySomeInput) auto const keys = int32s_col{0, 1, 2}; auto const vals = int32s_col{0, 1, 2}; for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { + bool test_run = false; auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ cudf::host_udf_base::groupby_data_attribute::INPUT_VALUES, cudf::host_udf_base::groupby_data_attribute::GROUPED_VALUES, @@ -327,7 +340,8 @@ TEST_F(HostUDFTest, GroupbySomeInput) cudf::host_udf_base::groupby_data_attribute::GROUP_LABELS}); input_attrs.insert(get_random_agg()); auto agg = cudf::make_host_udf_aggregation( - std::make_unique>(std::move(input_attrs))); + std::make_unique>(&test_run, + std::move(input_attrs))); std::vector requests; requests.emplace_back(); @@ -335,8 +349,8 @@ TEST_F(HostUDFTest, GroupbySomeInput) requests[0].aggregations.push_back(std::move(agg)); cudf::groupby::groupby gb_obj( cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); - [[maybe_unused]] auto const grp_result = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + EXPECT_TRUE(test_run); } } From 93ac14c404773cd67f3f6e652ea62da62367d277 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Nov 2024 21:54:37 -0800 Subject: [PATCH 49/59] Add more Java classes Signed-off-by: Nghia Truong --- .../src/main/java/ai/rapids/cudf/Aggregation.java | 15 +++++++++++---- .../java/ai/rapids/cudf/GroupByAggregation.java | 9 +++++++++ .../java/ai/rapids/cudf/ReductionAggregation.java | 15 ++++++++++++--- .../cudf/SegmentedReductionAggregation.java | 9 +++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index e69f1d5449b..18d1e9b432c 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -388,12 +388,10 @@ public boolean equals(Object other) { static final class HostUDFAggregation extends Aggregation { private final long udfNativeHandle; - private final long udfNativeHashCode; - private HostUDFAggregation(long udfNativeHandle, long udfNativeHashCode) { + private HostUDFAggregation(long udfNativeHandle) { super(Kind.HOST_UDF); this.udfNativeHandle = udfNativeHandle; - this.udfNativeHashCode = udfNativeHashCode; } @Override @@ -403,7 +401,7 @@ long createNativeInstance() { @Override public int hashCode() { - return 31 * kind.hashCode() + Long.hashCode(udfNativeHashCode); + return 31 * kind.hashCode(); } @Override @@ -870,6 +868,15 @@ static MergeSetsAggregation mergeSets(NullEquality nullEquality, NaNEquality nan return new MergeSetsAggregation(nullEquality, nanEquality); } + /** + * Host UDF aggregation, to execute a host-side user-defined function (UDF). + * @param udfNativeHandle Pointer to the native host UDF instance + * @return A new HostUDFAggregation instance + */ + static HostUDFAggregation hostUDF(long udfNativeHandle) { + return new HostUDFAggregation(udfNativeHandle); + } + static final class LeadAggregation extends LeadLagAggregation { private LeadAggregation(int offset, ColumnVector defaultOutput) { super(Kind.LEAD, offset, defaultOutput); diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 0fae33927b6..0c945a5ed2f 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -277,6 +277,15 @@ public static GroupByAggregation mergeSets() { return new GroupByAggregation(Aggregation.mergeSets()); } + /** + * Execute an aggregation using a host-side user-defined function (UDF). + * @param udfNativeHandle Pointer to the native host UDF instance + * @return A new GroupByAggregation instance + */ + public static GroupByAggregation hostUDF(long udfNativeHandle) { + return new GroupByAggregation(Aggregation.hostUDF(udfNativeHandle)); + } + /** * Merge the partial sets produced by multiple CollectSetAggregations. * diff --git a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java index ba8ae379bae..063879ec34f 100644 --- a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java @@ -161,14 +161,14 @@ public static ReductionAggregation median() { /** * Aggregate to compute the specified quantiles. Uses linear interpolation by default. */ - public static ReductionAggregation quantile(double ... quantiles) { + public static ReductionAggregation quantile(double... quantiles) { return new ReductionAggregation(Aggregation.quantile(quantiles)); } /** * Aggregate to compute various quantiles. */ - public static ReductionAggregation quantile(QuantileMethod method, double ... quantiles) { + public static ReductionAggregation quantile(QuantileMethod method, double... quantiles) { return new ReductionAggregation(Aggregation.quantile(method, quantiles)); } @@ -256,7 +256,7 @@ public static ReductionAggregation collectSet() { * @param nanEquality Flag to specify whether NaN values in floating point column should be considered equal. */ public static ReductionAggregation collectSet(NullPolicy nullPolicy, - NullEquality nullEquality, NaNEquality nanEquality) { + NullEquality nullEquality, NaNEquality nanEquality) { return new ReductionAggregation(Aggregation.collectSet(nullPolicy, nullEquality, nanEquality)); } @@ -286,6 +286,15 @@ public static ReductionAggregation mergeSets(NullEquality nullEquality, NaNEqual return new ReductionAggregation(Aggregation.mergeSets(nullEquality, nanEquality)); } + /** + * Execute a reduction using a host-side user-defined function (UDF). + * @param udfNativeHandle Pointer to the native host UDF instance + * @return A new ReductionAggregation instance + */ + public static ReductionAggregation hostUDF(long udfNativeHandle) { + return new ReductionAggregation(Aggregation.hostUDF(udfNativeHandle)); + } + /** * Create HistogramAggregation, computing the frequencies for each unique row. * diff --git a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java index 7ed150a2fec..099b5535bc5 100644 --- a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java @@ -101,4 +101,13 @@ public static SegmentedReductionAggregation any() { public static SegmentedReductionAggregation all() { return new SegmentedReductionAggregation(Aggregation.all()); } + + /** + * Execute a reduction using a host-side user-defined function (UDF). + * @param udfNativeHandle Pointer to the native host UDF instance + * @return A new SegmentedReductionAggregation instance + */ + public static SegmentedReductionAggregation hostUDF(long udfNativeHandle) { + return new SegmentedReductionAggregation(Aggregation.hostUDF(udfNativeHandle)); + } } From 47163737d805cf0f36b383a5005ab9c80355cf29 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Nov 2024 16:32:59 -0800 Subject: [PATCH 50/59] Rewrite `host_udf_base` Signed-off-by: Nghia Truong --- cpp/include/cudf/aggregation.hpp | 134 +++++++++++++------------------ 1 file changed, 57 insertions(+), 77 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 571be214180..7bce858c168 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -614,10 +615,10 @@ std::unique_ptr make_udf_aggregation(udf_type type, data_type output_type); /** - * @brief The base class for host-based UDF implementation. + * @brief The interface for host-based UDF implementation. * - * An actual implementation of host-based UDF needs to be derived from this base class, defining - * its own operations as well as all the required input data to the aggregation. + * An implementation of host-based UDF needs to be derived from this base class, defining + * its own version of the required functions. */ struct host_udf_base { host_udf_base() = default; @@ -659,7 +660,7 @@ struct host_udf_base { }; /** - * @brief The possible data that may be needed in the derived class for its operations. + * @brief Describe possible data that may be needed in the derived class for its operations. * * Such data can be either intermediate data such as sorted values or group labels etc, or the * results of other aggregations. @@ -677,20 +678,14 @@ struct host_udf_base { segmented_reduction_data_attribute, groupby_data_attribute, std::unique_ptr>; - value_type value; ///< The actual data attribute, wrapped in this struct + value_type value; ///< The actual data attribute, wrapped by this struct ///< as a wrapper is needed to define hash and equal_to functors. data_attribute() = default; ///< Default constructor data_attribute(data_attribute&&) = default; ///< Move constructor /** - * @brief Copy constructor. - * @param other The other data attribute to copy from. - */ - data_attribute(data_attribute const& other) : value{copy_value(other.value)} {} - - /** - * @brief Construct a new data attribute from aggregation attributes. + * @brief Construct a new data attribute from an aggregation attribute. * @param value_ An aggregation attribute */ template const& val) { return value_type{val->clone()}; }}, + other.value)} { - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - return std::get>(value)->clone(); } /** @@ -750,23 +739,15 @@ struct host_udf_base { */ std::size_t operator()(data_attribute const& attr) const { - auto const& value = attr.value; - auto const hash_value = [&] { - if (std::holds_alternative(value)) { - return std::hash{}(static_cast(std::get(value))); - } - if (std::holds_alternative(value)) { - return std::hash{}( - static_cast(std::get(value))); - } - if (std::holds_alternative(value)) { - return std::hash{}(static_cast(std::get(value))); - } - return std::get>(value)->do_hash(); - }(); - return value.index() ^ hash_value; + auto const& value = attr.value; + auto const hash_value = + std::visit(cudf::detail::visitor_overload{ + [](auto const& val) { return std::hash{}(static_cast(val)); }, + [](std::unique_ptr const& val) { return val->do_hash(); }}, + value); + return std::hash{}(value.index()) ^ hash_value; } - }; + }; // struct hash /** * @brief Equality comparison functor for `data_attribute`. @@ -783,26 +764,25 @@ struct host_udf_base { auto const& lhs_val = lhs.value; auto const& rhs_val = rhs.value; if (lhs_val.index() != rhs_val.index()) { return false; } - if (std::holds_alternative(lhs_val)) { - return std::get(lhs_val) == - std::get(rhs_val); - } - if (std::holds_alternative(lhs_val)) { - return std::get(lhs_val) == - std::get(rhs_val); - } - if (std::holds_alternative(lhs_val)) { - return std::get(lhs_val) == - std::get(rhs_val); - } - return std::get>(lhs.value)->is_equal( - *std::get>(rhs.value)); + return std::visit(cudf::detail::visitor_overload{ + [](auto const& lhs_val, auto const& rhs_val) { + if constexpr (std::is_same_v) { + return lhs_val == rhs_val; + } + return false; + }, + [](std::unique_ptr const& lhs_val, + std::unique_ptr const& rhs_val) { + return lhs_val->is_equal(*rhs_val); + }}, + lhs_val, + rhs_val); } - }; - }; + }; // struct equal_to + }; // struct data_attribute /** - * @brief Set of attributes for the data that is needed for computing the aggregation. + * @brief Set of attributes for the input data that is needed for computing the aggregation. */ using input_data_attributes = std::unordered_set; @@ -839,6 +819,21 @@ struct host_udf_base { */ using output_type = std::variant, std::unique_ptr>; + /** + * @brief Get the output when the input values column is empty. + * + * This is called in libcudf when the input values column is empty. In such situations libcudf + * tries to generate the output directly without unnecessarily evaluating the intermediate data. + * + * @param output_dtype The expected output data type for reduction (if specified) + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations + * @return The output result of the aggregation when input values is empty + */ + [[nodiscard]] virtual output_type get_empty_output(std::optional output_dtype, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const = 0; + /** * @brief Perform the main computation for the host-based UDF. * @@ -852,19 +847,10 @@ struct host_udf_base { rmm::device_async_resource_ref mr) const = 0; /** - * @brief Get the output when the input values is empty. - * - * This may be called in the situations that libcudf tries to avoid unnecessarily evaluating the - * intermediate data when the input values is empty. - * - * @param output_dtype The expected output data type for reduction (if specified) - * @param stream The CUDA stream to use for any kernel launches - * @param mr Device memory resource to use for any allocations - * @return The output result of the aggregation when input values is empty + * @brief Computes hash value of the derived class's instance. + * @return The hash value of the instance */ - [[nodiscard]] virtual output_type get_empty_output(std::optional output_dtype, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const = 0; + [[nodiscard]] virtual std::size_t do_hash() const = 0; /** * @brief Compares two instances of the derived class for equality. @@ -873,12 +859,6 @@ struct host_udf_base { */ [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0; - /** - * @brief Computes hash value of the derived class's instance. - * @return The hash value of the instance - */ - [[nodiscard]] virtual std::size_t do_hash() const = 0; - /** * @brief Clones the instance. * From baa79913d898dfe4eec0526d5317fcd7a01ab1a0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Nov 2024 20:47:25 -0800 Subject: [PATCH 51/59] Rewrite tests Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_example_tests.cu | 101 ++++++++++---------- cpp/tests/groupby/host_udf_tests.cpp | 59 +++++++----- 2 files changed, 82 insertions(+), 78 deletions(-) diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu index 385dc8d92f0..00e390aa893 100644 --- a/cpp/tests/groupby/host_udf_example_tests.cu +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -62,6 +62,8 @@ struct test_udf_simple_type : cudf::host_udf_base { // Empty set, which means we need everything. return {}; } else { + // We need grouped values, group offsets, group labels, and also results from groups' + // MAX and SUM aggregations. return {groupby_data_attribute::GROUPED_VALUES, groupby_data_attribute::GROUP_OFFSETS, groupby_data_attribute::GROUP_LABELS, @@ -70,6 +72,25 @@ struct test_udf_simple_type : cudf::host_udf_base { } } + [[nodiscard]] output_type get_empty_output( + [[maybe_unused]] std::optional output_dtype, + [[maybe_unused]] rmm::cuda_stream_view stream, + [[maybe_unused]] rmm::device_async_resource_ref mr) const override + { + if constexpr (std::is_same_v) { + CUDF_EXPECTS(output_dtype.has_value(), + "Data type for the reduction result must be specified."); + return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); + } else if constexpr (std::is_same_v) { + CUDF_EXPECTS(output_dtype.has_value(), + "Data type for the reduction result must be specified."); + return cudf::make_empty_column(output_dtype.value()); + } else { + return cudf::make_empty_column( + cudf::data_type{cudf::type_to_id()}); + } + } + [[nodiscard]] output_type operator()(host_udf_input const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override @@ -95,49 +116,31 @@ struct test_udf_simple_type : cudf::host_udf_base { } } - [[nodiscard]] output_type get_empty_output( - [[maybe_unused]] std::optional output_dtype, - [[maybe_unused]] rmm::cuda_stream_view stream, - [[maybe_unused]] rmm::device_async_resource_ref mr) const override + [[nodiscard]] std::size_t do_hash() const override { - if constexpr (std::is_same_v) { - CUDF_EXPECTS(output_dtype.has_value(), - "Data type for the reduction result must be specified."); - return cudf::make_default_constructed_scalar(output_dtype.value(), stream, mr); - } else if constexpr (std::is_same_v) { - CUDF_EXPECTS(output_dtype.has_value(), - "Data type for the reduction result must be specified."); - return cudf::make_empty_column(output_dtype.value()); - } else { - return cudf::make_empty_column( - cudf::data_type{cudf::type_to_id()}); - } + // Just return the same hash for all instances of this class. + return std::size_t{12345}; } [[nodiscard]] bool is_equal(host_udf_base const& other) const override { - // Just check if the other object is also instance of the same derived class. + // Just check if the other object is also instance of this class. return dynamic_cast(&other) != nullptr; } - [[nodiscard]] std::size_t do_hash() const override - { - return std::hash{}({"test_udf_simple_type"}); - } - [[nodiscard]] std::unique_ptr clone() const override { return std::make_unique(); } - // For faster compile times, we only support a few input/output types. + // For quick compilation, we only instantiate a few input/output types. template static constexpr bool is_valid_input_t() { - return std::is_same_v || std::is_same_v; + return std::is_same_v; } - // For faster compile times, we only support a few input/output types. + // For quick compilation, we only instantiate a few input/output types. template static constexpr bool is_valid_output_t() { @@ -154,7 +157,7 @@ struct test_udf_simple_type : cudf::host_udf_base { CUDF_ENABLE_IF(!is_valid_input_t() || !is_valid_output_t())> output_type operator()(Args...) const { - CUDF_FAIL("Unsupported input type."); + CUDF_FAIL("Unsupported input/output type."); } template (input.at(reduction_data_attribute::INPUT_VALUES)); auto const output_dtype = std::get(input.at(reduction_data_attribute::OUTPUT_DTYPE)); + if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } + auto const input_init_value = std::get>>( input.at(reduction_data_attribute::INIT_VALUE)); - - if (values.size() == 0) { return parent->get_empty_output(output_dtype, stream, mr); } - auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { auto const numeric_init_scalar = @@ -220,7 +222,7 @@ struct test_udf_simple_type : cudf::host_udf_base { CUDF_ENABLE_IF(!is_valid_input_t() || !is_valid_output_t())> output_type operator()(Args...) const { - CUDF_FAIL("Unsupported input type."); + CUDF_FAIL("Unsupported input/output type."); } template (offsets.size()) - 1; if (values.size() == 0) { - if (num_segments <= 0) { - return parent->get_empty_output(output_dtype, stream, mr); - } else { - return cudf::make_numeric_column( - output_dtype, num_segments, cudf::mask_state::ALL_NULL, stream, mr); - } + if (num_segments <= 0) { return parent->get_empty_output(output_dtype, stream, mr); } + return cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::ALL_NULL, stream, mr); } auto const input_init_value = std::get>>( input.at(segmented_reduction_data_attribute::INIT_VALUE)); - auto const init_value = [&]() -> InputType { if (input_init_value.has_value() && input_init_value.value().get().is_valid(stream)) { auto const numeric_init_scalar = @@ -264,7 +262,6 @@ struct test_udf_simple_type : cudf::host_udf_base { auto const null_handling = std::get(input.at(segmented_reduction_data_attribute::NULL_POLICY)); - auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto output = cudf::make_numeric_column( output_dtype, num_segments, cudf::mask_state::UNALLOCATED, stream); @@ -323,7 +320,7 @@ struct test_udf_simple_type : cudf::host_udf_base { template ())> output_type operator()(Args...) const { - CUDF_FAIL("Unsupported input type."); + CUDF_FAIL("Unsupported input/output type."); } template ())> @@ -443,11 +440,11 @@ TEST_F(HostUDFExampleTest, ReductionEmptyInput) TEST_F(HostUDFExampleTest, SegmentedReductionSimpleInput) { - auto const vals = doubles_col{ - {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, - {true, false, true, true, false, true, false, false, true, true}}; - auto const offsets = int32s_col{0, 3, 5, 10}.release(); - auto const agg = cudf::make_host_udf_aggregation( + double constexpr null = 0.0; + auto const vals = doubles_col{{0.0, null, 2.0, 3.0, null, 5.0, null, null, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto const offsets = int32s_col{0, 3, 5, 10}.release(); + auto const agg = cudf::make_host_udf_aggregation( std::make_unique>()); // Test without init value. @@ -509,7 +506,7 @@ TEST_F(HostUDFExampleTest, SegmentedReductionSimpleInput) TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) { - auto const vals = int32s_col{}; + auto const vals = doubles_col{}; auto const offsets = int32s_col{0, 0, 0, 0}.release(); auto const agg = cudf::make_host_udf_aggregation( std::make_unique>()); @@ -528,7 +525,7 @@ TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) { - auto const vals = int32s_col{}; + auto const vals = doubles_col{}; // Cannot be empty due to a bug in the libcudf: https://github.com/rapidsai/cudf/issues/17433. auto const offsets = int32s_col{0}.release(); auto const agg = cudf::make_host_udf_aggregation( @@ -548,11 +545,11 @@ TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) TEST_F(HostUDFExampleTest, GroupbySimpleInput) { - auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; - auto const vals = doubles_col{ - {0.0, 0.0 /*null*/, 2.0, 3.0, 0.0 /*null*/, 5.0, 0.0 /*null*/, 0.0 /*null*/, 8.0, 9.0}, - {true, false, true, true, false, true, false, false, true, true}}; - auto agg = cudf::make_host_udf_aggregation( + double constexpr null = 0.0; + auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; + auto const vals = doubles_col{{0.0, null, 2.0, 3.0, null, 5.0, null, null, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto agg = cudf::make_host_udf_aggregation( std::make_unique>()); std::vector requests; @@ -571,6 +568,6 @@ TEST_F(HostUDFExampleTest, GroupbySimpleInput) // Group max: [ 9, null, 8 ] // Group sum: [ 12, null, 15 ] // Output: [ 1 * 90 - 9 * 12, null, 3 * 93 - 8 * 15 ] - auto const expected = doubles_col{{-18, 0, 159}, {true, false, true}}; + auto const expected = doubles_col{{-18.0, null, 159.0}, {true, false, true}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } diff --git a/cpp/tests/groupby/host_udf_tests.cpp b/cpp/tests/groupby/host_udf_tests.cpp index 024d66b10f0..231fc3ec462 100644 --- a/cpp/tests/groupby/host_udf_tests.cpp +++ b/cpp/tests/groupby/host_udf_tests.cpp @@ -171,14 +171,17 @@ struct host_udf_test : cudf::host_udf_base { } } - [[nodiscard]] bool is_equal(host_udf_base const& other) const override { return true; } [[nodiscard]] std::size_t do_hash() const override { return 0; } + [[nodiscard]] bool is_equal(host_udf_base const& other) const override { return true; } [[nodiscard]] std::unique_ptr clone() const override { return std::make_unique(test_run, input_attrs); } }; +/** + * @brief Get a random subset of input data attributes. + */ cudf::host_udf_base::input_data_attributes get_subset( cudf::host_udf_base::input_data_attributes const& attrs) { @@ -186,7 +189,6 @@ cudf::host_udf_base::input_data_attributes get_subset( std::mt19937 gen(rd()); std::uniform_int_distribution size_distr(1, attrs.size() - 1); auto const subset_size = size_distr(gen); - auto const elements = std::vector(attrs.begin(), attrs.end()); std::uniform_int_distribution idx_distr(0, attrs.size() - 1); @@ -194,10 +196,12 @@ cudf::host_udf_base::input_data_attributes get_subset( while (output.size() < subset_size) { output.insert(elements[idx_distr(gen)]); } - return output; } +/** + * @brief Generate a random aggregation object from {min, max, sum, product}. + */ std::unique_ptr get_random_agg() { std::random_device rd; @@ -208,9 +212,8 @@ std::unique_ptr get_random_agg() case 2: return cudf::make_max_aggregation(); case 3: return cudf::make_sum_aggregation(); case 4: return cudf::make_product_aggregation(); - default:; + default: CUDF_UNREACHABLE("This should not be reached."); } - CUDF_UNREACHABLE("This should not be reached."); return nullptr; } @@ -220,7 +223,8 @@ using int32s_col = cudf::test::fixed_width_column_wrapper; // Number of randomly testing on the input data attributes. // For each test, a subset of data attributes will be randomly generated from all the possible input -// data attributes. That subset will be tested for correctness. +// data attributes. The input data corresponding to that subset passed from libcudf will be tested +// for correctness. constexpr int NUM_RANDOM_TESTS = 10; struct HostUDFTest : cudf::test::BaseFixture {}; @@ -241,13 +245,14 @@ TEST_F(HostUDFTest, ReductionAllInput) TEST_F(HostUDFTest, ReductionSomeInput) { - auto const vals = int32s_col{1, 2, 3}; + auto const vals = int32s_col{1, 2, 3}; + auto const all_attrs = cudf::host_udf_base::input_data_attributes{ + cudf::host_udf_base::reduction_data_attribute::INPUT_VALUES, + cudf::host_udf_base::reduction_data_attribute::OUTPUT_DTYPE, + cudf::host_udf_base::reduction_data_attribute::INIT_VALUE}; for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { bool test_run = false; - auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ - cudf::host_udf_base::reduction_data_attribute::INPUT_VALUES, - cudf::host_udf_base::reduction_data_attribute::OUTPUT_DTYPE, - cudf::host_udf_base::reduction_data_attribute::INIT_VALUE}); + auto input_attrs = get_subset(all_attrs); auto const agg = cudf::make_host_udf_aggregation( std::make_unique>(&test_run, std::move(input_attrs))); @@ -281,16 +286,17 @@ TEST_F(HostUDFTest, SegmentedReductionAllInput) TEST_F(HostUDFTest, SegmentedReductionSomeInput) { - auto const vals = int32s_col{1, 2, 3}; + auto const vals = int32s_col{1, 2, 3}; + auto const all_attrs = cudf::host_udf_base::input_data_attributes{ + cudf::host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES, + cudf::host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE, + cudf::host_udf_base::segmented_reduction_data_attribute::INIT_VALUE, + cudf::host_udf_base::segmented_reduction_data_attribute::NULL_POLICY, + cudf::host_udf_base::segmented_reduction_data_attribute::OFFSETS}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { bool test_run = false; - auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ - cudf::host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES, - cudf::host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE, - cudf::host_udf_base::segmented_reduction_data_attribute::INIT_VALUE, - cudf::host_udf_base::segmented_reduction_data_attribute::NULL_POLICY, - cudf::host_udf_base::segmented_reduction_data_attribute::OFFSETS}); + auto input_attrs = get_subset(all_attrs); auto const agg = cudf::make_host_udf_aggregation( std::make_unique>( &test_run, std::move(input_attrs))); @@ -328,16 +334,17 @@ TEST_F(HostUDFTest, GroupbyAllInput) TEST_F(HostUDFTest, GroupbySomeInput) { - auto const keys = int32s_col{0, 1, 2}; - auto const vals = int32s_col{0, 1, 2}; + auto const keys = int32s_col{0, 1, 2}; + auto const vals = int32s_col{0, 1, 2}; + auto const all_attrs = cudf::host_udf_base::input_data_attributes{ + cudf::host_udf_base::groupby_data_attribute::INPUT_VALUES, + cudf::host_udf_base::groupby_data_attribute::GROUPED_VALUES, + cudf::host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES, + cudf::host_udf_base::groupby_data_attribute::GROUP_OFFSETS, + cudf::host_udf_base::groupby_data_attribute::GROUP_LABELS}; for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { bool test_run = false; - auto input_attrs = get_subset(cudf::host_udf_base::input_data_attributes{ - cudf::host_udf_base::groupby_data_attribute::INPUT_VALUES, - cudf::host_udf_base::groupby_data_attribute::GROUPED_VALUES, - cudf::host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES, - cudf::host_udf_base::groupby_data_attribute::GROUP_OFFSETS, - cudf::host_udf_base::groupby_data_attribute::GROUP_LABELS}); + auto input_attrs = get_subset(all_attrs); input_attrs.insert(get_random_agg()); auto agg = cudf::make_host_udf_aggregation( std::make_unique>(&test_run, From 8405167f4ebce0a275178f801d104ca51f4cbe92 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Nov 2024 20:57:27 -0800 Subject: [PATCH 52/59] Rewrite switch statements Signed-off-by: Nghia Truong --- cpp/src/groupby/sort/aggregate.cpp | 17 ++++++----------- cpp/src/reductions/reductions.cpp | 11 ++++------- cpp/src/reductions/segmented/reductions.cpp | 18 +++++++----------- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 4e2166ddc1e..d19a81ca1bf 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -814,27 +814,22 @@ void aggregate_result_functor::operator()(aggregation con "Invalid input data attribute for HOST_UDF groupby aggregation."); if (std::holds_alternative(attr.value)) { switch (std::get(attr.value)) { - case host_udf_base::groupby_data_attribute::INPUT_VALUES: { + case host_udf_base::groupby_data_attribute::INPUT_VALUES: udf_input.emplace(attr, values); break; - } - case host_udf_base::groupby_data_attribute::GROUPED_VALUES: { + case host_udf_base::groupby_data_attribute::GROUPED_VALUES: udf_input.emplace(attr, get_grouped_values()); break; - } - case host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES: { + case host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES: udf_input.emplace(attr, get_sorted_values()); break; - } - case host_udf_base::groupby_data_attribute::GROUP_OFFSETS: { + case host_udf_base::groupby_data_attribute::GROUP_OFFSETS: udf_input.emplace(attr, helper.group_offsets(stream)); break; - } - case host_udf_base::groupby_data_attribute::GROUP_LABELS: { + case host_udf_base::groupby_data_attribute::GROUP_LABELS: udf_input.emplace(attr, helper.group_labels(stream)); break; - } - default:; + default: CUDF_UNREACHABLE("Invalid input data attribute for HOST_UDF groupby aggregation."); } } else { // data is result from another aggregation auto other_agg = std::get>(attr.value)->clone(); diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 4b76096b5f6..e7f5ca19d2c 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -159,19 +159,16 @@ struct reduce_dispatch_functor { CUDF_EXPECTS(std::holds_alternative(attr.value), "Invalid input data attribute for HOST_UDF reduction."); switch (std::get(attr.value)) { - case host_udf_base::reduction_data_attribute::INPUT_VALUES: { + case host_udf_base::reduction_data_attribute::INPUT_VALUES: udf_input.emplace(attr, col); break; - } - case host_udf_base::reduction_data_attribute::OUTPUT_DTYPE: { + case host_udf_base::reduction_data_attribute::OUTPUT_DTYPE: udf_input.emplace(attr, output_dtype); break; - } - case host_udf_base::reduction_data_attribute::INIT_VALUE: { + case host_udf_base::reduction_data_attribute::INIT_VALUE: udf_input.emplace(attr, init); break; - } - default:; + default: CUDF_UNREACHABLE("Invalid input data attribute for HOST_UDF reduction."); } } auto output = (*udf_ptr)(udf_input, stream, mr); diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 4226be6710d..af1f9c85fda 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -113,27 +113,23 @@ struct segmented_reduce_dispatch_functor { std::holds_alternative(attr.value), "Invalid input data attribute for HOST_UDF segmented reduction."); switch (std::get(attr.value)) { - case host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES: { + case host_udf_base::segmented_reduction_data_attribute::INPUT_VALUES: udf_input.emplace(attr, col); break; - } - case host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE: { + case host_udf_base::segmented_reduction_data_attribute::OUTPUT_DTYPE: udf_input.emplace(attr, output_dtype); break; - } - case host_udf_base::segmented_reduction_data_attribute::INIT_VALUE: { + case host_udf_base::segmented_reduction_data_attribute::INIT_VALUE: udf_input.emplace(attr, init); break; - } - case host_udf_base::segmented_reduction_data_attribute::NULL_POLICY: { + case host_udf_base::segmented_reduction_data_attribute::NULL_POLICY: udf_input.emplace(attr, null_handling); break; - } - case host_udf_base::segmented_reduction_data_attribute::OFFSETS: { + case host_udf_base::segmented_reduction_data_attribute::OFFSETS: udf_input.emplace(attr, offsets); break; - } - default:; + default: + CUDF_UNREACHABLE("Invalid input data attribute for HOST_UDF segmented reduction."); } } auto output = (*udf_ptr)(udf_input, stream, mr); From bbdc699a8922ce11592850f3ee3778e7862c2e14 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 2 Dec 2024 10:46:00 -0800 Subject: [PATCH 53/59] Fix out of sync enums Signed-off-by: Nghia Truong --- java/src/main/native/src/AggregationJni.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index c46019bba68..e39b91c8f15 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -163,10 +163,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createTDigestAgg(JNIEnv* std::unique_ptr ret; // These numbers come from Aggregation.java and must stay in sync switch (kind) { - case 31: // TDIGEST + case 32: // TDIGEST ret = cudf::make_tdigest_aggregation(delta); break; - case 32: // MERGE_TDIGEST + case 33: // MERGE_TDIGEST ret = cudf::make_merge_tdigest_aggregation(delta); break; default: throw std::logic_error("Unsupported TDigest Aggregation Operation"); From 3f4d450cd39a8d27b2b1e0e1b77a8dda78432c3b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 4 Dec 2024 19:55:28 -0800 Subject: [PATCH 54/59] Rewrite example Signed-off-by: Nghia Truong --- cpp/tests/groupby/host_udf_example_tests.cu | 6 ++---- cpp/tests/groupby/host_udf_tests.cpp | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu index 00e390aa893..729f1b77756 100644 --- a/cpp/tests/groupby/host_udf_example_tests.cu +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -525,9 +524,8 @@ TEST_F(HostUDFExampleTest, SegmentedReductionEmptySegments) TEST_F(HostUDFExampleTest, SegmentedReductionEmptyInput) { - auto const vals = doubles_col{}; - // Cannot be empty due to a bug in the libcudf: https://github.com/rapidsai/cudf/issues/17433. - auto const offsets = int32s_col{0}.release(); + auto const vals = doubles_col{}; + auto const offsets = int32s_col{}.release(); auto const agg = cudf::make_host_udf_aggregation( std::make_unique>()); auto const result = cudf::segmented_reduce( diff --git a/cpp/tests/groupby/host_udf_tests.cpp b/cpp/tests/groupby/host_udf_tests.cpp index 231fc3ec462..f3c2ce42931 100644 --- a/cpp/tests/groupby/host_udf_tests.cpp +++ b/cpp/tests/groupby/host_udf_tests.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include From 069600b4747cf65d8d4c8a9c536948b5de6e80fb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 17 Dec 2024 20:06:01 -0800 Subject: [PATCH 55/59] Instantiate `HostUDFAggregation` from `HostUDFWrapper` Signed-off-by: Nghia Truong --- .../main/java/ai/rapids/cudf/Aggregation.java | 32 ++++++++++++------- java/src/main/native/src/AggregationJni.cpp | 16 ---------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 18d1e9b432c..57f293452b6 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -386,17 +386,33 @@ public boolean equals(Object other) { } } + /** + * A wrapper class for native host UDF aggregations. + *

+ * This class is used to store the native handle of a host UDF aggregation and is used as + * a proxy object to compute hash code and compare two host UDF aggregations. + * A new host UDF aggregation implementation must extend this class and override the + * {@code hashCode} and {@code equals} methods for such purposes. + */ + public static abstract class HostUDFWrapper { + public final long udfNativeHandle; + + HostUDFWrapper(long udfNativeHandle) { + this.udfNativeHandle = udfNativeHandle; + } + } + static final class HostUDFAggregation extends Aggregation { - private final long udfNativeHandle; + HostUDFWrapper wrapper; - private HostUDFAggregation(long udfNativeHandle) { + private HostUDFAggregation(HostUDFWrapper wrapper) { super(Kind.HOST_UDF); - this.udfNativeHandle = udfNativeHandle; + this.wrapper = wrapper; } @Override long createNativeInstance() { - return Aggregation.createHostUDFAgg(udfNativeHandle); + return Aggregation.createHostUDFAgg(wrapper.udfNativeHandle); } @Override @@ -409,8 +425,7 @@ public boolean equals(Object other) { if (this == other) { return true; } else if (other instanceof HostUDFAggregation) { - HostUDFAggregation o = (HostUDFAggregation) other; - return Aggregation.areHostUDFsEqual(udfNativeHandle, o.udfNativeHandle); + return wrapper.equals(((HostUDFAggregation) other).wrapper); } return false; } @@ -1035,9 +1050,4 @@ static MergeHistogramAggregation mergeHistogram() { * Create a HOST_UDF aggregation. */ private static native long createHostUDFAgg(long udfNativeHandle); - - /** - * Compare two host UDFs to see if they are equal. - */ - private static native boolean areHostUDFsEqual(long lhsNativeHandle, long rhsNativeHandle); } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index e39b91c8f15..86b44b7e628 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -313,20 +313,4 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_areHostUDFsEqual(JNIEnv* env, - jclass class_object, - jlong lhs_native_handle, - jlong rhs_native_handle) -{ - JNI_NULL_CHECK(env, lhs_native_handle, "lhs_native_handle is null", 0); - JNI_NULL_CHECK(env, rhs_native_handle, "rhs_native_handle is null", 0); - try { - cudf::jni::auto_set_device(env); - auto const lhs_udf_ptr = reinterpret_cast(lhs_native_handle); - auto const rhs_udf_ptr = reinterpret_cast(rhs_native_handle); - return lhs_udf_ptr->is_equal(*rhs_udf_ptr); - } - CATCH_STD(env, 0); -} - } // extern "C" From a63000dff86304b077b400b1bfdde97661436dcb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 17 Dec 2024 20:06:18 -0800 Subject: [PATCH 56/59] Fix Java --- java/src/main/java/ai/rapids/cudf/Aggregation.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 57f293452b6..82188ea7faf 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -24,7 +24,7 @@ * Represents an aggregation operation. Please note that not all aggregations work, or even make * sense in all types of aggregation operations. */ -abstract class Aggregation { +public abstract class Aggregation { static { NativeDepsLoader.loadNativeDeps(); } @@ -397,7 +397,7 @@ public boolean equals(Object other) { public static abstract class HostUDFWrapper { public final long udfNativeHandle; - HostUDFWrapper(long udfNativeHandle) { + public HostUDFWrapper(long udfNativeHandle) { this.udfNativeHandle = udfNativeHandle; } } @@ -417,7 +417,7 @@ long createNativeInstance() { @Override public int hashCode() { - return 31 * kind.hashCode(); + return 31 * kind.hashCode() ^ wrapper.hashCode(); } @Override @@ -885,11 +885,11 @@ static MergeSetsAggregation mergeSets(NullEquality nullEquality, NaNEquality nan /** * Host UDF aggregation, to execute a host-side user-defined function (UDF). - * @param udfNativeHandle Pointer to the native host UDF instance + * @param TODO * @return A new HostUDFAggregation instance */ - static HostUDFAggregation hostUDF(long udfNativeHandle) { - return new HostUDFAggregation(udfNativeHandle); + static HostUDFAggregation hostUDF(HostUDFWrapper wrapper) { + return new HostUDFAggregation(wrapper); } static final class LeadAggregation extends LeadLagAggregation { From 05084a4019e32d8b9a323e7e77f7f5b7f231ffee Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 17 Dec 2024 20:06:26 -0800 Subject: [PATCH 57/59] Apply new wrapper --- java/src/main/java/ai/rapids/cudf/GroupByAggregation.java | 6 +++--- java/src/main/java/ai/rapids/cudf/ReductionAggregation.java | 6 +++--- .../java/ai/rapids/cudf/SegmentedReductionAggregation.java | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 0c945a5ed2f..948bb36229b 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -279,11 +279,11 @@ public static GroupByAggregation mergeSets() { /** * Execute an aggregation using a host-side user-defined function (UDF). - * @param udfNativeHandle Pointer to the native host UDF instance + * @param TODO * @return A new GroupByAggregation instance */ - public static GroupByAggregation hostUDF(long udfNativeHandle) { - return new GroupByAggregation(Aggregation.hostUDF(udfNativeHandle)); + public static GroupByAggregation hostUDF(Aggregation.HostUDFWrapper wrapper) { + return new GroupByAggregation(Aggregation.hostUDF(wrapper)); } /** diff --git a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java index 063879ec34f..291ba0254de 100644 --- a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java @@ -288,11 +288,11 @@ public static ReductionAggregation mergeSets(NullEquality nullEquality, NaNEqual /** * Execute a reduction using a host-side user-defined function (UDF). - * @param udfNativeHandle Pointer to the native host UDF instance + * @param TODO * @return A new ReductionAggregation instance */ - public static ReductionAggregation hostUDF(long udfNativeHandle) { - return new ReductionAggregation(Aggregation.hostUDF(udfNativeHandle)); + public static ReductionAggregation hostUDF(Aggregation.HostUDFWrapper wrapper) { + return new ReductionAggregation(Aggregation.hostUDF(wrapper)); } /** diff --git a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java index 099b5535bc5..852962a7b06 100644 --- a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java @@ -104,10 +104,10 @@ public static SegmentedReductionAggregation all() { /** * Execute a reduction using a host-side user-defined function (UDF). - * @param udfNativeHandle Pointer to the native host UDF instance + * @param TODO * @return A new SegmentedReductionAggregation instance */ - public static SegmentedReductionAggregation hostUDF(long udfNativeHandle) { - return new SegmentedReductionAggregation(Aggregation.hostUDF(udfNativeHandle)); + public static SegmentedReductionAggregation hostUDF(Aggregation.HostUDFWrapper wrapper) { + return new SegmentedReductionAggregation(Aggregation.hostUDF(wrapper)); } } From 28af2de4869cedc023a789e63994f46679057243 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 17 Dec 2024 20:40:00 -0800 Subject: [PATCH 58/59] Move `HostUDFWrapper` --- .../main/java/ai/rapids/cudf/Aggregation.java | 18 +--------- .../java/ai/rapids/cudf/HostUDFWrapper.java | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+), 17 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 82188ea7faf..d3692ed2d5a 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -24,7 +24,7 @@ * Represents an aggregation operation. Please note that not all aggregations work, or even make * sense in all types of aggregation operations. */ -public abstract class Aggregation { +abstract class Aggregation { static { NativeDepsLoader.loadNativeDeps(); } @@ -386,22 +386,6 @@ public boolean equals(Object other) { } } - /** - * A wrapper class for native host UDF aggregations. - *

- * This class is used to store the native handle of a host UDF aggregation and is used as - * a proxy object to compute hash code and compare two host UDF aggregations. - * A new host UDF aggregation implementation must extend this class and override the - * {@code hashCode} and {@code equals} methods for such purposes. - */ - public static abstract class HostUDFWrapper { - public final long udfNativeHandle; - - public HostUDFWrapper(long udfNativeHandle) { - this.udfNativeHandle = udfNativeHandle; - } - } - static final class HostUDFAggregation extends Aggregation { HostUDFWrapper wrapper; diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java new file mode 100644 index 00000000000..0b6ecf2e140 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.rapids.cudf; + +/** + * A wrapper around native host UDF aggregations. + *

+ * This class is used to store the native handle of a host UDF aggregation and is used as + * a proxy object to compute hash code and compare two host UDF aggregations for equality. + *

+ * A new host UDF aggregation implementation must extend this class and override the + * {@code hashCode} and {@code equals} methods for such purposes. + */ +public abstract class HostUDFWrapper { + public final long udfNativeHandle; + + public HostUDFWrapper(long udfNativeHandle) { + this.udfNativeHandle = udfNativeHandle; + } +} From d75d3dabc02e2097d57a961eca9d7f1927d4001f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 17 Dec 2024 20:40:07 -0800 Subject: [PATCH 59/59] Fix compile error --- java/src/main/java/ai/rapids/cudf/GroupByAggregation.java | 2 +- java/src/main/java/ai/rapids/cudf/ReductionAggregation.java | 2 +- .../main/java/ai/rapids/cudf/SegmentedReductionAggregation.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 948bb36229b..f8b07336f62 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -282,7 +282,7 @@ public static GroupByAggregation mergeSets() { * @param TODO * @return A new GroupByAggregation instance */ - public static GroupByAggregation hostUDF(Aggregation.HostUDFWrapper wrapper) { + public static GroupByAggregation hostUDF(HostUDFWrapper wrapper) { return new GroupByAggregation(Aggregation.hostUDF(wrapper)); } diff --git a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java index 291ba0254de..c49516697b6 100644 --- a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java @@ -291,7 +291,7 @@ public static ReductionAggregation mergeSets(NullEquality nullEquality, NaNEqual * @param TODO * @return A new ReductionAggregation instance */ - public static ReductionAggregation hostUDF(Aggregation.HostUDFWrapper wrapper) { + public static ReductionAggregation hostUDF(HostUDFWrapper wrapper) { return new ReductionAggregation(Aggregation.hostUDF(wrapper)); } diff --git a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java index 852962a7b06..c8c40c74e99 100644 --- a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java @@ -107,7 +107,7 @@ public static SegmentedReductionAggregation all() { * @param TODO * @return A new SegmentedReductionAggregation instance */ - public static SegmentedReductionAggregation hostUDF(Aggregation.HostUDFWrapper wrapper) { + public static SegmentedReductionAggregation hostUDF(HostUDFWrapper wrapper) { return new SegmentedReductionAggregation(Aggregation.hostUDF(wrapper)); } }