From aadadead809b387a5c952922c22bf208d13cc3f4 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 13 Oct 2023 11:34:27 -0400 Subject: [PATCH] Expose stream parameter in public strings combine APIs --- cpp/include/cudf/strings/combine.hpp | 88 ++++++++++-------- cpp/include/cudf/strings/repeat_strings.hpp | 8 +- cpp/src/strings/combine/concatenate.cu | 14 +-- cpp/src/strings/combine/join.cu | 3 +- cpp/src/strings/combine/join_list_elements.cu | 13 +-- cpp/src/strings/repeat_strings.cu | 11 ++- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/strings/combine_test.cpp | 93 +++++++++++++++++++ 8 files changed, 168 insertions(+), 63 deletions(-) create mode 100644 cpp/tests/streams/strings/combine_test.cpp diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 71f65ac9080..568e8ac50ec 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -66,18 +66,20 @@ enum class output_if_empty_list { * * @throw cudf::logic_error if separator is not valid. * - * @param strings Strings for this operation. + * @param input Strings for this operation * @param separator String that should inserted between each string. * Default is an empty string. - * @param narep String that should represent any null strings found. + * @param narep String to replace any null strings found. * Default of invalid-scalar will ignore any null entries. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column containing one string. */ std::unique_ptr join_strings( - strings_column_view const& strings, + strings_column_view const& input, string_scalar const& separator = string_scalar(""), string_scalar const& narep = string_scalar("", false), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -127,18 +129,17 @@ std::unique_ptr join_strings( * @throw cudf::logic_error if the number of rows from @p separators and @p strings_columns * do not match * - * @param strings_columns List of strings columns to concatenate. + * @param strings_columns List of strings columns to concatenate * @param separators Strings column that provides the separator for a given row - * @param separator_narep String that should be used in place of a null separator for a given - * row. Default of invalid-scalar means no row separator value replacements. - * Default is an invalid string. - * @param col_narep String that should be used in place of any null strings - * found in any column. Default of invalid-scalar means no null column value replacements. - * Default is an invalid string. + * @param separator_narep String to replace a null separator for a given row. + * Default of invalid-scalar means no row separator value replacements. + * @param col_narep String that should be used in place of any null strings found in any column. + * Default of invalid-scalar means no null column value replacements. * @param separate_nulls If YES, then the separator is included for null rows * if `col_narep` is valid. - * @param mr Resource for allocating device memory. - * @return New column with concatenated results. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Resource for allocating device memory + * @return New column with concatenated results */ std::unique_ptr concatenate( table_view const& strings_columns, @@ -146,6 +147,7 @@ std::unique_ptr concatenate( string_scalar const& separator_narep = string_scalar("", false), string_scalar const& col_narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -184,21 +186,23 @@ std::unique_ptr concatenate( * @throw cudf::logic_error if separator is not valid. * @throw cudf::logic_error if only one column is specified * - * @param strings_columns List of string columns to concatenate. + * @param strings_columns List of string columns to concatenate * @param separator String that should inserted between each string from each row. * Default is an empty string. - * @param narep String that should be used in place of any null strings - * found in any column. Default of invalid-scalar means any null entry in any column will + * @param narep String to replace any null strings found in any column. + * Default of invalid-scalar means any null entry in any column will * produces a null result for that row. - * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with concatenated results. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with concatenated results */ std::unique_ptr concatenate( table_view const& strings_columns, string_scalar const& separator = string_scalar(""), string_scalar const& narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -243,19 +247,20 @@ std::unique_ptr concatenate( * @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do * not match * - * @param lists_strings_column Column containing lists of strings to concatenate. - * @param separators Strings column that provides separators for concatenation. - * @param separator_narep String that should be used to replace null separator, default is an - * invalid-scalar denoting that rows containing null separator will result in null string in - * the corresponding output rows. - * @param string_narep String that should be used to replace null strings in any non-null list row, - * default is an invalid-scalar denoting that list rows containing null strings will result - * in null string in the corresponding output rows. - * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. - * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will + * @param lists_strings_column Column containing lists of strings to concatenate + * @param separators Strings column that provides separators for concatenation + * @param separator_narep String that should be used to replace a null separator. + * Default is an invalid-scalar denoting that rows containing null separator will result in + * a null string in the corresponding output rows. + * @param string_narep String to replace null strings in any non-null list row. + * Default is an invalid-scalar denoting that list rows containing null strings will result + * in a null string in the corresponding output rows. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid + * @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will * result in an empty string. Otherwise, it will result in a null. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with concatenated results. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with concatenated results */ std::unique_ptr join_list_elements( lists_column_view const& lists_strings_column, @@ -264,6 +269,7 @@ std::unique_ptr join_list_elements( string_scalar const& string_narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -303,17 +309,18 @@ std::unique_ptr join_list_elements( * @throw cudf::logic_error if input column is not lists of strings column. * @throw cudf::logic_error if separator is not valid. * - * @param lists_strings_column Column containing lists of strings to concatenate. - * @param separator String that should inserted between strings of each list row, default is an - * empty string. - * @param narep String that should be used to replace null strings in any non-null list row, default - * is an invalid-scalar denoting that list rows containing null strings will result in null - * string in the corresponding output rows. - * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. - * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result + * @param lists_strings_column Column containing lists of strings to concatenate + * @param separator String to insert between strings of each list row. + * Default is an empty string. + * @param narep String to replace null strings in any non-null list row. + * Default is an invalid-scalar denoting that list rows containing null strings will result + * in a null string in the corresponding output rows. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid + * @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will result * in an empty string. Otherwise, it will result in a null. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with concatenated results. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with concatenated results */ std::unique_ptr join_list_elements( lists_column_view const& lists_strings_column, @@ -321,6 +328,7 @@ std::unique_ptr join_list_elements( string_scalar const& narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index 2b6575f80d0..7dc9c33f579 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -52,12 +52,14 @@ namespace strings { * * @param input The scalar containing the string to repeat * @param repeat_times The number of times the input string is repeated + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned string scalar * @return New string scalar in which the input string is repeated */ std::unique_ptr repeat_string( string_scalar const& input, size_type repeat_times, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,12 +83,14 @@ std::unique_ptr repeat_string( * * @param input The column containing strings to repeat * @param repeat_times The number of times each input string is repeated + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned strings column * @return New column containing the repeated strings */ std::unique_ptr repeat_strings( strings_column_view const& input, size_type repeat_times, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -115,13 +119,15 @@ std::unique_ptr repeat_strings( * * @param input The column containing strings to repeat * @param repeat_times The column containing numbers of times that the corresponding input strings - * are repeated + * for each row are repeated + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned strings column * @return New column containing the repeated strings. */ std::unique_ptr repeat_strings( strings_column_view const& input, column_view const& repeat_times, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu index ba8acd23467..0a11b6dc460 100644 --- a/cpp/src/strings/combine/concatenate.cu +++ b/cpp/src/strings/combine/concatenate.cu @@ -267,11 +267,11 @@ std::unique_ptr concatenate(table_view const& strings_columns, string_scalar const& separator, string_scalar const& narep, separator_on_nulls separate_nulls, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate( - strings_columns, separator, narep, separate_nulls, cudf::get_default_stream(), mr); + return detail::concatenate(strings_columns, separator, narep, separate_nulls, stream, mr); } std::unique_ptr concatenate(table_view const& strings_columns, @@ -279,16 +279,12 @@ std::unique_ptr concatenate(table_view const& strings_columns, string_scalar const& separator_narep, string_scalar const& col_narep, separator_on_nulls separate_nulls, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate(strings_columns, - separators, - separator_narep, - col_narep, - separate_nulls, - cudf::get_default_stream(), - mr); + return detail::concatenate( + strings_columns, separators, separator_narep, col_narep, separate_nulls, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index faf1be6a26f..9ab527feaf8 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -180,10 +180,11 @@ std::unique_ptr join_strings(strings_column_view const& input, std::unique_ptr join_strings(strings_column_view const& strings, string_scalar const& separator, string_scalar const& narep, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::join_strings(strings, separator, narep, cudf::get_default_stream(), mr); + return detail::join_strings(strings, separator, narep, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu index eee59e37478..372b49fb0ee 100644 --- a/cpp/src/strings/combine/join_list_elements.cu +++ b/cpp/src/strings/combine/join_list_elements.cu @@ -301,16 +301,12 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::join_list_elements(lists_strings_column, - separator, - narep, - separate_nulls, - empty_list_policy, - cudf::get_default_stream(), - mr); + return detail::join_list_elements( + lists_strings_column, separator, narep, separate_nulls, empty_list_policy, stream, mr); } std::unique_ptr join_list_elements(lists_column_view const& lists_strings_column, @@ -319,6 +315,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); @@ -328,7 +325,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_narep, separate_nulls, empty_list_policy, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index 396e1e6a2ac..847a64f5602 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -67,7 +67,7 @@ std::unique_ptr repeat_string(string_scalar const& input, return in_ptr[idx % str_size]; }); - return std::make_unique(std::move(buff)); + return std::make_unique(std::move(buff), true, stream, mr); } namespace { @@ -260,26 +260,29 @@ std::unique_ptr repeat_strings(strings_column_view const& input, std::unique_ptr repeat_string(string_scalar const& input, size_type repeat_times, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_string(input, repeat_times, cudf::get_default_stream(), mr); + return detail::repeat_string(input, repeat_times, stream, mr); } std::unique_ptr repeat_strings(strings_column_view const& input, size_type repeat_times, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr); + return detail::repeat_strings(input, repeat_times, stream, mr); } std::unique_ptr repeat_strings(strings_column_view const& input, column_view const& repeat_times, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr); + return detail::repeat_strings(input, repeat_times, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f36fcbc9246..6e538ce31ce 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -635,6 +635,7 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp + streams/strings/combine_test.cpp streams/strings/find_test.cpp streams/strings/replace_test.cpp streams/strings/split_test.cpp diff --git a/cpp/tests/streams/strings/combine_test.cpp b/cpp/tests/streams/strings/combine_test.cpp new file mode 100644 index 00000000000..9562634957a --- /dev/null +++ b/cpp/tests/streams/strings/combine_test.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +class StringsCombineTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsCombineTest, Concatenate) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést"}); + auto view = cudf::table_view({input, input}); + + auto separators = cudf::test::strings_column_wrapper({"_", ".", " "}); + auto separators_view = cudf::strings_column_view(separators); + auto sep_on_null = cudf::strings::separator_on_nulls::YES; + + auto const separator = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const narep = cudf::string_scalar("n/a", true, cudf::test::get_default_stream()); + cudf::strings::concatenate(view, separator, narep, sep_on_null, cudf::test::get_default_stream()); + cudf::strings::concatenate( + view, separators_view, narep, narep, sep_on_null, cudf::test::get_default_stream()); +} + +TEST_F(StringsCombineTest, Join) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést"}); + auto view = cudf::strings_column_view(input); + + auto const separator = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const narep = cudf::string_scalar("n/a", true, cudf::test::get_default_stream()); + cudf::strings::join_strings(view, separator, narep, cudf::test::get_default_stream()); +} + +TEST_F(StringsCombineTest, JoinLists) +{ + using STR_LISTS = cudf::test::lists_column_wrapper; + auto const input = STR_LISTS{ + STR_LISTS{"a", "bb", "ccc"}, STR_LISTS{"ddd", "efgh", "ijk"}, STR_LISTS{"zzz", "xxxxx"}}; + auto view = cudf::lists_column_view(input); + + auto separators = cudf::test::strings_column_wrapper({"_", ".", " "}); + auto separators_view = cudf::strings_column_view(separators); + auto sep_on_null = cudf::strings::separator_on_nulls::YES; + auto if_empty = cudf::strings::output_if_empty_list::EMPTY_STRING; + + auto const separator = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const narep = cudf::string_scalar("n/a", true, cudf::test::get_default_stream()); + cudf::strings::join_list_elements( + view, separator, narep, sep_on_null, if_empty, cudf::test::get_default_stream()); + cudf::strings::join_list_elements( + view, separators_view, narep, narep, sep_on_null, if_empty, cudf::test::get_default_stream()); +} + +TEST_F(StringsCombineTest, Repeat) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést"}); + auto view = cudf::strings_column_view(input); + cudf::strings::repeat_strings(view, 0, cudf::test::get_default_stream()); + cudf::strings::repeat_strings(view, 1, cudf::test::get_default_stream()); + cudf::strings::repeat_strings(view, 10, cudf::test::get_default_stream()); + + auto counts = cudf::test::fixed_width_column_wrapper({9, 8, 7}); + cudf::strings::repeat_strings(view, counts, cudf::test::get_default_stream()); + cudf::strings::repeat_strings(view, counts, cudf::test::get_default_stream()); + + auto const str = cudf::string_scalar("X", true, cudf::test::get_default_stream()); + cudf::strings::repeat_string(str, 0, cudf::test::get_default_stream()); + cudf::strings::repeat_string(str, 1, cudf::test::get_default_stream()); + cudf::strings::repeat_string(str, 10, cudf::test::get_default_stream()); + + auto const invalid = cudf::string_scalar("", false, cudf::test::get_default_stream()); + cudf::strings::repeat_string(invalid, 10, cudf::test::get_default_stream()); +}