From 5cec219b39507297c325fa7eff24b84bb993a553 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 5 Oct 2023 13:06:51 -0400 Subject: [PATCH] Expose stream parameter in public strings convert APIs --- .../cudf/strings/convert/convert_booleans.hpp | 32 ++-- .../cudf/strings/convert/convert_datetime.hpp | 34 ++-- .../strings/convert/convert_durations.hpp | 26 ++-- .../strings/convert/convert_fixed_point.hpp | 30 ++-- .../cudf/strings/convert/convert_floats.hpp | 30 ++-- .../cudf/strings/convert/convert_integers.hpp | 72 +++++---- .../cudf/strings/convert/convert_ipv4.hpp | 30 ++-- .../cudf/strings/convert/convert_lists.hpp | 14 +- .../cudf/strings/convert/convert_urls.hpp | 22 +-- cpp/src/strings/convert/convert_booleans.cu | 8 +- cpp/src/strings/convert/convert_datetime.cu | 9 +- cpp/src/strings/convert/convert_durations.cu | 8 +- .../strings/convert/convert_fixed_point.cu | 11 +- cpp/src/strings/convert/convert_floats.cu | 14 +- cpp/src/strings/convert/convert_hex.cu | 9 +- cpp/src/strings/convert/convert_integers.cu | 12 +- cpp/src/strings/convert/convert_ipv4.cu | 9 +- cpp/src/strings/convert/convert_lists.cu | 3 +- cpp/src/strings/convert/convert_urls.cu | 6 +- cpp/tests/CMakeLists.txt | 4 +- cpp/tests/streams/strings/convert_test.cpp | 146 ++++++++++++++++++ cpp/tests/strings/booleans_tests.cpp | 33 +++- cpp/tests/strings/format_lists_tests.cpp | 9 +- 23 files changed, 410 insertions(+), 161 deletions(-) create mode 100644 cpp/tests/streams/strings/convert_test.cpp diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp index ab63503f166..9e9f25e800a 100644 --- a/cpp/include/cudf/strings/convert/convert_booleans.hpp +++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,16 @@ namespace strings { * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param true_string String to expect for true. Non-matching strings are false. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New BOOL8 column converted from strings. + * @param input Strings instance for this operation + * @param true_string String to expect for true. Non-matching strings are false + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New BOOL8 column converted from strings */ std::unique_ptr to_booleans( - strings_column_view const& strings, - string_scalar const& true_string = string_scalar("true"), + strings_column_view const& input, + string_scalar const& true_string, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -53,16 +55,18 @@ std::unique_ptr to_booleans( * * @throw cudf::logic_error if the input column is not BOOL8 type. * - * @param booleans Boolean column to convert. - * @param true_string String to use for true in the output column. - * @param false_string String to use for false in the output column. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param booleans Boolean column to convert + * @param true_string String to use for true in the output column + * @param false_string String to use for false in the output column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr from_booleans( column_view const& booleans, - string_scalar const& true_string = string_scalar("true"), - string_scalar const& false_string = string_scalar("false"), + string_scalar const& true_string, + string_scalar const& false_string, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp index fa729d26734..81cce14b53b 100644 --- a/cpp/include/cudf/strings/convert/convert_datetime.hpp +++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp @@ -77,16 +77,18 @@ namespace strings { * * @throw cudf::logic_error if timestamp_type is not a timestamp type. * - * @param strings Strings instance for this operation. - * @param timestamp_type The timestamp type used for creating the output column. - * @param format String specifying the timestamp format in strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New datetime column. + * @param input Strings instance for this operation + * @param timestamp_type The timestamp type used for creating the output column + * @param format String specifying the timestamp format in strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New datetime column */ std::unique_ptr to_timestamps( - strings_column_view const& strings, + strings_column_view const& input, data_type timestamp_type, std::string_view format, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -124,14 +126,16 @@ std::unique_ptr to_timestamps( * This will return a column of type BOOL8 where a `true` row indicates the corresponding * input string can be parsed correctly with the given format. * - * @param strings Strings instance for this operation. - * @param format String specifying the timestamp format in strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New BOOL8 column. + * @param input Strings instance for this operation + * @param format String specifying the timestamp format in strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New BOOL8 column */ std::unique_ptr is_timestamp( - strings_column_view const& strings, + strings_column_view const& input, std::string_view format, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -231,19 +235,21 @@ std::unique_ptr is_timestamp( * @throw cudf::logic_error if the `format` string is empty * @throw cudf::logic_error if `names.size()` is an invalid size. Must be 0 or 40 strings. * - * @param timestamps Timestamp values to convert. + * @param timestamps Timestamp values to convert * @param format The string specifying output format. * Default format is "%Y-%m-%dT%H:%M:%SZ". * @param names The string names to use for weekdays ("%a", "%A") and months ("%b", "%B") * Default is an empty `strings_column_view`. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with formatted timestamps. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with formatted timestamps */ std::unique_ptr from_timestamps( column_view const& timestamps, std::string_view format = "%Y-%m-%dT%H:%M:%SZ", strings_column_view const& names = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_durations.hpp b/cpp/include/cudf/strings/convert/convert_durations.hpp index e915ec26279..a1f4e4ead1d 100644 --- a/cpp/include/cudf/strings/convert/convert_durations.hpp +++ b/cpp/include/cudf/strings/convert/convert_durations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,16 +65,18 @@ namespace strings { * * @throw cudf::logic_error if duration_type is not a duration type. * - * @param strings Strings instance for this operation. - * @param duration_type The duration type used for creating the output column. - * @param format String specifying the duration format in strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New duration column. + * @param input Strings instance for this operation + * @param duration_type The duration type used for creating the output column + * @param format String specifying the duration format in strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New duration column */ std::unique_ptr to_durations( - strings_column_view const& strings, + strings_column_view const& input, data_type duration_type, std::string_view format, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -115,15 +117,17 @@ std::unique_ptr to_durations( * * @throw cudf::logic_error if `durations` column parameter is not a duration type. * - * @param durations Duration values to convert. + * @param durations Duration values to convert * @param format The string specifying output format. - * Default format is ""%d days %H:%M:%S". - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with formatted durations. + * Default format is ""%D days %H:%M:%S". + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches + * @return New strings column with formatted durations */ std::unique_ptr from_durations( column_view const& durations, std::string_view format = "%D days %H:%M:%S", + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp index 3852dc8e81a..8f37715967a 100644 --- a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp +++ b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,14 +53,16 @@ namespace strings { * * @throw cudf::logic_error if `output_type` is not a fixed-point decimal type. * - * @param input Strings instance for this operation. - * @param output_type Type of fixed-point column to return including the scale value. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of `output_type`. + * @param input Strings instance for this operation + * @param output_type Type of fixed-point column to return including the scale value + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of `output_type` */ std::unique_ptr to_fixed_point( strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -83,12 +85,14 @@ std::unique_ptr to_fixed_point( * * @throw cudf::logic_error if the `input` column is not a fixed-point decimal type. * - * @param input Fixed-point column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Fixed-point column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr from_fixed_point( column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -111,14 +115,16 @@ std::unique_ptr from_fixed_point( * * @throw cudf::logic_error if the `decimal_type` is not a fixed-point decimal type. * - * @param input Strings instance for this operation. - * @param decimal_type Fixed-point type (with scale) used only for checking overflow. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param decimal_type Fixed-point type (with scale) used only for checking overflow + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_fixed_point( strings_column_view const& input, data_type decimal_type = data_type{type_id::DECIMAL64}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index 38a84fc1548..a35cb68ef4e 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,14 +39,16 @@ namespace strings { * * @throw cudf::logic_error if output_type is not float type. * - * @param strings Strings instance for this operation. - * @param output_type Type of float numeric column to return. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with floats converted from strings. + * @param strings Strings instance for this operation + * @param output_type Type of float numeric column to return + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with floats converted from strings */ std::unique_ptr to_floats( strings_column_view const& strings, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -62,12 +64,14 @@ std::unique_ptr to_floats( * * @throw cudf::logic_error if floats column is not float type. * - * @param floats Numeric column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with floats as strings. + * @param floats Numeric column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with floats as strings */ std::unique_ptr from_floats( column_view const& floats, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -86,12 +90,14 @@ std::unique_ptr from_floats( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_float( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 44213b84139..bbe3a04cef5 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -46,14 +46,16 @@ namespace strings { * * @throw cudf::logic_error if output_type is not integral type. * - * @param strings Strings instance for this operation. - * @param output_type Type of integer numeric column to return. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with integers converted from strings. + * @param input Strings instance for this operation + * @param output_type Type of integer numeric column to return + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with integers converted from strings */ std::unique_ptr to_integers( - strings_column_view const& strings, + strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -67,12 +69,14 @@ std::unique_ptr to_integers( * * @throw cudf::logic_error if integers column is not integral type. * - * @param integers Numeric column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with integers as strings. + * @param integers Numeric column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with integers as strings */ std::unique_ptr from_integers( column_view const& integers, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -94,12 +98,14 @@ std::unique_ptr from_integers( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_integer( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -124,14 +130,16 @@ std::unique_ptr is_integer( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param int_type Integer type used for checking underflow and overflow. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param int_type Integer type used for checking underflow and overflow + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_integer( - strings_column_view const& strings, + strings_column_view const& input, data_type int_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -152,14 +160,16 @@ std::unique_ptr is_integer( * * @throw cudf::logic_error if output_type is not integral type. * - * @param strings Strings instance for this operation. - * @param output_type Type of integer numeric column to return. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with integers converted from strings. + * @param input Strings instance for this operation + * @param output_type Type of integer numeric column to return + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with integers converted from strings */ std::unique_ptr hex_to_integers( - strings_column_view const& strings, + strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -179,12 +189,14 @@ std::unique_ptr hex_to_integers( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_hex( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -210,12 +222,14 @@ std::unique_ptr is_hex( * * @throw cudf::logic_error if the input column is not integral type. * - * @param input Integer column to convert to hex. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with hexadecimal characters. + * @param input Integer column to convert to hex + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with hexadecimal characters */ std::unique_ptr integers_to_hex( column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp index 22272af74fc..25ad7b86748 100644 --- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp +++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,12 +48,14 @@ namespace strings { * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT64 column converted from strings. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New INT64 column converted from strings */ std::unique_ptr ipv4_to_integers( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -71,12 +73,14 @@ std::unique_ptr ipv4_to_integers( * * @throw cudf::logic_error if the input column is not INT64 type. * - * @param integers Integer (INT64) column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param integers Integer (INT64) column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr integers_to_ipv4( column_view const& integers, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -96,12 +100,14 @@ std::unique_ptr integers_to_ipv4( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_ipv4( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index 7ab1bf47b0a..dedf4e95138 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -50,17 +50,19 @@ namespace strings { * * @throw cudf::logic_error if the input column is not a LIST type with a STRING child. * - * @param input Lists column to format. - * @param na_rep Replacement string for null elements. - * @param separators Strings to use for enclosing list components and separating elements. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Lists column to format + * @param na_rep Replacement string for null elements + * @param separators Strings to use for enclosing list components and separating elements + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr format_list_column( lists_column_view const& input, - string_scalar const& na_rep = string_scalar("NULL"), + string_scalar const& na_rep = string_scalar(""), strings_column_view const& separators = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp index 7f29a0d2149..902835081af 100644 --- a/cpp/include/cudf/strings/convert/convert_urls.hpp +++ b/cpp/include/cudf/strings/convert/convert_urls.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,12 +39,14 @@ namespace strings { * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr url_encode( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -60,12 +62,14 @@ std::unique_ptr url_encode( * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr url_decode( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu index 0d04fc74b0c..8ed5b68f10b 100644 --- a/cpp/src/strings/convert/convert_booleans.cu +++ b/cpp/src/strings/convert/convert_booleans.cu @@ -80,12 +80,13 @@ std::unique_ptr to_booleans(strings_column_view const& strings, } // namespace detail // external API -std::unique_ptr to_booleans(strings_column_view const& strings, +std::unique_ptr to_booleans(strings_column_view const& input, string_scalar const& true_string, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_booleans(strings, true_string, cudf::get_default_stream(), mr); + return detail::to_booleans(input, true_string, stream, mr); } namespace detail { @@ -156,10 +157,11 @@ std::unique_ptr from_booleans(column_view const& booleans, std::unique_ptr from_booleans(column_view const& booleans, string_scalar const& true_string, string_scalar const& false_string, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_booleans(booleans, true_string, false_string, cudf::get_default_stream(), mr); + return detail::from_booleans(booleans, true_string, false_string, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 8a953d778ed..d2609441d72 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -710,18 +710,20 @@ std::unique_ptr is_timestamp(strings_column_view const& input, std::unique_ptr to_timestamps(strings_column_view const& input, data_type timestamp_type, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_timestamps(input, timestamp_type, format, cudf::get_default_stream(), mr); + return detail::to_timestamps(input, timestamp_type, format, stream, mr); } std::unique_ptr is_timestamp(strings_column_view const& input, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_timestamp(input, format, cudf::get_default_stream(), mr); + return detail::is_timestamp(input, format, stream, mr); } namespace detail { @@ -1168,10 +1170,11 @@ std::unique_ptr from_timestamps(column_view const& timestamps, std::unique_ptr from_timestamps(column_view const& timestamps, std::string_view format, strings_column_view const& names, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_timestamps(timestamps, format, names, cudf::get_default_stream(), mr); + return detail::from_timestamps(timestamps, format, names, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 6ab70825a6b..be2292495a4 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -721,19 +721,21 @@ std::unique_ptr to_durations(strings_column_view const& strings, std::unique_ptr from_durations(column_view const& durations, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_durations(durations, format, cudf::get_default_stream(), mr); + return detail::from_durations(durations, format, stream, mr); } -std::unique_ptr to_durations(strings_column_view const& strings, +std::unique_ptr to_durations(strings_column_view const& input, data_type duration_type, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_durations(strings, duration_type, format, cudf::get_default_stream(), mr); + return detail::to_durations(input, duration_type, format, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 51aab9faeba..2c59f6dcd29 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -184,12 +184,13 @@ std::unique_ptr to_fixed_point(strings_column_view const& input, } // namespace detail // external API -std::unique_ptr to_fixed_point(strings_column_view const& strings, +std::unique_ptr to_fixed_point(strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_fixed_point(strings, output_type, cudf::get_default_stream(), mr); + return detail::to_fixed_point(input, output_type, stream, mr); } namespace detail { @@ -277,10 +278,11 @@ std::unique_ptr from_fixed_point(column_view const& input, // external API std::unique_ptr from_fixed_point(column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_fixed_point(input, cudf::get_default_stream(), mr); + return detail::from_fixed_point(input, stream, mr); } namespace detail { @@ -341,10 +343,11 @@ std::unique_ptr is_fixed_point(strings_column_view const& input, std::unique_ptr is_fixed_point(strings_column_view const& input, data_type decimal_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_fixed_point(input, decimal_type, cudf::get_default_stream(), mr); + return detail::is_fixed_point(input, decimal_type, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 32167589ab4..627eaa4c9d6 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -120,10 +120,11 @@ std::unique_ptr to_floats(strings_column_view const& strings, std::unique_ptr to_floats(strings_column_view const& strings, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_floats(strings, output_type, cudf::get_default_stream(), mr); + return detail::to_floats(strings, output_type, stream, mr); } namespace detail { @@ -436,10 +437,12 @@ std::unique_ptr from_floats(column_view const& floats, } // namespace detail // external API -std::unique_ptr from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr) +std::unique_ptr from_floats(column_view const& floats, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_floats(floats, cudf::get_default_stream(), mr); + return detail::from_floats(floats, stream, mr); } namespace detail { @@ -473,11 +476,12 @@ std::unique_ptr is_float(strings_column_view const& strings, } // namespace detail // external API -std::unique_ptr is_float(strings_column_view const& strings, +std::unique_ptr is_float(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_float(strings, cudf::get_default_stream(), mr); + return detail::is_float(input, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu index bed682aba71..e0ccb6907b5 100644 --- a/cpp/src/strings/convert/convert_hex.cu +++ b/cpp/src/strings/convert/convert_hex.cu @@ -280,24 +280,27 @@ std::unique_ptr integers_to_hex(column_view const& input, // external API std::unique_ptr hex_to_integers(strings_column_view const& strings, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::hex_to_integers(strings, output_type, cudf::get_default_stream(), mr); + return detail::hex_to_integers(strings, output_type, stream, mr); } std::unique_ptr is_hex(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_hex(strings, cudf::get_default_stream(), mr); + return detail::is_hex(strings, stream, mr); } std::unique_ptr integers_to_hex(column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::integers_to_hex(input, cudf::get_default_stream(), mr); + return detail::integers_to_hex(input, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 5597d2831c0..84d7a1b80aa 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -203,18 +203,20 @@ std::unique_ptr is_integer(strings_column_view const& strings, // external APIs std::unique_ptr is_integer(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_integer(strings, cudf::get_default_stream(), mr); + return detail::is_integer(strings, stream, mr); } std::unique_ptr is_integer(strings_column_view const& strings, data_type int_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_integer(strings, int_type, cudf::get_default_stream(), mr); + return detail::is_integer(strings, int_type, stream, mr); } namespace detail { @@ -304,10 +306,11 @@ std::unique_ptr to_integers(strings_column_view const& strings, // external API std::unique_ptr to_integers(strings_column_view const& strings, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_integers(strings, output_type, cudf::get_default_stream(), mr); + return detail::to_integers(strings, output_type, stream, mr); } namespace detail { @@ -407,10 +410,11 @@ std::unique_ptr from_integers(column_view const& integers, // external API std::unique_ptr from_integers(column_view const& integers, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_integers(integers, cudf::get_default_stream(), mr); + return detail::from_integers(integers, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index adb72cb0263..bdc138f787f 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -103,10 +103,11 @@ std::unique_ptr ipv4_to_integers(strings_column_view const& strings, // external API std::unique_ptr ipv4_to_integers(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ipv4_to_integers(strings, cudf::get_default_stream(), mr); + return detail::ipv4_to_integers(strings, stream, mr); } namespace detail { @@ -223,17 +224,19 @@ std::unique_ptr is_ipv4(strings_column_view const& strings, // external API std::unique_ptr integers_to_ipv4(column_view const& integers, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::integers_to_ipv4(integers, cudf::get_default_stream(), mr); + return detail::integers_to_ipv4(integers, stream, mr); } std::unique_ptr is_ipv4(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_ipv4(strings, cudf::get_default_stream(), mr); + return detail::is_ipv4(strings, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_lists.cu b/cpp/src/strings/convert/convert_lists.cu index 3aef37914fd..f9f2b91eb12 100644 --- a/cpp/src/strings/convert/convert_lists.cu +++ b/cpp/src/strings/convert/convert_lists.cu @@ -233,10 +233,11 @@ std::unique_ptr format_list_column(lists_column_view const& input, std::unique_ptr format_list_column(lists_column_view const& input, string_scalar const& na_rep, strings_column_view const& separators, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::format_list_column(input, na_rep, separators, cudf::get_default_stream(), mr); + return detail::format_list_column(input, na_rep, separators, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index 9efa148cfd2..95be8534df9 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -149,10 +149,11 @@ std::unique_ptr url_encode(strings_column_view const& input, // external API std::unique_ptr url_encode(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::url_encode(strings, cudf::get_default_stream(), mr); + return detail::url_encode(strings, stream, mr); } namespace detail { @@ -429,10 +430,11 @@ std::unique_ptr url_decode(strings_column_view const& strings, // external API std::unique_ptr url_decode(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::url_decode(strings, cudf::get_default_stream(), mr); + return detail::url_decode(strings, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ac13c121530..171b45b8e25 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -633,8 +633,8 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest( - STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE - testing + STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp + streams/strings/convert_test.cpp STREAM_MODE testing ) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/strings/convert_test.cpp b/cpp/tests/streams/strings/convert_test.cpp new file mode 100644 index 00000000000..80c6e0d0fde --- /dev/null +++ b/cpp/tests/streams/strings/convert_test.cpp @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +class StringsConvertTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsConvertTest, Booleans) +{ + auto input = cudf::test::strings_column_wrapper({"true", "false", "True", ""}); + auto view = cudf::strings_column_view(input); + + auto true_scalar = cudf::string_scalar("true", true, cudf::test::get_default_stream()); + auto false_scalar = cudf::string_scalar("false", true, cudf::test::get_default_stream()); + + auto bools = cudf::strings::to_booleans(view, true_scalar, cudf::test::get_default_stream()); + cudf::strings::from_booleans( + bools->view(), true_scalar, false_scalar, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Timestamps) +{ + auto input = cudf::test::strings_column_wrapper({"2019-03-20T12:34:56Z", "2020-02-29T00:00:00Z"}); + auto view = cudf::strings_column_view(input); + + std::string format = "%Y-%m-%dT%H:%M:%SZ"; + auto dtype = cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}; + + cudf::strings::is_timestamp(view, format, cudf::test::get_default_stream()); + auto timestamps = + cudf::strings::to_timestamps(view, dtype, format, cudf::test::get_default_stream()); + + auto empty = cudf::test::strings_column_wrapper(); + cudf::strings::from_timestamps( + timestamps->view(), format, cudf::strings_column_view(empty), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Durations) +{ + auto input = cudf::test::strings_column_wrapper({"17975 days 12:34:56", "18321 days 00:00:00"}); + auto view = cudf::strings_column_view(input); + + std::string format = "%D days %H:%M:%S"; + auto dtype = cudf::data_type{cudf::type_id::DURATION_SECONDS}; + + auto durations = + cudf::strings::to_durations(view, dtype, format, cudf::test::get_default_stream()); + cudf::strings::from_durations(durations->view(), format, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, FixedPoint) +{ + auto input = cudf::test::strings_column_wrapper({"1.234E3", "-876", "543.2"}); + auto view = cudf::strings_column_view(input); + + auto dtype = cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{-3}}; + + auto values = cudf::strings::to_fixed_point(view, dtype, cudf::test::get_default_stream()); + cudf::strings::from_fixed_point(values->view(), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Floats) +{ + auto input = cudf::test::strings_column_wrapper({"1.234E3", "-876", "543.2"}); + auto view = cudf::strings_column_view(input); + + auto dtype = cudf::data_type{cudf::type_id::FLOAT32}; + + auto values = cudf::strings::to_floats(view, dtype, cudf::test::get_default_stream()); + cudf::strings::from_floats(values->view(), cudf::test::get_default_stream()); + cudf::strings::is_float(view, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Integers) +{ + auto input = cudf::test::strings_column_wrapper({"1234", "-876", "5432"}); + auto view = cudf::strings_column_view(input); + + auto dtype = cudf::data_type{cudf::type_id::INT32}; + + auto values = cudf::strings::to_integers(view, dtype, cudf::test::get_default_stream()); + cudf::strings::from_integers(values->view(), cudf::test::get_default_stream()); + cudf::strings::is_integer(view, cudf::test::get_default_stream()); + cudf::strings::is_hex(view, cudf::test::get_default_stream()); + cudf::strings::hex_to_integers(view, dtype, cudf::test::get_default_stream()); + cudf::strings::integers_to_hex(values->view(), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, IPv4) +{ + auto input = cudf::test::strings_column_wrapper({"192.168.0.1", "10.0.0.1"}); + auto view = cudf::strings_column_view(input); + + auto values = cudf::strings::ipv4_to_integers(view, cudf::test::get_default_stream()); + cudf::strings::integers_to_ipv4(values->view(), cudf::test::get_default_stream()); + cudf::strings::is_ipv4(view, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, URLs) +{ + auto input = cudf::test::strings_column_wrapper({"www.nvidia.com/rapids?p=é", "/_file-7.txt"}); + auto view = cudf::strings_column_view(input); + + auto values = cudf::strings::url_encode(view, cudf::test::get_default_stream()); + cudf::strings::url_decode(values->view(), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, DISABLED_ListsFormat) // depends on PR 14248 +{ + using STR_LISTS = cudf::test::lists_column_wrapper; + auto const input = + STR_LISTS{{STR_LISTS{"a", "bb", "ccc"}, STR_LISTS{}, STR_LISTS{"ddd", "ee", "f"}}, + {STR_LISTS{"gg", "hhh"}, STR_LISTS{"i", "", "", "jj"}}}; + auto view = cudf::lists_column_view(input); + auto null_scalar = cudf::string_scalar("NULL", true, cudf::test::get_default_stream()); + auto separators = cudf::strings_column_view(cudf::test::strings_column_wrapper()); + cudf::strings::format_list_column( + view, null_scalar, separators, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/strings/booleans_tests.cpp b/cpp/tests/strings/booleans_tests.cpp index 0c7fc992065..469ca77a4c5 100644 --- a/cpp/tests/strings/booleans_tests.cpp +++ b/cpp/tests/strings/booleans_tests.cpp @@ -36,7 +36,8 @@ TEST_F(StringsConvertTest, ToBooleans) thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::to_booleans(strings_view); + auto true_scalar = cudf::string_scalar("true"); + auto results = cudf::strings::to_booleans(strings_view, true_scalar); std::vector h_expected{false, false, false, true, false, false}; cudf::test::fixed_width_column_wrapper expected( @@ -60,26 +61,46 @@ TEST_F(StringsConvertTest, FromBooleans) h_column.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - auto results = cudf::strings::from_booleans(column); + auto true_scalar = cudf::string_scalar("true"); + auto false_scalar = cudf::string_scalar("false"); + auto results = cudf::strings::from_booleans(column, true_scalar, false_scalar); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings); } TEST_F(StringsConvertTest, ZeroSizeStringsColumnBoolean) { auto const zero_size_column = cudf::make_empty_column(cudf::type_id::BOOL8)->view(); - auto results = cudf::strings::from_booleans(zero_size_column); + auto true_scalar = cudf::string_scalar("true"); + auto false_scalar = cudf::string_scalar("false"); + auto results = cudf::strings::from_booleans(zero_size_column, true_scalar, false_scalar); cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConvertTest, ZeroSizeBooleansColumn) { auto const zero_size_strings_column = cudf::make_empty_column(cudf::type_id::STRING)->view(); - auto results = cudf::strings::to_booleans(zero_size_strings_column); + auto true_scalar = cudf::string_scalar("true"); + auto results = cudf::strings::to_booleans(zero_size_strings_column, true_scalar); EXPECT_EQ(0, results->size()); } TEST_F(StringsConvertTest, BooleanError) { - auto column = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 100); - EXPECT_THROW(cudf::strings::from_booleans(column->view()), cudf::logic_error); + auto int_column = cudf::test::fixed_width_column_wrapper({1, 2, 3}); + auto true_scalar = cudf::string_scalar("true"); + auto false_scalar = cudf::string_scalar("false"); + EXPECT_THROW(cudf::strings::from_booleans(int_column, true_scalar, false_scalar), + cudf::logic_error); + + auto bool_column = cudf::test::fixed_width_column_wrapper({1, 0, 1}); + auto null_scalar = cudf::string_scalar("", false); + EXPECT_THROW(cudf::strings::from_booleans(bool_column, null_scalar, false_scalar), + cudf::logic_error); + EXPECT_THROW(cudf::strings::from_booleans(bool_column, true_scalar, null_scalar), + cudf::logic_error); + auto empty_scalar = cudf::string_scalar("", true); + EXPECT_THROW(cudf::strings::from_booleans(int_column, empty_scalar, false_scalar), + cudf::logic_error); + EXPECT_THROW(cudf::strings::from_booleans(int_column, true_scalar, empty_scalar), + cudf::logic_error); } diff --git a/cpp/tests/strings/format_lists_tests.cpp b/cpp/tests/strings/format_lists_tests.cpp index 95dc9725afc..6196b8ed6ad 100644 --- a/cpp/tests/strings/format_lists_tests.cpp +++ b/cpp/tests/strings/format_lists_tests.cpp @@ -60,8 +60,9 @@ TEST_F(StringsFormatListsTest, WithNulls) cudf::test::iterators::null_at(1)}; auto const view = cudf::lists_column_view(input); - auto results = cudf::strings::format_list_column(view); - auto expected = cudf::test::strings_column_wrapper( + auto null_scalar = cudf::string_scalar("NULL"); + auto results = cudf::strings::format_list_column(view, null_scalar); + auto expected = cudf::test::strings_column_wrapper( {"[a,NULL,ccc]", "NULL", "[NULL,bb,ddd]", "[zzz,xxxxx]", "[v,,NULL,w]"}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } @@ -132,11 +133,13 @@ TEST_F(StringsFormatListsTest, SlicedLists) "[ééé,12345abcdef]", "[www,12345]"}); + auto null_scalar = cudf::string_scalar("NULL"); + // set of slice intervals: covers slicing the front, back, and middle std::vector> index_pairs({{0, 11}, {0, 4}, {3, 8}, {5, 11}}); for (auto indexes : index_pairs) { auto sliced = cudf::lists_column_view(cudf::slice(input, {indexes.first, indexes.second})[0]); - auto results = cudf::strings::format_list_column(sliced); + auto results = cudf::strings::format_list_column(sliced, null_scalar); auto expected = cudf::test::strings_column_wrapper(h_expected.begin() + indexes.first, h_expected.begin() + indexes.second); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);