diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index dc12564c656..bd9c936626a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -582,6 +582,7 @@ add_library( src/strings/replace/replace.cu src/strings/replace/replace_re.cu src/strings/reverse.cu + src/strings/scan/scan_inclusive.cu src/strings/search/findall.cu src/strings/search/find.cu src/strings/search/find_multiple.cu @@ -598,6 +599,7 @@ add_library( src/strings/utilities.cu src/strings/wrap.cu src/structs/copying/concatenate.cu + src/structs/scan/scan_inclusive.cu src/structs/structs_column_factories.cu src/structs/structs_column_view.cpp src/structs/utilities.cpp diff --git a/cpp/include/cudf/strings/detail/scan.hpp b/cpp/include/cudf/strings/detail/scan.hpp new file mode 100644 index 00000000000..611e32e28cd --- /dev/null +++ b/cpp/include/cudf/strings/detail/scan.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cudf { +namespace strings { +namespace detail { +/** + * @brief Scan function for strings + * + * Called by cudf::scan() with only min and max aggregates. + * + * @tparam Op Either DeviceMin or DeviceMax operations + * + * @param input Input strings column + * @param mask Mask for scan + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ +template +std::unique_ptr scan_inclusive(column_view const& input, + bitmask_type const* mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/include/cudf/structs/detail/scan.hpp b/cpp/include/cudf/structs/detail/scan.hpp new file mode 100644 index 00000000000..531e0a6c65f --- /dev/null +++ b/cpp/include/cudf/structs/detail/scan.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cudf { +namespace structs { +namespace detail { +/** + * @brief Scan function for struct column type + * + * Called by cudf::scan() with only min and max aggregates. + * + * @tparam Op Either DeviceMin or DeviceMax operations + * + * @param input Input column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New struct column + */ +template +std::unique_ptr scan_inclusive(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace structs +} // namespace cudf diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index e74fce62caf..91aa1cac487 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include @@ -25,9 +24,10 @@ #include #include #include +#include +#include #include -#include #include #include @@ -68,43 +68,6 @@ std::pair mask_scan(column_view const& input_view namespace { -/** - * @brief Min/Max inclusive scan operator - * - * This operator will accept index values, check them and then - * run the `Op` operation on the individual element objects. - * The returned result is the appropriate index value. - * - * This was specifically created to workaround a thrust issue - * https://github.com/NVIDIA/thrust/issues/1479 - * where invalid values are passed to the operator. - */ -template -struct min_max_scan_operator { - column_device_view const col; ///< strings column device view - Element const null_replacement{}; ///< value used when element is null - bool const has_nulls; ///< true if col has null elements - - min_max_scan_operator(column_device_view const& col, bool has_nulls = true) - : col{col}, null_replacement{Op::template identity()}, has_nulls{has_nulls} - { - // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash - if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); - } - - __device__ inline size_type operator()(size_type lhs, size_type rhs) const - { - // thrust::inclusive_scan may pass us garbage values so we need to protect ourselves; - // in these cases the return value does not matter since the result is not used - if (lhs < 0 || rhs < 0 || lhs >= col.size() || rhs >= col.size()) return 0; - Element d_lhs = - has_nulls && col.is_null_nocheck(lhs) ? null_replacement : col.element(lhs); - Element d_rhs = - has_nulls && col.is_null_nocheck(rhs) ? null_replacement : col.element(rhs); - return Op{}(d_lhs, d_rhs) == d_lhs ? lhs : rhs; - } -}; - template struct scan_functor { static std::unique_ptr invoke(column_view const& input_view, @@ -127,11 +90,6 @@ struct scan_functor { } }; -struct null_iterator { - bitmask_type const* mask; - __device__ bool operator()(size_type idx) const { return !bit_is_set(mask, idx); } -}; - template struct scan_functor { static std::unique_ptr invoke(column_view const& input_view, @@ -139,38 +97,7 @@ struct scan_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto d_input = column_device_view::create(input_view, stream); - - // build indices of the scan operation results - rmm::device_uvector result_map(input_view.size(), stream); - thrust::inclusive_scan( - rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(input_view.size()), - result_map.begin(), - min_max_scan_operator{*d_input, input_view.has_nulls()}); - - if (input_view.has_nulls()) { - // fill the null rows with out-of-bounds values so gather records them as null; - // this prevents un-sanitized null entries in the output - auto null_itr = detail::make_counting_transform_iterator(0, null_iterator{mask}); - auto oob_val = thrust::constant_iterator(input_view.size()); - thrust::scatter_if(rmm::exec_policy(stream), - oob_val, - oob_val + input_view.size(), - thrust::counting_iterator(0), - null_itr, - result_map.data()); - } - - // call gather using the indices to build the output column - auto result_table = cudf::detail::gather(cudf::table_view({input_view}), - result_map, - out_of_bounds_policy::NULLIFY, - negative_index_policy::NOT_ALLOWED, - stream, - mr); - return std::move(result_table->release().front()); + return cudf::strings::detail::scan_inclusive(input_view, mask, stream, mr); } }; @@ -181,38 +108,7 @@ struct scan_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // Create a gather map containing indices of the prefix min/max elements. - auto gather_map = rmm::device_uvector(input.size(), stream); - auto const binop_generator = - cudf::reduction::detail::comparison_binop_generator::create(input, stream); - thrust::inclusive_scan(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(input.size()), - gather_map.begin(), - binop_generator.binop()); - - // Gather the children columns of the input column. Must use `get_sliced_child` to properly - // handle input in case it is a sliced view. - auto const input_children = [&] { - auto const it = cudf::detail::make_counting_transform_iterator( - 0, [structs_view = structs_column_view{input}, &stream](auto const child_idx) { - return structs_view.get_sliced_child(child_idx, stream); - }); - return std::vector(it, it + input.num_children()); - }(); - - // Gather the children elements of the prefix min/max struct elements for the output. - auto scanned_children = cudf::detail::gather(table_view{input_children}, - gather_map, - out_of_bounds_policy::DONT_CHECK, - negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release(); - - // Don't need to set a null mask because that will be handled at the caller. - return make_structs_column( - input.size(), std::move(scanned_children), 0, rmm::device_buffer{0, stream, mr}, stream, mr); + return cudf::structs::detail::scan_inclusive(input, stream, mr); } }; diff --git a/cpp/src/strings/scan/scan_inclusive.cu b/cpp/src/strings/scan/scan_inclusive.cu new file mode 100644 index 00000000000..0cf492fa295 --- /dev/null +++ b/cpp/src/strings/scan/scan_inclusive.cu @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace cudf { +namespace strings { +namespace detail { +namespace { + +/** + * @brief Min/Max inclusive scan operator + * + * This operator will accept index values, check them and then + * run the `Op` operation on the individual element objects. + * The returned result is the appropriate index value. + * + * This was specifically created to workaround a thrust issue + * https://github.com/NVIDIA/thrust/issues/1479 + * where invalid values are passed to the operator. + */ +template +struct min_max_scan_operator { + column_device_view const col; ///< strings column device view + Element const null_replacement{}; ///< value used when element is null + bool const has_nulls; ///< true if col has null elements + + min_max_scan_operator(column_device_view const& col, bool has_nulls = true) + : col{col}, null_replacement{Op::template identity()}, has_nulls{has_nulls} + { + // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash + if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); + } + + __device__ inline size_type operator()(size_type lhs, size_type rhs) const + { + // thrust::inclusive_scan may pass us garbage values so we need to protect ourselves; + // in these cases the return value does not matter since the result is not used + if (lhs < 0 || rhs < 0 || lhs >= col.size() || rhs >= col.size()) return 0; + Element d_lhs = + has_nulls && col.is_null_nocheck(lhs) ? null_replacement : col.element(lhs); + Element d_rhs = + has_nulls && col.is_null_nocheck(rhs) ? null_replacement : col.element(rhs); + return Op{}(d_lhs, d_rhs) == d_lhs ? lhs : rhs; + } +}; + +struct null_iterator { + bitmask_type const* mask; + __device__ bool operator()(size_type idx) const { return !bit_is_set(mask, idx); } +}; + +} // namespace + +template +std::unique_ptr scan_inclusive(column_view const& input, + bitmask_type const* mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto d_input = column_device_view::create(input, stream); + + // build indices of the scan operation results + rmm::device_uvector result_map(input.size(), stream); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + result_map.begin(), + min_max_scan_operator{*d_input, input.has_nulls()}); + + if (input.has_nulls()) { + // fill the null rows with out-of-bounds values so gather records them as null; + // this prevents un-sanitized null entries in the output + auto null_itr = cudf::detail::make_counting_transform_iterator(0, null_iterator{mask}); + auto oob_val = thrust::constant_iterator(input.size()); + thrust::scatter_if(rmm::exec_policy(stream), + oob_val, + oob_val + input.size(), + thrust::counting_iterator(0), + null_itr, + result_map.data()); + } + + // call gather using the indices to build the output column + auto result_table = cudf::detail::gather(cudf::table_view({input}), + result_map, + cudf::out_of_bounds_policy::NULLIFY, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + return std::move(result_table->release().front()); +} + +template std::unique_ptr scan_inclusive(column_view const& input, + bitmask_type const* mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +template std::unique_ptr scan_inclusive(column_view const& input, + bitmask_type const* mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/src/structs/scan/scan_inclusive.cu b/cpp/src/structs/scan/scan_inclusive.cu new file mode 100644 index 00000000000..823e4472960 --- /dev/null +++ b/cpp/src/structs/scan/scan_inclusive.cu @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +namespace cudf { +namespace structs { +namespace detail { +namespace { + +} // namespace + +template +std::unique_ptr scan_inclusive(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Create a gather map containing indices of the prefix min/max elements. + auto gather_map = rmm::device_uvector(input.size(), stream); + auto const binop_generator = + cudf::reduction::detail::comparison_binop_generator::create(input, stream); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + gather_map.begin(), + binop_generator.binop()); + + // Gather the children columns of the input column. Must use `get_sliced_child` to properly + // handle input in case it is a sliced view. + auto const input_children = [&] { + auto const it = cudf::detail::make_counting_transform_iterator( + 0, [structs_view = structs_column_view{input}, &stream](auto const child_idx) { + return structs_view.get_sliced_child(child_idx, stream); + }); + return std::vector(it, it + input.num_children()); + }(); + + // Gather the children elements of the prefix min/max struct elements for the output. + auto scanned_children = cudf::detail::gather(table_view{input_children}, + gather_map, + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release(); + + // Don't need to set a null mask because that will be handled at the caller. + return make_structs_column( + input.size(), std::move(scanned_children), 0, rmm::device_buffer{0, stream, mr}, stream, mr); +} + +template std::unique_ptr scan_inclusive(column_view const& input_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +template std::unique_ptr scan_inclusive(column_view const& input_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace structs +} // namespace cudf