Skip to content

Commit

Permalink
updates from review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Wilson <[email protected]>
  • Loading branch information
hyperbolic2346 committed Oct 17, 2023
1 parent f33919a commit 463317e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 16 deletions.
16 changes: 8 additions & 8 deletions src/main/cpp/src/parse_uri.cu
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,11 @@ __global__ void parse_uri_to_protocol(column_device_view const in_strings,

} // namespace

std::unique_ptr<column> parse_uri_to_protocol(strings_column_view const& strings,
std::unique_ptr<column> parse_uri_to_protocol(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
size_type strings_count = strings.size();
size_type strings_count = input.size();
if (strings_count == 0) return make_empty_column(type_id::STRING);

constexpr size_type num_warps_per_threadblock = 4;
Expand All @@ -300,17 +300,17 @@ std::unique_ptr<column> parse_uri_to_protocol(strings_column_view const& strings
std::min(65536, cudf::util::div_rounding_up_unsafe(strings_count, num_warps_per_threadblock));

auto offset_count = strings_count + 1;
auto const d_strings = column_device_view::create(strings.parent(), stream);
auto const d_strings = column_device_view::create(input.parent(), stream);

// build offsets column
auto offsets_column = make_numeric_column(
data_type{type_to_id<size_type>()}, offset_count, mask_state::UNALLOCATED, stream, mr);

// copy null mask
rmm::device_buffer null_mask =
strings.parent().nullable()
? cudf::detail::copy_bitmask(strings.parent(), stream, mr)
: cudf::detail::create_null_mask(strings.size(), mask_state::ALL_VALID, stream, mr);
input.parent().nullable()
? cudf::detail::copy_bitmask(input.parent(), stream, mr)
: cudf::detail::create_null_mask(input.size(), mask_state::ALL_VALID, stream, mr);

// count number of bytes in each string after parsing and store it in offsets_column
auto offsets_view = offsets_column->view();
Expand Down Expand Up @@ -357,12 +357,12 @@ std::unique_ptr<column> parse_uri_to_protocol(strings_column_view const& strings

// external API

std::unique_ptr<column> parse_uri_to_protocol(strings_column_view const& strings,
std::unique_ptr<column> parse_uri_to_protocol(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::parse_uri_to_protocol(strings, stream, mr);
return detail::parse_uri_to_protocol(input, stream, mr);
}

} // namespace spark_rapids_jni
12 changes: 4 additions & 8 deletions src/main/cpp/src/parse_uri.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,15 @@
namespace spark_rapids_jni {

/**
* @brief Convert a string column into an integer column.
* @brief Parse protocol and copy from the input string column to the output char buffer.
*
* @param dtype Type of column to return.
* @param string_col Incoming string column to convert to integers.
* @param ansi_mode If true, strict conversion and throws on erorr.
* If false, null invalid entries.
* @param strip if true leading and trailing white space is ignored.
* @param input Input string column of URIs to parse
* @param stream Stream on which to operate.
* @param mr Memory resource for returned column
* @return std::unique_ptr<column> Integer column that was created from string_col.
* @return std::unique_ptr<column> String column of protocols parsed.
*/
std::unique_ptr<cudf::column> parse_uri_to_protocol(
cudf::strings_column_view const& string_col,
cudf::strings_column_view const& input,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down

0 comments on commit 463317e

Please sign in to comment.