Skip to content

Commit

Permalink
Fix write_json to handle empty string column (#16995)
Browse files Browse the repository at this point in the history
Add empty string column condition for write_json 
bypass make_strings_children for empty column because when grid size is zero, it throws cuda error.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: #16995
  • Loading branch information
karthikeyann authored Oct 5, 2024
1 parent 33b8dfa commit fcff2b6
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ struct escape_strings_fn {
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
if (column_v.is_empty()) { // empty begets empty
return make_empty_column(type_id::STRING);
}
auto [offsets_column, chars] =
cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr);

Expand Down
37 changes: 37 additions & 0 deletions cpp/tests/io/json/json_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,43 @@ TEST_F(JsonWriterTest, EmptyInput)
EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
}

TEST_F(JsonWriterTest, EmptyLeaf)
{
cudf::test::strings_column_wrapper col1{""};
cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0};
auto col2 = make_lists_column(1,
offsets.release(),
cudf::test::strings_column_wrapper{}.release(),
0,
rmm::device_buffer{},
cudf::test::get_default_stream());
auto col3 = cudf::test::lists_column_wrapper<int>::make_one_empty_row_column();
cudf::table_view tbl_view{{col1, *col2, col3}};
cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"col3"}}};

std::vector<char> out_buffer;
auto destination = cudf::io::sink_info(&out_buffer);
auto out_options = cudf::io::json_writer_options_builder(destination, tbl_view)
.include_nulls(true)
.metadata(mt)
.lines(false)
.na_rep("null")
.build();

// Empty columns in table
cudf::io::write_json(out_options, cudf::test::get_default_stream());
std::string const expected = R"([{"col1":"","col2":[],"col3":[]}])";
EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size()));

// Empty columns in table - JSON Lines
out_buffer.clear();
out_options.enable_lines(true);
cudf::io::write_json(out_options, cudf::test::get_default_stream());
std::string const expected_lines = R"({"col1":"","col2":[],"col3":[]})"
"\n";
EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
}

TEST_F(JsonWriterTest, ErrorCases)
{
cudf::test::strings_column_wrapper col1{"a", "b", "c"};
Expand Down

0 comments on commit fcff2b6

Please sign in to comment.