diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index fde1857cb7f..2534140e326 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -376,6 +376,7 @@ class json_reader_options { /** * @brief Set whether to parse mixed types as a string column. * Also enables forcing to read a struct as string column using schema. + * If enable, mixed types are parsed a string column regardless of schema. * * @param val Boolean value to enable/disable parsing mixed types as a string column */ diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 8d6890045be..54454da785e 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -987,15 +987,15 @@ std::pair, std::vector> device_json_co data_type target_type{}; - if (schema.has_value()) { + if (json_col.forced_as_string_column) { + target_type = data_type{type_id::STRING}; + } else if (schema.has_value()) { #ifdef NJP_DEBUG_PRINT std::cout << "-> explicit type: " << (schema.has_value() ? std::to_string(static_cast(schema->type.id())) : "n/a"); #endif target_type = schema.value().type; - } else if (json_col.forced_as_string_column) { - target_type = data_type{type_id::STRING}; } // Infer column type, if we don't have an explicit type for it else { diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp index c26e5ca3edb..1127f9ad641 100644 --- a/cpp/tests/io/json/json_test.cpp +++ b/cpp/tests/io/json/json_test.cpp @@ -2776,4 +2776,29 @@ TEST_F(JsonReaderTest, JSONMixedTypeChildren) } } +TEST_F(JsonReaderTest, MixedTypesWithSchema) +{ + std::string data = "{\"data\": {\"A\": 0, \"B\": 1}}\n{\"data\": [1,0]}\n"; + + std::map data_types; + data_types.insert( + std::pair{"data", cudf::io::schema_element{cudf::data_type{cudf::type_id::LIST}}}); + + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .dtypes(data_types) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .mixed_types_as_string(true) + .keep_quotes(true) + .lines(true); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + + EXPECT_EQ(result.tbl->num_columns(), 1); + EXPECT_EQ(result.tbl->num_rows(), 2); + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING); + // expected output without whitespace + cudf::test::strings_column_wrapper expected({R"({"A": 0, "B": 1})", "[1,0]"}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0)); +} + CUDF_TEST_PROGRAM_MAIN()