Skip to content

Commit

Permalink
gtest_util's RunEndEncodeTableColumns should update run-end-encoded c…
Browse files Browse the repository at this point in the history
…olumns' schema types
  • Loading branch information
lesterfan committed Feb 13, 2025
1 parent 25d8bed commit 5bcde57
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
9 changes: 8 additions & 1 deletion cpp/src/arrow/testing/gtest_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,17 +478,24 @@ Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
const int num_columns = table.num_columns();
std::vector<std::shared_ptr<ChunkedArray>> encoded_columns;
encoded_columns.reserve(num_columns);
std::vector<std::shared_ptr<Field>> encoded_fields;
encoded_fields.reserve(num_columns);
for (int i = 0; i < num_columns; i++) {
auto field = table.schema()->field(i);
if (std::find(column_indices.begin(), column_indices.end(), i) !=
column_indices.end()) {
ARROW_ASSIGN_OR_RAISE(auto run_end_encoded, compute::RunEndEncode(table.column(i)));
DCHECK_EQ(run_end_encoded.kind(), Datum::CHUNKED_ARRAY);
encoded_columns.push_back(run_end_encoded.chunked_array());
auto encoded_type = arrow::run_end_encoded(arrow::int32(), field->type());
encoded_fields.push_back(field->WithType(encoded_type));
} else {
encoded_columns.push_back(table.column(i));
encoded_fields.push_back(field);
}
}
return Table::Make(table.schema(), std::move(encoded_columns));
auto updated_schema = arrow::schema(encoded_fields);
return Table::Make(updated_schema, std::move(encoded_columns));
}

Result<std::optional<std::string>> PrintArrayDiff(const ChunkedArray& expected,
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/testing/gtest_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "arrow/array/builder_decimal.h"
#include "arrow/datum.h"
#include "arrow/record_batch.h"
#include "arrow/table.h"
#include "arrow/tensor.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/math.h"
Expand Down Expand Up @@ -281,4 +282,17 @@ TEST(AssertTestWithinUlp, Basics) {
EXPECT_FATAL_FAILURE(AssertWithinUlp(123.456f, 123.456085f, 10), "not within 10 ulps");
}

TEST(RunEndEncodeGtestUtilTest, SchemaTypeIsModified) {
std::shared_ptr<Table> table =
arrow::TableFromJSON(arrow::schema({arrow::field("col", arrow::utf8())}), {R"([
{"col": "a"},
{"col": "b"},
{"col": "c"},
{"col": "d"}
])"});
ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> ree_table,
RunEndEncodeTableColumns(*table, {0}));
ASSERT_TRUE(ree_table->schema()->field(0)->type()->Equals(
arrow::run_end_encoded(arrow::int32(), arrow::utf8())));
}
} // namespace arrow

0 comments on commit 5bcde57

Please sign in to comment.