From 25583b44ca9cac7bef4f1211498f59f1c35b05d5 Mon Sep 17 00:00:00 2001 From: Giorgi Lomia Date: Fri, 1 Oct 2021 22:03:58 +0000 Subject: [PATCH] Did more work on arrowArray visitor WIP. --- libsupport/include/katana/ArrowVisitor.h | 8 + tools/graph-stats/graph-memory-stats.cpp | 178 ++++++++++------------- 2 files changed, 88 insertions(+), 98 deletions(-) diff --git a/libsupport/include/katana/ArrowVisitor.h b/libsupport/include/katana/ArrowVisitor.h index 7fbfdeb006..2497085210 100644 --- a/libsupport/include/katana/ArrowVisitor.h +++ b/libsupport/include/katana/ArrowVisitor.h @@ -307,6 +307,14 @@ using AcceptAllArrowTypes = std::tuple< arrow::LargeStringType, arrow::StructType, arrow::ListType, arrow::LargeListType, arrow::NullType>; +using AcceptAllFlatTypes = std::tuple< + arrow::Int8Type, arrow::UInt8Type, arrow::Int16Type, arrow::UInt16Type, + arrow::Int32Type, arrow::UInt32Type, arrow::Int64Type, arrow::UInt64Type, + arrow::FloatType, arrow::DoubleType, arrow::FloatType, arrow::DoubleType, + arrow::BooleanType, arrow::Date32Type, arrow::Date64Type, arrow::Time32Type, + arrow::Time64Type, arrow::TimestampType, arrow::StringType, + arrow::LargeStringType, arrow::NullType>; + template using tuple_cat_t = decltype(std::tuple_cat(std::declval()...)); diff --git a/tools/graph-stats/graph-memory-stats.cpp b/tools/graph-stats/graph-memory-stats.cpp index 270c2ab06f..c05a34a18f 100644 --- a/tools/graph-stats/graph-memory-stats.cpp +++ b/tools/graph-stats/graph-memory-stats.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include "katana/ArrowVisitor.h" @@ -43,137 +42,122 @@ using map_string_element = std::unordered_map; using memory_map = std::unordered_map< std::string, std::variant>; +inline std::shared_ptr +GetArrowType(const arrow::Scalar& scalar) { + return scalar.type; +} + +inline std::shared_ptr +GetArrowType(const arrow::Array& array) { + return array.type(); +} + +inline std::shared_ptr +GetArrowType(const arrow::ArrayBuilder* builder) { + return builder->type(); +} + struct Visitor : public katana::ArrowVisitor { using ResultType = katana::Result; - using AcceptTypes = std::tuple< - arrow::Int8Type, arrow::UInt8Type, arrow::Int16Type, arrow::UInt16Type, - arrow::Int32Type, arrow::UInt32Type, arrow::Int64Type, arrow::UInt64Type, - arrow::FloatType, arrow::DoubleType, arrow::FloatType, arrow::DoubleType, - arrow::BooleanType, arrow::Date32Type, arrow::Date64Type, - arrow::Time32Type, arrow::Time64Type, arrow::TimestampType, - arrow::StringType, arrow::LargeStringType, arrow::StructType, - arrow::NullType>; - - template + using AcceptTypes = std::tuple; + + template + arrow::enable_if_null Call(const ArrayType& scalars) { + std::cout << scalars.total_values_length() << "\n"; + return 0; + } + + template std::enable_if_t< arrow::is_number_type::value || arrow::is_boolean_type::value || arrow::is_temporal_type::value, ResultType> - Call(const ScalarType& scalar) { - return scalar.value; + Call(const ArrayType& scalars) { + // ResultType width = 0; + std::cout << scalars.total_values_length() << "\n"; + return 0; } - template + template arrow::enable_if_string_like Call( - const ScalarType& scalar) { - const ScalarType* typed_scalar = static_cast(scalar.get()); - auto res = (arrow::util::string_view)(*typed_scalar->value); - // TODO (giorgi): make this KATANA_CHECKED - // if (!res.ok()) { - // return KATANA_ERROR( - // katana::ErrorCode::ArrowError, "arrow builder failed append: {}", - // res); - // } - return res; + const ArrayType& scalars) { + std::cout << scalars.total_values_length() << "\n"; + + return 0; } - ResultType AcceptFailed(const arrow::Scalar& scalar) { + template + ResultType AcceptFailed(Param&& param) { return KATANA_ERROR( - katana::ErrorCode::ArrowError, "no matching type {}", - scalar.type->name()); + "Instant functions do not accept {}", GetArrowType(param)->ToString()); } }; -// struct ToArrayVisitor : public katana::ArrowVisitor { -// // Internal data and constructor -// const std::shared_ptr scalars; -// ToArrayVisitor(const std::shared_ptr input) : scalars(input) {} - -// using ResultType = katana::Result>; - +// struct Visitor : public katana::ArrowVisitor { +// const std::shared_ptr& scalar; +// Visitor(const std::shared_ptr& input) : scalar(input) {} +// using ResultType = katana::Result; // using AcceptTypes = std::tuple; -// template -// arrow::enable_if_null Call(BuilderType* builder) { -// return KATANA_CHECKED(builder->Finish()); +// template +// arrow::enable_if_null Call( +// const WidthType& width_tracker) { +// width_tracker = 0; +// return width_tracker; // } -// template +// template // std::enable_if_t< // arrow::is_number_type::value || // arrow::is_boolean_type::value || // arrow::is_temporal_type::value, // ResultType> -// Call(BuilderType* builder) { +// Call(const WidthType& width_tracker) { // using ScalarType = typename arrow::TypeTraits::ScalarType; - -// KATANA_CHECKED(builder->Reserve(scalars->length())); -// for (auto j = 0; j < scalars->length(); j++) { -// auto scalar = *scalars->GetScalar(j); -// if (scalar != nullptr && scalar->is_valid) { -// const ScalarType* typed_scalar = static_cast(scalar.get()); -// builder->UnsafeAppend(typed_scalar->value); -// } else { -// builder->UnsafeAppendNull(); -// } +// if (scalar != nullptr && scalar->is_valid) { +// const ScalarType* typed_scalar = static_cast(scalar.get()); +// return typed_scalar->value; +// } else { +// return KATANA_ERROR( +// katana::ErrorCode::ArrowError, "arrow visitor failed to read: NULL"); // } -// return KATANA_CHECKED(builder->Finish()); // } -// template +// template // arrow::enable_if_string_like Call( -// BuilderType* builder) { +// const WidthType& width_tracker) { // using ScalarType = typename arrow::TypeTraits::ScalarType; -// // same as above, but with string_view and Append instead of UnsafeAppend -// for (auto j = 0; j < scalars->length(); j++) { -// auto scalar = *scalars->GetScalar(j); -// if (scalar != nullptr && scalar->is_valid) { -// // ->value->ToString() works, scalar->ToString() yields "..." -// const ScalarType* typed_scalar = static_cast(scalar.get()); -// if (auto res = builder->Append( -// (arrow::util::string_view)(*typed_scalar->value)); -// !res.ok()) { -// return KATANA_ERROR( -// katana::ErrorCode::ArrowError, "arrow builder failed append: {}", -// res); -// } -// } else { -// if (auto res = builder->AppendNull(); !res.ok()) { -// return KATANA_ERROR( -// katana::ErrorCode::ArrowError, -// "arrow builder failed append null: {}", res); -// } -// } +// if (scalar != nullptr && scalar->is_valid) { +// // ->value->ToString() works, scalar->ToString() yields "..." +// const ScalarType* typed_scalar = static_cast(scalar.get()); +// auto res = (arrow::util::string_view)(*typed_scalar->value); +// return res; +// } else { +// return KATANA_ERROR( +// katana::ErrorCode::ArrowError, "arrow visitor failed to read: NULL"); // } -// return KATANA_CHECKED(builder->Finish()); // } -// template +// template // std::enable_if_t< // arrow::is_list_type::value || // arrow::is_struct_type::value, // ResultType> -// Call(BuilderType* builder) { +// Call(const WidthType& width_tracker) { // using ScalarType = typename arrow::TypeTraits::ScalarType; // // use a visitor to traverse more complex types -// katana::AppendScalarToBuilder visitor(builder); -// for (auto j = 0; j < scalars->length(); j++) { -// auto scalar = *scalars->GetScalar(j); -// if (scalar != nullptr && scalar->is_valid) { -// const ScalarType* typed_scalar = static_cast(scalar.get()); -// KATANA_CHECKED(visitor.Call(*typed_scalar)); -// } else { -// KATANA_CHECKED(builder->AppendNull()); -// } +// Visitor visitor(scalar); +// if (scalar != nullptr && scalar->is_valid) { +// const ScalarType* typed_scalar = static_cast(scalar.get()); +// KATANA_CHECKED(visitor.Call(*typed_scalar)); // } -// return KATANA_CHECKED(builder->Finish()); // } -// ResultType AcceptFailed(const arrow::ArrayBuilder* builder) { +// ResultType AcceptFailed(const arrow::Scalar& scalar) { // return KATANA_ERROR( // katana::ErrorCode::ArrowError, "no matching type {}", -// builder->type()->name()); +// scalar.type->name()); // } // }; @@ -202,19 +186,17 @@ PrintStringMapping(const std::unordered_map& u) { std::cout << "\n"; } -katana::Result> +void RunVisit(const std::shared_ptr scalars) { - Visitor v; int64_t total = 0; - for (auto j = 0; j < scalars->length(); j++) { - auto s = *scalars->GetScalar(j); - auto res = katana::VisitArrow(v, *s); - KATANA_LOG_VASSERT(res, "unexpected errror {}", res.error()); - total += res.value(); - } + Visitor v; + arrow::Array* arr = scalars.get(); + auto res = katana::VisitArrow(v, *arr); + KATANA_LOG_VASSERT(res, "unexpected errror {}", res.error()); + total += res.value(); - KATANA_LOG_VASSERT( - total == scalars->length(), "{} != {}", total, scalars->length()); + // KATANA_LOG_VASSERT( + // total == scalars->length(), "{} != {}", total, scalars->length()); } void @@ -258,7 +240,7 @@ GatherMemoryAllocation( alloc_size = 0; prop_size = 0; auto bit_width = arrow::bit_width(dtype->id()); - auto visited_arr = RunVisit(prop_field); + RunVisit(prop_field); for (auto j = 0; j < prop_field->length(); j++) { if (prop_field->IsValid(j)) { auto scal_ptr = *prop_field->GetScalar(j);