diff --git a/hybridse/include/codec/fe_row_codec.h b/hybridse/include/codec/fe_row_codec.h index 0e0b153f5a5..fd876a07db4 100644 --- a/hybridse/include/codec/fe_row_codec.h +++ b/hybridse/include/codec/fe_row_codec.h @@ -20,11 +20,10 @@ #include #include #include -#include #include + +#include "absl/status/statusor.h" #include "base/raw_buffer.h" -#include "butil/iobuf.h" -#include "gflags/gflags.h" #include "proto/fe_type.pb.h" namespace hybridse { @@ -180,26 +179,38 @@ class RowView { }; struct ColInfo { - ::hybridse::type::Type type; + // type is still used in same lagecy udf context, + // cautious use for non-base types + ::hybridse::type::Type type() const { + if (!schema.has_base_type()) { + return type::kNull; + } + return schema.base_type(); + } + uint32_t idx; uint32_t offset; std::string name; + type::ColumnSchema schema; ColInfo() {} - ColInfo(const std::string& name, ::hybridse::type::Type type, uint32_t idx, - uint32_t offset) - : type(type), idx(idx), offset(offset), name(name) {} + ColInfo(const std::string& name, ::hybridse::type::Type type, uint32_t idx, uint32_t offset) + : idx(idx), offset(offset), name(name) { + schema.set_base_type(type); + } + + ColInfo(const std::string& name, const type::ColumnSchema& sc, uint32_t idx, uint32_t offset) + : idx(idx), offset(offset), name(name), schema(sc) {} }; struct StringColInfo : public ColInfo { uint32_t str_next_offset; uint32_t str_start_offset; - StringColInfo() {} - StringColInfo(const std::string& name, ::hybridse::type::Type type, + StringColInfo(const std::string& name, ::hybridse::type::ColumnSchema sc, uint32_t idx, uint32_t offset, uint32_t str_next_offset, uint32_t str_start_offset) - : ColInfo(name, type, idx, offset), + : ColInfo(name, sc, idx, offset), str_next_offset(str_next_offset), str_start_offset(str_start_offset) {} }; @@ -209,7 +220,7 @@ class SliceFormat { explicit SliceFormat(const hybridse::codec::Schema* schema); virtual ~SliceFormat() {} - bool GetStringColumnInfo(size_t idx, StringColInfo* res) const; + absl::StatusOr GetStringColumnInfo(size_t idx) const; const ColInfo* GetColumnInfo(size_t idx) const; @@ -224,7 +235,7 @@ class SliceFormat { class RowFormat { public: virtual ~RowFormat() {} - virtual bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const = 0; + virtual absl::StatusOr GetStringColumnInfo(size_t schema_idx, size_t idx) const = 0; virtual const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const = 0; virtual size_t GetSliceId(size_t schema_idx) const = 0; }; @@ -245,8 +256,8 @@ class MultiSlicesRowFormat : public RowFormat { slice_formats_.clear(); } - bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const override { - return slice_formats_[schema_idx].GetStringColumnInfo(idx, res); + absl::StatusOr GetStringColumnInfo(size_t schema_idx, size_t idx) const override { + return slice_formats_[schema_idx].GetStringColumnInfo(idx); } const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const override { @@ -287,8 +298,8 @@ class SingleSliceRowFormat : public RowFormat { } } - bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const override { - return slice_format_->GetStringColumnInfo(offsets_[schema_idx] + idx, res); + absl::StatusOr GetStringColumnInfo(size_t schema_idx, size_t idx) const override { + return slice_format_->GetStringColumnInfo(offsets_[schema_idx] + idx); } const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const override { diff --git a/hybridse/src/benchmark/udf_bm_case.cc b/hybridse/src/benchmark/udf_bm_case.cc index 1dc6966d291..a914ed99a5b 100644 --- a/hybridse/src/benchmark/udf_bm_case.cc +++ b/hybridse/src/benchmark/udf_bm_case.cc @@ -183,8 +183,10 @@ void SumArrayListCol(benchmark::State* state, MODE mode, int64_t data_size, schemas_context.GetRowFormat(schema_idx)->GetColumnInfo(col_idx); codegen::MemoryWindowDecodeIRBuilder builder(&schemas_context, nullptr); - node::TypeNode type; - codegen::SchemaType2DataType(info->type, &type); + node::NodeManager nm; + auto rs = codegen::ColumnSchema2Type(info->schema, &nm); + ASSERT_TRUE(rs.ok()); + auto* type = rs.value(); uint32_t col_size; ASSERT_TRUE(codegen::GetLlvmColumnSize(&type, &col_size)); @@ -193,7 +195,7 @@ void SumArrayListCol(benchmark::State* state, MODE mode, int64_t data_size, ASSERT_EQ(0, ::hybridse::codec::v1::GetCol( reinterpret_cast(&list_table_ref), 0, info->idx, - info->offset, info->type, buf)); + info->offset, info->type(), buf)); { switch (mode) { diff --git a/hybridse/src/case/sql_case.cc b/hybridse/src/case/sql_case.cc index be0633dc703..8af73741caa 100644 --- a/hybridse/src/case/sql_case.cc +++ b/hybridse/src/case/sql_case.cc @@ -295,6 +295,7 @@ bool SqlCase::ExtractSchema(const std::vector& columns, } column->set_type(type); column->set_is_not_null(false); + column->mutable_schema()->set_base_type(column->type()); } } catch (const std::exception& ex) { LOG(WARNING) << "Fail to ExtractSchema: " << ex.what(); diff --git a/hybridse/src/codec/fe_row_codec.cc b/hybridse/src/codec/fe_row_codec.cc index 9e61402879d..2acb6e88041 100644 --- a/hybridse/src/codec/fe_row_codec.cc +++ b/hybridse/src/codec/fe_row_codec.cc @@ -928,10 +928,18 @@ SliceFormat::SliceFormat(const hybridse::codec::Schema* schema) for (int32_t i = 0; i < schema_->size(); i++) { const ::hybridse::type::ColumnDef& column = schema_->Get(i); if (column.type() == ::hybridse::type::kVarchar) { + // backwards compatibility check + type::ColumnSchema col_schema; + if (column.has_schema()) { + col_schema = column.schema(); + } else { + col_schema.set_base_type(column.type()); + } + if (FLAGS_enable_spark_unsaferow_format) { - infos_.emplace_back(column.name(), column.type(), i, offset); + infos_.emplace_back(column.name(), col_schema, i, offset); } else { - infos_.emplace_back(column.name(), column.type(), i, string_field_cnt); + infos_.emplace_back(column.name(), col_schema, i, string_field_cnt); } infos_dict_[column.name()] = i; @@ -943,13 +951,17 @@ SliceFormat::SliceFormat(const hybridse::codec::Schema* schema) offset += 8; } } else { - auto TYPE_SIZE_MAP = codec::GetTypeSizeMap(); + auto& TYPE_SIZE_MAP = codec::GetTypeSizeMap(); auto it = TYPE_SIZE_MAP.find(column.type()); if (it == TYPE_SIZE_MAP.end()) { LOG(WARNING) << "fail to find column type " << ::hybridse::type::Type_Name(column.type()); } else { - infos_.emplace_back(column.name(), column.type(), i, offset); + if (column.has_schema()) { + infos_.emplace_back(column.name(), column.schema(), i, offset); + } else { + infos_.emplace_back(column.name(), column.type(), i, offset); + } infos_dict_[column.name()] = i; offset += it->second; } @@ -969,17 +981,12 @@ const ColInfo* SliceFormat::GetColumnInfo(size_t idx) const { return idx < infos_.size() ? &infos_[idx] : nullptr; } -bool SliceFormat::GetStringColumnInfo(size_t idx, StringColInfo* res) const { - if (nullptr == res) { - LOG(WARNING) << "input args have null"; - return false; - } +absl::StatusOr SliceFormat::GetStringColumnInfo(size_t idx) const { if (idx >= infos_.size()) { - return false; + return absl::NotFoundError("schemas empty"); } // TODO(wangtaize) support null check auto& base_col_info = infos_[idx]; - auto ty = base_col_info.type; uint32_t col_idx = base_col_info.idx; uint32_t offset = base_col_info.offset; uint32_t next_offset = -1; @@ -990,17 +997,15 @@ bool SliceFormat::GetStringColumnInfo(size_t idx, StringColInfo* res) const { if (FLAGS_enable_spark_unsaferow_format) { // No need to get next offset for UnsafeRowOpt and ignore the warning } else { - LOG(WARNING) << "fail to get string field next offset"; - return false; + return absl::NotFoundError("fail to get string field next offset"); } } DLOG(INFO) << "get string with offset " << offset << " next offset " << next_offset << " str_field_start_offset " << str_field_start_offset_ << " for col " << base_col_info.name; - *res = StringColInfo(base_col_info.name, ty, col_idx, offset, next_offset, - str_field_start_offset_); - return true; + return StringColInfo(base_col_info.name, base_col_info.schema, col_idx, offset, next_offset, + str_field_start_offset_); } } // namespace codec diff --git a/hybridse/src/codec/fe_row_codec_test.cc b/hybridse/src/codec/fe_row_codec_test.cc index 2da3c7e1199..565bab55c77 100644 --- a/hybridse/src/codec/fe_row_codec_test.cc +++ b/hybridse/src/codec/fe_row_codec_test.cc @@ -426,18 +426,17 @@ TEST_F(CodecTest, SliceFormatTest) { if (i % 3 == 0) { const codec::ColInfo* info = decoder.GetColumnInfo(i); ASSERT_TRUE(info != nullptr); - ASSERT_EQ(::hybridse::type::kVarchar, info->type); + ASSERT_EQ(::hybridse::type::kVarchar, info->type()); - codec::StringColInfo str_info; - ASSERT_TRUE(decoder.GetStringColumnInfo(i, &str_info)); + ASSERT_TRUE(decoder.GetStringColumnInfo(i).ok()); } else if (i % 3 == 1) { const codec::ColInfo* info = decoder.GetColumnInfo(i); ASSERT_TRUE(info != nullptr); - ASSERT_EQ(::hybridse::type::kInt64, info->type); + ASSERT_EQ(::hybridse::type::kInt64, info->type()); } else if (i % 3 == 2) { const codec::ColInfo* info = decoder.GetColumnInfo(i); ASSERT_TRUE(info != nullptr); - ASSERT_EQ(::hybridse::type::kDouble, info->type); + ASSERT_EQ(::hybridse::type::kDouble, info->type()); } } } @@ -487,40 +486,41 @@ TEST_F(CodecTest, SliceFormatOffsetTest) { SliceFormat decoder(&table.columns()); { const codec::ColInfo* info = decoder.GetColumnInfo(0); - ASSERT_EQ(::hybridse::type::kInt32, info->type); + ASSERT_EQ(::hybridse::type::kInt32, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(7u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(1); - ASSERT_EQ(::hybridse::type::kInt16, info->type); + ASSERT_EQ(::hybridse::type::kInt16, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(7u + 4u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(2); - ASSERT_EQ(::hybridse::type::kFloat, info->type); + ASSERT_EQ(::hybridse::type::kFloat, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(7u + 4u + 2u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(3); - ASSERT_EQ(::hybridse::type::kDouble, info->type); + ASSERT_EQ(::hybridse::type::kDouble, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(7u + 4u + 2u + 4u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(4); - ASSERT_EQ(::hybridse::type::kInt64, info->type); + ASSERT_EQ(::hybridse::type::kInt64, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(7u + 4u + 2u + 4u + 8u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(5); - ASSERT_EQ(::hybridse::type::kVarchar, info->type); + ASSERT_EQ(::hybridse::type::kVarchar, info->type()); - codec::StringColInfo str_info; - decoder.GetStringColumnInfo(5, &str_info); + auto rs = decoder.GetStringColumnInfo(5); + ASSERT_TRUE(rs.ok()); + auto& str_info = rs.value(); LOG(INFO) << "offset: " << str_info.offset << " next_offset: " << str_info.str_next_offset << " str_start_offset " << str_info.str_start_offset; @@ -530,10 +530,11 @@ TEST_F(CodecTest, SliceFormatOffsetTest) { } { const codec::ColInfo* info = decoder.GetColumnInfo(6); - ASSERT_EQ(::hybridse::type::kVarchar, info->type); + ASSERT_EQ(::hybridse::type::kVarchar, info->type()); - codec::StringColInfo str_info; - decoder.GetStringColumnInfo(6, &str_info); + auto rs = decoder.GetStringColumnInfo(6); + ASSERT_TRUE(rs.ok()); + auto& str_info = rs.value(); LOG(INFO) << "offset: " << str_info.offset << " next_offset: " << str_info.str_next_offset << " str_start_offset " << str_info.str_start_offset; @@ -596,40 +597,41 @@ TEST_F(CodecTest, SliceFormatOffsetLongHeaderTest) { SliceFormat decoder(&table.columns()); { const codec::ColInfo* info = decoder.GetColumnInfo(0); - ASSERT_EQ(::hybridse::type::kInt32, info->type); + ASSERT_EQ(::hybridse::type::kInt32, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(8u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(1); - ASSERT_EQ(::hybridse::type::kInt16, info->type); + ASSERT_EQ(::hybridse::type::kInt16, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(8u + 4u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(2); - ASSERT_EQ(::hybridse::type::kFloat, info->type); + ASSERT_EQ(::hybridse::type::kFloat, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(8u + 4u + 2u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(3); - ASSERT_EQ(::hybridse::type::kDouble, info->type); + ASSERT_EQ(::hybridse::type::kDouble, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(8u + 4u + 2u + 4u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(4); - ASSERT_EQ(::hybridse::type::kInt64, info->type); + ASSERT_EQ(::hybridse::type::kInt64, info->type()); LOG(INFO) << "offset: " << info->offset; ASSERT_EQ(8u + 4u + 2u + 4u + 8u, info->offset); } { const codec::ColInfo* info = decoder.GetColumnInfo(5); - ASSERT_EQ(::hybridse::type::kVarchar, info->type); + ASSERT_EQ(::hybridse::type::kVarchar, info->type()); - codec::StringColInfo str_info; - decoder.GetStringColumnInfo(5, &str_info); + auto str_info_wp = decoder.GetStringColumnInfo(5); + ASSERT_TRUE(str_info_wp.ok()); + auto& str_info = str_info_wp.value(); LOG(INFO) << "offset: " << str_info.offset << " next_offset: " << str_info.str_next_offset << " str_start_offset " << str_info.str_start_offset; @@ -639,10 +641,11 @@ TEST_F(CodecTest, SliceFormatOffsetLongHeaderTest) { } { const codec::ColInfo* info = decoder.GetColumnInfo(6); - ASSERT_EQ(::hybridse::type::kVarchar, info->type); + ASSERT_EQ(::hybridse::type::kVarchar, info->type()); - codec::StringColInfo str_info; - decoder.GetStringColumnInfo(6, &str_info); + auto str_info_wp = decoder.GetStringColumnInfo(6); + ASSERT_TRUE(str_info_wp.ok()); + auto& str_info = str_info_wp.value(); LOG(INFO) << "offset: " << str_info.offset << " next_offset: " << str_info.str_next_offset << " str_start_offset " << str_info.str_start_offset; @@ -691,18 +694,18 @@ TEST_F(CodecTest, SparkUnsaferowRowFormatTest) { if (i % 3 == 0) { const codec::ColInfo* info = decoder.GetColumnInfo(i); ASSERT_TRUE(info != nullptr); - ASSERT_EQ(::hybridse::type::kVarchar, info->type); + ASSERT_EQ(::hybridse::type::kVarchar, info->type()); - codec::StringColInfo str_info; - ASSERT_TRUE(decoder.GetStringColumnInfo(i, &str_info)); + auto rs = decoder.GetStringColumnInfo(i); + ASSERT_TRUE(rs.ok()); } else if (i % 3 == 1) { const codec::ColInfo* info = decoder.GetColumnInfo(i); ASSERT_TRUE(info != nullptr); - ASSERT_EQ(::hybridse::type::kInt64, info->type); + ASSERT_EQ(::hybridse::type::kInt64, info->type()); } else if (i % 3 == 2) { const codec::ColInfo* info = decoder.GetColumnInfo(i); ASSERT_TRUE(info != nullptr); - ASSERT_EQ(::hybridse::type::kDouble, info->type); + ASSERT_EQ(::hybridse::type::kDouble, info->type()); } } } diff --git a/hybridse/src/codegen/aggregate_ir_builder.cc b/hybridse/src/codegen/aggregate_ir_builder.cc index 22de3d3d742..f69d24bfbfc 100644 --- a/hybridse/src/codegen/aggregate_ir_builder.cc +++ b/hybridse/src/codegen/aggregate_ir_builder.cc @@ -15,30 +15,39 @@ */ #include "codegen/aggregate_ir_builder.h" -#include #include #include #include #include +#include "absl/container/flat_hash_set.h" #include "codegen/buf_ir_builder.h" #include "codegen/expr_ir_builder.h" #include "codegen/ir_base_builder.h" #include "codegen/variable_ir_builder.h" #include "glog/logging.h" +#include "node/node_manager.h" + namespace hybridse { namespace codegen { -AggregateIRBuilder::AggregateIRBuilder(const vm::SchemasContext* sc, - ::llvm::Module* module, - const node::FrameNode* frame_node, - uint32_t id) - : schema_context_(sc), module_(module), frame_node_(frame_node), id_(id) { - available_agg_func_set_ = {"sum", "avg", "count", "min", "max"}; +static auto CreateAggFuncMap() { + absl::flat_hash_set res = {"sum", "avg", "count", "min", "max"}; + return res; +} + +static auto& GetAggFuncMap() { + static const absl::flat_hash_set& res = *new auto(CreateAggFuncMap()); + return res; } -bool AggregateIRBuilder::IsAggFuncName(const std::string& fname) { - return available_agg_func_set_.find(fname) != available_agg_func_set_.end(); +AggregateIRBuilder::AggregateIRBuilder(const vm::SchemasContext* sc, ::llvm::Module* module, + const node::FrameNode* frame_node, uint32_t id) + : schema_context_(sc), module_(module), frame_node_(frame_node), id_(id) {} + +bool AggregateIRBuilder::IsAggFuncName(absl::string_view fname) const { + auto& map = GetAggFuncMap(); + return map.find(fname) != map.end(); } bool AggregateIRBuilder::CollectAggColumn(const hybridse::node::ExprNode* expr, @@ -89,21 +98,23 @@ bool AggregateIRBuilder::CollectAggColumn(const hybridse::node::ExprNode* expr, DLOG(ERROR) << status; return false; } - const codec::ColInfo& col_info = - *schema_context_->GetRowFormat() - ->GetColumnInfo(schema_idx, col_idx); - auto col_type = col_info.type; + const codec::ColInfo& col_info = *schema_context_->GetRowFormat()->GetColumnInfo(schema_idx, col_idx); uint32_t offset = col_info.offset; // resolve llvm agg type - node::DataType node_type; - if (!SchemaType2DataType(col_type, &node_type)) { - LOG(ERROR) << "unrecognized data type " - << hybridse::type::Type_Name(col_type); + node::NodeManager nm; + auto s = ColumnSchema2Type(col_info.schema, &nm); + if (!s.ok()) { + // legacy udf resolve context, this only happens for base types + LOG(ERROR) << s.status(); + return false; + } + auto* type = s.value(); + if (!type->IsBaseType()) { + LOG(INFO) << "skip CollectAggColumn for non-base types"; return false; } - if (GetOutputLlvmType(module_->getContext(), agg_func_name, - node_type) == nullptr) { + if (GetOutputLlvmType(module_->getContext(), agg_func_name, type->base()) == nullptr) { return false; } if (agg_func_name == "count") { @@ -111,11 +122,11 @@ bool AggregateIRBuilder::CollectAggColumn(const hybridse::node::ExprNode* expr, } else if (agg_func_name == "avg") { *res_agg_type = ::hybridse::type::kDouble; } else { - *res_agg_type = col_type; + *res_agg_type = col_info.schema.base_type(); } std::string col_key = absl::StrCat(rel_name, ".", col_name); - auto res = agg_col_infos_.try_emplace(col_key, col, node_type, schema_idx, col_idx, offset); + auto res = agg_col_infos_.try_emplace(col_key, col, type->base(), schema_idx, col_idx, offset); res.first->second.AddAgg(agg_func_name, output_idx); return true; } diff --git a/hybridse/src/codegen/aggregate_ir_builder.h b/hybridse/src/codegen/aggregate_ir_builder.h index f0af7163cdd..70c69c7438f 100644 --- a/hybridse/src/codegen/aggregate_ir_builder.h +++ b/hybridse/src/codegen/aggregate_ir_builder.h @@ -89,8 +89,6 @@ class AggregateIRBuilder { bool CollectAggColumn(const node::ExprNode* expr, size_t output_idx, ::hybridse::type::Type* col_type); - bool IsAggFuncName(const std::string& fname); - static llvm::Type* GetOutputLlvmType( ::llvm::LLVMContext& llvm_ctx, // NOLINT const std::string& fname, const node::DataType& node_type); @@ -105,13 +103,14 @@ class AggregateIRBuilder { bool empty() const { return agg_col_infos_.empty(); } private: + bool IsAggFuncName(absl::string_view fname) const; + // schema context of input node const vm::SchemasContext* schema_context_; ::llvm::Module* module_; const node::FrameNode* frame_node_; uint32_t id_; - std::set available_agg_func_set_; std::unordered_map agg_col_infos_; }; diff --git a/hybridse/src/codegen/buf_ir_builder.cc b/hybridse/src/codegen/buf_ir_builder.cc index 3b7c0887ca6..b0aadccab97 100644 --- a/hybridse/src/codegen/buf_ir_builder.cc +++ b/hybridse/src/codegen/buf_ir_builder.cc @@ -15,15 +15,18 @@ */ #include "codegen/buf_ir_builder.h" + #include #include #include + #include "codec/fe_row_codec.h" #include "codegen/date_ir_builder.h" #include "codegen/ir_base_builder.h" #include "codegen/string_ir_builder.h" #include "codegen/timestamp_ir_builder.h" #include "glog/logging.h" +#include "node/node_manager.h" DECLARE_bool(enable_spark_unsaferow_format); @@ -46,7 +49,6 @@ bool BufNativeIRBuilder::BuildGetField(size_t col_idx, ::llvm::Value* slice_ptr, return false; } - node::TypeNode data_type; const codec::ColInfo* col_info = format_->GetColumnInfo(schema_idx_, col_idx); if (col_info == nullptr) { LOG(WARNING) << "fail to resolve field info at " << col_idx; @@ -56,14 +58,16 @@ bool BufNativeIRBuilder::BuildGetField(size_t col_idx, ::llvm::Value* slice_ptr, // Get the corrected column index from RowFormat auto row_format_corrected_col_idx = col_info->idx; - if (!SchemaType2DataType(col_info->type, &data_type)) { - LOG(WARNING) << "unrecognized data type " + hybridse::type::Type_Name(col_info->type); + node::NodeManager tmp_nm; + auto s = ColumnSchema2Type(col_info->schema, &tmp_nm); + if (!s.ok()) { + LOG(WARNING) << s.value(); return false; } uint32_t offset = col_info->offset; ::llvm::IRBuilder<> builder(block_); - switch (data_type.base_) { + switch (s.value()->base()) { case ::hybridse::node::kBool: { llvm::Type* bool_ty = builder.getInt1Ty(); return BuildGetPrimaryField("hybridse_storage_get_bool_field", row_ptr, row_format_corrected_col_idx, @@ -125,17 +129,19 @@ bool BufNativeIRBuilder::BuildGetField(size_t col_idx, ::llvm::Value* slice_ptr, } case ::hybridse::node::kVarchar: { - codec::StringColInfo str_info; - if (!format_->GetStringColumnInfo(schema_idx_, col_idx, &str_info)) { - LOG(WARNING) << "fail to get string filed offset and next offset " << col_info->name; + auto s = format_->GetStringColumnInfo(schema_idx_, col_idx); + if (!s.ok()) { + LOG(WARNING) << "fail to get string filed offset and next offset " << s.status(); + return false; } + auto& str_info = s.value(); DLOG(INFO) << "get string with offset " << offset << " next offset " << str_info.str_next_offset << " for col " << col_idx; return BuildGetStringField(str_info.idx, offset, str_info.str_next_offset, str_info.str_start_offset, row_ptr, row_size, output); } default: { - LOG(WARNING) << "fail to get col for type: " << data_type.GetName(); + LOG(WARNING) << "fail to get col for type: " << s.value()->DebugString(); return false; } } diff --git a/hybridse/src/codegen/fn_let_ir_builder.cc b/hybridse/src/codegen/fn_let_ir_builder.cc index 6d8e86e3933..8b309118e9a 100644 --- a/hybridse/src/codegen/fn_let_ir_builder.cc +++ b/hybridse/src/codegen/fn_let_ir_builder.cc @@ -235,8 +235,9 @@ Status RowFnLetIRBuilder::BuildProject( kCodegenError, "Fail to get output type at ", index, ", expect ", expr->GetOutputType()->GetName()); - ::hybridse::type::Type ctype; - CHECK_TRUE(DataType2SchemaType(*data_type, &ctype), kCodegenError); + ::hybridse::type::ColumnSchema schema; + auto s = Type2ColumnSchema(data_type, &schema); + CHECK_TRUE(s.ok(), kCodegenError, s.ToString()); outputs->insert(std::make_pair(index, expr_out_val)); return Status::OK(); diff --git a/hybridse/src/codegen/ir_base_builder.cc b/hybridse/src/codegen/ir_base_builder.cc index 81fadbfdd3d..15a35924664 100644 --- a/hybridse/src/codegen/ir_base_builder.cc +++ b/hybridse/src/codegen/ir_base_builder.cc @@ -15,6 +15,7 @@ */ #include "codegen/ir_base_builder.h" +#include #include #include @@ -27,6 +28,7 @@ #include "codegen/timestamp_ir_builder.h" #include "glog/logging.h" #include "node/node_manager.h" +#include "proto/fe_type.pb.h" namespace hybridse { namespace codegen { @@ -777,6 +779,104 @@ bool DataType2SchemaType(const ::hybridse::node::TypeNode& type, } return true; } + +static auto CreateBaseDataType2SchemaTypeMap() { + // type mapping for SQL base types only + absl::flat_hash_map map = {{node::DataType::kBool, type::kBool}, + {node::DataType::kInt16, type::kInt16}, + {node::DataType::kInt32, type::kInt32}, + {node::DataType::kInt64, type::kInt64}, + {node::DataType::kFloat, type::kFloat}, + {node::DataType::kDouble, type::kDouble}, + {node::DataType::kVarchar, type::kVarchar}, + {node::DataType::kDate, type::kDate}, + {node::DataType::kTimestamp, type::kTimestamp}, + + // historic reason, null is bool during encoding + {node::DataType::kNull, type::kBool}, + {node::DataType::kVoid, type::kBool}}; + return map; +} + +static const auto& GetBaseDataType2SchemaTypeMap() { + static const absl::flat_hash_map& map = *new auto(CreateBaseDataType2SchemaTypeMap()); + return map; +} +static auto CreateSchemaType2BaseDataTypeMap() { + // type mapping for SQL base types only + absl::flat_hash_map map = { + {type::kBool, node::DataType::kBool}, {type::kInt16, node::DataType::kInt16}, + {type::kInt32, node::DataType::kInt32}, {type::kInt64, node::DataType::kInt64}, + {type::kFloat, node::DataType::kFloat}, {type::kDouble, node::DataType::kDouble}, + {type::kVarchar, node::DataType::kVarchar}, {type::kDate, node::DataType::kDate}, + {type::kTimestamp, node::DataType::kTimestamp}}; + return map; +} + +static const auto& GetSchemaType2BaseTypeMap() { + static const absl::flat_hash_map& map = *new auto(CreateSchemaType2BaseDataTypeMap()); + return map; +} + +absl::Status Type2ColumnSchema(const node::TypeNode* type, type::ColumnSchema* mut_schema) { + if (type->IsMap()) { + assert(type->GetGenericSize() == 2); + auto* mut_map_type = mut_schema->mutable_map_type(); + auto* mut_map_key_type = mut_map_type->mutable_key_type(); + auto* mut_map_value_type = mut_map_type->mutable_value_type(); + auto s = Type2ColumnSchema(type->GetGenericType(0), mut_map_key_type); + s.Update(Type2ColumnSchema(type->GetGenericType(1), mut_map_value_type)); + return s; + } else if (type->IsArray()) { + assert(type->GetGenericSize() == 1); + auto* mut_array_type = mut_schema->mutable_array_type(); + auto* mut_array_ele_type = mut_array_type->mutable_ele_type(); + return Type2ColumnSchema(type->GetGenericType(0), mut_array_ele_type); + } + + // simple type + auto& map = GetBaseDataType2SchemaTypeMap(); + auto it = map.find(type->base()); + if (it == map.end()) { + return absl::UnimplementedError(absl::StrCat("unable to convert from ", type->DebugString())); + } + mut_schema->set_base_type(it->second); + return absl::OkStatus(); +} + +absl::StatusOr ColumnSchema2Type(const type::ColumnSchema& schema, node::NodeManager* tmp_nm) { + if (schema.has_map_type()) { + auto& map_type = schema.map_type(); + auto s1 = ColumnSchema2Type(map_type.key_type(), tmp_nm); + if (!s1.ok()) { + return s1.status(); + } + auto s2 = ColumnSchema2Type(map_type.value_type(), tmp_nm); + if (!s2.ok()) { + return s2.status(); + } + + return tmp_nm->MakeNode(s1.value(), s2.value()); + } else if (schema.has_array_type()) { + auto& arr_type = schema.array_type(); + auto s = ColumnSchema2Type(arr_type.ele_type(), tmp_nm); + if (!s.ok()) { + return s.status(); + } + return tmp_nm->MakeNode(node::kArray, s.value()); + } else if (schema.has_base_type()) { + auto& map = GetSchemaType2BaseTypeMap(); + auto it = map.find(schema.base_type()); + if (it == map.end()) { + return absl::UnimplementedError(absl::StrCat("column schema to type node: ", schema.DebugString())); + } + + return tmp_nm->MakeNode(it->second); + } + + return absl::UnimplementedError(absl::StrCat("unknown type: ", schema.DebugString())); +} + bool SchemaType2DataType(const ::hybridse::type::Type type, ::hybridse::node::TypeNode* output) { if (nullptr == output) { diff --git a/hybridse/src/codegen/ir_base_builder.h b/hybridse/src/codegen/ir_base_builder.h index db2075289cf..825e8f8fcd2 100644 --- a/hybridse/src/codegen/ir_base_builder.h +++ b/hybridse/src/codegen/ir_base_builder.h @@ -19,6 +19,8 @@ #include #include + +#include "absl/base/attributes.h" #include "llvm/IR/IRBuilder.h" #include "node/sql_node.h" #include "node/type_node.h" @@ -48,12 +50,18 @@ bool IsStringType(::llvm::Type* type); bool GetFullType(node::NodeManager* nm, ::llvm::Type* type, const ::hybridse::node::TypeNode** type_node); -bool SchemaType2DataType(const ::hybridse::type::Type type, - ::hybridse::node::DataType* output); -bool SchemaType2DataType(const ::hybridse::type::Type type, - ::hybridse::node::TypeNode* output); -bool DataType2SchemaType(const ::hybridse::node::TypeNode& type, - ::hybridse::type::Type* output); +[[deprecated("can't handle comple data type, use ColumnSchema2Type instead")]] +bool SchemaType2DataType(const ::hybridse::type::Type type, ::hybridse::node::DataType* output); + +[[deprecated("can't handle comple data type, use ColumnSchema2Type instead")]] +bool SchemaType2DataType(const ::hybridse::type::Type type, ::hybridse::node::TypeNode* output); + +[[deprecated("can not handle complex type, use Type2ColumnSchema instead")]] +bool DataType2SchemaType(const ::hybridse::node::TypeNode& type, ::hybridse::type::Type* output); + +absl::Status Type2ColumnSchema(const node::TypeNode* type, type::ColumnSchema* mut_schema) ABSL_ATTRIBUTE_NONNULL(); + +absl::StatusOr ColumnSchema2Type(const type::ColumnSchema& schema, node::NodeManager* tmp_nm); bool GetConstFeString(const std::string& val, ::llvm::BasicBlock* block, ::llvm::Value** output); diff --git a/hybridse/src/codegen/window_ir_builder.cc b/hybridse/src/codegen/window_ir_builder.cc index 672e7acc6f5..c03a5b0b184 100644 --- a/hybridse/src/codegen/window_ir_builder.cc +++ b/hybridse/src/codegen/window_ir_builder.cc @@ -15,12 +15,13 @@ */ #include "codegen/window_ir_builder.h" + #include -#include -#include + #include "codec/fe_row_codec.h" #include "codegen/ir_base_builder.h" #include "glog/logging.h" +#include "node/node_manager.h" namespace hybridse { namespace codegen { @@ -147,7 +148,6 @@ bool MemoryWindowDecodeIRBuilder::BuildGetCol(size_t schema_idx, size_t col_idx, LOG(WARNING) << "input args have null"; return false; } - ::hybridse::node::TypeNode data_type; auto row_format = schemas_context_->GetRowFormat(); if (row_format == nullptr) { LOG(WARNING) << "fail to get row format at " << schema_idx; @@ -162,13 +162,15 @@ bool MemoryWindowDecodeIRBuilder::BuildGetCol(size_t schema_idx, size_t col_idx, auto row_format_corrected_col_idx = col_info->idx; - if (!SchemaType2DataType(col_info->type, &data_type)) { - LOG(WARNING) << "unrecognized data type " + - hybridse::type::Type_Name(col_info->type); + node::NodeManager tmp_nm; + auto rs = ColumnSchema2Type(col_info->schema, &tmp_nm); + if (!rs.ok()) { + LOG(WARNING) << rs.status(); return false; } ::llvm::IRBuilder<> builder(block_); - switch (data_type.base_) { + auto* data_type = rs.value(); + switch (data_type->base_) { case ::hybridse::node::kBool: case ::hybridse::node::kInt16: case ::hybridse::node::kInt32: @@ -177,29 +179,32 @@ bool MemoryWindowDecodeIRBuilder::BuildGetCol(size_t schema_idx, size_t col_idx, case ::hybridse::node::kDouble: case ::hybridse::node::kTimestamp: case ::hybridse::node::kDate: { - return BuildGetPrimaryCol("hybridse_storage_get_col", window_ptr, - schema_idx, row_format_corrected_col_idx, col_info->offset, - &data_type, output); + if (!col_info->schema.has_base_type()) { + LOG(WARNING) << "input type is not base type: " << col_info->schema.DebugString(); + return false; + } + return BuildGetPrimaryCol("hybridse_storage_get_col", window_ptr, schema_idx, row_format_corrected_col_idx, + col_info->offset, data_type, col_info->schema.base_type(), output); } case ::hybridse::node::kVarchar: { - codec::StringColInfo str_col_info; - if (!schemas_context_->GetRowFormat() - ->GetStringColumnInfo(schema_idx, col_idx, &str_col_info)) { - LOG(WARNING) - << "fail to get string filed offset and next offset" - << " at " << col_idx; + auto s = schemas_context_->GetRowFormat() + ->GetStringColumnInfo(schema_idx, col_idx); + if (!s.ok()) { + LOG(WARNING) << "fail to get string filed offset and next offset" + << " at " << col_idx << ": " << s.status(); } + auto& str_col_info = s.value(); DLOG(INFO) << "get string with offset " << str_col_info.offset << " next offset " << str_col_info.str_next_offset << " for col at " << str_col_info.name; - return BuildGetStringCol( - schema_idx, str_col_info.idx, str_col_info.offset, - str_col_info.str_next_offset, str_col_info.str_start_offset, - &data_type, window_ptr, output); + return BuildGetStringCol(schema_idx, str_col_info.idx, str_col_info.offset, str_col_info.str_next_offset, + str_col_info.str_start_offset, data_type, window_ptr, output); + } + case ::hybridse::node::kMap: { + // WIP } default: { - LOG(WARNING) << "Fail get col, invalid data type " - << data_type.GetName(); + LOG(WARNING) << "Fail get col, invalid data type " << data_type->DebugString(); return false; } } @@ -207,7 +212,7 @@ bool MemoryWindowDecodeIRBuilder::BuildGetCol(size_t schema_idx, size_t col_idx, bool MemoryWindowDecodeIRBuilder::BuildGetPrimaryCol( const std::string& fn_name, ::llvm::Value* row_ptr, size_t schema_idx, - size_t col_idx, uint32_t offset, hybridse::node::TypeNode* type, + size_t col_idx, uint32_t offset, hybridse::node::TypeNode* type, type::Type base_type, ::llvm::Value** output) { if (row_ptr == NULL || output == NULL) { LOG(WARNING) << "input args have null ptr"; @@ -248,15 +253,8 @@ bool MemoryWindowDecodeIRBuilder::BuildGetPrimaryCol( ::llvm::Value* val_schema_idx = builder.getInt32(schema_idx); ::llvm::Value* val_col_idx = builder.getInt32(col_idx); ::llvm::Value* val_offset = builder.getInt32(offset); - ::hybridse::type::Type schema_type; - if (!DataType2SchemaType(*type, &schema_type)) { - LOG(WARNING) << "fail to convert data type to schema type: " - << type->GetName(); - return false; - } + ::llvm::Value* val_type_id = builder.getInt32(static_cast(base_type)); - ::llvm::Value* val_type_id = - builder.getInt32(static_cast(schema_type)); ::llvm::FunctionCallee callee = block_->getModule()->getOrInsertFunction( fn_name, i32_ty, i8_ptr_ty, i32_ty, i32_ty, i32_ty, i32_ty, i8_ptr_ty); builder.CreateCall(callee, {row_ptr, val_schema_idx, val_col_idx, @@ -316,14 +314,9 @@ bool MemoryWindowDecodeIRBuilder::BuildGetStringCol( ::llvm::Value* val_col_idx = builder.getInt32(col_idx); ::llvm::Value* str_offset = builder.getInt32(offset); ::llvm::Value* next_str_offset = builder.getInt32(next_str_field_offset); - ::hybridse::type::Type schema_type; - if (!DataType2SchemaType(*type, &schema_type)) { - LOG(WARNING) << "fail to convert data type to schema type: " - << type->GetName(); - return false; - } - ::llvm::Value* val_type_id = - builder.getInt32(static_cast(schema_type)); + + ::llvm::Value* val_type_id = builder.getInt32(static_cast(type::kVarchar)); + builder.CreateCall( callee, {window_ptr, val_schema_idx, val_col_idx, str_offset, next_str_offset, diff --git a/hybridse/src/codegen/window_ir_builder.h b/hybridse/src/codegen/window_ir_builder.h index 9d130c43936..d8e76369e24 100644 --- a/hybridse/src/codegen/window_ir_builder.h +++ b/hybridse/src/codegen/window_ir_builder.h @@ -17,15 +17,10 @@ #ifndef HYBRIDSE_SRC_CODEGEN_WINDOW_IR_BUILDER_H_ #define HYBRIDSE_SRC_CODEGEN_WINDOW_IR_BUILDER_H_ -#include #include -#include -#include -#include "codec/fe_row_codec.h" -#include "codegen/ir_base_builder.h" -#include "llvm/IR/IRBuilder.h" + +#include "llvm/IR/Value.h" #include "proto/fe_type.pb.h" -#include "vm/catalog.h" #include "vm/schemas_context.h" namespace hybridse { @@ -65,11 +60,13 @@ class MemoryWindowDecodeIRBuilder : public WindowDecodeIRBuilder { ::llvm::Value* window_ptr, ::llvm::Value** output); private: + // get value from a base type column except string bool BuildGetPrimaryCol(const std::string& fn_name, ::llvm::Value* row_ptr, size_t schema_idx, size_t col_idx, uint32_t offset, - hybridse::node::TypeNode* type, + hybridse::node::TypeNode* type, type::Type base_type, ::llvm::Value** output); + // get value from a string column bool BuildGetStringCol(size_t schema_idx, size_t col_idx, uint32_t offset, uint32_t next_str_field_offset, uint32_t str_start_offset, diff --git a/hybridse/src/plan/planner.cc b/hybridse/src/plan/planner.cc index e05e639efb1..c60f8fce155 100644 --- a/hybridse/src/plan/planner.cc +++ b/hybridse/src/plan/planner.cc @@ -1152,6 +1152,8 @@ base::Status Planner::TransformTableDef(const std::string &table_name, const Nod node::DataTypeName(column_def->GetColumnType()), " is not supported") } } + + column->mutable_schema()->set_base_type(column->type()); break; } diff --git a/hybridse/src/proto/fe_type.proto b/hybridse/src/proto/fe_type.proto index 4ecb5ec75e6..501a0a76b85 100644 --- a/hybridse/src/proto/fe_type.proto +++ b/hybridse/src/proto/fe_type.proto @@ -40,12 +40,49 @@ enum TTLType { kTTLNone = 5; } +message ArrayType { + optional ColumnSchema ele_type = 1; +} + +message MapType { + optional ColumnSchema key_type = 1; + optional ColumnSchema value_type = 2; +} + +message ColumnSchema { + oneof type { + Type base_type = 1; + ArrayType array_type = 2; + MapType map_type = 3; + } + // reserve 4 - 100 for futhur complex type + + optional bool is_not_null = 101 [ default = false ]; // field attribute 'NOT NULL' +} + message ColumnDef { optional string name = 1; - optional Type type = 2; - optional uint32 offset = 3; + + // legacy column type specification + + // Deprecated: use ColumnSchema::type. + // Currently, both `type` and `schema` are set for base types; + // complex types, like array & map, only `schema` is set, be cautious accessing the type field. + optional Type type = 2 [deprecated = true]; + optional uint32 offset = 3 [deprecated = true]; // I do not actually see usage of this field + + // 'NOT NULL' is the only one special attribute from CREATE TABLE statement, that is valid for + // both column and field. So for example, 'col1 INT NOT NULL' and 'col2 ARRAY' are + // both valid. + // + // 'CONSTANT' seems not revelant to table definition, but from function parameter. + // + // For simplification and unclear requirements, we do not touch the two fileds optional bool is_not_null = 4; optional bool is_constant = 5 [default = false]; + + // new column type specification since v0.9.0 + optional ColumnSchema schema = 6; } message IndexDef { diff --git a/hybridse/src/udf/literal_traits.h b/hybridse/src/udf/literal_traits.h index 2c79c8a365d..4baba818357 100644 --- a/hybridse/src/udf/literal_traits.h +++ b/hybridse/src/udf/literal_traits.h @@ -587,6 +587,7 @@ codec::Schema MakeLiteralSchema() { ::hybridse::type::ColumnDef* col = schema.Add(); col->set_name("col_" + std::to_string(i)); col->set_type(static_cast<::hybridse::type::Type>(types[i])); + col->mutable_schema()->set_base_type(col->type()); } return schema; } diff --git a/hybridse/src/vm/physical_plan_context.cc b/hybridse/src/vm/physical_plan_context.cc index a9771218dbc..66eb9cf311f 100644 --- a/hybridse/src/vm/physical_plan_context.cc +++ b/hybridse/src/vm/physical_plan_context.cc @@ -93,7 +93,6 @@ Status PhysicalPlanContext::InitFnDef(const ColumnProjects& projects, const Sche column_def.set_name(projects.GetName(i)); column_def.set_is_not_null(false); - type::Type column_type; auto resolved_expr = expr_list->GetChild(i); // TODO(xxx): legacy udf type infer @@ -113,10 +112,13 @@ Status PhysicalPlanContext::InitFnDef(const ColumnProjects& projects, const Sche CHECK_TRUE(resolved_expr->GetOutputType() != nullptr, kPlanError, "Fail to resolve expression: ", resolved_expr->GetExprString()); - CHECK_TRUE(codegen::DataType2SchemaType(*resolved_expr->GetOutputType(), &column_type), kPlanError, - "Invalid expression: ", resolved_expr->GetExprString(), " with illegal type ", - resolved_expr->GetOutputType()->GetName()); - column_def.set_type(column_type); + auto* mut_col_schema = column_def.mutable_schema(); + auto as = codegen::Type2ColumnSchema(resolved_expr->GetOutputType(), mut_col_schema); + if (mut_col_schema->has_base_type()) { + // backwards compatibility to types field + column_def.set_type(mut_col_schema->base_type()); + } + CHECK_TRUE(as.ok(), kPlanError, as.ToString(), " for expression ", resolved_expr->GetExprString()); auto frame = has_agg ? projects.GetFrame(i) : nullptr; output_fn->AddOutputColumn(column_def, frame); diff --git a/hybridse/src/vm/simple_catalog.cc b/hybridse/src/vm/simple_catalog.cc index 76093858ace..7e97f92027a 100644 --- a/hybridse/src/vm/simple_catalog.cc +++ b/hybridse/src/vm/simple_catalog.cc @@ -70,8 +70,14 @@ SimpleCatalogTableHandler::SimpleCatalogTableHandler( // init types var for (int32_t i = 0; i < table_def.columns_size(); i++) { const type::ColumnDef &column = table_def.columns(i); - codec::ColInfo col_info(column.name(), column.type(), i, 0); - types_dict_.insert(std::make_pair(column.name(), col_info)); + if (column.has_schema()) { + // new schema field + types_dict_.emplace(column.name(), ColInfo(column.name(), column.schema(), i, 0)); + } else { + // old type field + codec::ColInfo col_info(column.name(), column.type(), i, 0); + types_dict_.emplace(column.name(), col_info); + } } // init index hint @@ -165,12 +171,15 @@ bool SimpleCatalogTableHandler::DecodeKeysAndTs(const IndexSt &index, uint32_t size, std::string &key, int64_t *time_ptr) { for (const auto &col : index.keys) { + // expect keys and ts as base types, so calling 'type()' is generally safe + assert(col.schema.has_base_type()); + if (!key.empty()) { key.append("|"); } if (row_view_.IsNULL(buf, col.idx)) { key.append(codec::NONETOKEN); - } else if (col.type == ::hybridse::type::kVarchar) { + } else if (col.type() == ::hybridse::type::kVarchar) { const char *val = NULL; uint32_t length = 0; row_view_.GetValue(buf, col.idx, &val, &length); @@ -181,7 +190,7 @@ bool SimpleCatalogTableHandler::DecodeKeysAndTs(const IndexSt &index, } } else { int64_t value = 0; - row_view_.GetInteger(buf, col.idx, col.type, &value); + row_view_.GetInteger(buf, col.idx, col.type(), &value); key.append(std::to_string(value)); } } diff --git a/src/catalog/sdk_catalog.cc b/src/catalog/sdk_catalog.cc index 94bc7dad8cd..40fea86a8d1 100644 --- a/src/catalog/sdk_catalog.cc +++ b/src/catalog/sdk_catalog.cc @@ -61,7 +61,7 @@ bool SDKTableHandler::Init() { for (int32_t i = 0; i < cur_schema->size(); i++) { const ::hybridse::type::ColumnDef& column = cur_schema->Get(i); ::hybridse::vm::ColInfo col_info; - col_info.type = column.type(); + col_info.schema.set_base_type(column.type()); col_info.idx = i; col_info.name = column.name(); types_.emplace(column.name(), col_info); diff --git a/src/catalog/tablet_catalog.cc b/src/catalog/tablet_catalog.cc index 233077f32fb..570bb8d87e4 100644 --- a/src/catalog/tablet_catalog.cc +++ b/src/catalog/tablet_catalog.cc @@ -71,7 +71,7 @@ bool TabletTableHandler::Init(const ClientManager& client_manager) { for (int32_t i = 0; i < schema_.size(); i++) { const ::hybridse::type::ColumnDef& column = schema_.Get(i); ::hybridse::vm::ColInfo col_info; - col_info.type = column.type(); + col_info.schema.set_base_type(column.type()); col_info.idx = i; col_info.name = column.name(); types_.insert(std::make_pair(column.name(), col_info));