From 512d9864fb583958b8306cb312a7a76b721f2154 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Wed, 16 Apr 2025 11:17:51 +0200 Subject: [PATCH 1/6] refactor(cubesql): Remove unused rule for member pushdown from AllMembers All matching happening with CubeScanMembers on top of actual members node, and this rule does not expect that member_name_to_expr part of analysis can extract members from CubeScanMembers(AllMembers), so there's no reason to keep this rule --- .../src/compile/rewrite/rules/members.rs | 157 +----------------- 1 file changed, 5 insertions(+), 152 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 1196334ec329a..10869a0b426d1 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -19,16 +19,15 @@ use crate::{ transforming_chain_rewrite, transforming_rewrite, transforming_rewrite_with_root, udaf_expr, udf_expr, virtual_field_expr, AggregateFunctionExprDistinct, AggregateFunctionExprFun, AliasExprAlias, AllMembersAlias, AllMembersCube, - BinaryExprOp, CastExprDataType, ChangeUserCube, ColumnExprColumn, CubeScanAliasToCube, + BinaryExprOp, CastExprDataType, ColumnExprColumn, CubeScanAliasToCube, CubeScanCanPushdownJoin, CubeScanLimit, CubeScanOffset, CubeScanUngrouped, DimensionName, JoinLeftOn, JoinRightOn, LikeExprEscapeChar, LikeExprLikeType, LikeExprNegated, LikeType, LimitFetch, LimitSkip, ListType, LiteralExprValue, LiteralMemberRelation, LiteralMemberValue, LogicalPlanLanguage, MeasureName, MemberErrorAliasToCube, MemberErrorError, MemberErrorPriority, MemberPushdownReplacerAliasToCube, MemberReplacerAliasToCube, ProjectionAlias, - SegmentName, TableScanFetch, TableScanProjection, TableScanSourceTableName, - TableScanTableName, TimeDimensionDateRange, TimeDimensionGranularity, - TimeDimensionName, VirtualFieldCube, VirtualFieldName, + TableScanFetch, TableScanProjection, TableScanSourceTableName, TableScanTableName, + TimeDimensionDateRange, TimeDimensionGranularity, TimeDimensionName, }, }, config::ConfigObj, @@ -538,8 +537,7 @@ impl MemberRules { let find_matching_old_member_with_count = |name: &str, column_expr: String, default_count: bool| { - vec![ - transforming_rewrite( + vec![transforming_rewrite( &format!( "member-pushdown-replacer-column-find-matching-old-member-{}", name @@ -562,31 +560,7 @@ impl MemberRules { "?filtered_member_pushdown_replacer_alias_to_cube", default_count, ), - ), - transforming_rewrite( - &format!( - "member-pushdown-replacer-column-find-matching-old-member-{}-select-member-from-all-members", - name - ), - member_pushdown_replacer( - column_expr.clone(), - all_members("?cube", "?all_members_alias"), - "?member_pushdown_replacer_alias_to_cube", - ), - member_pushdown_replacer( - column_expr.clone(), - "?terminal_member", - "?member_pushdown_replacer_alias_to_cube", - ), - self.select_from_all_member_by_column( - "?cube", - "?member_pushdown_replacer_alias_to_cube", - "?column", - "?terminal_member", - default_count - ), - ), - ] + )] }; let find_matching_old_member = |name: &str, column_expr: String| { @@ -2050,127 +2024,6 @@ impl MemberRules { } } - fn select_from_all_member_by_column( - &self, - cube_var: &'static str, - member_pushdown_replacer_alias_to_cube_var: &'static str, - column_var: &'static str, - member_var: &'static str, - default_count: bool, - ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { - let cube_var = var!(cube_var); - let member_pushdown_replacer_alias_to_cube_var = - var!(member_pushdown_replacer_alias_to_cube_var); - let column_var = var!(column_var); - let member_var = var!(member_var); - let meta = self.meta_context.clone(); - move |egraph, subst| { - for alias_to_cube in var_iter!( - egraph[subst[member_pushdown_replacer_alias_to_cube_var]], - MemberPushdownReplacerAliasToCube - ) - .cloned() - { - // alias_to_cube at this point is already filtered to a single cube - let cube_alias = alias_to_cube.first().unwrap().0 .1.to_string(); - for cube in var_iter!(egraph[subst[cube_var]], AllMembersCube).cloned() { - let column_iter = if default_count { - vec![Column::from_name(Self::default_count_measure_name())] - } else { - var_iter!(egraph[subst[column_var]], ColumnExprColumn) - .cloned() - .collect() - }; - for column in column_iter { - if let Some(cube) = meta.find_cube_with_name(&cube) { - let alias_expr = |egraph| { - Self::add_alias_column( - egraph, - column.name.to_string(), - Some(cube_alias.clone()), - ) - }; - - if let Some(dimension) = cube.lookup_dimension(&column.name) { - let dimension_name = - egraph.add(LogicalPlanLanguage::DimensionName(DimensionName( - dimension.name.to_string(), - ))); - - let alias = alias_expr(egraph); - subst.insert( - member_var, - egraph.add(LogicalPlanLanguage::Dimension([ - dimension_name, - alias, - ])), - ); - return true; - } - - if let Some(measure) = cube.lookup_measure(&column.name) { - let measure_name = egraph.add(LogicalPlanLanguage::MeasureName( - MeasureName(measure.name.to_string()), - )); - let alias = alias_expr(egraph); - subst.insert( - member_var, - egraph.add(LogicalPlanLanguage::Measure([measure_name, alias])), - ); - return true; - } - - if let Some(segment) = cube.lookup_segment(&column.name) { - let segment_name = egraph.add(LogicalPlanLanguage::SegmentName( - SegmentName(segment.name.to_string()), - )); - let alias = alias_expr(egraph); - subst.insert( - member_var, - egraph.add(LogicalPlanLanguage::Segment([segment_name, alias])), - ); - return true; - } - - let member_name = column.name.to_string(); - - if member_name.eq_ignore_ascii_case(&"__user") { - let cube = egraph.add(LogicalPlanLanguage::ChangeUserCube( - ChangeUserCube(cube.name.to_string()), - )); - let alias = alias_expr(egraph); - subst.insert( - member_var, - egraph.add(LogicalPlanLanguage::ChangeUser([cube, alias])), - ); - return true; - } - - if member_name.eq_ignore_ascii_case(&"__cubeJoinField") { - let field_name = egraph.add(LogicalPlanLanguage::VirtualFieldName( - VirtualFieldName(column.name.to_string()), - )); - let cube = egraph.add(LogicalPlanLanguage::VirtualFieldCube( - VirtualFieldCube(cube.name.to_string()), - )); - let alias = alias_expr(egraph); - subst.insert( - member_var, - egraph.add(LogicalPlanLanguage::VirtualField([ - field_name, cube, alias, - ])), - ); - - return true; - } - } - } - } - } - false - } - } - fn pushdown_measure( &self, member_pushdown_replacer_alias_to_cube_var: &'static str, From 5d22f89e13130d4154cd7734a6bd09a3bfc4ad14 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Wed, 16 Apr 2025 11:34:56 +0200 Subject: [PATCH 2/6] refactor(cubesql): Remove unnecessary intermediate Vec --- .../src/compile/rewrite/rules/members.rs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 10869a0b426d1..ee262eddd9ff5 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -537,7 +537,7 @@ impl MemberRules { let find_matching_old_member_with_count = |name: &str, column_expr: String, default_count: bool| { - vec![transforming_rewrite( + transforming_rewrite( &format!( "member-pushdown-replacer-column-find-matching-old-member-{}", name @@ -560,7 +560,7 @@ impl MemberRules { "?filtered_member_pushdown_replacer_alias_to_cube", default_count, ), - )] + ) }; let find_matching_old_member = |name: &str, column_expr: String| { @@ -606,27 +606,27 @@ impl MemberRules { member_replacer_fn, )); } - rules.extend(find_matching_old_member("column", column_expr("?column"))); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member("column", column_expr("?column"))); + rules.push(find_matching_old_member( "alias", alias_expr(column_expr("?column"), "?alias"), )); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member( "agg-fun", agg_fun_expr("?fun_name", vec![column_expr("?column")], "?distinct"), )); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member( "agg-fun-alias", alias_expr( agg_fun_expr("?fun_name", vec![column_expr("?column")], "?distinct"), "?alias", ), )); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member( "udaf-fun", udaf_expr(MEASURE_UDAF_NAME, vec![column_expr("?column")]), )); - rules.extend(find_matching_old_member_with_count( + rules.push(find_matching_old_member_with_count( "agg-fun-default-count", agg_fun_expr( "Count", @@ -635,7 +635,7 @@ impl MemberRules { ), true, )); - rules.extend(find_matching_old_member_with_count( + rules.push(find_matching_old_member_with_count( "agg-fun-default-count-alias", alias_expr( agg_fun_expr( @@ -647,7 +647,7 @@ impl MemberRules { ), true, )); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member( "agg-fun-with-cast", // TODO need to check data_type if we can remove the cast agg_fun_expr( @@ -656,14 +656,14 @@ impl MemberRules { "?distinct", ), )); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member( "date-trunc", self.fun_expr( "DateTrunc", vec![literal_expr("?granularity"), column_expr("?column")], ), )); - rules.extend(find_matching_old_member( + rules.push(find_matching_old_member( "date-trunc-with-alias", // TODO need to check data_type if we can remove the cast alias_expr( From 86ba105afc1f7fc78f0ae15d1c55f48f11d88fb6 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Wed, 16 Apr 2025 12:35:51 +0200 Subject: [PATCH 3/6] refactor(cubesql): Rename find_matching_old_member --- rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index ee262eddd9ff5..6c1d3b394382e 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -552,7 +552,7 @@ impl MemberRules { "?terminal_member", "?filtered_member_pushdown_replacer_alias_to_cube", ), - self.find_matching_old_member( + self.transform_find_matching_old_member( "?member_pushdown_replacer_alias_to_cube", "?column", "?old_members", @@ -1944,7 +1944,7 @@ impl MemberRules { ) } - fn find_matching_old_member( + fn transform_find_matching_old_member( &self, member_pushdown_replacer_alias_to_cube_var: &'static str, column_var: &'static str, From da483b11b6eb7fb0367ea76afe557b59a42b2cef Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Wed, 16 Apr 2025 12:50:06 +0200 Subject: [PATCH 4/6] refactor(cubesql): Add explicit type in var! macro --- rust/cubesql/cubesql/src/compile/rewrite/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs index 38b05d7dfc34c..421317fca239c 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs @@ -560,7 +560,7 @@ macro_rules! var_list_iter { #[macro_export] macro_rules! var { ($var_str:expr) => { - $var_str.parse().unwrap() + $var_str.parse::<::egg::Var>().unwrap() }; } From a7f2ef63a8aec83206e4f0d66489007121f26233 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Wed, 16 Apr 2025 13:40:10 +0200 Subject: [PATCH 5/6] refactor(cubesql): Move column-count decision in single find_matching_old_member argument --- .../src/compile/rewrite/rules/members.rs | 54 +++++++++++-------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 6c1d3b394382e..373ff9f684948 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -468,6 +468,11 @@ impl RewriteRules for MemberRules { } } +enum ColumnToSearch { + Var(&'static str), + DefaultCount, +} + impl MemberRules { pub fn new( meta_context: Arc, @@ -535,8 +540,8 @@ impl MemberRules { ) }; - let find_matching_old_member_with_count = - |name: &str, column_expr: String, default_count: bool| { + let find_matching_old_member = + |name: &str, column_expr: String, column_to_search: ColumnToSearch| { transforming_rewrite( &format!( "member-pushdown-replacer-column-find-matching-old-member-{}", @@ -554,19 +559,14 @@ impl MemberRules { ), self.transform_find_matching_old_member( "?member_pushdown_replacer_alias_to_cube", - "?column", + column_to_search, "?old_members", "?terminal_member", "?filtered_member_pushdown_replacer_alias_to_cube", - default_count, ), ) }; - let find_matching_old_member = |name: &str, column_expr: String| { - find_matching_old_member_with_count(name, column_expr, false) - }; - if self.config_obj.push_down_pull_up_split() { rules.extend(replacer_flat_push_down_node_substitute_rules( "member-pushdown-replacer-aggregate-group", @@ -606,14 +606,20 @@ impl MemberRules { member_replacer_fn, )); } - rules.push(find_matching_old_member("column", column_expr("?column"))); + rules.push(find_matching_old_member( + "column", + column_expr("?column"), + ColumnToSearch::Var("?column"), + )); rules.push(find_matching_old_member( "alias", alias_expr(column_expr("?column"), "?alias"), + ColumnToSearch::Var("?column"), )); rules.push(find_matching_old_member( "agg-fun", agg_fun_expr("?fun_name", vec![column_expr("?column")], "?distinct"), + ColumnToSearch::Var("?column"), )); rules.push(find_matching_old_member( "agg-fun-alias", @@ -621,21 +627,23 @@ impl MemberRules { agg_fun_expr("?fun_name", vec![column_expr("?column")], "?distinct"), "?alias", ), + ColumnToSearch::Var("?column"), )); rules.push(find_matching_old_member( "udaf-fun", udaf_expr(MEASURE_UDAF_NAME, vec![column_expr("?column")]), + ColumnToSearch::Var("?column"), )); - rules.push(find_matching_old_member_with_count( + rules.push(find_matching_old_member( "agg-fun-default-count", agg_fun_expr( "Count", vec![literal_expr("?any")], "AggregateFunctionExprDistinct:false", ), - true, + ColumnToSearch::DefaultCount, )); - rules.push(find_matching_old_member_with_count( + rules.push(find_matching_old_member( "agg-fun-default-count-alias", alias_expr( agg_fun_expr( @@ -645,7 +653,7 @@ impl MemberRules { ), "?alias", ), - true, + ColumnToSearch::DefaultCount, )); rules.push(find_matching_old_member( "agg-fun-with-cast", @@ -655,6 +663,7 @@ impl MemberRules { vec![cast_expr(column_expr("?column"), "?data_type")], "?distinct", ), + ColumnToSearch::Var("?column"), )); rules.push(find_matching_old_member( "date-trunc", @@ -662,6 +671,7 @@ impl MemberRules { "DateTrunc", vec![literal_expr("?granularity"), column_expr("?column")], ), + ColumnToSearch::Var("?column"), )); rules.push(find_matching_old_member( "date-trunc-with-alias", @@ -673,6 +683,7 @@ impl MemberRules { ), "?original_alias", ), + ColumnToSearch::Var("?column"), )); Self::measure_rewrites( &mut rules, @@ -1947,15 +1958,17 @@ impl MemberRules { fn transform_find_matching_old_member( &self, member_pushdown_replacer_alias_to_cube_var: &'static str, - column_var: &'static str, + column_to_search: ColumnToSearch, old_members_var: &'static str, terminal_member: &'static str, filtered_member_pushdown_replacer_alias_to_cube_var: &'static str, - default_count: bool, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let member_pushdown_replacer_alias_to_cube_var = var!(member_pushdown_replacer_alias_to_cube_var); - let column_var = var!(column_var); + let column_var = match column_to_search { + ColumnToSearch::Var(column_var) => Some(var!(column_var)), + ColumnToSearch::DefaultCount => None, + }; let old_members_var = var!(old_members_var); let terminal_member = var!(terminal_member); let filtered_member_pushdown_replacer_alias_to_cube_var = @@ -1969,12 +1982,11 @@ impl MemberRules { .cloned() .collect(); for alias_to_cube in alias_to_cubes { - let column_iter = if default_count { - vec![Column::from_name(Self::default_count_measure_name())] - } else { - var_iter!(egraph[subst[column_var]], ColumnExprColumn) + let column_iter = match column_var { + Some(column_var) => var_iter!(egraph[subst[column_var]], ColumnExprColumn) .cloned() - .collect() + .collect(), + None => vec![Column::from_name(Self::default_count_measure_name())], }; for alias_column in column_iter { let alias_name = expr_column_name(&Expr::Column(alias_column), &None); From 1312aba01fadab32f9d4bc7f4315b0970baa333f Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Wed, 16 Apr 2025 13:42:24 +0200 Subject: [PATCH 6/6] feat(cubesql): Support trivial casts in member pushdown --- rust/cubesql/cubesql/src/compile/mod.rs | 2 +- .../src/compile/rewrite/rules/members.rs | 102 +++++++++++++++++- ...est_cube_join__join_with_trivial_cast.snap | 10 ++ .../src/compile/test/test_cube_join.rs | 74 +++++++++++++ 4 files changed, 184 insertions(+), 4 deletions(-) create mode 100644 rust/cubesql/cubesql/src/compile/test/snapshots/cubesql__compile__test__test_cube_join__join_with_trivial_cast.snap diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index b9dbf91c0f2b9..abc482f810b60 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -13429,7 +13429,7 @@ ORDER BY "source"."str0" ASC V1LoadResultAnnotation::new(json!([]), json!([]), json!([]), json!([])) } - fn simple_load_response(data: Vec) -> V1LoadResponse { + pub(crate) fn simple_load_response(data: Vec) -> V1LoadResponse { V1LoadResponse::new(vec![V1LoadResult::new(empty_annotation(), data)]) } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 373ff9f684948..fd2536903da42 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -31,6 +31,7 @@ use crate::{ }, }, config::ConfigObj, + sql::ColumnType, transport::{MetaContext, V1CubeMetaDimensionExt, V1CubeMetaExt, V1CubeMetaMeasureExt}, var, var_iter, var_list_iter, CubeError, }; @@ -527,6 +528,23 @@ impl MemberRules { relation, ), ), + // Cast without alias will not generate stable name in schema, so there's no rule like that for now + // TODO implement it anyway, to be able to remove Projection on top of CubeScan completely + transforming_rewrite( + &format!("member-pushdown-replacer-column-{}-cast-alias", name), + member_pushdown_replacer( + alias_expr(cast_expr(column_expr("?column"), "?cast_type"), "?alias"), + member_fn("?old_alias"), + "?member_pushdown_replacer_alias_to_cube", + ), + member_fn("?output_column"), + self.transform_alias( + "?member_pushdown_replacer_alias_to_cube", + "?alias", + "?output_column", + relation, + ), + ), ] } @@ -541,7 +559,10 @@ impl MemberRules { }; let find_matching_old_member = - |name: &str, column_expr: String, column_to_search: ColumnToSearch| { + |name: &str, + column_expr: String, + column_to_search: ColumnToSearch, + cast_type_var: Option<&'static str>| { transforming_rewrite( &format!( "member-pushdown-replacer-column-find-matching-old-member-{}", @@ -560,6 +581,7 @@ impl MemberRules { self.transform_find_matching_old_member( "?member_pushdown_replacer_alias_to_cube", column_to_search, + cast_type_var, "?old_members", "?terminal_member", "?filtered_member_pushdown_replacer_alias_to_cube", @@ -610,16 +632,31 @@ impl MemberRules { "column", column_expr("?column"), ColumnToSearch::Var("?column"), + None, + )); + rules.push(find_matching_old_member( + "column-cast", + cast_expr(column_expr("?column"), "?cast_type"), + ColumnToSearch::Var("?column"), + Some("?cast_type"), )); rules.push(find_matching_old_member( "alias", alias_expr(column_expr("?column"), "?alias"), ColumnToSearch::Var("?column"), + None, + )); + rules.push(find_matching_old_member( + "alias-cast", + alias_expr(cast_expr(column_expr("?column"), "?cast_type"), "?alias"), + ColumnToSearch::Var("?column"), + Some("?cast_type"), )); rules.push(find_matching_old_member( "agg-fun", agg_fun_expr("?fun_name", vec![column_expr("?column")], "?distinct"), ColumnToSearch::Var("?column"), + None, )); rules.push(find_matching_old_member( "agg-fun-alias", @@ -628,11 +665,13 @@ impl MemberRules { "?alias", ), ColumnToSearch::Var("?column"), + None, )); rules.push(find_matching_old_member( "udaf-fun", udaf_expr(MEASURE_UDAF_NAME, vec![column_expr("?column")]), ColumnToSearch::Var("?column"), + None, )); rules.push(find_matching_old_member( "agg-fun-default-count", @@ -642,6 +681,7 @@ impl MemberRules { "AggregateFunctionExprDistinct:false", ), ColumnToSearch::DefaultCount, + None, )); rules.push(find_matching_old_member( "agg-fun-default-count-alias", @@ -654,6 +694,7 @@ impl MemberRules { "?alias", ), ColumnToSearch::DefaultCount, + None, )); rules.push(find_matching_old_member( "agg-fun-with-cast", @@ -664,6 +705,7 @@ impl MemberRules { "?distinct", ), ColumnToSearch::Var("?column"), + None, )); rules.push(find_matching_old_member( "date-trunc", @@ -672,6 +714,7 @@ impl MemberRules { vec![literal_expr("?granularity"), column_expr("?column")], ), ColumnToSearch::Var("?column"), + None, )); rules.push(find_matching_old_member( "date-trunc-with-alias", @@ -684,6 +727,7 @@ impl MemberRules { "?original_alias", ), ColumnToSearch::Var("?column"), + None, )); Self::measure_rewrites( &mut rules, @@ -1955,10 +1999,47 @@ impl MemberRules { ) } + fn can_remove_cast( + meta: &MetaContext, + member: &Member, + cast_types: Option<&Vec>, + ) -> bool { + let cube = member.cube(); + match cast_types { + // No cast, nothing to check + None => true, + // Need to check that cast is trivial + Some(cast_types) => { + // For now, allow trivial casts only for cube members, not literals + let Some(cube) = &cube else { + return false; + }; + let Some(name) = member.name() else { + return false; + }; + let Some(cube) = meta.find_cube_with_name(cube) else { + return false; + }; + // For now, allow trivial casts only for dimensions + let Some(dimension) = cube.lookup_dimension_by_member_name(name) else { + return false; + }; + + cast_types + .iter() + .any(|dt| match (dimension.get_sql_type(), dt) { + (ColumnType::String, DataType::Utf8) => true, + _ => false, + }) + } + } + } + fn transform_find_matching_old_member( &self, member_pushdown_replacer_alias_to_cube_var: &'static str, column_to_search: ColumnToSearch, + cast_type_var: Option<&'static str>, old_members_var: &'static str, terminal_member: &'static str, filtered_member_pushdown_replacer_alias_to_cube_var: &'static str, @@ -1969,11 +2050,13 @@ impl MemberRules { ColumnToSearch::Var(column_var) => Some(var!(column_var)), ColumnToSearch::DefaultCount => None, }; + let cast_type_var = cast_type_var.map(|cast_type_var| var!(cast_type_var)); let old_members_var = var!(old_members_var); let terminal_member = var!(terminal_member); let filtered_member_pushdown_replacer_alias_to_cube_var = var!(filtered_member_pushdown_replacer_alias_to_cube_var); let flat_list = self.config_obj.push_down_pull_up_split(); + let meta = self.meta_context.clone(); move |egraph, subst| { let alias_to_cubes: Vec<_> = var_iter!( egraph[subst[member_pushdown_replacer_alias_to_cube_var]], @@ -1981,6 +2064,13 @@ impl MemberRules { ) .cloned() .collect(); + + let cast_types = cast_type_var.map(|cast_type_var| { + var_iter!(egraph[subst[cast_type_var]], CastExprDataType) + .cloned() + .collect::>() + }); + for alias_to_cube in alias_to_cubes { let column_iter = match column_var { Some(column_var) => var_iter!(egraph[subst[column_var]], ColumnExprColumn) @@ -1996,7 +2086,9 @@ impl MemberRules { .data .find_member_by_alias(&alias_name) { - let cube_to_filter = if let Some(cube) = member.1.cube() { + let member = &member.1; + + let cube_to_filter = if let Some(cube) = member.cube() { Some(cube) } else { alias_to_cube @@ -2014,8 +2106,12 @@ impl MemberRules { alias_to_cube.clone() }; + if !Self::can_remove_cast(&meta, member, cast_types.as_ref()) { + continue; + } + // TODO remove unwrap - let old_member = member.1.clone().add_to_egraph(egraph, flat_list).unwrap(); + let old_member = member.clone().add_to_egraph(egraph, flat_list).unwrap(); subst.insert(terminal_member, old_member); let filtered_member_pushdown_replacer_alias_to_cube = diff --git a/rust/cubesql/cubesql/src/compile/test/snapshots/cubesql__compile__test__test_cube_join__join_with_trivial_cast.snap b/rust/cubesql/cubesql/src/compile/test/snapshots/cubesql__compile__test__test_cube_join__join_with_trivial_cast.snap new file mode 100644 index 0000000000000..1fda8d63eed49 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/test/snapshots/cubesql__compile__test__test_cube_join__join_with_trivial_cast.snap @@ -0,0 +1,10 @@ +--- +source: cubesql/src/compile/test/test_cube_join.rs +expression: context.execute_query(query).await.unwrap() +--- ++-------+--------------+ +| notes | content_cast | ++-------+--------------+ +| foo | bar | +| baz | quux | ++-------+--------------+ diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join.rs index 4a8b7463aafe1..281283f515ba2 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_cube_join.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join.rs @@ -1,9 +1,11 @@ use cubeclient::models::{ V1LoadRequestQuery, V1LoadRequestQueryFilterItem, V1LoadRequestQueryTimeDimension, }; +use datafusion::physical_plan::displayable; use pretty_assertions::assert_eq; use serde_json::json; +use crate::compile::test::TestContext; use crate::compile::{ rewrite::rewriter::Rewriter, test::{ @@ -566,3 +568,75 @@ async fn test_join_cubes_with_aggr_error() { . Please check logs for additional information.".to_string() ) } + +/// CAST(dimension AS TEXT) should be pushed into CubeScan as regular dimension, +/// so join could see both CubeScans +#[tokio::test] +async fn test_join_with_trivial_cast() { + init_testing_logger(); + + let context = TestContext::new(DatabaseProtocol::PostgreSQL).await; + + // language=PostgreSQL + let query = r#" +SELECT + KibanaSampleDataEcommerce.notes, + t0.content_cast +FROM + KibanaSampleDataEcommerce + INNER JOIN ( + SELECT + __cubeJoinField, + CAST(content AS TEXT) AS content_cast + FROM + Logs + ) t0 ON ( + KibanaSampleDataEcommerce.__cubeJoinField = t0.__cubeJoinField + ) +; + "#; + + let expected_cube_scan = V1LoadRequestQuery { + measures: Some(vec![]), + segments: Some(vec![]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.notes".to_string(), + "Logs.content".to_string(), + ]), + order: Some(vec![]), + ungrouped: Some(true), + ..Default::default() + }; + + context + .add_cube_load_mock( + expected_cube_scan.clone(), + crate::compile::tests::simple_load_response(vec![ + json!({ + "KibanaSampleDataEcommerce.notes": "foo", + "Logs.content": "bar", + }), + json!({ + "Logs.content": "quux", + "KibanaSampleDataEcommerce.notes": "baz", + }), + ]), + ) + .await; + + let query_plan = context.convert_sql_to_cube_query(&query).await.unwrap(); + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + assert_eq!( + query_plan.as_logical_plan().find_cube_scan().request, + expected_cube_scan + ); + + // Expect that query is executable, and properly assigns alias for cast + insta::assert_snapshot!(context.execute_query(query).await.unwrap()); +}