From 362f448c361f52d77413247a1d0baf38318a48e6 Mon Sep 17 00:00:00 2001 From: Manan Gupta <35839558+GuptaManan100@users.noreply.github.com> Date: Wed, 7 Aug 2024 17:45:00 +0530 Subject: [PATCH 1/2] Fix: Offset planning in hash joins (#16540) Signed-off-by: Manan Gupta --- .../vtgate/vitess_tester/join/join.test | 79 +++++++++ .../vtgate/vitess_tester/join/vschema.json | 46 +++++ .../vtgate/planbuilder/operators/hash_join.go | 23 +-- .../planbuilder/operators/offset_planning.go | 9 +- .../planbuilder/testdata/aggr_cases.json | 95 +++++----- .../planbuilder/testdata/from_cases.json | 162 +++++++++++++++--- .../planbuilder/testdata/vschemas/schema.json | 12 ++ 7 files changed, 339 insertions(+), 87 deletions(-) create mode 100644 go/test/endtoend/vtgate/vitess_tester/join/join.test create mode 100644 go/test/endtoend/vtgate/vitess_tester/join/vschema.json diff --git a/go/test/endtoend/vtgate/vitess_tester/join/join.test b/go/test/endtoend/vtgate/vitess_tester/join/join.test new file mode 100644 index 00000000000..72d79a1206e --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/join/join.test @@ -0,0 +1,79 @@ +CREATE TABLE `t1` +( + `id` int unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(191) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t2` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `t1_id` int unsigned NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t3` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(191) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t4` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `col` int unsigned NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +insert into t1 (id, name) +values (1, 'A'), + (2, 'B'), + (3, 'C'), + (4, 'D'); + +insert into t2 (id, t1_id) +values (1, 1), + (2, 2), + (3, 3); + +insert into t3 (id, name) +values (1, 'A'), + (2, 'B'), + (3, 'B'), + (4, 'B'), + (5, 'B'); + +insert into t4 (id, col) +values (1, 1), + (2, 2), + (3, 3); + +-- wait_authoritative t1 +-- wait_authoritative t2 +-- wait_authoritative t3 +select 42 +from t1 + join t2 on t1.id = t2.t1_id + join t3 on t1.id = t3.id +where t1.name + or t2.id + or t3.name; + +# Complex query that requires hash join underneath a memory sort and ordered aggregate +select 1 +from t1 + join t2 on t1.id = t2.t1_id + join t4 on t4.col = t2.id + left join (select t4.col, count(*) as count from t4 group by t4.col) t3 on t3.col = t2.id +where t1.id IN (1, 2) +group by t2.id, t4.col; + diff --git a/go/test/endtoend/vtgate/vitess_tester/join/vschema.json b/go/test/endtoend/vtgate/vitess_tester/join/vschema.json new file mode 100644 index 00000000000..1105b951e61 --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/join/vschema.json @@ -0,0 +1,46 @@ +{ + "keyspaces": { + "joinks": { + "sharded": true, + "vindexes": { + "hash": { + "type": "hash" + } + }, + "tables": { + "t1": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] + }, + "t2": { + "column_vindexes": [ + { + "column": "t1_id", + "name": "hash" + } + ] + }, + "t3": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] + }, + "t4": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] + } + } + } + } +} \ No newline at end of file diff --git a/go/vt/vtgate/planbuilder/operators/hash_join.go b/go/vt/vtgate/planbuilder/operators/hash_join.go index f997ed5205d..135fda276b5 100644 --- a/go/vt/vtgate/planbuilder/operators/hash_join.go +++ b/go/vt/vtgate/planbuilder/operators/hash_join.go @@ -300,20 +300,9 @@ func (hj *HashJoin) addColumn(ctx *plancontext.PlanningContext, in sqlparser.Exp inOffset = op.AddColumn(ctx, false, false, aeWrap(expr)) } - // we turn the + // we have to turn the incoming offset to an outgoing offset of the columns this operator is exposing internalOffset := offsetter(inOffset) - - // ok, we have an offset from the input operator. Let's check if we already have it - // in our list of incoming columns - - for idx, offset := range hj.ColumnOffsets { - if internalOffset == offset { - return idx - } - } - hj.ColumnOffsets = append(hj.ColumnOffsets, internalOffset) - return len(hj.ColumnOffsets) - 1 } @@ -408,17 +397,7 @@ func (hj *HashJoin) addSingleSidedColumn( // we have to turn the incoming offset to an outgoing offset of the columns this operator is exposing internalOffset := offsetter(inOffset) - - // ok, we have an offset from the input operator. Let's check if we already have it - // in our list of incoming columns - for idx, offset := range hj.ColumnOffsets { - if internalOffset == offset { - return idx - } - } - hj.ColumnOffsets = append(hj.ColumnOffsets, internalOffset) - return len(hj.ColumnOffsets) - 1 } diff --git a/go/vt/vtgate/planbuilder/operators/offset_planning.go b/go/vt/vtgate/planbuilder/operators/offset_planning.go index 638d3d80907..712cc8ee5ad 100644 --- a/go/vt/vtgate/planbuilder/operators/offset_planning.go +++ b/go/vt/vtgate/planbuilder/operators/offset_planning.go @@ -38,7 +38,6 @@ func planOffsets(ctx *plancontext.PlanningContext, root Operator) Operator { panic(vterrors.VT13001(fmt.Sprintf("should not see %T here", in))) case offsettable: newOp := op.planOffsets(ctx) - if newOp == nil { newOp = op } @@ -47,7 +46,13 @@ func planOffsets(ctx *plancontext.PlanningContext, root Operator) Operator { fmt.Println("Planned offsets for:") fmt.Println(ToTree(newOp)) } - return newOp, nil + + if newOp == op { + return newOp, nil + } else { + // We got a new operator from plan offsets. We should return that something has changed. + return newOp, Rewrote("planning offsets introduced a new operator") + } } return in, NoRewrite } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index f1555686230..4296c72a6e6 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -6507,59 +6507,74 @@ "OrderBy": "(4|6) ASC, (5|7) ASC", "Inputs": [ { - "OperatorType": "Join", - "Variant": "HashLeftJoin", - "Collation": "binary", - "ComparisonType": "INT16", - "JoinColumnIndexes": "-1,1,-2,2,-3,3", - "Predicate": "`user`.col = ue.col", - "TableName": "`user`_user_extra", + "OperatorType": "Projection", + "Expressions": [ + "count(*) as count(*)", + "count(*) as count(*)", + "`user`.col as col", + "ue.col as col", + "`user`.foo as foo", + "ue.bar as bar", + "weight_string(`user`.foo) as weight_string(`user`.foo)", + "weight_string(ue.bar) as weight_string(ue.bar)" + ], "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select count(*), `user`.col, `user`.foo from `user` where 1 != 1 group by `user`.col, `user`.foo", - "Query": "select count(*), `user`.col, `user`.foo from `user` group by `user`.col, `user`.foo", - "Table": "`user`" - }, - { - "OperatorType": "Aggregate", - "Variant": "Ordered", - "Aggregates": "count_star(0)", - "GroupBy": "1, (2|3)", + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "INT16", + "JoinColumnIndexes": "-1,1,-2,2,-3,3,-3,3", + "Predicate": "`user`.col = ue.col", + "TableName": "`user`_user_extra", "Inputs": [ { - "OperatorType": "SimpleProjection", - "Columns": [ + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*), `user`.col, `user`.foo from `user` where 1 != 1 group by `user`.col, `user`.foo", + "Query": "select count(*), `user`.col, `user`.foo from `user` group by `user`.col, `user`.foo", + "Table": "`user`" + }, + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count_star(0)", + "GroupBy": "1, (2|3)", + "Inputs": [ + { + "OperatorType": "SimpleProjection", + "Columns": [ 2, 0, 1, 3 ], - "Inputs": [ - { - "OperatorType": "Sort", - "Variant": "Memory", - "OrderBy": "0 ASC, (1|3) ASC", "Inputs": [ { - "OperatorType": "Limit", - "Count": "10", + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "0 ASC, (1|3) ASC", "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra where 1 != 1) as ue where 1 != 1", - "Query": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra) as ue limit :__upper_limit", - "Table": "user_extra" + "OperatorType": "Limit", + "Count": "10", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra where 1 != 1) as ue where 1 != 1", + "Query": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra) as ue limit :__upper_limit", + "Table": "user_extra" + } + ] } ] } diff --git a/go/vt/vtgate/planbuilder/testdata/from_cases.json b/go/vt/vtgate/planbuilder/testdata/from_cases.json index 204322489b0..18f9f376810 100644 --- a/go/vt/vtgate/planbuilder/testdata/from_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/from_cases.json @@ -744,6 +744,111 @@ ] } }, + { + "comment": "Complex query that has hash left join underneath a memory sort and ordered aggregation", + "query": "select 1 from user join user_extra on user.id = user_extra.user_id join music on music.intcol = user_extra.col left join (select user_metadata.col, count(*) as count from user_metadata group by user_metadata.col) um on um.col = user_extra.col where user.id IN (103) group by user_extra.col, music.intcol", + "plan": { + "QueryType": "SELECT", + "Original": "select 1 from user join user_extra on user.id = user_extra.user_id join music on music.intcol = user_extra.col left join (select user_metadata.col, count(*) as count from user_metadata group by user_metadata.col) um on um.col = user_extra.col where user.id IN (103) group by user_extra.col, music.intcol", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "any_value(0) AS 1", + "GroupBy": "1, 4", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "1 ASC, 4 ASC", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "INT16", + "JoinColumnIndexes": "-1,-2,1,-2,-4,-1", + "Predicate": "user_extra.col = um.col", + "TableName": "music_`user`, user_extra_user_metadata", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "L:0,R:0,R:0,L:1", + "JoinVars": { + "music_intcol": 1 + }, + "TableName": "music_`user`, user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1, music.intcol from music where 1 != 1 group by music.intcol", + "Query": "select 1, music.intcol from music group by music.intcol", + "Table": "music" + }, + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select user_extra.col, user_extra.col from `user`, user_extra where 1 != 1 group by user_extra.col, user_extra.col", + "Query": "select user_extra.col, user_extra.col from `user`, user_extra where `user`.id in (103) and user_extra.col = :music_intcol and `user`.id = user_extra.user_id group by user_extra.col, user_extra.col", + "Table": "`user`, user_extra", + "Values": [ + "103" + ], + "Vindex": "user_index" + } + ] + }, + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "0", + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "sum_count_star(1) AS count", + "GroupBy": "0", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select user_metadata.col, count(*) as `count` from user_metadata where 1 != 1 group by user_metadata.col", + "OrderBy": "0 ASC", + "Query": "select user_metadata.col, count(*) as `count` from user_metadata group by user_metadata.col order by user_metadata.col asc", + "Table": "user_metadata" + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "TablesUsed": [ + "user.music", + "user.user", + "user.user_extra", + "user.user_metadata" + ] + } + }, { "comment": "Straight-join (ignores the straight_join hint)", "query": "select m1.col from unsharded as m1 straight_join unsharded as m2", @@ -4300,28 +4405,22 @@ "ResultColumns": 2, "Inputs": [ { - "OperatorType": "Join", - "Variant": "HashLeftJoin", - "Collation": "binary", - "ComparisonType": "INT16", - "JoinColumnIndexes": "-1,2", - "Predicate": "u.col = ue.col", - "TableName": "`user`_user_extra", + "OperatorType": "Projection", + "Expressions": [ + "id as id", + "user_id as user_id", + "weight_string(id) as weight_string(id)", + "weight_string(user_id) as weight_string(user_id)" + ], "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select u.id, u.col from (select id, col from `user` where 1 != 1) as u where 1 != 1", - "Query": "select distinct u.id, u.col from (select id, col from `user`) as u", - "Table": "`user`" - }, - { - "OperatorType": "Limit", - "Count": "10", + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "INT16", + "JoinColumnIndexes": "-1,2,-1,2", + "Predicate": "u.col = ue.col", + "TableName": "`user`_user_extra", "Inputs": [ { "OperatorType": "Route", @@ -4330,9 +4429,26 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select ue.col, ue.user_id from (select col, user_id from user_extra where 1 != 1) as ue where 1 != 1", - "Query": "select ue.col, ue.user_id from (select col, user_id from user_extra) as ue limit :__upper_limit", - "Table": "user_extra" + "FieldQuery": "select u.id, u.col from (select id, col from `user` where 1 != 1) as u where 1 != 1", + "Query": "select distinct u.id, u.col from (select id, col from `user`) as u", + "Table": "`user`" + }, + { + "OperatorType": "Limit", + "Count": "10", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select ue.col, ue.user_id from (select col, user_id from user_extra where 1 != 1) as ue where 1 != 1", + "Query": "select ue.col, ue.user_id from (select col, user_id from user_extra) as ue limit :__upper_limit", + "Table": "user_extra" + } + ] } ] } diff --git a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json index 7aaa2648388..d28a9f97482 100644 --- a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json +++ b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json @@ -252,6 +252,12 @@ "column": "non_planable", "name": "non_planable_user_map" } + ], + "columns": [ + { + "name": "col", + "type": "INT16" + } ] }, "user_extra": { @@ -282,6 +288,12 @@ "column": "id", "name": "music_user_map" } + ], + "columns": [ + { + "name": "intcol", + "type": "INT16" + } ] }, "authoritative": { From 9f3db0d377e6af61aaaee8d1e262f9e1d8d8b584 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Thu, 8 Aug 2024 10:47:53 +0530 Subject: [PATCH 2/2] empty commit to kick CI Signed-off-by: Manan Gupta