From 7bba3a0d7e0c036bc8ab2cd973f72e75e0f9ac59 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Thu, 8 Aug 2024 15:18:50 +0530 Subject: [PATCH] [release-20.0] Fix: Offset planning in hash joins (#16540) (#16552) Signed-off-by: Manan Gupta Co-authored-by: Manan Gupta <35839558+GuptaManan100@users.noreply.github.com> --- .../vtgate/vitess_tester/join/join.test | 79 +++++++++++++ .../vtgate/vitess_tester/join/vschema.json | 46 ++++++++ .../planbuilder/operators/apply_join.go | 2 +- .../planbuilder/operators/dml_with_input.go | 2 +- .../vtgate/planbuilder/operators/hash_join.go | 23 +--- .../planbuilder/operators/offset_planning.go | 9 +- .../planbuilder/testdata/aggr_cases.json | 91 ++++++++------- .../planbuilder/testdata/from_cases.json | 105 ++++++++++++++++++ .../planbuilder/testdata/vschemas/schema.json | 12 ++ 9 files changed, 305 insertions(+), 64 deletions(-) create mode 100644 go/test/endtoend/vtgate/vitess_tester/join/join.test create mode 100644 go/test/endtoend/vtgate/vitess_tester/join/vschema.json diff --git a/go/test/endtoend/vtgate/vitess_tester/join/join.test b/go/test/endtoend/vtgate/vitess_tester/join/join.test new file mode 100644 index 00000000000..72d79a1206e --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/join/join.test @@ -0,0 +1,79 @@ +CREATE TABLE `t1` +( + `id` int unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(191) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t2` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `t1_id` int unsigned NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t3` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(191) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t4` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `col` int unsigned NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +insert into t1 (id, name) +values (1, 'A'), + (2, 'B'), + (3, 'C'), + (4, 'D'); + +insert into t2 (id, t1_id) +values (1, 1), + (2, 2), + (3, 3); + +insert into t3 (id, name) +values (1, 'A'), + (2, 'B'), + (3, 'B'), + (4, 'B'), + (5, 'B'); + +insert into t4 (id, col) +values (1, 1), + (2, 2), + (3, 3); + +-- wait_authoritative t1 +-- wait_authoritative t2 +-- wait_authoritative t3 +select 42 +from t1 + join t2 on t1.id = t2.t1_id + join t3 on t1.id = t3.id +where t1.name + or t2.id + or t3.name; + +# Complex query that requires hash join underneath a memory sort and ordered aggregate +select 1 +from t1 + join t2 on t1.id = t2.t1_id + join t4 on t4.col = t2.id + left join (select t4.col, count(*) as count from t4 group by t4.col) t3 on t3.col = t2.id +where t1.id IN (1, 2) +group by t2.id, t4.col; + diff --git a/go/test/endtoend/vtgate/vitess_tester/join/vschema.json b/go/test/endtoend/vtgate/vitess_tester/join/vschema.json new file mode 100644 index 00000000000..1105b951e61 --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/join/vschema.json @@ -0,0 +1,46 @@ +{ + "keyspaces": { + "joinks": { + "sharded": true, + "vindexes": { + "hash": { + "type": "hash" + } + }, + "tables": { + "t1": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] + }, + "t2": { + "column_vindexes": [ + { + "column": "t1_id", + "name": "hash" + } + ] + }, + "t3": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] + }, + "t4": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] + } + } + } + } +} \ No newline at end of file diff --git a/go/vt/vtgate/planbuilder/operators/apply_join.go b/go/vt/vtgate/planbuilder/operators/apply_join.go index 03d1736e6ef..30e2c5dd3c3 100644 --- a/go/vt/vtgate/planbuilder/operators/apply_join.go +++ b/go/vt/vtgate/planbuilder/operators/apply_join.go @@ -298,7 +298,7 @@ func (aj *ApplyJoin) AddWSColumn(ctx *plancontext.PlanningContext, offset int, u func (aj *ApplyJoin) planOffsets(ctx *plancontext.PlanningContext) Operator { if len(aj.Columns) > 0 { // we've already done offset planning - return aj + return nil } for _, col := range aj.JoinColumns.columns { // Read the type description for applyJoinColumn to understand the following code diff --git a/go/vt/vtgate/planbuilder/operators/dml_with_input.go b/go/vt/vtgate/planbuilder/operators/dml_with_input.go index 09859b90bac..3843e2f3fa8 100644 --- a/go/vt/vtgate/planbuilder/operators/dml_with_input.go +++ b/go/vt/vtgate/planbuilder/operators/dml_with_input.go @@ -114,7 +114,7 @@ func (d *DMLWithInput) planOffsets(ctx *plancontext.PlanningContext) Operator { } } d.BvList = bvList - return d + return nil } var _ Operator = (*DMLWithInput)(nil) diff --git a/go/vt/vtgate/planbuilder/operators/hash_join.go b/go/vt/vtgate/planbuilder/operators/hash_join.go index 1928f4dda9e..23d0d061e21 100644 --- a/go/vt/vtgate/planbuilder/operators/hash_join.go +++ b/go/vt/vtgate/planbuilder/operators/hash_join.go @@ -326,20 +326,9 @@ func (hj *HashJoin) addColumn(ctx *plancontext.PlanningContext, in sqlparser.Exp inOffset = op.AddColumn(ctx, false, false, aeWrap(expr)) } - // we turn the + // we have to turn the incoming offset to an outgoing offset of the columns this operator is exposing internalOffset := offsetter(inOffset) - - // ok, we have an offset from the input operator. Let's check if we already have it - // in our list of incoming columns - - for idx, offset := range hj.ColumnOffsets { - if internalOffset == offset { - return idx - } - } - hj.ColumnOffsets = append(hj.ColumnOffsets, internalOffset) - return len(hj.ColumnOffsets) - 1 } @@ -434,17 +423,7 @@ func (hj *HashJoin) addSingleSidedColumn( // we have to turn the incoming offset to an outgoing offset of the columns this operator is exposing internalOffset := offsetter(inOffset) - - // ok, we have an offset from the input operator. Let's check if we already have it - // in our list of incoming columns - for idx, offset := range hj.ColumnOffsets { - if internalOffset == offset { - return idx - } - } - hj.ColumnOffsets = append(hj.ColumnOffsets, internalOffset) - return len(hj.ColumnOffsets) - 1 } diff --git a/go/vt/vtgate/planbuilder/operators/offset_planning.go b/go/vt/vtgate/planbuilder/operators/offset_planning.go index eb92cdf0920..3b52d84834f 100644 --- a/go/vt/vtgate/planbuilder/operators/offset_planning.go +++ b/go/vt/vtgate/planbuilder/operators/offset_planning.go @@ -39,7 +39,6 @@ func planOffsets(ctx *plancontext.PlanningContext, root Operator) Operator { panic(vterrors.VT13001(fmt.Sprintf("should not see %T here", in))) case offsettable: newOp := op.planOffsets(ctx) - if newOp == nil { newOp = op } @@ -48,7 +47,13 @@ func planOffsets(ctx *plancontext.PlanningContext, root Operator) Operator { fmt.Println("Planned offsets for:") fmt.Println(ToTree(newOp)) } - return newOp, nil + + if newOp == op { + return newOp, nil + } else { + // We got a new operator from plan offsets. We should return that something has changed. + return newOp, Rewrote("planning offsets introduced a new operator") + } } return in, NoRewrite } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 6800a636752..9b46b0f4b8a 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -6593,54 +6593,69 @@ "OrderBy": "(4|6) ASC, (5|7) ASC", "Inputs": [ { - "OperatorType": "Join", - "Variant": "HashLeftJoin", - "Collation": "binary", - "ComparisonType": "INT16", - "JoinColumnIndexes": "-1,1,-2,2,-3,3", - "Predicate": "`user`.col = ue.col", - "TableName": "`user`_user_extra", + "OperatorType": "Projection", + "Expressions": [ + "count(*) as count(*)", + "count(*) as count(*)", + "`user`.col as col", + "ue.col as col", + "`user`.foo as foo", + "ue.bar as bar", + "weight_string(`user`.foo) as weight_string(`user`.foo)", + "weight_string(ue.bar) as weight_string(ue.bar)" + ], "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select count(*), `user`.col, `user`.foo from `user` where 1 != 1 group by `user`.col, `user`.foo", - "Query": "select count(*), `user`.col, `user`.foo from `user` group by `user`.col, `user`.foo", - "Table": "`user`" - }, - { - "OperatorType": "Aggregate", - "Variant": "Ordered", - "Aggregates": "count_star(0)", - "GroupBy": "1, (2|3)", + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "INT16", + "JoinColumnIndexes": "-1,1,-2,2,-3,3,-3,3", + "Predicate": "`user`.col = ue.col", + "TableName": "`user`_user_extra", "Inputs": [ { - "OperatorType": "SimpleProjection", - "Columns": "2,0,1,3", + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*), `user`.col, `user`.foo from `user` where 1 != 1 group by `user`.col, `user`.foo", + "Query": "select count(*), `user`.col, `user`.foo from `user` group by `user`.col, `user`.foo", + "Table": "`user`" + }, + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count_star(0)", + "GroupBy": "1, (2|3)", "Inputs": [ { - "OperatorType": "Sort", - "Variant": "Memory", - "OrderBy": "0 ASC, (1|3) ASC", + "OperatorType": "SimpleProjection", + "Columns": "2,0,1,3", "Inputs": [ { - "OperatorType": "Limit", - "Count": "10", + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "0 ASC, (1|3) ASC", "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra where 1 != 1) as ue where 1 != 1", - "Query": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra) as ue limit 10", - "Table": "user_extra" + "OperatorType": "Limit", + "Count": "10", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra where 1 != 1) as ue where 1 != 1", + "Query": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra) as ue limit 10", + "Table": "user_extra" + } + ] } ] } diff --git a/go/vt/vtgate/planbuilder/testdata/from_cases.json b/go/vt/vtgate/planbuilder/testdata/from_cases.json index 222956bd430..3bc9eae96c1 100644 --- a/go/vt/vtgate/planbuilder/testdata/from_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/from_cases.json @@ -720,6 +720,111 @@ ] } }, + { + "comment": "Complex query that has hash left join underneath a memory sort and ordered aggregation", + "query": "select 1 from user join user_extra on user.id = user_extra.user_id join music on music.intcol = user_extra.col left join (select user_metadata.col, count(*) as count from user_metadata group by user_metadata.col) um on um.col = user_extra.col where user.id IN (103) group by user_extra.col, music.intcol", + "plan": { + "QueryType": "SELECT", + "Original": "select 1 from user join user_extra on user.id = user_extra.user_id join music on music.intcol = user_extra.col left join (select user_metadata.col, count(*) as count from user_metadata group by user_metadata.col) um on um.col = user_extra.col where user.id IN (103) group by user_extra.col, music.intcol", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "any_value(0) AS 1", + "GroupBy": "1, 4", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "1 ASC, 4 ASC", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "INT16", + "JoinColumnIndexes": "-1,-2,1,-2,-4,-1", + "Predicate": "user_extra.col = um.col", + "TableName": "music_`user`, user_extra_user_metadata", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "L:0,R:0,R:0,L:1", + "JoinVars": { + "music_intcol": 1 + }, + "TableName": "music_`user`, user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1, music.intcol from music where 1 != 1 group by music.intcol", + "Query": "select 1, music.intcol from music group by music.intcol", + "Table": "music" + }, + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select user_extra.col, user_extra.col from `user`, user_extra where 1 != 1 group by user_extra.col", + "Query": "select user_extra.col, user_extra.col from `user`, user_extra where `user`.id in (103) and user_extra.col = :music_intcol and `user`.id = user_extra.user_id group by user_extra.col", + "Table": "`user`, user_extra", + "Values": [ + "103" + ], + "Vindex": "user_index" + } + ] + }, + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "0", + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "sum_count_star(1) AS count", + "GroupBy": "0", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select user_metadata.col, count(*) as `count` from user_metadata where 1 != 1 group by user_metadata.col", + "OrderBy": "0 ASC", + "Query": "select user_metadata.col, count(*) as `count` from user_metadata group by user_metadata.col order by user_metadata.col asc", + "Table": "user_metadata" + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "TablesUsed": [ + "user.music", + "user.user", + "user.user_extra", + "user.user_metadata" + ] + } + }, { "comment": "Straight-join (ignores the straight_join hint)", "query": "select m1.col from unsharded as m1 straight_join unsharded as m2", diff --git a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json index a8fe91e5d49..4f6217fb9fb 100644 --- a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json +++ b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json @@ -252,6 +252,12 @@ "column": "non_planable", "name": "non_planable_user_map" } + ], + "columns": [ + { + "name": "col", + "type": "INT16" + } ] }, "user_extra": { @@ -282,6 +288,12 @@ "column": "id", "name": "music_user_map" } + ], + "columns": [ + { + "name": "intcol", + "type": "INT16" + } ] }, "authoritative": {