Skip to content

Commit

Permalink
[release-18.0] use aggregation engine over distinct engine when overl…
Browse files Browse the repository at this point in the history
…apping order by (#14359) (#14361)

Signed-off-by: Harshit Gangal <[email protected]>
Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
  • Loading branch information
vitess-bot[bot] authored Oct 25, 2023
1 parent 40bea23 commit 152b01f
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 22 deletions.
48 changes: 46 additions & 2 deletions go/vt/vtgate/planbuilder/operators/queryprojection.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy

func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
if qp.Distinct && !qp.HasAggr {
// grouping and distinct both lead to unique results, so we don't need
qp.groupByExprs = nil
if qp.useGroupingOverDistinct(ctx) {
// if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct.
qp.Distinct = false
} else {
// grouping and distinct both lead to unique results, so we don't need
qp.groupByExprs = nil
}
}

if qp.HasAggr && len(qp.groupByExprs) == 0 {
Expand Down Expand Up @@ -851,6 +856,45 @@ func (qp *QueryProjection) GetColumnCount() int {
return len(qp.SelectExprs) - qp.AddedColumn
}

func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.PlanningContext) bool {
for _, expr := range qp.OrderExprs {
idx, _ := qp.FindSelectExprIndexForExpr(ctx, expr.SimplifiedExpr)
if idx != nil {
return true
}
}
return false
}

func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool {
if !qp.orderByOverlapWithSelectExpr(ctx) {
return false
}
var gbs []GroupBy
for idx, selExpr := range qp.SelectExprs {
ae, err := selExpr.GetAliasedExpr()
if err != nil {
// not an alias Expr, cannot continue forward.
return false
}
sExpr := qp.GetSimplifiedExpr(ae.Expr)
// check if the grouping already exists on that column.
found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool {
return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr)
})
if found != -1 {
continue
}
groupBy := NewGroupBy(ae.Expr, sExpr, ae)
selectExprIdx := idx
groupBy.InnerIndex = &selectExprIdx

gbs = append(gbs, groupBy)
}
qp.groupByExprs = append(qp.groupByExprs, gbs...)
return true
}

func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
return sqlparser.Walk(func(node sqlparser.SQLNode) (bool, error) {
if _, isAggregate := node.(sqlparser.AggrFunc); isAggregate {
Expand Down
33 changes: 13 additions & 20 deletions go/vt/vtgate/planbuilder/testdata/oltp_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,28 +106,21 @@
"QueryType": "SELECT",
"Original": "SELECT DISTINCT c FROM sbtest30 WHERE id BETWEEN 1 AND 10 ORDER BY c",
"Instructions": {
"OperatorType": "Sort",
"Variant": "Memory",
"OrderBy": "0 ASC COLLATE latin1_swedish_ci",
"OperatorType": "Aggregate",
"Variant": "Ordered",
"GroupBy": "0 COLLATE latin1_swedish_ci",
"Inputs": [
{
"OperatorType": "Distinct",
"Collations": [
"0: latin1_swedish_ci"
],
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "main",
"Sharded": true
},
"FieldQuery": "select c from sbtest30 where 1 != 1",
"Query": "select distinct c from sbtest30 where id between 1 and 10",
"Table": "sbtest30"
}
]
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "main",
"Sharded": true
},
"FieldQuery": "select c from sbtest30 where 1 != 1 group by c",
"OrderBy": "0 ASC COLLATE latin1_swedish_ci",
"Query": "select c from sbtest30 where id between 1 and 10 group by c order by c asc",
"Table": "sbtest30"
}
]
},
Expand Down
100 changes: 100 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -2081,5 +2081,105 @@
"user.user"
]
}
},
{
"comment": "distinct with order by using aggregation engine",
"query": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"GroupBy": "0",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select col from `user` where 1 != 1 group by col",
"OrderBy": "0 ASC",
"Query": "select col from `user` where id between :vtg1 and :vtg2 group by col order by col asc",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "distinct with order by having additional non-order by columns in the selection using aggregation engine",
"query": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"GroupBy": "1, (0|2)",
"ResultColumns": 2,
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1 group by col, foo, weight_string(foo)",
"OrderBy": "1 ASC, (0|2) ASC",
"Query": "select foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2 group by col, foo, weight_string(foo) order by col asc, foo asc",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "distinct with order by having no overalap with the selection columns - using distinct engine",
"query": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc",
"Instructions": {
"OperatorType": "Sort",
"Variant": "Memory",
"OrderBy": "1 ASC",
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Distinct",
"Collations": [
"(0:2)",
"1"
],
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1",
"Query": "select distinct foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"user.user"
]
}
}
]

0 comments on commit 152b01f

Please sign in to comment.