From 987ec9a345520bd3a3064bffd894a92877e8551f Mon Sep 17 00:00:00 2001 From: tuannh982 Date: Tue, 22 Aug 2023 17:43:56 +0700 Subject: [PATCH 1/2] separate between AST and execution Signed-off-by: tuannh982 --- cascades/cost/cost.go | 7 ++ cascades/logicalplan/ast2plan.go | 74 ++++++++++++ cascades/logicalplan/ast2plan_test.go | 81 +++++++++++++ cascades/logicalplan/plan.go | 156 ++++++++++++++++++++++++ cascades/memo/explore.go | 20 ++++ cascades/memo/explore_test.go | 58 +++++++++ cascades/memo/group.go | 50 ++++++++ cascades/memo/implementation_rule.go | 7 ++ cascades/memo/memo.go | 68 +++++++++++ cascades/memo/memo_test.go | 166 ++++++++++++++++++++++++++ cascades/memo/transformation_rule.go | 11 ++ cascades/optimize.go | 119 ++++++++++++++++++ cascades/physicalplan/plan.go | 13 ++ cascades/utils/id_gen.go | 5 + cascades/utils/set.go | 22 ++++ 15 files changed, 857 insertions(+) create mode 100644 cascades/cost/cost.go create mode 100644 cascades/logicalplan/ast2plan.go create mode 100644 cascades/logicalplan/ast2plan_test.go create mode 100644 cascades/logicalplan/plan.go create mode 100644 cascades/memo/explore.go create mode 100644 cascades/memo/explore_test.go create mode 100644 cascades/memo/group.go create mode 100644 cascades/memo/implementation_rule.go create mode 100644 cascades/memo/memo.go create mode 100644 cascades/memo/memo_test.go create mode 100644 cascades/memo/transformation_rule.go create mode 100644 cascades/optimize.go create mode 100644 cascades/physicalplan/plan.go create mode 100644 cascades/utils/id_gen.go create mode 100644 cascades/utils/set.go diff --git a/cascades/cost/cost.go b/cascades/cost/cost.go new file mode 100644 index 00000000..93bc6d7f --- /dev/null +++ b/cascades/cost/cost.go @@ -0,0 +1,7 @@ +package cost + +type Cost interface{} + +type CostModel interface { + IsBetter(currentCost Cost, newCost Cost) bool +} diff --git a/cascades/logicalplan/ast2plan.go b/cascades/logicalplan/ast2plan.go new file mode 100644 index 00000000..50222de0 --- /dev/null +++ b/cascades/logicalplan/ast2plan.go @@ -0,0 +1,74 @@ +package logicalplan + +import "github.com/thanos-io/promql-engine/parser" + +func NewLogicalPlan(expr *parser.Expr) LogicalPlan { + switch node := (*expr).(type) { + case *parser.StepInvariantExpr: + return &StepInvariantExpr{Expr: NewLogicalPlan(&node.Expr)} + case *parser.VectorSelector: + return &VectorSelector{ + Name: node.Name, + OriginalOffset: node.OriginalOffset, + Offset: node.Offset, + Timestamp: node.Timestamp, + StartOrEnd: node.StartOrEnd, + LabelMatchers: node.LabelMatchers, + } + case *parser.MatrixSelector: + return &MatrixSelector{ + VectorSelector: NewLogicalPlan(&node.VectorSelector), + Range: node.Range, + } + case *parser.AggregateExpr: + return &AggregateExpr{ + Op: node.Op, + Expr: NewLogicalPlan(&node.Expr), + Param: NewLogicalPlan(&node.Param), + Grouping: node.Grouping, + Without: node.Without, + } + case *parser.Call: + var args []LogicalPlan + for i := range node.Args { + args = append(args, NewLogicalPlan(&node.Args[i])) + } + return &Call{ + Func: node.Func, + Args: args, + } + case *parser.BinaryExpr: + return &BinaryExpr{ + Op: node.Op, + LHS: NewLogicalPlan(&node.LHS), + RHS: NewLogicalPlan(&node.RHS), + VectorMatching: node.VectorMatching, + ReturnBool: node.ReturnBool, + } + case *parser.UnaryExpr: + return &UnaryExpr{ + Op: node.Op, + Expr: NewLogicalPlan(&node.Expr), + } + case *parser.ParenExpr: + return &ParenExpr{ + Expr: NewLogicalPlan(&node.Expr), + } + case *parser.SubqueryExpr: + return &SubqueryExpr{ + Expr: NewLogicalPlan(&node.Expr), + Range: node.Range, + OriginalOffset: node.OriginalOffset, + Offset: node.Offset, + Timestamp: node.Timestamp, + StartOrEnd: node.StartOrEnd, + Step: node.Step, + } + // literal types + case *parser.NumberLiteral: + return &NumberLiteral{Val: node.Val} + case *parser.StringLiteral: + return &StringLiteral{Val: node.Val} + } + return nil // should never reach here +} diff --git a/cascades/logicalplan/ast2plan_test.go b/cascades/logicalplan/ast2plan_test.go new file mode 100644 index 00000000..7130f69e --- /dev/null +++ b/cascades/logicalplan/ast2plan_test.go @@ -0,0 +1,81 @@ +package logicalplan + +import ( + "github.com/stretchr/testify/require" + "github.com/thanos-io/promql-engine/parser" + "math" + "testing" +) + +var ast2planTestCases = []struct { + input parser.Expr // The AST input. + expected LogicalPlan // The expected logical plan. +}{ + { + input: &parser.NumberLiteral{Val: 1}, + expected: &NumberLiteral{Val: 1}, + }, + { + input: &parser.NumberLiteral{Val: math.Inf(1)}, + expected: &NumberLiteral{Val: math.Inf(1)}, + }, + { + input: &parser.NumberLiteral{Val: math.Inf(-1)}, + expected: &NumberLiteral{Val: math.Inf(-1)}, + }, + { + input: &parser.BinaryExpr{ + Op: parser.ADD, + LHS: &parser.NumberLiteral{Val: 1}, + RHS: &parser.NumberLiteral{Val: 1}, + }, + expected: &BinaryExpr{ + Op: parser.ADD, + LHS: &NumberLiteral{Val: 1}, + RHS: &NumberLiteral{Val: 1}, + }, + }, + { + input: &parser.BinaryExpr{ + Op: parser.ADD, + LHS: &parser.NumberLiteral{Val: 1}, + RHS: &parser.BinaryExpr{ + Op: parser.DIV, + LHS: &parser.NumberLiteral{Val: 2}, + RHS: &parser.ParenExpr{ + Expr: &parser.BinaryExpr{ + Op: parser.MUL, + LHS: &parser.NumberLiteral{Val: 3}, + RHS: &parser.NumberLiteral{Val: 1}, + }, + }, + }, + }, + expected: &BinaryExpr{ + Op: parser.ADD, + LHS: &NumberLiteral{Val: 1}, + RHS: &BinaryExpr{ + Op: parser.DIV, + LHS: &NumberLiteral{Val: 2}, + RHS: &ParenExpr{ + Expr: &BinaryExpr{ + Op: parser.MUL, + LHS: &NumberLiteral{Val: 3}, + RHS: &NumberLiteral{Val: 1}, + }, + }, + }, + }, + }, + // TODO add tests +} + +func TestAST2Plan(t *testing.T) { + for _, test := range ast2planTestCases { + t.Run(test.input.String(), func(t *testing.T) { + plan := NewLogicalPlan(&test.input) + require.True(t, plan != nil, "could not convert AST to logical plan") + require.Equal(t, test.expected, plan, "error on input '%s'", test.input.String()) + }) + } +} diff --git a/cascades/logicalplan/plan.go b/cascades/logicalplan/plan.go new file mode 100644 index 00000000..cb46c706 --- /dev/null +++ b/cascades/logicalplan/plan.go @@ -0,0 +1,156 @@ +package logicalplan + +import ( + "github.com/prometheus/prometheus/model/labels" + "github.com/thanos-io/promql-engine/parser" + "time" +) + +type LogicalPlan interface { + Children() LogicalPlans +} + +type LogicalPlans []LogicalPlan + +// StepInvariantExpr represents a query which evaluates to the same result +// irrespective of the evaluation time given the raw samples from TSDB remain unchanged. +// Currently this is only used for engine optimisations and the parser does not produce this. +type StepInvariantExpr struct { + Expr LogicalPlan +} + +func (l *StepInvariantExpr) Children() LogicalPlans { + return []LogicalPlan{l.Expr} +} + +// VectorSelector represents a Vector selection. +type VectorSelector struct { + Name string + // OriginalOffset is the actual offset that was set in the query. + // This never changes. + OriginalOffset time.Duration + // Offset is the offset used during the query execution + // which is calculated using the original offset, at modifier time, + // eval time, and subquery offsets in the AST tree. + Offset time.Duration + Timestamp *int64 + StartOrEnd parser.ItemType // Set when @ is used with start() or end() + LabelMatchers []*labels.Matcher +} + +func (l *VectorSelector) Children() LogicalPlans { + return []LogicalPlan{} +} + +// MatrixSelector represents a Matrix selection. +type MatrixSelector struct { + // It is safe to assume that this is an VectorSelector + // if the parser hasn't returned an error. + VectorSelector LogicalPlan + Range time.Duration +} + +func (l *MatrixSelector) Children() LogicalPlans { + return []LogicalPlan{l.VectorSelector} +} + +// AggregateExpr represents an aggregation operation on a Vector. +type AggregateExpr struct { + Op parser.ItemType // The used aggregation operation. + Expr LogicalPlan // The Vector expression over which is aggregated. + Param LogicalPlan // Parameter used by some aggregators. + Grouping []string // The labels by which to group the Vector. + Without bool // Whether to drop the given labels rather than keep them. +} + +func (l *AggregateExpr) Children() LogicalPlans { + return []LogicalPlan{l.Expr, l.Param} +} + +// Call represents a function call. +type Call struct { + Func *parser.Function // The function that was called. + Args LogicalPlans // Arguments used in the call. +} + +func (l *Call) Children() LogicalPlans { + return l.Args +} + +// BinaryExpr represents a binary expression between two child expressions. +type BinaryExpr struct { + Op parser.ItemType // The operation of the expression. + LHS, RHS LogicalPlan // The operands on the respective sides of the operator. + + // The matching behavior for the operation if both operands are Vectors. + // If they are not this field is nil. + VectorMatching *parser.VectorMatching + + // If a comparison operator, return 0/1 rather than filtering. + ReturnBool bool +} + +func (l *BinaryExpr) Children() LogicalPlans { + return []LogicalPlan{l.LHS, l.RHS} +} + +// UnaryExpr represents a unary operation on another expression. +// Currently unary operations are only supported for Scalars. +type UnaryExpr struct { + Op parser.ItemType + Expr LogicalPlan +} + +func (l *UnaryExpr) Children() LogicalPlans { + return []LogicalPlan{l.Expr} +} + +// ParenExpr wraps an expression so it cannot be disassembled as a consequence +// of operator precedence. +type ParenExpr struct { + Expr LogicalPlan +} + +func (l *ParenExpr) Children() LogicalPlans { + return []LogicalPlan{l.Expr} +} + +// SubqueryExpr represents a subquery. +type SubqueryExpr struct { + Expr LogicalPlan + Range time.Duration + // OriginalOffset is the actual offset that was set in the query. + // This never changes. + OriginalOffset time.Duration + // Offset is the offset used during the query execution + // which is calculated using the original offset, at modifier time, + // eval time, and subquery offsets in the AST tree. + Offset time.Duration + Timestamp *int64 + StartOrEnd parser.ItemType // Set when @ is used with start() or end() + Step time.Duration +} + +func (l *SubqueryExpr) Children() LogicalPlans { + return []LogicalPlan{l.Expr} +} + +// literal types + +// NumberLiteral represents a number. +type NumberLiteral struct { + Val float64 +} + +func (l *NumberLiteral) Children() LogicalPlans { + return []LogicalPlan{} +} + +// StringLiteral represents a string. +type StringLiteral struct { + Val string +} + +func (l *StringLiteral) Children() LogicalPlans { + return []LogicalPlan{} +} diff --git a/cascades/memo/explore.go b/cascades/memo/explore.go new file mode 100644 index 00000000..1f4aab0a --- /dev/null +++ b/cascades/memo/explore.go @@ -0,0 +1,20 @@ +package memo + +const ( + InitialExplorationRound ExplorationRound = 0 +) + +type ExplorationRound int // The exploration round. +type ExplorationMark int // The exploration mark, if i-th bit of the mark is set, then i-th round is explored. + +func (e *ExplorationMark) IsExplored(round ExplorationRound) bool { + return (*e & (1 << round)) != 0 +} + +func (e *ExplorationMark) SetExplore(round ExplorationRound, explored bool) { + if explored { + *e |= 1 << round + } else { + *e &= ^(1 << round) + } +} diff --git a/cascades/memo/explore_test.go b/cascades/memo/explore_test.go new file mode 100644 index 00000000..fec9d855 --- /dev/null +++ b/cascades/memo/explore_test.go @@ -0,0 +1,58 @@ +package memo + +import ( + "fmt" + "github.com/stretchr/testify/require" + "testing" +) + +var explorationMarkTestCases = []struct { + input ExplorationMark // The initial exploration mark. + method string + round ExplorationRound + expected ExplorationMark // The expected exploration mark. +}{ + { + input: ExplorationMark(0), + method: "set", + round: ExplorationRound(0), + expected: ExplorationMark(0b1), + }, + { + input: ExplorationMark(0), + method: "set", + round: ExplorationRound(1), + expected: ExplorationMark(0b10), + }, + { + input: ExplorationMark(0), + method: "set", + round: ExplorationRound(2), + expected: ExplorationMark(0b100), + }, + { + input: ExplorationMark(0b1110), + method: "unset", + round: ExplorationRound(2), + expected: ExplorationMark(0b1010), + }, + // TODO add tests +} + +func TestExplorationMark(t *testing.T) { + for _, test := range explorationMarkTestCases { + description := fmt.Sprintf("exploration mark %b after %s on round %d should be %b", test.input, test.method, test.round, test.expected) + t.Run(description, func(t *testing.T) { + mark := test.input + switch test.method { + case "set": + mark.SetExplore(test.round, true) + case "unset": + mark.SetExplore(test.round, false) + default: + require.Fail(t, "unrecognized method %s", test.method) + } + require.Equal(t, test.expected, mark, "error on input '%s'", test.input) + }) + } +} diff --git a/cascades/memo/group.go b/cascades/memo/group.go new file mode 100644 index 00000000..a53a2b30 --- /dev/null +++ b/cascades/memo/group.go @@ -0,0 +1,50 @@ +package memo + +import ( + "github.com/thanos-io/promql-engine/cascades/cost" + "github.com/thanos-io/promql-engine/cascades/logicalplan" + "github.com/thanos-io/promql-engine/cascades/physicalplan" + "github.com/thanos-io/promql-engine/cascades/utils" + "sync/atomic" +) + +// ID + +type ID uint32 + +type idGenerator struct { + counter uint64 +} + +func NewIDGenerator() utils.Generator[ID] { + return &idGenerator{counter: 0} +} + +func (g *idGenerator) Generate() ID { + return ID(atomic.AddUint64(&g.counter, 1)) +} + +// Group + +type Group struct { + ID ID + // logical + Equivalents map[ID]*GroupExpr // The equivalent expressions. + ExplorationMark + // physical + Implementation *GroupImplementation +} + +type GroupImplementation struct { + SelectedExpr *GroupExpr + Cost cost.Cost + Implementation physicalplan.Implementation +} + +type GroupExpr struct { + ID ID + Expr logicalplan.LogicalPlan // The logical plan bind to the expression. + Children []*Group // The children group of the expression, noted that it must be in the same order with LogicalPlan.Children(). + AppliedTransformations utils.Set[TransformationRule] + ExplorationMark +} diff --git a/cascades/memo/implementation_rule.go b/cascades/memo/implementation_rule.go new file mode 100644 index 00000000..e74409e3 --- /dev/null +++ b/cascades/memo/implementation_rule.go @@ -0,0 +1,7 @@ +package memo + +import "github.com/thanos-io/promql-engine/cascades/physicalplan" + +type ImplementationRule interface { + ListImplementations(expr *GroupExpr) []physicalplan.Implementation // List all implementation for the expression +} diff --git a/cascades/memo/memo.go b/cascades/memo/memo.go new file mode 100644 index 00000000..b1ba97df --- /dev/null +++ b/cascades/memo/memo.go @@ -0,0 +1,68 @@ +package memo + +import ( + "github.com/thanos-io/promql-engine/cascades/logicalplan" + "github.com/thanos-io/promql-engine/cascades/utils" +) + +type Memo interface { + GetOrCreateGroupExpr(node logicalplan.LogicalPlan) *GroupExpr + GetOrCreateGroup(node logicalplan.LogicalPlan) *Group +} + +type memo struct { + Groups map[ID]*Group // The ID-Group mapping, used to store all the groups. + Parents map[ID]*Group // The GroupExpr-Group mapping, mapped from expr ID to group, used to find the group containing the equivalent logical plans. + GroupExprs map[logicalplan.LogicalPlan]*GroupExpr // The LogicalPlan-GroupExpr mapping. + groupIDGenerator utils.Generator[ID] // The ID generator for groups + groupExprIDGenerator utils.Generator[ID] // The ID generator for group exprs +} + +func NewMemo() Memo { + return &memo{ + Groups: make(map[ID]*Group), + Parents: make(map[ID]*Group), + GroupExprs: make(map[logicalplan.LogicalPlan]*GroupExpr), + groupIDGenerator: NewIDGenerator(), + groupExprIDGenerator: NewIDGenerator(), + } +} + +func (m *memo) GetOrCreateGroupExpr(node logicalplan.LogicalPlan) *GroupExpr { + children := node.Children() + var childGroups []*Group + for _, child := range children { + childGroups = append(childGroups, m.GetOrCreateGroup(child)) + } + entry, ok := m.GroupExprs[node] + if ok { + return entry + } else { + id := m.groupExprIDGenerator.Generate() + expr := &GroupExpr{ + ID: id, + Expr: node, + Children: childGroups, + } + m.GroupExprs[node] = expr + return expr + } +} + +func (m *memo) GetOrCreateGroup(node logicalplan.LogicalPlan) *Group { + groupExpr := m.GetOrCreateGroupExpr(node) + entry, ok := m.Parents[groupExpr.ID] + if ok { + entry.Equivalents[groupExpr.ID] = groupExpr + return entry + } else { + id := m.groupIDGenerator.Generate() + group := &Group{ + ID: id, + Equivalents: map[ID]*GroupExpr{groupExpr.ID: groupExpr}, + } + m.Groups[group.ID] = group + m.Parents[groupExpr.ID] = group + return group + } +} diff --git a/cascades/memo/memo_test.go b/cascades/memo/memo_test.go new file mode 100644 index 00000000..c58a06da --- /dev/null +++ b/cascades/memo/memo_test.go @@ -0,0 +1,166 @@ +package memo + +import ( + "github.com/stretchr/testify/assert" + "github.com/thanos-io/promql-engine/cascades/logicalplan" + "github.com/thanos-io/promql-engine/parser" + "golang.org/x/exp/maps" + "reflect" + "testing" +) + +var memoInitRootTestCases = []struct { + input logicalplan.LogicalPlan // The initial logical plan. + expected *Group // The expected group. +}{ + { + input: &logicalplan.BinaryExpr{ + Op: parser.ADD, + LHS: &logicalplan.NumberLiteral{Val: 1}, + RHS: &logicalplan.BinaryExpr{ + Op: parser.DIV, + LHS: &logicalplan.NumberLiteral{Val: 2}, + RHS: &logicalplan.ParenExpr{ + Expr: &logicalplan.BinaryExpr{ + Op: parser.MUL, + LHS: &logicalplan.NumberLiteral{Val: 3}, + RHS: &logicalplan.NumberLiteral{Val: 1}, + }, + }, + }, + }, + expected: &Group{ + Equivalents: map[ID]*GroupExpr{ + 0: { + Expr: &logicalplan.BinaryExpr{ /* #1 */ + Op: parser.ADD, + LHS: &logicalplan.NumberLiteral{Val: 1}, /* #2 */ + RHS: &logicalplan.BinaryExpr{ /* #3 */ + Op: parser.DIV, + LHS: &logicalplan.NumberLiteral{Val: 2}, /* #4 */ + RHS: &logicalplan.ParenExpr{ /* #5 */ + Expr: &logicalplan.BinaryExpr{ /* #6 */ + Op: parser.MUL, + LHS: &logicalplan.NumberLiteral{Val: 3}, // #7 */ + RHS: &logicalplan.NumberLiteral{Val: 1}, // #8 */ + }, + }, + }, + }, + Children: []*Group{ + { + Equivalents: map[ID]*GroupExpr{ + 0: {Expr: &logicalplan.NumberLiteral{Val: 1}}, /* #2 */ + }, + }, + { + Equivalents: map[ID]*GroupExpr{ + 0: { + Expr: &logicalplan.BinaryExpr{ /* #3 */ + Op: parser.DIV, + LHS: &logicalplan.NumberLiteral{Val: 2}, /* #4 */ + RHS: &logicalplan.ParenExpr{ /* #5 */ + Expr: &logicalplan.BinaryExpr{ /* #6 */ + Op: parser.MUL, + LHS: &logicalplan.NumberLiteral{Val: 3}, /* #7 */ + RHS: &logicalplan.NumberLiteral{Val: 1}, /* #8 */ + }, + }, + }, + Children: []*Group{ + { + Equivalents: map[ID]*GroupExpr{ + 0: {Expr: &logicalplan.NumberLiteral{Val: 2}}, /* #4 */ + }, + }, + { + Equivalents: map[ID]*GroupExpr{ + 0: { + Expr: &logicalplan.ParenExpr{ /* #5 */ + Expr: &logicalplan.BinaryExpr{ /* #6 */ + Op: parser.MUL, + LHS: &logicalplan.NumberLiteral{Val: 3}, /* #7 */ + RHS: &logicalplan.NumberLiteral{Val: 1}, /* #8 */ + }, + }, + Children: []*Group{ + { + Equivalents: map[ID]*GroupExpr{ + 0: { + Expr: &logicalplan.BinaryExpr{ /* #6 */ + Op: parser.MUL, + LHS: &logicalplan.NumberLiteral{Val: 3}, /* #7 */ + RHS: &logicalplan.NumberLiteral{Val: 1}, /* #8 */ + }, + Children: []*Group{ + { + Equivalents: map[ID]*GroupExpr{ + 0: {Expr: &logicalplan.NumberLiteral{Val: 3}}, /* #7 */ + }, + }, + { + Equivalents: map[ID]*GroupExpr{ + 0: {Expr: &logicalplan.NumberLiteral{Val: 1}}, /* #8 */ + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + // TODO add tests +} + +// function to check if two root group are logically equals +func checkGroupEquals(left *Group, right *Group) bool { + if left == right { + return true + } + // since we're testing root group, all group only contains exactly one group expr + if len(left.Equivalents) != len(right.Equivalents) || len(left.Equivalents) != 1 || len(right.Equivalents) != 1 { + return false + } + leftExpr := maps.Values(left.Equivalents)[0] + rightExpr := maps.Values(right.Equivalents)[0] + return checkGroupExprEquals(leftExpr, rightExpr) +} + +// function to check if two group exprs are logically equals +func checkGroupExprEquals(left *GroupExpr, right *GroupExpr) bool { + if left == right { + return true + } + if !reflect.DeepEqual(left.Expr, right.Expr) { + return false + } + if len(left.Children) != len(right.Children) { + return false + } + length := len(left.Children) + for i := 0; i < length; i++ { + if !checkGroupEquals(left.Children[i], right.Children[i]) { + return false + } + } + return true +} + +func TestMemoInitRootGroup(t *testing.T) { + for _, test := range memoInitRootTestCases { + m := NewMemo() + root := m.GetOrCreateGroup(test.input) + assert.True(t, checkGroupEquals(root, test.expected), "error on input '%s'", test.input) + } +} diff --git a/cascades/memo/transformation_rule.go b/cascades/memo/transformation_rule.go new file mode 100644 index 00000000..f2d51190 --- /dev/null +++ b/cascades/memo/transformation_rule.go @@ -0,0 +1,11 @@ +package memo + +import ( + "github.com/thanos-io/promql-engine/cascades/utils" +) + +type TransformationRule interface { + utils.Hashable + Match(expr *GroupExpr) bool // Check if the transformation can be applied to the expression + Transform(expr *GroupExpr) *GroupExpr // Transform the expression +} diff --git a/cascades/optimize.go b/cascades/optimize.go new file mode 100644 index 00000000..d210ae0f --- /dev/null +++ b/cascades/optimize.go @@ -0,0 +1,119 @@ +package cascades + +import ( + "github.com/thanos-io/promql-engine/cascades/cost" + "github.com/thanos-io/promql-engine/cascades/logicalplan" + "github.com/thanos-io/promql-engine/cascades/memo" + "github.com/thanos-io/promql-engine/cascades/physicalplan" + "github.com/thanos-io/promql-engine/parser" + "golang.org/x/exp/maps" +) + +type Optimize struct { + expr parser.Expr + memo memo.Memo + // root + root logicalplan.LogicalPlan + rootGroup *memo.Group +} + +func New(expr parser.Expr) *Optimize { + return &Optimize{ + expr: expr, + memo: memo.NewMemo(), + } +} + +func (o *Optimize) SetRoot(root logicalplan.LogicalPlan) { + o.root = root + o.rootGroup = o.memo.GetOrCreateGroup(root) +} + +func (o *Optimize) exploreGroup(rules []memo.TransformationRule, group *memo.Group, round memo.ExplorationRound) { + for { + if group.IsExplored(round) { + break + } + group.SetExplore(round, true) + for _, equivalentExpr := range maps.Values(group.Equivalents) { + // if the equivalent expression is not yet explored, then we will explore it + if !equivalentExpr.IsExplored(round) { + equivalentExpr.SetExplore(round, true) + for _, child := range equivalentExpr.Children { + o.exploreGroup(rules, child, round) + if equivalentExpr.IsExplored(round) && child.IsExplored(round) { + equivalentExpr.SetExplore(round, true) + } else { + equivalentExpr.SetExplore(round, false) + } + } + } + // fire rules for more equivalent expressions + for _, rule := range rules { + if !equivalentExpr.AppliedTransformations.Contains(rule) { + if rule.Match(equivalentExpr) { + transformedExpr := rule.Transform(equivalentExpr) + group.Equivalents[transformedExpr.ID] = transformedExpr + equivalentExpr.AppliedTransformations.Add(rule) + // reset group exploration state + transformedExpr.SetExplore(round, false) + group.SetExplore(round, false) + } + } + } + if group.IsExplored(round) && equivalentExpr.IsExplored(round) { + group.SetExplore(round, true) + } else { + group.SetExplore(round, false) + } + } + } +} + +func (o *Optimize) Explore(rules []memo.TransformationRule, round memo.ExplorationRound) { + o.exploreGroup(rules, o.rootGroup, round) +} + +func (o *Optimize) findBestImpl(costModel cost.CostModel, rules []memo.ImplementationRule, group *memo.Group) *memo.GroupImplementation { + if group.Implementation != nil { + return group.Implementation + } else { + var groupImpl *memo.GroupImplementation + for _, expr := range group.Equivalents { + // fire rules to find implementations for each equiv expr, returning un-calculated implementations + var possibleImpls []physicalplan.Implementation + for _, rule := range rules { + possibleImpls = append(possibleImpls, rule.ListImplementations(expr)...) + } + // get the implementation of child groups + var childImpls []physicalplan.Implementation + for _, child := range expr.Children { + childImpl := o.findBestImpl(costModel, rules, child) + child.Implementation = childImpl + childImpls = append(childImpls, childImpl.Implementation) + } + // calculate the implementation, and update the best cost for group + for _, impl := range possibleImpls { + calculatedCost := impl.CalculateCost(childImpls) + if groupImpl != nil { + if costModel.IsBetter(groupImpl, calculatedCost) { + groupImpl.SelectedExpr = expr + groupImpl.Implementation = impl + groupImpl.Cost = calculatedCost + } + } else { + groupImpl = &memo.GroupImplementation{ + SelectedExpr: expr, + Cost: calculatedCost, + Implementation: impl, + } + } + } + } + return groupImpl + } +} + +func (o *Optimize) FindBestImplementation(costModel cost.CostModel, rules []memo.ImplementationRule) { + o.rootGroup.Implementation = o.findBestImpl(costModel, rules, o.rootGroup) +} diff --git a/cascades/physicalplan/plan.go b/cascades/physicalplan/plan.go new file mode 100644 index 00000000..3896bad1 --- /dev/null +++ b/cascades/physicalplan/plan.go @@ -0,0 +1,13 @@ +package physicalplan + +import ( + "github.com/thanos-io/promql-engine/cascades/cost" + "github.com/thanos-io/promql-engine/execution/model" +) + +type Implementation interface { + CalculateCost(children []Implementation) cost.Cost // Calculate cost based on provided child implementations, also update the actual implementation, cost, and children list + Operator() model.VectorOperator // Return the saved physical operator set from the last CalculateCost + Cost() cost.Cost // Return the saved cost from the last CalculateCost call. + Children() []Implementation // Return the saved child implementations from the last CalculateCost call. +} diff --git a/cascades/utils/id_gen.go b/cascades/utils/id_gen.go new file mode 100644 index 00000000..a68ad916 --- /dev/null +++ b/cascades/utils/id_gen.go @@ -0,0 +1,5 @@ +package utils + +type Generator[T any] interface { + Generate() T +} diff --git a/cascades/utils/set.go b/cascades/utils/set.go new file mode 100644 index 00000000..0888e8ea --- /dev/null +++ b/cascades/utils/set.go @@ -0,0 +1,22 @@ +package utils + +import "golang.org/x/exp/maps" + +type Hashable interface { + HashCode() uint64 +} + +type Set[T Hashable] map[uint64]T + +func (s *Set[T]) Values() []T { + return maps.Values(*s) +} + +func (s *Set[T]) Add(entry T) { + (*s)[entry.HashCode()] = entry +} + +func (s *Set[T]) Contains(entry T) bool { + _, ok := (*s)[entry.HashCode()] + return ok +} From d8d7365e73d14bf4ea5fe9bee4561b122f486b61 Mon Sep 17 00:00:00 2001 From: tuannh982 Date: Wed, 23 Aug 2023 00:56:46 +0700 Subject: [PATCH 2/2] update pkg, add proposal doc Signed-off-by: tuannh982 --- cascades/memo/implementation_rule.go | 7 - cascades/memo/transformation_rule.go | 11 - cascades/physicalplan/plan.go | 13 - planner/README.md | 184 ++++++++++++++ {cascades => planner}/cost/cost.go | 5 +- {cascades => planner}/logicalplan/ast2plan.go | 0 .../logicalplan/ast2plan_test.go | 0 {cascades => planner}/logicalplan/plan.go | 0 {cascades => planner}/memo/explore.go | 0 {cascades => planner}/memo/explore_test.go | 0 {cascades => planner}/memo/group.go | 10 +- planner/memo/implementation_rule.go | 7 + {cascades => planner}/memo/memo.go | 11 +- {cascades => planner}/memo/memo_test.go | 2 +- planner/memo/transformation_rule.go | 11 + planner/physicalplan/plan.go | 13 + cascades/optimize.go => planner/planner.go | 47 ++-- planner/planner_test.go | 237 ++++++++++++++++++ {cascades => planner}/utils/id_gen.go | 0 {cascades => planner}/utils/set.go | 0 20 files changed, 492 insertions(+), 66 deletions(-) delete mode 100644 cascades/memo/implementation_rule.go delete mode 100644 cascades/memo/transformation_rule.go delete mode 100644 cascades/physicalplan/plan.go create mode 100644 planner/README.md rename {cascades => planner}/cost/cost.go (59%) rename {cascades => planner}/logicalplan/ast2plan.go (100%) rename {cascades => planner}/logicalplan/ast2plan_test.go (100%) rename {cascades => planner}/logicalplan/plan.go (100%) rename {cascades => planner}/memo/explore.go (100%) rename {cascades => planner}/memo/explore_test.go (100%) rename {cascades => planner}/memo/group.go (77%) create mode 100644 planner/memo/implementation_rule.go rename {cascades => planner}/memo/memo.go (87%) rename {cascades => planner}/memo/memo_test.go (98%) create mode 100644 planner/memo/transformation_rule.go create mode 100644 planner/physicalplan/plan.go rename cascades/optimize.go => planner/planner.go (66%) create mode 100644 planner/planner_test.go rename {cascades => planner}/utils/id_gen.go (100%) rename {cascades => planner}/utils/set.go (100%) diff --git a/cascades/memo/implementation_rule.go b/cascades/memo/implementation_rule.go deleted file mode 100644 index e74409e3..00000000 --- a/cascades/memo/implementation_rule.go +++ /dev/null @@ -1,7 +0,0 @@ -package memo - -import "github.com/thanos-io/promql-engine/cascades/physicalplan" - -type ImplementationRule interface { - ListImplementations(expr *GroupExpr) []physicalplan.Implementation // List all implementation for the expression -} diff --git a/cascades/memo/transformation_rule.go b/cascades/memo/transformation_rule.go deleted file mode 100644 index f2d51190..00000000 --- a/cascades/memo/transformation_rule.go +++ /dev/null @@ -1,11 +0,0 @@ -package memo - -import ( - "github.com/thanos-io/promql-engine/cascades/utils" -) - -type TransformationRule interface { - utils.Hashable - Match(expr *GroupExpr) bool // Check if the transformation can be applied to the expression - Transform(expr *GroupExpr) *GroupExpr // Transform the expression -} diff --git a/cascades/physicalplan/plan.go b/cascades/physicalplan/plan.go deleted file mode 100644 index 3896bad1..00000000 --- a/cascades/physicalplan/plan.go +++ /dev/null @@ -1,13 +0,0 @@ -package physicalplan - -import ( - "github.com/thanos-io/promql-engine/cascades/cost" - "github.com/thanos-io/promql-engine/execution/model" -) - -type Implementation interface { - CalculateCost(children []Implementation) cost.Cost // Calculate cost based on provided child implementations, also update the actual implementation, cost, and children list - Operator() model.VectorOperator // Return the saved physical operator set from the last CalculateCost - Cost() cost.Cost // Return the saved cost from the last CalculateCost call. - Children() []Implementation // Return the saved child implementations from the last CalculateCost call. -} diff --git a/planner/README.md b/planner/README.md new file mode 100644 index 00000000..f42ab18b --- /dev/null +++ b/planner/README.md @@ -0,0 +1,184 @@ +Proposal: separate between AST & execution, refactor query planner +=== + +## Background + +Quotes from https://github.com/thanos-io/promql-engine/issues/5 + +``` +We currently translate the AST directly to a physical plan. Having an in-between logical plan will allow us to run optimizers before the query is executed. + +The logical plan would have a one to one mapping with the AST and will contain the parameters of each AST node. +Query optimizers can then transform the logical plan based on predefined heuristics. One example would be optimizing series selects in binary operations so that we do as few network calls as possible. + +Finally, we would build the physical plan from the optimized logical plan instead doing it from the AST directly. +``` + +Here's our current query lifecycle + +```mermaid +flowchart TD +Query["query string"] +AST["parser.Expr"] +Plan1["logicalplan.Plan"] +Plan2["logicalplan.Plan"] +Operator["model.VectorOperator"] + +Query -->|parsring| AST +AST -->|logicalplan.New| Plan1 +Plan1 -->|optimize| Plan2 +Plan2 -->|execution.New| Operator +``` + +The `logicalplan.Plan` is just a wrapper of `parser.Expr`, and the conversion from `logicalplan.Plan` to `model.VectorOperator` is actually direct conversion from `parser.Expr` to `model.VectorOperator`. + +Another point, is our optimizers are heuristic optimizers, hence it's could not optimize for some complex queries, and could not use the data statistic to perform the optimization + +## Proposal + +We will implement the 2-stages planner according to Volcano planner + +```mermaid +flowchart TD +Query["query string"] +AST["parser.Expr"] +LogicalPlan1["LogicalPlan"] +Operator["model.VectorOperator"] + +subgraph plan["Planner"] + subgraph explore["Exploration Phase"] + LogicalPlan2["LogicalPlan"] + end + subgraph implement["Implementation Phase"] + PhysicalPlan + end +end + +Query -->|parsring| AST +AST -->|ast2plan| LogicalPlan1 +LogicalPlan1 --> LogicalPlan2 +LogicalPlan2 --> PhysicalPlan +PhysicalPlan --> Operator +``` + +### Exploration phase + +The exploration phase, used to explore all possible transformation of the original logical plan + +```mermaid +flowchart TD +LogicalPlan1["LogicalPlan"] +LogicalPlan2["LogicalPlan"] + +LogicalPlan1 -->|fire transformation rules| LogicalPlan2 +``` + +**Define**: +- `Group`: The `Equivalent Group`, or the `Equivalent Set`, is a group of multiple equivalent logical plans +- `GroupExpr`: representing a logical plan node (basically it's just wrap around the logical plan, with some additional information) + +```go +type Group struct { + // logical + Equivalents map[ID]*GroupExpr // The equivalent expressions. + ExplorationMark +} + +type GroupExpr struct { + Expr logicalplan.LogicalPlan // The logical plan bind to the expression. + Children []*Group // The children group of the expression, noted that it must be in the same order with LogicalPlan.Children(). + AppliedTransformations utils.Set[TransformationRule] + ExplorationMark +} +``` + +Here is the interface of transformation rule + +```go +type TransformationRule interface { + Match(expr *GroupExpr) bool // Check if the transformation can be applied to the expression + Transform(expr *GroupExpr) *GroupExpr // Transform the expression +} +``` + +```go +for _, rule := range rules { + if rule.Match(equivalentExpr) { + if !equivalentExpr.AppliedTransformations.Contains(rule) { + transformedExpr := rule.Transform(o.memo, equivalentExpr) + // add new equivalent expr to group + group.Equivalents[transformedExpr.ID] = transformedExpr + equivalentExpr.AppliedTransformations.Add(rule) + // reset group exploration state + transformedExpr.SetExplore(round, false) + group.SetExplore(round, false) + } + } +} +``` + +### Implementation phase + +After exploration phase, we have the expanded logical plan (including the original plan and the transformed plans) + +Then we will find the implementation which has the lowest implementation cost + + +```mermaid +flowchart TD +LogicalPlan2["LogicalPlan"] +PhysicalPlan +Operator["model.VectorOperator"] + +LogicalPlan2 -->|find best implementation| PhysicalPlan +PhysicalPlan -->|get the actual implementation| Operator +``` + +The physical plan represent the actual implementation of a logical plan (the `Children` property in `PhysicalPlan` is used for cost calculation) + +```go +type PhysicalPlan interface { + SetChildren(children []PhysicalPlan) // set child implementations, also update the operator and cost. + Children() []PhysicalPlan // Return the saved child implementations from the last CalculateCost call. + Operator() model.VectorOperator // Return the saved physical operator set from the last CalculateCost. + Cost() cost.Cost // Return the saved cost from the last CalculateCost call. +} +``` + +For each logical plan, we will have several implementations + +```go +type ImplementationRule interface { + ListImplementations(expr *GroupExpr) []physicalplan.PhysicalPlan // List all implementation for the expression +} +``` + +And we will find the best implementation, via simple dynamic programming + +```go +var possibleImpls []physicalplan.PhysicalPlan +for _, rule := range rules { + possibleImpls = append(possibleImpls, rule.ListImplementations(expr)...) +} +``` + +```go +var currentBest *memo.GroupImplementation +for _, impl := range possibleImpls { + impl.SetChildren(childImpls) + calculatedCost := impl.Cost() + if groupImpl != nil { + if costModel.IsBetter(currentBest.Cost, calculatedCost) { + currentBest.SelectedExpr = expr + currentBest.Implementation = impl + currentBest.Cost = calculatedCost + } + } else { + currentBest = &memo.GroupImplementation{ + SelectedExpr: expr, + Cost: calculatedCost, + Implementation: impl, + } + } +} +``` \ No newline at end of file diff --git a/cascades/cost/cost.go b/planner/cost/cost.go similarity index 59% rename from cascades/cost/cost.go rename to planner/cost/cost.go index 93bc6d7f..add8757c 100644 --- a/cascades/cost/cost.go +++ b/planner/cost/cost.go @@ -1,6 +1,9 @@ package cost -type Cost interface{} +type Cost struct { + CpuCost float64 + MemoryCost float64 +} type CostModel interface { IsBetter(currentCost Cost, newCost Cost) bool diff --git a/cascades/logicalplan/ast2plan.go b/planner/logicalplan/ast2plan.go similarity index 100% rename from cascades/logicalplan/ast2plan.go rename to planner/logicalplan/ast2plan.go diff --git a/cascades/logicalplan/ast2plan_test.go b/planner/logicalplan/ast2plan_test.go similarity index 100% rename from cascades/logicalplan/ast2plan_test.go rename to planner/logicalplan/ast2plan_test.go diff --git a/cascades/logicalplan/plan.go b/planner/logicalplan/plan.go similarity index 100% rename from cascades/logicalplan/plan.go rename to planner/logicalplan/plan.go diff --git a/cascades/memo/explore.go b/planner/memo/explore.go similarity index 100% rename from cascades/memo/explore.go rename to planner/memo/explore.go diff --git a/cascades/memo/explore_test.go b/planner/memo/explore_test.go similarity index 100% rename from cascades/memo/explore_test.go rename to planner/memo/explore_test.go diff --git a/cascades/memo/group.go b/planner/memo/group.go similarity index 77% rename from cascades/memo/group.go rename to planner/memo/group.go index a53a2b30..d0167aed 100644 --- a/cascades/memo/group.go +++ b/planner/memo/group.go @@ -1,10 +1,10 @@ package memo import ( - "github.com/thanos-io/promql-engine/cascades/cost" - "github.com/thanos-io/promql-engine/cascades/logicalplan" - "github.com/thanos-io/promql-engine/cascades/physicalplan" - "github.com/thanos-io/promql-engine/cascades/utils" + "github.com/thanos-io/promql-engine/planner/cost" + "github.com/thanos-io/promql-engine/planner/logicalplan" + "github.com/thanos-io/promql-engine/planner/physicalplan" + "github.com/thanos-io/promql-engine/planner/utils" "sync/atomic" ) @@ -38,7 +38,7 @@ type Group struct { type GroupImplementation struct { SelectedExpr *GroupExpr Cost cost.Cost - Implementation physicalplan.Implementation + Implementation physicalplan.PhysicalPlan } type GroupExpr struct { diff --git a/planner/memo/implementation_rule.go b/planner/memo/implementation_rule.go new file mode 100644 index 00000000..402a840f --- /dev/null +++ b/planner/memo/implementation_rule.go @@ -0,0 +1,7 @@ +package memo + +import "github.com/thanos-io/promql-engine/planner/physicalplan" + +type ImplementationRule interface { + ListImplementations(expr *GroupExpr) []physicalplan.PhysicalPlan // List all implementation for the expression +} diff --git a/cascades/memo/memo.go b/planner/memo/memo.go similarity index 87% rename from cascades/memo/memo.go rename to planner/memo/memo.go index b1ba97df..20c9cb32 100644 --- a/cascades/memo/memo.go +++ b/planner/memo/memo.go @@ -1,8 +1,8 @@ package memo import ( - "github.com/thanos-io/promql-engine/cascades/logicalplan" - "github.com/thanos-io/promql-engine/cascades/utils" + "github.com/thanos-io/promql-engine/planner/logicalplan" + "github.com/thanos-io/promql-engine/planner/utils" ) type Memo interface { @@ -40,9 +40,10 @@ func (m *memo) GetOrCreateGroupExpr(node logicalplan.LogicalPlan) *GroupExpr { } else { id := m.groupExprIDGenerator.Generate() expr := &GroupExpr{ - ID: id, - Expr: node, - Children: childGroups, + ID: id, + Expr: node, + Children: childGroups, + AppliedTransformations: make(utils.Set[TransformationRule]), } m.GroupExprs[node] = expr return expr diff --git a/cascades/memo/memo_test.go b/planner/memo/memo_test.go similarity index 98% rename from cascades/memo/memo_test.go rename to planner/memo/memo_test.go index c58a06da..e2d9ff68 100644 --- a/cascades/memo/memo_test.go +++ b/planner/memo/memo_test.go @@ -2,8 +2,8 @@ package memo import ( "github.com/stretchr/testify/assert" - "github.com/thanos-io/promql-engine/cascades/logicalplan" "github.com/thanos-io/promql-engine/parser" + "github.com/thanos-io/promql-engine/planner/logicalplan" "golang.org/x/exp/maps" "reflect" "testing" diff --git a/planner/memo/transformation_rule.go b/planner/memo/transformation_rule.go new file mode 100644 index 00000000..456d35ab --- /dev/null +++ b/planner/memo/transformation_rule.go @@ -0,0 +1,11 @@ +package memo + +import ( + "github.com/thanos-io/promql-engine/planner/utils" +) + +type TransformationRule interface { + utils.Hashable + Match(expr *GroupExpr) bool // Check if the transformation can be applied to the expression + Transform(memo Memo, expr *GroupExpr) *GroupExpr // Transform the expression +} diff --git a/planner/physicalplan/plan.go b/planner/physicalplan/plan.go new file mode 100644 index 00000000..310af0fa --- /dev/null +++ b/planner/physicalplan/plan.go @@ -0,0 +1,13 @@ +package physicalplan + +import ( + "github.com/thanos-io/promql-engine/execution/model" + "github.com/thanos-io/promql-engine/planner/cost" +) + +type PhysicalPlan interface { + SetChildren(children []PhysicalPlan) // set child implementations, also update the operator and cost. + Children() []PhysicalPlan // Return the saved child implementations + Operator() model.VectorOperator // Return the saved physical operator + Cost() cost.Cost // Return the saved cost +} diff --git a/cascades/optimize.go b/planner/planner.go similarity index 66% rename from cascades/optimize.go rename to planner/planner.go index d210ae0f..a10b6fd4 100644 --- a/cascades/optimize.go +++ b/planner/planner.go @@ -1,15 +1,15 @@ -package cascades +package planner import ( - "github.com/thanos-io/promql-engine/cascades/cost" - "github.com/thanos-io/promql-engine/cascades/logicalplan" - "github.com/thanos-io/promql-engine/cascades/memo" - "github.com/thanos-io/promql-engine/cascades/physicalplan" "github.com/thanos-io/promql-engine/parser" + "github.com/thanos-io/promql-engine/planner/cost" + "github.com/thanos-io/promql-engine/planner/logicalplan" + "github.com/thanos-io/promql-engine/planner/memo" + "github.com/thanos-io/promql-engine/planner/physicalplan" "golang.org/x/exp/maps" ) -type Optimize struct { +type Planner struct { expr parser.Expr memo memo.Memo // root @@ -17,19 +17,19 @@ type Optimize struct { rootGroup *memo.Group } -func New(expr parser.Expr) *Optimize { - return &Optimize{ - expr: expr, +func New() *Planner { + return &Planner{ memo: memo.NewMemo(), } } -func (o *Optimize) SetRoot(root logicalplan.LogicalPlan) { - o.root = root - o.rootGroup = o.memo.GetOrCreateGroup(root) +func (o *Planner) MakeRoot(expr parser.Expr) { + o.expr = expr + o.root = logicalplan.NewLogicalPlan(&expr) + o.rootGroup = o.memo.GetOrCreateGroup(o.root) } -func (o *Optimize) exploreGroup(rules []memo.TransformationRule, group *memo.Group, round memo.ExplorationRound) { +func (o *Planner) exploreGroup(rules []memo.TransformationRule, group *memo.Group, round memo.ExplorationRound) { for { if group.IsExplored(round) { break @@ -50,9 +50,9 @@ func (o *Optimize) exploreGroup(rules []memo.TransformationRule, group *memo.Gro } // fire rules for more equivalent expressions for _, rule := range rules { - if !equivalentExpr.AppliedTransformations.Contains(rule) { - if rule.Match(equivalentExpr) { - transformedExpr := rule.Transform(equivalentExpr) + if rule.Match(equivalentExpr) { + if !equivalentExpr.AppliedTransformations.Contains(rule) { + transformedExpr := rule.Transform(o.memo, equivalentExpr) group.Equivalents[transformedExpr.ID] = transformedExpr equivalentExpr.AppliedTransformations.Add(rule) // reset group exploration state @@ -70,23 +70,23 @@ func (o *Optimize) exploreGroup(rules []memo.TransformationRule, group *memo.Gro } } -func (o *Optimize) Explore(rules []memo.TransformationRule, round memo.ExplorationRound) { +func (o *Planner) Explore(rules []memo.TransformationRule, round memo.ExplorationRound) { o.exploreGroup(rules, o.rootGroup, round) } -func (o *Optimize) findBestImpl(costModel cost.CostModel, rules []memo.ImplementationRule, group *memo.Group) *memo.GroupImplementation { +func (o *Planner) findBestImpl(costModel cost.CostModel, rules []memo.ImplementationRule, group *memo.Group) *memo.GroupImplementation { if group.Implementation != nil { return group.Implementation } else { var groupImpl *memo.GroupImplementation for _, expr := range group.Equivalents { // fire rules to find implementations for each equiv expr, returning un-calculated implementations - var possibleImpls []physicalplan.Implementation + var possibleImpls []physicalplan.PhysicalPlan for _, rule := range rules { possibleImpls = append(possibleImpls, rule.ListImplementations(expr)...) } // get the implementation of child groups - var childImpls []physicalplan.Implementation + var childImpls []physicalplan.PhysicalPlan for _, child := range expr.Children { childImpl := o.findBestImpl(costModel, rules, child) child.Implementation = childImpl @@ -94,9 +94,10 @@ func (o *Optimize) findBestImpl(costModel cost.CostModel, rules []memo.Implement } // calculate the implementation, and update the best cost for group for _, impl := range possibleImpls { - calculatedCost := impl.CalculateCost(childImpls) + impl.SetChildren(childImpls) + calculatedCost := impl.Cost() if groupImpl != nil { - if costModel.IsBetter(groupImpl, calculatedCost) { + if costModel.IsBetter(groupImpl.Cost, calculatedCost) { groupImpl.SelectedExpr = expr groupImpl.Implementation = impl groupImpl.Cost = calculatedCost @@ -114,6 +115,6 @@ func (o *Optimize) findBestImpl(costModel cost.CostModel, rules []memo.Implement } } -func (o *Optimize) FindBestImplementation(costModel cost.CostModel, rules []memo.ImplementationRule) { +func (o *Planner) FindBestImplementation(costModel cost.CostModel, rules []memo.ImplementationRule) { o.rootGroup.Implementation = o.findBestImpl(costModel, rules, o.rootGroup) } diff --git a/planner/planner_test.go b/planner/planner_test.go new file mode 100644 index 00000000..5dd33dd3 --- /dev/null +++ b/planner/planner_test.go @@ -0,0 +1,237 @@ +package planner + +import ( + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + model2 "github.com/thanos-io/promql-engine/execution/model" + "github.com/thanos-io/promql-engine/parser" + "github.com/thanos-io/promql-engine/planner/cost" + "github.com/thanos-io/promql-engine/planner/logicalplan" + "github.com/thanos-io/promql-engine/planner/memo" + "github.com/thanos-io/promql-engine/planner/physicalplan" + "testing" +) + +// transformation rules + +/* +a simple transformation rule to inject injection_foo to label matchers of VectorSelector +The transformation result might be invalid, but it's only mean to test the planner, so it's okay +*/ +type dummyMatcherInjection struct{} + +func (s *dummyMatcherInjection) HashCode() uint64 { + return 1 +} + +func (s *dummyMatcherInjection) Match(expr *memo.GroupExpr) bool { + _, ok := (expr.Expr).(*logicalplan.VectorSelector) + return ok +} + +func (s *dummyMatcherInjection) Transform(m memo.Memo, expr *memo.GroupExpr) *memo.GroupExpr { + node, _ := (expr.Expr).(*logicalplan.VectorSelector) + var newLabelMatchers []*labels.Matcher + existingVals := make(map[string]bool) + for _, matcher := range node.LabelMatchers { + newLabelMatchers = append(newLabelMatchers, matcher) + existingVals[matcher.Value] = true + } + toBeInjected := "injection_foo" + if _, ok := existingVals[toBeInjected]; !ok { + newLabelMatchers = append(newLabelMatchers, parser.MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "injection_foo")) + newNode := &logicalplan.VectorSelector{ + Name: node.Name, + OriginalOffset: node.OriginalOffset, + Offset: node.Offset, + Timestamp: node.Timestamp, + StartOrEnd: node.StartOrEnd, + LabelMatchers: newLabelMatchers, + } + newExpr := m.GetOrCreateGroupExpr(newNode) + return newExpr + } else { + return expr // return the original + } +} + +// implementation rules + +var implementationRules = []memo.ImplementationRule{ // implementation rule must cover all possible logical plan + &mockVectorSelectorImplRule{}, + &mockBinaryExprImplRule{}, +} + +type mockVectorSelectorImplRule struct{} + +func (m *mockVectorSelectorImplRule) ListImplementations(expr *memo.GroupExpr) []physicalplan.PhysicalPlan { + if e, ok := (expr.Expr).(*logicalplan.VectorSelector); ok { + return []physicalplan.PhysicalPlan{&mockVectorSelectorImpl{ + plan: e, + parent: expr, + }} + } else { + return []physicalplan.PhysicalPlan{} + } +} + +type mockBinaryExprImplRule struct{} + +func (m *mockBinaryExprImplRule) ListImplementations(expr *memo.GroupExpr) []physicalplan.PhysicalPlan { + if e, ok := (expr.Expr).(*logicalplan.BinaryExpr); ok { + return []physicalplan.PhysicalPlan{&mockBinaryExprImpl{ + plan: e, + parent: expr, + }} + } else { + return []physicalplan.PhysicalPlan{} + } +} + +// the implementations + +type mockVectorSelectorImpl struct { + plan *logicalplan.VectorSelector + parent *memo.GroupExpr + children []physicalplan.PhysicalPlan + cost cost.Cost +} + +func (m *mockVectorSelectorImpl) ParentExpr() *memo.GroupExpr { + return m.parent +} + +func (m *mockVectorSelectorImpl) SetChildren(children []physicalplan.PhysicalPlan) { + m.children = children + matcherValues := make(map[string]bool) + for _, matcher := range m.plan.LabelMatchers { + matcherValues[matcher.Value] = true + } + // we will bias the cost if there's a label with value "injection_foo" (because our transformation rule above) + if _, ok := matcherValues["injection_foo"]; ok { + m.cost = cost.Cost{ + CpuCost: 1, + MemoryCost: 2, + } + } else { + m.cost = cost.Cost{ + CpuCost: 4, + MemoryCost: 3, + } + } +} + +func (m *mockVectorSelectorImpl) Children() []physicalplan.PhysicalPlan { + return m.children +} + +func (m *mockVectorSelectorImpl) Operator() model2.VectorOperator { + // FIXME this is just a test code to demo, not the real implementation, so I will return nil instead + return nil +} + +func (m *mockVectorSelectorImpl) Cost() cost.Cost { + return m.cost +} + +type mockBinaryExprImpl struct { + plan *logicalplan.BinaryExpr + parent *memo.GroupExpr + children []physicalplan.PhysicalPlan + cost cost.Cost +} + +func (m *mockBinaryExprImpl) SetChildren(children []physicalplan.PhysicalPlan) { + m.children = children + // this node always have 2 children + leftChild := children[0] + rightChild := children[1] + // for simplicity, just combine their cost together (might be it's not true in real scenario, but it's just a demo) + m.cost = cost.Cost{ + CpuCost: leftChild.Cost().CpuCost + rightChild.Cost().CpuCost, + MemoryCost: leftChild.Cost().MemoryCost + rightChild.Cost().MemoryCost, + } +} + +func (m *mockBinaryExprImpl) Children() []physicalplan.PhysicalPlan { + return m.children +} + +func (m *mockBinaryExprImpl) Operator() model2.VectorOperator { + // FIXME this is just a test code to demo, not the real implementation, so I will return nil instead + return nil +} + +func (m *mockBinaryExprImpl) Cost() cost.Cost { + return m.cost +} + +// cost model + +type mockCostModel struct{} + +func (m *mockCostModel) IsBetter(currentCost cost.Cost, newCost cost.Cost) bool { + if currentCost.CpuCost == newCost.CpuCost { + return currentCost.MemoryCost > newCost.MemoryCost + } else { + return currentCost.CpuCost > newCost.CpuCost + } +} + +func TestPlanner0(t *testing.T) { // demo the new planner + input := "foo / on(test,blub) group_left(bar) bar" // copied from parse_test.go + /* + &parser.BinaryExpr{ + Op: parser.DIV, + LHS: &parser.VectorSelector{ + Name: "foo", + LabelMatchers: []*labels.Matcher{ + parser.MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"), + }, + PosRange: parser.PositionRange{ + Start: 0, + End: 3, + }, + }, + RHS: &parser.VectorSelector{ + Name: "bar", + LabelMatchers: []*labels.Matcher{ + parser.MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "bar"), + }, + PosRange: parser.PositionRange{ + Start: 43, + End: 46, + }, + }, + VectorMatching: &parser.VectorMatching{ + Card: parser.CardManyToOne, + MatchingLabels: []string{"test", "blub"}, + Include: []string{"blub"}, + }, + } + */ + expr, err := parser.ParseExpr(input) + require.NoError(t, err) + planner := New() + planner.MakeRoot(expr) + planner.Explore([]memo.TransformationRule{&dummyMatcherInjection{}}, 0) // now we have a dummy + planner.FindBestImplementation(&mockCostModel{}, implementationRules) + root := planner.rootGroup + /* + In the parsed expression, we have 2 VectorSelector expression, + hence the logical plan also includes 2 VectorSelector nodes. + + Since we have a transformation rule `dummyMatcherInjection` to inject the "injection_foo" value into the matchers, + and we have the implementation rules to bias the cost if "injection_foo" is presented + ({cpu_cost=1, mem_cost=2} if "injection_foo" is presented, {cpu_cost=4, mem_cost=3} otherwise). + + And the binary implementation rule will just sum up all of its child implementation cost. + + So the final cost is {cpu_cost=2, mem_cost=4} + */ + require.Equal(t, root.Implementation.Cost, cost.Cost{ + CpuCost: 2, + MemoryCost: 4, + }) +} diff --git a/cascades/utils/id_gen.go b/planner/utils/id_gen.go similarity index 100% rename from cascades/utils/id_gen.go rename to planner/utils/id_gen.go diff --git a/cascades/utils/set.go b/planner/utils/set.go similarity index 100% rename from cascades/utils/set.go rename to planner/utils/set.go