From 84abd143abfa4a223ea113761b29e4ae02b7a68b Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Tue, 3 Oct 2023 15:03:01 -0700 Subject: [PATCH] Support dynamic paths for put and cut op --- compiler/kernel/expr.go | 36 +++++- compiler/kernel/groupby.go | 9 +- compiler/kernel/op.go | 35 +----- compiler/semantic/expr.go | 95 +++++++------- compiler/semantic/op.go | 73 ++++++++--- compiler/semantic/sql.go | 10 +- compiler/ztests/dynamic-field-cut.yaml | 41 ++++++ compiler/ztests/dynamic-field-put.yaml | 41 ++++++ compiler/ztests/where-on-func.yaml | 2 +- docs/language/operators/rename.md | 2 +- docs/tutorials/schools.md | 2 +- runtime/expr/cutter.go | 95 ++++++++------ runtime/expr/eval.go | 14 ++- runtime/expr/lval.go | 80 ++++++++++++ runtime/expr/putter.go | 139 ++++++++++++--------- runtime/expr/ztests/cut-dup-fields.yaml | 8 +- runtime/expr/ztests/cut-not-adjacent.yaml | 8 +- runtime/expr/ztests/rename-error-move.yaml | 2 +- runtime/op/groupby/groupby.go | 13 +- runtime/op/join/join.go | 3 +- 20 files changed, 485 insertions(+), 223 deletions(-) create mode 100644 compiler/ztests/dynamic-field-cut.yaml create mode 100644 compiler/ztests/dynamic-field-put.yaml create mode 100644 runtime/expr/lval.go diff --git a/compiler/kernel/expr.go b/compiler/kernel/expr.go index 80d5dae6d4..6c689dfea9 100644 --- a/compiler/kernel/expr.go +++ b/compiler/kernel/expr.go @@ -263,15 +263,41 @@ func (b *Builder) compileDotExpr(dot *dag.Dot) (expr.Evaluator, error) { return expr.NewDotExpr(b.zctx(), record, dot.RHS), nil } -func compileLval(e dag.Expr) (field.Path, error) { - if this, ok := e.(*dag.This); ok { - return field.Path(this.Path), nil +func (b *Builder) compileLval(e dag.Expr) (*expr.Lval, error) { + switch e := e.(type) { + case *dag.BinaryExpr: + if e.Op != "[" { + return nil, fmt.Errorf("internal error: invalid lval %#v", e) + } + lhs, err := b.compileLval(e.LHS) + if err != nil { + return nil, err + } + rhs, err := b.compileExpr(e.RHS) + if err != nil { + return nil, err + } + lhs.Elems = append(lhs.Elems, expr.NewExprLvalElem(b.zctx(), rhs)) + return lhs, nil + case *dag.Dot: + lhs, err := b.compileLval(e.LHS) + if err != nil { + return nil, err + } + lhs.Elems = append(lhs.Elems, &expr.StaticLvalElem{Name: e.RHS}) + return lhs, nil + case *dag.This: + var elems []expr.LvalElem + for _, elem := range e.Path { + elems = append(elems, &expr.StaticLvalElem{Name: elem}) + } + return expr.NewLval(elems), nil } - return nil, errors.New("invalid expression on lhs of assignment") + return nil, fmt.Errorf("internal error: invalid lval %#v", e) } func (b *Builder) compileAssignment(node *dag.Assignment) (expr.Assignment, error) { - lhs, err := compileLval(node.LHS) + lhs, err := b.compileLval(node.LHS) if err != nil { return expr.Assignment{}, err } diff --git a/compiler/kernel/groupby.go b/compiler/kernel/groupby.go index d0ab780abb..1887b9b1a6 100644 --- a/compiler/kernel/groupby.go +++ b/compiler/kernel/groupby.go @@ -2,7 +2,6 @@ package kernel import ( "errors" - "fmt" "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/order" @@ -44,12 +43,12 @@ func (b *Builder) compileAggAssignment(assignment dag.Assignment) (field.Path, * if !ok { return nil, nil, errors.New("aggregator is not an aggregation expression") } - lhs, err := compileLval(assignment.LHS) - if err != nil { - return nil, nil, fmt.Errorf("lhs of aggregation: %w", err) + this, ok := assignment.LHS.(*dag.This) + if !ok { + return nil, nil, errors.New("internal error: aggregator assignment LHS must be a static path") } m, err := b.compileAgg(aggAST) - return lhs, m, err + return this.Path, m, err } func (b *Builder) compileAgg(agg *dag.Agg) (*expr.Aggregator, error) { diff --git a/compiler/kernel/op.go b/compiler/kernel/op.go index e51e960209..af9db0b3d2 100644 --- a/compiler/kernel/op.go +++ b/compiler/kernel/op.go @@ -174,36 +174,13 @@ func (b *Builder) compileLeaf(o dag.Op, parent zbuf.Puller) (zbuf.Puller, error) if err != nil { return nil, err } - putter, err := expr.NewPutter(b.octx.Zctx, clauses) - if err != nil { - return nil, err - } + putter := expr.NewPutter(b.octx.Zctx, clauses) return op.NewApplier(b.octx, parent, putter), nil case *dag.Rename: var srcs, dsts field.List - for _, fa := range v.Args { - dst, err := compileLval(fa.LHS) - if err != nil { - return nil, err - } - // We call CompileLval on the RHS because renames are - // restricted to dotted field name expressions. - src, err := compileLval(fa.RHS) - if err != nil { - return nil, err - } - if len(dst) != len(src) { - return nil, fmt.Errorf("cannot rename %s to %s", src, dst) - } - // Check that the prefixes match and, if not, report first place - // that they don't. - for i := 0; i <= len(src)-2; i++ { - if src[i] != dst[i] { - return nil, fmt.Errorf("cannot rename %s to %s (differ in %s vs %s)", src, dst, src[i], dst[i]) - } - } - dsts = append(dsts, dst) - srcs = append(srcs, src) + for _, a := range v.Args { + dsts = append(dsts, a.LHS.(*dag.This).Path) + srcs = append(srcs, a.RHS.(*dag.This).Path) } renamer := expr.NewRenamer(b.octx.Zctx, srcs, dsts) return op.NewApplier(b.octx, parent, renamer), nil @@ -388,9 +365,9 @@ func (b *Builder) compileAssignments(assignments []dag.Assignment) ([]expr.Assig return keys, nil } -func splitAssignments(assignments []expr.Assignment) (field.List, []expr.Evaluator) { +func splitAssignments(assignments []expr.Assignment) ([]*expr.Lval, []expr.Evaluator) { n := len(assignments) - lhs := make(field.List, 0, n) + lhs := make([]*expr.Lval, 0, n) rhs := make([]expr.Evaluator, 0, n) for _, a := range assignments { lhs = append(lhs, a.LHS) diff --git a/compiler/semantic/expr.go b/compiler/semantic/expr.go index 68c21341eb..1e5245859b 100644 --- a/compiler/semantic/expr.go +++ b/compiler/semantic/expr.go @@ -507,10 +507,10 @@ func (a *analyzer) semExprs(in []ast.Expr) ([]dag.Expr, error) { return exprs, nil } -func (a *analyzer) semAssignments(assignments []ast.Assignment, summarize bool) ([]dag.Assignment, error) { +func (a *analyzer) semAssignments(assignments []ast.Assignment) ([]dag.Assignment, error) { out := make([]dag.Assignment, 0, len(assignments)) for _, e := range assignments { - a, err := a.semAssignment(e, summarize) + a, err := a.semAssignment(e) if err != nil { return nil, err } @@ -519,64 +519,69 @@ func (a *analyzer) semAssignments(assignments []ast.Assignment, summarize bool) return out, nil } -func (a *analyzer) semAssignment(assign ast.Assignment, summarize bool) (dag.Assignment, error) { +func (a *analyzer) semAssignment(assign ast.Assignment) (dag.Assignment, error) { rhs, err := a.semExpr(assign.RHS) if err != nil { - return dag.Assignment{}, fmt.Errorf("rhs of assignment expression: %w", err) - } - if _, ok := rhs.(*dag.Agg); ok { - summarize = true + return dag.Assignment{}, fmt.Errorf("right-hand side of assignment: %w", err) } var lhs dag.Expr - if assign.LHS != nil { - lhs, err = a.semExpr(assign.LHS) + if assign.LHS == nil { + path, err := deriveLHSPath(rhs) if err != nil { - return dag.Assignment{}, fmt.Errorf("lhs of assigment expression: %w", err) + return dag.Assignment{}, err } - } else if call, ok := assign.RHS.(*ast.Call); ok { - path := []string{call.Name} - switch call.Name { + lhs = &dag.This{Kind: "This", Path: path} + } else if lhs, err = a.semExpr(assign.LHS); err != nil { + return dag.Assignment{}, fmt.Errorf("left-hand side of assignment: %w", err) + } + if !isLval(lhs) { + return dag.Assignment{}, errors.New("illegal left-hand side of assignment") + } + if this, ok := lhs.(*dag.This); ok && len(this.Path) == 0 { + return dag.Assignment{}, errors.New("cannot assign to 'this'") + } + return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil +} + +func isLval(e dag.Expr) bool { + switch e := e.(type) { + case *dag.BinaryExpr: + return e.Op == "[" && isLval(e.LHS) + case *dag.Dot: + return isLval(e.LHS) + case *dag.This: + return true + } + return false +} + +func deriveLHSPath(rhs dag.Expr) ([]string, error) { + var path []string + switch rhs := rhs.(type) { + case *dag.Call: + path = []string{rhs.Name} + switch rhs.Name { case "every": // If LHS is nil and the call is every() make the LHS field ts since // field ts assumed with every. path = []string{"ts"} case "quiet": - if len(call.Args) > 0 { - if p, ok := rhs.(*dag.Call).Args[0].(*dag.This); ok { - path = p.Path + if len(rhs.Args) > 0 { + if this, ok := rhs.Args[0].(*dag.This); ok { + path = this.Path } } } - lhs = &dag.This{Kind: "This", Path: path} - } else if agg, ok := assign.RHS.(*ast.Agg); ok { - lhs = &dag.This{Kind: "This", Path: []string{agg.Name}} - } else if v, ok := rhs.(*dag.Var); ok { - lhs = &dag.This{Kind: "This", Path: []string{v.Name}} - } else { - lhs, err = a.semExpr(assign.RHS) - if err != nil { - return dag.Assignment{}, errors.New("assignment name could not be inferred from rhs expression") - } - } - if summarize { - // Summarize always outputs its results as new records of "this" - // so if we have an "as" that overrides "this", we just - // convert it back to a local this. - if dot, ok := lhs.(*dag.Dot); ok { - if v, ok := dot.LHS.(*dag.Var); ok && v.Name == "this" { - lhs = &dag.This{Kind: "This", Path: []string{dot.RHS}} - } - } - } - // Make sure we have a valid lval for lhs. - this, ok := lhs.(*dag.This) - if !ok { - return dag.Assignment{}, errors.New("illegal left-hand side of assignment") - } - if len(this.Path) == 0 { - return dag.Assignment{}, errors.New("cannot assign to 'this'") + case *dag.Agg: + path = []string{rhs.Name} + case *dag.Var: + path = []string{rhs.Name} + case *dag.This: + path = rhs.Path + default: + return nil, errors.New("cannot infer field from expression") } - return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil + return path, nil } func (a *analyzer) semFields(exprs []ast.Expr) ([]dag.Expr, error) { diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index be1f5b658e..fdc55bb206 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -15,6 +15,7 @@ import ( "github.com/brimdata/zed/pkg/field" "github.com/brimdata/zed/pkg/plural" "github.com/brimdata/zed/pkg/reglob" + "github.com/brimdata/zed/runtime/expr" "github.com/brimdata/zed/runtime/expr/function" "github.com/brimdata/zed/zson" "github.com/segmentio/ksuid" @@ -410,19 +411,25 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { case *ast.From: return a.semFrom(o, seq) case *ast.Summarize: - keys, err := a.semAssignments(o.Keys, true) + keys, err := a.semAssignments(o.Keys) if err != nil { return nil, err } + if assignmentHasDynamicLHS(keys) { + return nil, errors.New("summarize: keys must be static field references") + } if len(keys) == 0 && len(o.Aggs) == 1 { if seq := a.singletonAgg(o.Aggs[0], seq); seq != nil { return seq, nil } } - aggs, err := a.semAssignments(o.Aggs, true) + aggs, err := a.semAssignments(o.Aggs) if err != nil { return nil, err } + if assignmentHasDynamicLHS(aggs) { + return nil, errors.New("summarize: aggs must be static field references") + } // Note: InputSortDir is copied in here but it's not meaningful // coming from a parser AST, only from a worker using the kernel DSL, // which is another reason why we need separate parser and kernel ASTs. @@ -497,10 +504,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { case *ast.Shape: return append(seq, &dag.Shape{Kind: "Shape"}), nil case *ast.Cut: - assignments, err := a.semAssignments(o.Args, false) + assignments, err := a.semAssignments(o.Args) if err != nil { return nil, err } + // Collect static paths so we can check on what is available. + var fields field.List + for _, a := range assignments { + if this, ok := a.LHS.(*dag.This); ok { + fields = append(fields, this.Path) + } + } + if _, err = zed.NewRecordBuilder(a.zctx, fields); err != nil { + return nil, fmt.Errorf("cut: %w", err) + } return append(seq, &dag.Cut{ Kind: "Cut", Args: assignments, @@ -596,10 +613,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Limit: o.Limit, }), nil case *ast.Put: - assignments, err := a.semAssignments(o.Args, false) + assignments, err := a.semAssignments(o.Args) if err != nil { return nil, err } + // We can do collision checking on static paths, so check what we can. + var fields field.List + for _, a := range assignments { + if this, ok := a.LHS.(*dag.This); ok { + fields = append(fields, this.Path) + } + } + if err := expr.CheckPutFields(fields); err != nil { + return nil, fmt.Errorf("put: %w", err) + } return append(seq, &dag.Put{ Kind: "Put", Args: assignments, @@ -615,20 +642,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { for _, fa := range o.Args { dst, err := a.semField(fa.LHS) if err != nil { - return nil, errors.New("'rename' requires explicit field references") + return nil, errors.New("rename: requires explicit field references") } src, err := a.semField(fa.RHS) if err != nil { - return nil, errors.New("'rename' requires explicit field references") + return nil, errors.New("rename: requires explicit field references") } if len(dst.Path) != len(src.Path) { - return nil, fmt.Errorf("cannot rename %s to %s", src, dst) + return nil, fmt.Errorf("rename: cannot rename %s to %s", src, dst) } // Check that the prefixes match and, if not, report first place // that they don't. for i := 0; i <= len(src.Path)-2; i++ { if src.Path[i] != dst.Path[i] { - return nil, fmt.Errorf("cannot rename %s to %s (differ in %s vs %s)", src, dst, src.Path[i], dst.Path[i]) + return nil, fmt.Errorf("rename: cannot rename %s to %s (differ in %s vs %s)", src, dst, src.Path[i], dst.Path[i]) } } assignments = append(assignments, dag.Assignment{Kind: "Assignment", LHS: dst, RHS: src}) @@ -652,7 +679,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { if err != nil { return nil, err } - assignments, err := a.semAssignments(o.Args, false) + assignments, err := a.semAssignments(o.Args) if err != nil { return nil, err } @@ -763,14 +790,14 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Aggs: []dag.Assignment{ { Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{"sample"}}, + LHS: pathOf("sample"), RHS: &dag.Agg{Kind: "Agg", Name: "any", Expr: e}, }, }, Keys: []dag.Assignment{ { Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{"shape"}}, + LHS: pathOf("shape"), RHS: &dag.Call{Kind: "Call", Name: "typeof", Args: []dag.Expr{e}}, }, }, @@ -812,7 +839,7 @@ func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { if agg.LHS != nil { return nil } - out, err := a.semAssignment(agg, true) + out, err := a.semAssignment(agg) if err != nil { return nil } @@ -942,14 +969,17 @@ func (a *analyzer) semOpAssignment(p *ast.OpAssignment) (dag.Op, error) { // Parition assignments into agg vs. puts. // It's okay to pass false here for the summarize bool because // semAssignment will check if the RHS is a dag.Agg and override. - assignment, err := a.semAssignment(assign, false) + a, err := a.semAssignment(assign) if err != nil { return nil, err } - if _, ok := assignment.RHS.(*dag.Agg); ok { - aggs = append(aggs, assignment) + if _, ok := a.RHS.(*dag.Agg); ok { + if _, ok := a.LHS.(*dag.This); !ok { + return nil, errors.New("summarize: illegal use of dynamic assignment in aggregation") + } + aggs = append(aggs, a) } else { - puts = append(puts, assignment) + puts = append(puts, a) } } if len(puts) > 0 && len(aggs) > 0 { @@ -967,6 +997,15 @@ func (a *analyzer) semOpAssignment(p *ast.OpAssignment) (dag.Op, error) { }, nil } +func assignmentHasDynamicLHS(assignments []dag.Assignment) bool { + for _, a := range assignments { + if _, ok := a.LHS.(*dag.This); !ok { + return true + } + } + return false +} + func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) (dag.Seq, error) { if call, ok := e.(*ast.Call); ok { if seq, err := a.semCallOp(call, seq); seq != nil || err != nil { @@ -1031,7 +1070,7 @@ func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { Aggs: []dag.Assignment{ { Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{call.Name}}, + LHS: pathOf(call.Name), RHS: agg, }, }, diff --git a/compiler/semantic/sql.go b/compiler/semantic/sql.go index 6c6f1d9f8b..3d75bae0c3 100644 --- a/compiler/semantic/sql.go +++ b/compiler/semantic/sql.go @@ -291,7 +291,7 @@ func (a *analyzer) convertSQLJoin(leftPath []dag.Op, sqlJoin ast.SQLJoin) ([]dag } alias := dag.Assignment{ Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{aliasID}}, + LHS: pathOf(aliasID), RHS: &dag.This{Kind: "This", Path: field.Path{aliasID}}, } join := &dag.Join{ @@ -433,7 +433,7 @@ func (a *analyzer) newSQLSelection(assignments []ast.Assignment) (sqlSelection, if err != nil { return nil, err } - assignment, err := a.semAssignment(assign, false) + assignment, err := a.semAssignment(assign) if err != nil { return nil, err } @@ -515,12 +515,12 @@ func (a *analyzer) isAgg(e ast.Expr) (*dag.Agg, error) { } func (a *analyzer) deriveAs(as ast.Assignment) (field.Path, error) { - sa, err := a.semAssignment(as, false) + sa, err := a.semAssignment(as) if err != nil { return nil, fmt.Errorf("AS clause of SELECT: %w", err) } - if f, ok := sa.LHS.(*dag.This); ok { - return f.Path, nil + if this, ok := sa.LHS.(*dag.This); ok { + return this.Path, nil } return nil, fmt.Errorf("AS clause not a field: %w", err) } diff --git a/compiler/ztests/dynamic-field-cut.yaml b/compiler/ztests/dynamic-field-cut.yaml new file mode 100644 index 0000000000..0559cf9686 --- /dev/null +++ b/compiler/ztests/dynamic-field-cut.yaml @@ -0,0 +1,41 @@ +script: | + echo '{a:"hi",b:"hello"}' | zq -z 'cut this[a][b] := "world"' - + echo "// ===" + echo '{a:{b:"hello"}}' | zq -z 'cut this[a.b]:="world"' - + echo "// ===" + echo '{a:"hello"}' | zq -z 'cut this[this["a"]] := "world"' - + echo "// ===" + echo '{a:{},b:"hello"}' | zq -z 'cut a[b] := "world"' - + echo "// ===" + echo '{a:"foo"}' | zq -z 'cut this[a]["bar"] := "baz"' - + echo "// ===" + # runtime error cases + echo '{a:"hello",b:"hello"}' | zq -z 'cut this[a] := "world1", this[b] := "world2"' - + echo "// ===" + echo '{a:"foo",b:"bar"}' | zq -z 'cut this[a][b] := "world", this[a] := "world"' - + echo "// ===" + # will display nothing because put ignores missing error type + echo {} | zq -z 'cut this[doesnotexist] := "world"' - + # semantic error cases + ! echo {} | zq -z 'op foo(): ( yield "error" ) cut this[foo] := "hello world"' - + +outputs: + - name: stdout + data: | + {hi:{hello:"world"}} + // === + {hello:"world"} + // === + {hello:"world"} + // === + {a:{hello:"world"}} + // === + {foo:{bar:"baz"}} + // === + error({message:"cut: duplicate field: \"hello\"",on:{a:"hello",b:"hello"}}) + // === + error({message:"cut: duplicate field: \"foo\"",on:{a:"foo",b:"bar"}}) + // === + - name: stderr + data: | + left-hand side of assignment: symbol "foo" is not bound to an expression diff --git a/compiler/ztests/dynamic-field-put.yaml b/compiler/ztests/dynamic-field-put.yaml new file mode 100644 index 0000000000..1d64215a58 --- /dev/null +++ b/compiler/ztests/dynamic-field-put.yaml @@ -0,0 +1,41 @@ +script: | + echo '{a:"hi",b:"hello"}' | zq -z 'this[a][b] := "world" | drop a, b' - + echo "// ===" + echo '{a:{b:"hello"}}' | zq -z 'this[a.b]:="world" | drop a' - + echo "// ===" + echo '{a:"hello"}' | zq -z 'this[this["a"]] := "world" | drop a' - + echo "// ===" + echo '{a:{},b:"hello"}' | zq -z 'a[b] := "world" | drop b' - + echo "// ===" + echo '{a:"foo"}' | zq -z 'this[a]["bar"] := "baz" | cut foo' - + echo "// ===" + # runtime error cases + echo '{a:"hello",b:"hello"}' | zq -z 'this[a] := "world1", this[b] := "world2"' - + echo "// ===" + echo '{a:"foo",b:"bar"}' | zq -z 'this[a][b] := "world", this[a] := "world"' - + echo "// ===" + # will display nothing because put ignores missing error type + echo {} | zq -z 'this[doesnotexist] := "world"' - + # semantic error cases + ! echo {} | zq -z 'op foo(): ( yield "error" ) put this[foo] := "hello world"' - + +outputs: + - name: stdout + data: | + {hi:{hello:"world"}} + // === + {hello:"world"} + // === + {hello:"world"} + // === + {a:{hello:"world"}} + // === + {foo:{bar:"baz"}} + // === + error({message:"put: multiple assignments to hello",on:{a:"hello",b:"hello"}}) + // === + error({message:"put: conflicting nested assignments to foo and foo.bar",on:{a:"foo",b:"bar"}}) + // === + - name: stderr + data: | + left-hand side of assignment: symbol "foo" is not bound to an expression diff --git a/compiler/ztests/where-on-func.yaml b/compiler/ztests/where-on-func.yaml index 3e6ca5dd70..21ae7245d6 100644 --- a/compiler/ztests/where-on-func.yaml +++ b/compiler/ztests/where-on-func.yaml @@ -1,3 +1,3 @@ zed: cut hex := hex(this) where this % 2 == 0 -errorRE: "rhs of assignment expression: 'where' clause on non-aggregation function: hex" +errorRE: "'where' clause on non-aggregation function: hex" diff --git a/docs/language/operators/rename.md b/docs/language/operators/rename.md index f224f84ab5..70be76e194 100644 --- a/docs/language/operators/rename.md +++ b/docs/language/operators/rename.md @@ -49,7 +49,7 @@ echo '{a:1,r:{b:2,c:3}}' | zq -z 'rename w:=r.b' - ``` => ```mdtest-output -cannot rename r.b to w +rename: cannot rename r.b to w ``` _Record literals can be used instead of rename for mutation_ ```mdtest-command diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index 1369a1cfb0..790e3edde8 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -843,7 +843,7 @@ zq -Z 'rename toplevel:=outer.inner' nested.zson ``` produces this compile-time error message and the query is not run: ```mdtest-output -cannot rename outer.inner to toplevel +rename: cannot rename outer.inner to toplevel ``` This goal could instead be achieved by combining [`put`](#44-put) and [`drop`](#42-drop), e.g., diff --git a/runtime/expr/cutter.go b/runtime/expr/cutter.go index 1a8a10b6cd..489545ed1d 100644 --- a/runtime/expr/cutter.go +++ b/runtime/expr/cutter.go @@ -1,7 +1,7 @@ package expr import ( - "errors" + "fmt" "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" @@ -9,14 +9,15 @@ import ( type Cutter struct { zctx *zed.Context - builder *zed.RecordBuilder fieldRefs field.List fieldExprs []Evaluator - typeCache []zed.Type + lvals []*Lval outTypes *zed.TypeVectorTable recordTypes map[int]*zed.TypeRecord + typeCache []zed.Type - droppers []*Dropper + builders map[string]*zed.RecordBuilder + droppers map[string]*Dropper dropperCache []*Dropper dirty bool quiet bool @@ -26,31 +27,18 @@ type Cutter struct { // the Cutter copies fields that are not in fieldnames. If complement // is false, the Cutter copies any fields in fieldnames, where targets // specifies the copied field names. -func NewCutter(zctx *zed.Context, fieldRefs field.List, fieldExprs []Evaluator) (*Cutter, error) { - for _, f := range fieldRefs { - if f.IsEmpty() { - return nil, errors.New("cut: 'this' not allowed (use record literal)") - } - } - var b *zed.RecordBuilder - if len(fieldRefs) == 0 || !fieldRefs[0].IsEmpty() { - // A root field will cause NewFieldBuilder to panic. - var err error - b, err = zed.NewRecordBuilder(zctx, fieldRefs) - if err != nil { - return nil, err - } - } +func NewCutter(zctx *zed.Context, fieldRefs []*Lval, fieldExprs []Evaluator) (*Cutter, error) { n := len(fieldRefs) return &Cutter{ zctx: zctx, - builder: b, - fieldRefs: fieldRefs, + builders: make(map[string]*zed.RecordBuilder), + fieldRefs: make(field.List, n), fieldExprs: fieldExprs, - typeCache: make([]zed.Type, len(fieldRefs)), + lvals: fieldRefs, outTypes: zed.NewTypeVectorTable(), recordTypes: make(map[int]*zed.TypeRecord), - droppers: make([]*Dropper, n), + typeCache: make([]zed.Type, len(fieldRefs)), + droppers: make(map[string]*Dropper), dropperCache: make([]*Dropper, n), }, nil } @@ -67,30 +55,42 @@ func (c *Cutter) FoundCut() bool { // receiver's configuration. If the resulting record would be empty, Apply // returns zed.Missing. func (c *Cutter) Eval(ectx Context, in *zed.Value) *zed.Value { + rb, paths, verr := c.lookupBuilder(ectx, in) + if verr != nil { + if !verr.IsMissing() && !verr.IsQuiet() { + verr = ectx.CopyValue(*c.zctx.WrapError(fmt.Sprintf("cut: %s", zed.DecodeError(verr.Bytes())), in)) + } + return verr + } types := c.typeCache - b := c.builder - b.Reset() droppers := c.dropperCache[:0] for k, e := range c.fieldExprs { val := e.Eval(ectx, in) if val.IsQuiet() { // ignore this field - if c.droppers[k] == nil { - c.droppers[k] = NewDropper(c.zctx, c.fieldRefs[k:k+1]) + // This no worky because the path may be dynamic. + pathID := paths[k].String() + if c.droppers[pathID] == nil { + c.droppers[pathID] = NewDropper(c.zctx, field.List{paths[k]}) } - droppers = append(droppers, c.droppers[k]) - b.Append(val.Bytes()) + droppers = append(droppers, c.droppers[pathID]) + rb.Append(val.Bytes()) types[k] = zed.TypeNull continue } - b.Append(val.Bytes()) + rb.Append(val.Bytes()) types[k] = val.Type } - bytes, err := b.Encode() + // check paths + bytes, err := rb.Encode() if err != nil { panic(err) } - rec := ectx.NewValue(c.lookupTypeRecord(types), bytes) + typ, err := c.lookupTypeRecord(types, rb) + if err != nil { + return ectx.CopyValue(*c.zctx.NewErrorf("cut: %s", err)) + } + rec := ectx.NewValue(typ, bytes) for _, d := range droppers { rec = d.Eval(ectx, rec) } @@ -100,12 +100,37 @@ func (c *Cutter) Eval(ectx Context, in *zed.Value) *zed.Value { return rec } -func (c *Cutter) lookupTypeRecord(types []zed.Type) *zed.TypeRecord { +func (c *Cutter) lookupBuilder(ectx Context, in *zed.Value) (*zed.RecordBuilder, field.List, *zed.Value) { + paths := c.fieldRefs[:0] + for _, p := range c.lvals { + path, verr := p.Eval(ectx, in) + if verr != nil { + // XXX What to do with quiet? + return nil, nil, verr + } + if path.IsEmpty() { + return nil, nil, ectx.CopyValue(*c.zctx.NewErrorf("'this' not allowed (use record literal)")) + } + paths = append(paths, path) + } + builder, ok := c.builders[paths.String()] + if !ok { + var err error + if builder, err = zed.NewRecordBuilder(c.zctx, paths); err != nil { + return nil, nil, ectx.CopyValue(*c.zctx.NewError(err)) + } + c.builders[paths.String()] = builder + } + builder.Reset() + return builder, paths, nil +} + +func (c *Cutter) lookupTypeRecord(types []zed.Type, builder *zed.RecordBuilder) (*zed.TypeRecord, error) { id := c.outTypes.Lookup(types) typ, ok := c.recordTypes[id] if !ok { - typ = c.builder.Type(types) + typ = builder.Type(types) c.recordTypes[id] = typ } - return typ + return typ, nil } diff --git a/runtime/expr/eval.go b/runtime/expr/eval.go index b62be5c94d..d635a2c94d 100644 --- a/runtime/expr/eval.go +++ b/runtime/expr/eval.go @@ -807,19 +807,23 @@ func (c *Call) Eval(ectx Context, this *zed.Value) *zed.Value { } type Assignment struct { - LHS field.Path + LHS *Lval RHS Evaluator } -func NewAssignments(zctx *zed.Context, dsts field.List, srcs field.List) (field.List, []Evaluator) { +func NewAssignments(zctx *zed.Context, dsts field.List, srcs field.List) ([]*Lval, []Evaluator) { if len(srcs) != len(dsts) { panic("NewAssignments: argument mismatch") } var resolvers []Evaluator - var fields field.List + var lvals []*Lval for k, dst := range dsts { - fields = append(fields, dst) + elems := make([]LvalElem, 0, len(dst)) + for _, d := range dst { + elems = append(elems, &StaticLvalElem{Name: d}) + } + lvals = append(lvals, NewLval(elems)) resolvers = append(resolvers, NewDottedExpr(zctx, srcs[k])) } - return fields, resolvers + return lvals, resolvers } diff --git a/runtime/expr/lval.go b/runtime/expr/lval.go new file mode 100644 index 0000000000..477539bb57 --- /dev/null +++ b/runtime/expr/lval.go @@ -0,0 +1,80 @@ +package expr + +import ( + "github.com/brimdata/zed" + "github.com/brimdata/zed/pkg/field" +) + +type LvalElem interface { + Eval(ectx Context, this *zed.Value) (string, *zed.Value) +} + +type Lval struct { + Elems []LvalElem + cache field.Path +} + +func NewLval(evals []LvalElem) *Lval { + return &Lval{Elems: evals} +} + +// Eval returns the path of the lval. If there's an error the returned *zed.Value +// will not be nill. +func (l *Lval) Eval(ectx Context, this *zed.Value) (field.Path, *zed.Value) { + l.cache = l.cache[:0] + for _, e := range l.Elems { + name, val := e.Eval(ectx, this) + if val != nil { + return nil, val + } + l.cache = append(l.cache, name) + } + return l.cache, nil +} + +// Path returns the receiver's path. Path returns false when the receiver +// contains a dynamic element. +func (l *Lval) Path() (field.Path, bool) { + var path field.Path + for _, e := range l.Elems { + s, ok := e.(*StaticLvalElem) + if !ok { + return nil, false + } + path = append(path, s.Name) + } + return path, true +} + +type StaticLvalElem struct { + Name string +} + +func (l *StaticLvalElem) Eval(_ Context, _ *zed.Value) (string, *zed.Value) { + return l.Name, nil +} + +type ExprLvalElem struct { + caster Evaluator + eval Evaluator +} + +func NewExprLvalElem(zctx *zed.Context, e Evaluator) *ExprLvalElem { + return &ExprLvalElem{ + eval: e, + caster: LookupPrimitiveCaster(zctx, zed.TypeString), + } +} + +func (l *ExprLvalElem) Eval(ectx Context, this *zed.Value) (string, *zed.Value) { + val := l.eval.Eval(ectx, this) + if val.IsError() { + return "", val + } + if !val.IsString() { + if val = l.caster.Eval(ectx, val); val.IsError() { + return "", val + } + } + return val.AsString(), nil +} diff --git a/runtime/expr/putter.go b/runtime/expr/putter.go index cc0e21dfa5..54fd4747bc 100644 --- a/runtime/expr/putter.go +++ b/runtime/expr/putter.go @@ -19,12 +19,12 @@ type Putter struct { zctx *zed.Context builder zcode.Builder clauses []Assignment - // valClauses is a slice to avoid re-allocating for every value - valClauses []Assignment + rules map[int]map[string]putRule + warned map[string]struct{} // vals is a slice to avoid re-allocating for every value - vals []zed.Value - rules map[int]putRule - warned map[string]struct{} + vals []zed.Value + // paths is a slice to avoid re-allocating for every path + paths []field.Path } // A putRule describes how a given record type is modified by describing @@ -39,43 +39,32 @@ type putRule struct { step putStep } -func NewPutter(zctx *zed.Context, clauses []Assignment) (*Putter, error) { - for i, p := range clauses { - if p.LHS.IsEmpty() { - return nil, fmt.Errorf("put: LHS cannot be 'this' (use 'yield' operator)") - } - for j, c := range clauses { - if i == j { - continue - } - if p.LHS.Equal(c.LHS) { - return nil, fmt.Errorf("put: multiple assignments to %s", p.LHS) - } - if c.LHS.HasStrictPrefix(p.LHS) { - return nil, fmt.Errorf("put: conflicting nested assignments to %s and %s", p.LHS, c.LHS) - } - } - } +func NewPutter(zctx *zed.Context, clauses []Assignment) *Putter { return &Putter{ zctx: zctx, clauses: clauses, vals: make([]zed.Value, len(clauses)), - rules: make(map[int]putRule), + rules: make(map[int]map[string]putRule), warned: make(map[string]struct{}), - }, nil + } } -func (p *Putter) eval(ectx Context, this *zed.Value) ([]zed.Value, []Assignment) { - p.valClauses = p.valClauses[:0] +func (p *Putter) eval(ectx Context, this *zed.Value) ([]zed.Value, []field.Path, *zed.Value) { p.vals = p.vals[:0] + p.paths = p.paths[:0] for _, cl := range p.clauses { val := *cl.RHS.Eval(ectx, this) - if !val.IsQuiet() { - p.vals = append(p.vals, val) - p.valClauses = append(p.valClauses, cl) + if val.IsQuiet() { + continue + } + p.vals = append(p.vals, val) + path, verr := cl.LHS.Eval(ectx, this) + if verr != nil { + return nil, nil, verr } + p.paths = append(p.paths, path) } - return p.vals, p.valClauses + return p.vals, p.paths, nil } // A putStep is a recursive data structure encoding a series of steps to be @@ -175,20 +164,20 @@ func (ig *getter) nth(n int) (zcode.Bytes, error) { return nil, fmt.Errorf("getter.nth: array index %d out of bounds", n) } -func findOverwriteClause(path field.Path, clauses []Assignment) (int, field.Path, bool) { - for i, cand := range clauses { - if path.Equal(cand.LHS) || cand.LHS.HasStrictPrefix(path) { - return i, cand.LHS, true +func findOverwriteClause(path field.Path, paths []field.Path) (int, field.Path, bool) { + for i, lpath := range paths { + if path.Equal(lpath) || lpath.HasStrictPrefix(path) { + return i, lpath, true } } return -1, nil, false } -func (p *Putter) deriveSteps(inType *zed.TypeRecord, vals []zed.Value, clauses []Assignment) (putStep, zed.Type) { - return p.deriveRecordSteps(field.Path{}, inType.Fields, vals, clauses) +func (p *Putter) deriveSteps(inType *zed.TypeRecord, vals []zed.Value, paths []field.Path) (putStep, zed.Type) { + return p.deriveRecordSteps(field.Path{}, inType.Fields, vals, paths) } -func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, vals []zed.Value, clauses []Assignment) (putStep, *zed.TypeRecord) { +func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, vals []zed.Value, paths []field.Path) (putStep, *zed.TypeRecord) { s := putStep{op: putRecord} var fields []zed.Field @@ -197,7 +186,7 @@ func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, // assignments. for i, f := range inFields { path := append(parentPath, f.Name) - matchIndex, matchPath, found := findOverwriteClause(path, clauses) + matchIndex, matchPath, found := findOverwriteClause(path, paths) switch { // input not overwritten by assignment: copy input value. case !found: @@ -217,13 +206,13 @@ func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, fields = append(fields, zed.NewField(f.Name, vals[matchIndex].Type)) // input record field overwritten by nested assignment: recurse. case len(path) < len(matchPath) && zed.IsRecordType(f.Type): - nestedStep, typ := p.deriveRecordSteps(path, zed.TypeRecordOf(f.Type).Fields, vals, clauses) + nestedStep, typ := p.deriveRecordSteps(path, zed.TypeRecordOf(f.Type).Fields, vals, paths) nestedStep.index = i s.append(nestedStep) fields = append(fields, zed.NewField(f.Name, typ)) // input non-record field overwritten by nested assignment(s): recurse. case len(path) < len(matchPath) && !zed.IsRecordType(f.Type): - nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, clauses) + nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, paths) nestedStep.index = i s.append(nestedStep) fields = append(fields, zed.NewField(f.Name, typ)) @@ -232,30 +221,30 @@ func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, } } - appendClause := func(cl Assignment) bool { - if !cl.LHS.HasPrefix(parentPath) { + appendClause := func(lpath field.Path) bool { + if !lpath.HasPrefix(parentPath) { return false } - return !hasField(cl.LHS[len(parentPath)], fields) + return !hasField(lpath[len(parentPath)], fields) } // Then, look at put assignments to see if there are any new fields to append. - for i, cl := range clauses { - if appendClause(cl) { + for i, lpath := range paths { + if appendClause(lpath) { switch { // Append value at this level - case len(cl.LHS) == len(parentPath)+1: + case len(lpath) == len(parentPath)+1: s.append(putStep{ op: putFromClause, container: zed.IsContainerType(vals[i].Type), index: i, }) - fields = append(fields, zed.NewField(cl.LHS[len(parentPath)], vals[i].Type)) + fields = append(fields, zed.NewField(lpath[len(parentPath)], vals[i].Type)) // Appended and nest. For example, this would happen with "put b.c=1" applied to a record {"a": 1}. - case len(cl.LHS) > len(parentPath)+1: - path := append(parentPath, cl.LHS[len(parentPath)]) - nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, clauses) + case len(lpath) > len(parentPath)+1: + path := append(parentPath, lpath[len(parentPath)]) + nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, paths) nestedStep.index = -1 - fields = append(fields, zed.NewField(cl.LHS[len(parentPath)], typ)) + fields = append(fields, zed.NewField(lpath[len(parentPath)], typ)) s.append(nestedStep) } } @@ -273,19 +262,45 @@ func hasField(name string, fields []zed.Field) bool { }) } -func (p *Putter) lookupRule(inType *zed.TypeRecord, vals []zed.Value, clauses []Assignment) putRule { - rule, ok := p.rules[inType.ID()] +func (p *Putter) lookupRule(inType *zed.TypeRecord, vals []zed.Value, fields field.List) (putRule, error) { + m, ok := p.rules[inType.ID()] + if !ok { + m = make(map[string]putRule) + p.rules[inType.ID()] = m + } + rule, ok := m[fields.String()] if ok && sameTypes(rule.clauseTypes, vals) { - return rule + return rule, nil } - step, typ := p.deriveSteps(inType, vals, clauses) + // first check fields + if err := CheckPutFields(fields); err != nil { + return putRule{}, err + } + step, typ := p.deriveSteps(inType, vals, fields) var clauseTypes []zed.Type for _, val := range vals { clauseTypes = append(clauseTypes, val.Type) } rule = putRule{typ, clauseTypes, step} - p.rules[inType.ID()] = rule - return rule + p.rules[inType.ID()][fields.String()] = rule + return rule, nil +} + +func CheckPutFields(fields field.List) error { + for i, f := range fields { + if f.IsEmpty() { + return fmt.Errorf("left-hand side cannot be 'this' (use 'yield' operator)") + } + for _, c := range fields[i+1:] { + if f.Equal(c) { + return fmt.Errorf("multiple assignments to %s", f) + } + if c.HasStrictPrefix(f) { + return fmt.Errorf("conflicting nested assignments to %s and %s", f, c) + } + } + } + return nil } func sameTypes(types []zed.Type, vals []zed.Value) bool { @@ -303,11 +318,17 @@ func (p *Putter) Eval(ectx Context, this *zed.Value) *zed.Value { } return ectx.CopyValue(*p.zctx.WrapError("put: not a record", this)) } - vals, clauses := p.eval(ectx, this) + vals, paths, verr := p.eval(ectx, this) + if verr != nil { + return verr + } if len(vals) == 0 { return this } - rule := p.lookupRule(recType, vals, clauses) + rule, err := p.lookupRule(recType, vals, paths) + if err != nil { + return ectx.CopyValue(*p.zctx.WrapError(err.Error(), this)) + } bytes := rule.step.build(this.Bytes(), &p.builder, vals) return ectx.NewValue(rule.typ, bytes) } diff --git a/runtime/expr/ztests/cut-dup-fields.yaml b/runtime/expr/ztests/cut-dup-fields.yaml index c476641af2..4ffd78207c 100644 --- a/runtime/expr/ztests/cut-dup-fields.yaml +++ b/runtime/expr/ztests/cut-dup-fields.yaml @@ -12,7 +12,7 @@ inputs: outputs: - name: stderr data: | - duplicate field: "rec" - duplicate field: "rec.sub1" - duplicate field: "rec.sub.sub" - duplicate field: "rec.sub" + cut: duplicate field: "rec" + cut: duplicate field: "rec.sub1" + cut: duplicate field: "rec.sub.sub" + cut: duplicate field: "rec.sub" diff --git a/runtime/expr/ztests/cut-not-adjacent.yaml b/runtime/expr/ztests/cut-not-adjacent.yaml index ac4bcd713b..de13172053 100644 --- a/runtime/expr/ztests/cut-not-adjacent.yaml +++ b/runtime/expr/ztests/cut-not-adjacent.yaml @@ -12,7 +12,7 @@ inputs: outputs: - name: stderr data: | - fields in record rec must be adjacent - fields in record rec1 must be adjacent - fields in record rec1 must be adjacent - fields in record t.rec must be adjacent + cut: fields in record rec must be adjacent + cut: fields in record rec1 must be adjacent + cut: fields in record rec1 must be adjacent + cut: fields in record t.rec must be adjacent diff --git a/runtime/expr/ztests/rename-error-move.yaml b/runtime/expr/ztests/rename-error-move.yaml index 136fa0cc39..9fc181586e 100644 --- a/runtime/expr/ztests/rename-error-move.yaml +++ b/runtime/expr/ztests/rename-error-move.yaml @@ -3,4 +3,4 @@ zed: rename dst:=id.resp_h input: | {id:{orig_h:10.164.94.120,orig_p:39681(port=uint16),resp_h:10.47.3.155,resp_p:3389(port)}} -errorRE: "cannot rename id.resp_h to dst" +errorRE: "rename: cannot rename id.resp_h to dst" diff --git a/runtime/op/groupby/groupby.go b/runtime/op/groupby/groupby.go index 1b3a5d8895..633376dd66 100644 --- a/runtime/op/groupby/groupby.go +++ b/runtime/op/groupby/groupby.go @@ -3,6 +3,7 @@ package groupby import ( "context" "encoding/binary" + "errors" "sync" "github.com/brimdata/zed" @@ -112,7 +113,11 @@ func NewAggregator(ctx context.Context, zctx *zed.Context, keyRefs, keyExprs, ag func New(octx *op.Context, parent zbuf.Puller, keys []expr.Assignment, aggNames field.List, aggs []*expr.Aggregator, limit int, inputSortDir order.Direction, partialsIn, partialsOut bool) (*Op, error) { names := make(field.List, 0, len(keys)+len(aggNames)) for _, e := range keys { - names = append(names, e.LHS) + p, ok := e.LHS.Path() + if !ok { + return nil, errors.New("invalid lval in groupby key") + } + names = append(names, p) } names = append(names, aggNames...) builder, err := zed.NewRecordBuilder(octx.Zctx, names) @@ -125,9 +130,9 @@ func New(octx *op.Context, parent zbuf.Puller, keys []expr.Assignment, aggNames } keyRefs := make([]expr.Evaluator, 0, len(keys)) keyExprs := make([]expr.Evaluator, 0, len(keys)) - for _, e := range keys { - keyRefs = append(keyRefs, expr.NewDottedExpr(octx.Zctx, e.LHS)) - keyExprs = append(keyExprs, e.RHS) + for i := range keys { + keyRefs = append(keyRefs, expr.NewDottedExpr(octx.Zctx, names[i])) + keyExprs = append(keyExprs, keys[i].RHS) } agg, err := NewAggregator(octx.Context, octx.Zctx, keyRefs, keyExprs, valRefs, aggs, builder, limit, inputSortDir, partialsIn, partialsOut) if err != nil { diff --git a/runtime/op/join/join.go b/runtime/op/join/join.go index d4238a93b4..f1bbb4d4fc 100644 --- a/runtime/op/join/join.go +++ b/runtime/op/join/join.go @@ -7,7 +7,6 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/order" - "github.com/brimdata/zed/pkg/field" "github.com/brimdata/zed/runtime/expr" "github.com/brimdata/zed/runtime/op" "github.com/brimdata/zed/runtime/op/sort" @@ -35,7 +34,7 @@ type Op struct { } func New(octx *op.Context, anti, inner bool, left, right zbuf.Puller, leftKey, rightKey expr.Evaluator, - leftDir, rightDir order.Direction, lhs field.List, + leftDir, rightDir order.Direction, lhs []*expr.Lval, rhs []expr.Evaluator) (*Op, error) { cutter, err := expr.NewCutter(octx.Zctx, lhs, rhs) if err != nil {