From eadfee570def77d32e0adce7396873dd7a0f1088 Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Thu, 12 Oct 2023 15:33:57 -0700 Subject: [PATCH] Add map function The map function that applies a function to every element in an array or set value. Also: rename the map aggregator to collect_map to avoid collision with this new function. Closes #4610 --- CHANGELOG.md | 2 +- compiler/ast/dag/expr.go | 6 ++ compiler/ast/dag/unpack.go | 1 + compiler/kernel/expr.go | 14 ++++ compiler/semantic/expr.go | 21 ++++++ docs/language/aggregates/README.md | 2 +- .../aggregates/{map.md => collect_map.md} | 12 ++-- docs/language/functions/README.md | 1 + docs/language/functions/map.md | 42 +++++++++++ runtime/expr/agg/agg.go | 4 +- runtime/expr/agg/map.go | 14 ++-- ...{map-union.yaml => collect-map-union.yaml} | 2 +- .../agg/ztests/{map.yaml => collect-map.yaml} | 2 +- runtime/expr/map.go | 71 +++++++++++++++++++ runtime/expr/ztests/map.yaml | 17 +++++ 15 files changed, 192 insertions(+), 19 deletions(-) rename docs/language/aggregates/{map.md => collect_map.md} (63%) create mode 100644 docs/language/functions/map.md rename runtime/expr/agg/ztests/{map-union.yaml => collect-map-union.yaml} (93%) rename runtime/expr/agg/ztests/{map.yaml => collect-map.yaml} (89%) create mode 100644 runtime/expr/map.go create mode 100644 runtime/expr/ztests/map.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c290cb255..830a46ef22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,7 +103,7 @@ * Add [`regexp()`](docs/language/functions/regexp.md) function for regular expression searches and capture groups (#4145, #4158) * Add [`coalesce()`](docs/language/functions/coalesce.md) function for locating non-null/non-error values (#4172) * Add `line` format for sourcing newline-delimited input as strings (#4175) -* Add [`map()` aggregation function](docs/language/aggregates/map.md) for constructing [maps](docs/formats/zed.md#24-map) #4173 +* Add [`collect_map()` aggregation function](docs/language/aggregates/collect_map.md) for constructing [maps](docs/formats/zed.md#24-map) #4173 ## v1.2.0 * Compress index values (#3974) diff --git a/compiler/ast/dag/expr.go b/compiler/ast/dag/expr.go index a735d4133a..5c0df48cd8 100644 --- a/compiler/ast/dag/expr.go +++ b/compiler/ast/dag/expr.go @@ -62,6 +62,11 @@ type ( Kind string `json:"kind" unpack:""` Value string `json:"value"` } + MapCall struct { + Kind string `json:"kind" unpack:""` + Expr Expr `json:"expr"` + Inner Expr `json:"inner"` + } MapExpr struct { Kind string `json:"kind" unpack:""` Entries []Entry `json:"entries"` @@ -121,6 +126,7 @@ func (*Conditional) ExprDAG() {} func (*Dot) ExprDAG() {} func (*Func) ExprDAG() {} func (*Literal) ExprDAG() {} +func (*MapCall) ExprDAG() {} func (*MapExpr) ExprDAG() {} func (*OverExpr) ExprDAG() {} func (*RecordExpr) ExprDAG() {} diff --git a/compiler/ast/dag/unpack.go b/compiler/ast/dag/unpack.go index 0a03034870..cf781d61c2 100644 --- a/compiler/ast/dag/unpack.go +++ b/compiler/ast/dag/unpack.go @@ -31,6 +31,7 @@ var unpacker = unpack.New( Lister{}, Literal{}, Load{}, + MapCall{}, MapExpr{}, Merge{}, Over{}, diff --git a/compiler/kernel/expr.go b/compiler/kernel/expr.go index 3ae2ee0bc8..399911b3b7 100644 --- a/compiler/kernel/expr.go +++ b/compiler/kernel/expr.go @@ -83,6 +83,8 @@ func (b *Builder) compileExpr(e dag.Expr) (expr.Evaluator, error) { return b.compileArrayExpr(e) case *dag.SetExpr: return b.compileSetExpr(e) + case *dag.MapCall: + return b.compileMapCall(e) case *dag.MapExpr: return b.compileMapExpr(e) case *dag.Agg: @@ -309,6 +311,18 @@ func (b *Builder) compileCall(call dag.Call) (expr.Evaluator, error) { return expr.NewCall(b.zctx(), fn, exprs), nil } +func (b *Builder) compileMapCall(a *dag.MapCall) (expr.Evaluator, error) { + e, err := b.compileExpr(a.Expr) + if err != nil { + return nil, err + } + inner, err := b.compileExpr(a.Inner) + if err != nil { + return nil, err + } + return expr.NewMapCall(b.zctx(), e, inner), nil +} + func (b *Builder) compileShaper(node dag.Call, tf expr.ShaperTransform) (expr.Evaluator, error) { args := node.Args field, err := b.compileExpr(args[0]) diff --git a/compiler/semantic/expr.go b/compiler/semantic/expr.go index bf705ce23e..d16794dd71 100644 --- a/compiler/semantic/expr.go +++ b/compiler/semantic/expr.go @@ -506,6 +506,27 @@ func (a *analyzer) semCall(call *ast.Call) (dag.Expr, error) { if nargs == 1 { exprs = append([]dag.Expr{&dag.This{Kind: "This"}}, exprs...) } + case name == "map": + if err := function.CheckArgCount(nargs, 2, 2); err != nil { + return nil, fmt.Errorf("%s(): %w", name, err) + } + id, ok := call.Args[1].(*ast.ID) + if !ok { + return nil, fmt.Errorf("%s(): second argument must be the identifier of a function", name) + } + inner, err := a.semCall(&ast.Call{ + Kind: "Call", + Name: id.Name, + Args: []ast.Expr{&ast.ID{Kind: "ID", Name: "this"}}, + }) + if err != nil { + return nil, err + } + return &dag.MapCall{ + Kind: "MapCall", + Expr: exprs[0], + Inner: inner, + }, nil default: if _, _, err = function.New(a.zctx, name, nargs); err != nil { return nil, fmt.Errorf("%s(): %w", name, err) diff --git a/docs/language/aggregates/README.md b/docs/language/aggregates/README.md index b9afa59b67..ec47ddd0ac 100644 --- a/docs/language/aggregates/README.md +++ b/docs/language/aggregates/README.md @@ -10,10 +10,10 @@ value for a sequence of input values. - [any](any.md) - select an arbitrary value from its input - [avg](avg.md) - average value - [collect](collect.md) - aggregate values into array +- [collect_map](collect_map.md) - aggregate map values into a single map - [count](count.md) - count input values - [dcount](dcount.md) - count distinct input values - [fuse](fuse.md) - compute a fused type of input values -- [map](map.md) - aggregate map values into a single map - [max](max.md) - maximum value of input values - [min](min.md) - minimum value of input values - [or](or.md) - logical OR of input values diff --git a/docs/language/aggregates/map.md b/docs/language/aggregates/collect_map.md similarity index 63% rename from docs/language/aggregates/map.md rename to docs/language/aggregates/collect_map.md index b10d6bf9a1..9bae505f5c 100644 --- a/docs/language/aggregates/map.md +++ b/docs/language/aggregates/collect_map.md @@ -1,16 +1,16 @@ ### Aggregate Function -  **map** — aggregate map values into a single map +  **collect_map** — aggregate map values into a single map ### Synopsis ``` -map(|{any:any}|) -> |{any:any}| +collect_map(|{any:any}|) -> |{any:any}| ``` ### Description -The _map_ aggregate function combines map inputs into a single map output. -If _map_ receives multiple values for the same key, the last value received is +The _collect_map_ aggregate function combines map inputs into a single map output. +If _collect_map_ receives multiple values for the same key, the last value received is retained. If the input keys or values vary in type, the return type will be a map of union of those types. @@ -18,7 +18,7 @@ of union of those types. Combine a sequence of records into a map: ```mdtest-command -echo '{stock:"APPL",price:145.03} {stock:"GOOG",price:87.07}' | zq -z 'map(|{stock:price}|)' - +echo '{stock:"APPL",price:145.03} {stock:"GOOG",price:87.07}' | zq -z 'collect_map(|{stock:price}|)' - ``` => ```mdtest-output @@ -27,7 +27,7 @@ echo '{stock:"APPL",price:145.03} {stock:"GOOG",price:87.07}' | zq -z 'map(|{sto Continuous collection over a simple sequence: ```mdtest-command -echo '|{"APPL":145.03}| |{"GOOG":87.07}| |{"APPL":150.13}|' | zq -z 'yield map(this)' - +echo '|{"APPL":145.03}| |{"GOOG":87.07}| |{"APPL":150.13}|' | zq -z 'yield collect_map(this)' - ``` => ```mdtest-output diff --git a/docs/language/functions/README.md b/docs/language/functions/README.md index 72da790cc4..94b787f65f 100644 --- a/docs/language/functions/README.md +++ b/docs/language/functions/README.md @@ -40,6 +40,7 @@ Zed's [primitive types](../../formats/zed.md#1-primitive-types), e.g., * [levenshtein](levenshtein.md) Levenshtein distance * [log](log.md) - natural logarithm * [lower](lower.md) - convert a string to lower case +* [map](map.md) - apply a function to each element of an array * [missing](missing.md) - test for the "missing" error * [nameof](nameof.md) - the name of a named type * [nest_dotted](nest_dotted.md) - transform fields in a record with dotted names to nested records diff --git a/docs/language/functions/map.md b/docs/language/functions/map.md new file mode 100644 index 0000000000..766ec015a5 --- /dev/null +++ b/docs/language/functions/map.md @@ -0,0 +1,42 @@ +### Function + +  **map** — calls a function on each element of an array and returns the results + +### Synopsis + +``` +map(v: array|set, f: function) -> array|set +``` + +### Description + +The _map_ function applies function f to every element in array/set v and +returns the augmented array/set. Function f must be a function that takes +only one argument. A user defined function can be used for f. + +### Examples + +Upper case each element of an array: + +```mdtest-command +echo '["foo","bar","baz"]' | zq -z 'yield map(this, upper)' - +``` +=> +```mdtest-output +["FOO","BAR","BAZ"] +``` + +Using a user defined function to convert an epoch float to a time: + +```mdtest-input udf.zed +func floatToTime(x): ( cast(x*1000000000,