From 963c76e1e6bb0e72bdccce7acd0f88644d8943fb Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Wed, 11 Dec 2024 11:26:18 -0500 Subject: [PATCH] Implement vector grep() function (#5523) --- runtime/sam/expr/function/grep.go | 3 ++ runtime/sam/expr/function/ztests/grep.yaml | 17 -------- runtime/vam/expr/function/function.go | 3 ++ runtime/vam/expr/function/grep.go | 50 ++++++++++++++++++++++ runtime/ztests/expr/function/grep.yaml | 28 ++++++++++++ 5 files changed, 84 insertions(+), 17 deletions(-) delete mode 100644 runtime/sam/expr/function/ztests/grep.yaml create mode 100644 runtime/vam/expr/function/grep.go create mode 100644 runtime/ztests/expr/function/grep.yaml diff --git a/runtime/sam/expr/function/grep.go b/runtime/sam/expr/function/grep.go index c95f03d646..48157f2669 100644 --- a/runtime/sam/expr/function/grep.go +++ b/runtime/sam/expr/function/grep.go @@ -17,6 +17,9 @@ func (g *Grep) Call(_ super.Allocator, vals []super.Value) super.Value { if super.TypeUnder(patternVal.Type()) != super.TypeString { return g.zctx.WrapError("grep(): pattern argument must be a string", patternVal) } + if patternVal.IsNull() { + return super.NullBool + } if p := patternVal.AsString(); g.grep == nil || g.pattern != p { g.pattern = p term := norm.NFC.Bytes(patternVal.Bytes()) diff --git a/runtime/sam/expr/function/ztests/grep.yaml b/runtime/sam/expr/function/ztests/grep.yaml deleted file mode 100644 index 79714d4392..0000000000 --- a/runtime/sam/expr/function/ztests/grep.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# This test tests grep as a function call which only happens when the pattern -# arg is not a glob, regular expression, or resolvable to a string at compile -# time. - -script: | - echo '{pattern:"hello",input:[{a:{hello:"world"}},{hi:"world"}]}' | - super -z -c 'over input with p=pattern => ( grep(p) )' - - echo "// ===" - echo '{a:{foo:"bar"}} {b:{baz:"foo"}}' | - super -z -c 'func g(s, e): ( grep(s, e) ) where g("baz", this)' - - -outputs: - - name: stdout - data: | - {a:{hello:"world"}} - // === - {b:{baz:"foo"}} diff --git a/runtime/vam/expr/function/function.go b/runtime/vam/expr/function/function.go index 9a4e82eccd..b489b08ea8 100644 --- a/runtime/vam/expr/function/function.go +++ b/runtime/vam/expr/function/function.go @@ -30,6 +30,9 @@ func New(zctx *super.Context, name string, narg int) (expr.Function, field.Path, f = &Bucket{zctx: zctx, name: name} case "fields": f = NewFields(zctx) + case "grep": + argmax = 2 + f = &Grep{zctx: zctx} case "hex": f = &Hex{zctx} case "join": diff --git a/runtime/vam/expr/function/grep.go b/runtime/vam/expr/function/grep.go new file mode 100644 index 0000000000..1b4e647a7f --- /dev/null +++ b/runtime/vam/expr/function/grep.go @@ -0,0 +1,50 @@ +package function + +import ( + "github.com/brimdata/super" + "github.com/brimdata/super/runtime/vam/expr" + "github.com/brimdata/super/vector" + "golang.org/x/text/unicode/norm" +) + +type Grep struct { + zctx *super.Context + grep expr.Evaluator + pattern string +} + +func (g *Grep) Call(args ...vector.Any) vector.Any { + patternVec, inputVec := args[0], args[1] + if patternVec.Type().ID() != super.IDString { + return vector.NewWrappedError(g.zctx, "grep(): pattern argument must be a string", patternVec) + } + if inputVec.Len() == 0 { + return vector.NewBoolEmpty(0, nil) + } + if c, ok := vector.Under(patternVec).(*vector.Const); ok { + pattern, _ := c.AsString() + if g.grep == nil || g.pattern != pattern { + pattern = norm.NFC.String(pattern) + g.grep = expr.NewSearchString(pattern, &expr.This{}) + g.pattern = pattern + } + return g.grep.Eval(inputVec) + } + var index [1]uint32 + nulls := vector.Or(vector.NullsOf(patternVec), vector.NullsOf(inputVec)) + out := vector.NewBoolEmpty(patternVec.Len(), nulls) + for i := range patternVec.Len() { + if nulls.Value(i) { + continue + } + pattern, _ := vector.StringValue(patternVec, i) + pattern = norm.NFC.String(pattern) + search := expr.NewSearchString(pattern, &expr.This{}) + index[0] = i + view := vector.NewView(inputVec, index[:]) + if match, _ := vector.BoolValue(search.Eval(view), 0); match { + out.Set(i) + } + } + return out +} diff --git a/runtime/ztests/expr/function/grep.yaml b/runtime/ztests/expr/function/grep.yaml new file mode 100644 index 0000000000..cb2f928e1b --- /dev/null +++ b/runtime/ztests/expr/function/grep.yaml @@ -0,0 +1,28 @@ +# This test tests grep as a function call which only happens when the pattern +# arg is not a glob, regular expression, or resolvable to a string at compile +# time. + +zed: | + [grep(pattern),grep(pattern,input)] + +vector: true + +input: | + {pattern:"a",input:"a"} + {pattern:"z",input:"a"} + {pattern:"b",input:{a:{b:1}}} + {pattern:"z",input:{a:{b:1}}} + {pattern:"c",input:{a:{b:"c"}}} + {pattern:"z",input:{a:{b:"c"}}} + {pattern:1,input:""} + {pattern:null(string),input:"a"} + +output: | + [true,true] + [true,false] + [true,true] + [true,false] + [true,true] + [true,false] + [error({message:"grep(): pattern argument must be a string",on:1}),error({message:"grep(): pattern argument must be a string",on:1})] + [null(bool),null(bool)]