Skip to content

Commit

Permalink
Implement vector grep() function (#5523)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwt authored Dec 11, 2024
1 parent d53e041 commit 963c76e
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 17 deletions.
3 changes: 3 additions & 0 deletions runtime/sam/expr/function/grep.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ func (g *Grep) Call(_ super.Allocator, vals []super.Value) super.Value {
if super.TypeUnder(patternVal.Type()) != super.TypeString {
return g.zctx.WrapError("grep(): pattern argument must be a string", patternVal)
}
if patternVal.IsNull() {
return super.NullBool
}
if p := patternVal.AsString(); g.grep == nil || g.pattern != p {
g.pattern = p
term := norm.NFC.Bytes(patternVal.Bytes())
Expand Down
17 changes: 0 additions & 17 deletions runtime/sam/expr/function/ztests/grep.yaml

This file was deleted.

3 changes: 3 additions & 0 deletions runtime/vam/expr/function/function.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ func New(zctx *super.Context, name string, narg int) (expr.Function, field.Path,
f = &Bucket{zctx: zctx, name: name}
case "fields":
f = NewFields(zctx)
case "grep":
argmax = 2
f = &Grep{zctx: zctx}
case "hex":
f = &Hex{zctx}
case "join":
Expand Down
50 changes: 50 additions & 0 deletions runtime/vam/expr/function/grep.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package function

import (
"github.com/brimdata/super"
"github.com/brimdata/super/runtime/vam/expr"
"github.com/brimdata/super/vector"
"golang.org/x/text/unicode/norm"
)

type Grep struct {
zctx *super.Context
grep expr.Evaluator
pattern string
}

func (g *Grep) Call(args ...vector.Any) vector.Any {
patternVec, inputVec := args[0], args[1]
if patternVec.Type().ID() != super.IDString {
return vector.NewWrappedError(g.zctx, "grep(): pattern argument must be a string", patternVec)
}
if inputVec.Len() == 0 {
return vector.NewBoolEmpty(0, nil)
}
if c, ok := vector.Under(patternVec).(*vector.Const); ok {
pattern, _ := c.AsString()
if g.grep == nil || g.pattern != pattern {
pattern = norm.NFC.String(pattern)
g.grep = expr.NewSearchString(pattern, &expr.This{})
g.pattern = pattern
}
return g.grep.Eval(inputVec)
}
var index [1]uint32
nulls := vector.Or(vector.NullsOf(patternVec), vector.NullsOf(inputVec))
out := vector.NewBoolEmpty(patternVec.Len(), nulls)
for i := range patternVec.Len() {
if nulls.Value(i) {
continue
}
pattern, _ := vector.StringValue(patternVec, i)
pattern = norm.NFC.String(pattern)
search := expr.NewSearchString(pattern, &expr.This{})
index[0] = i
view := vector.NewView(inputVec, index[:])
if match, _ := vector.BoolValue(search.Eval(view), 0); match {
out.Set(i)
}
}
return out
}
28 changes: 28 additions & 0 deletions runtime/ztests/expr/function/grep.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# This test tests grep as a function call which only happens when the pattern
# arg is not a glob, regular expression, or resolvable to a string at compile
# time.

zed: |
[grep(pattern),grep(pattern,input)]
vector: true

input: |
{pattern:"a",input:"a"}
{pattern:"z",input:"a"}
{pattern:"b",input:{a:{b:1}}}
{pattern:"z",input:{a:{b:1}}}
{pattern:"c",input:{a:{b:"c"}}}
{pattern:"z",input:{a:{b:"c"}}}
{pattern:1,input:""}
{pattern:null(string),input:"a"}
output: |
[true,true]
[true,false]
[true,true]
[true,false]
[true,true]
[true,false]
[error({message:"grep(): pattern argument must be a string",on:1}),error({message:"grep(): pattern argument must be a string",on:1})]
[null(bool),null(bool)]

0 comments on commit 963c76e

Please sign in to comment.