Skip to content

Commit

Permalink
vam: Add remaining string functions
Browse files Browse the repository at this point in the history
  • Loading branch information
mattnibs committed Aug 28, 2024
1 parent 35312ad commit 2bbec80
Show file tree
Hide file tree
Showing 11 changed files with 303 additions and 9 deletions.
14 changes: 8 additions & 6 deletions runtime/sam/expr/function/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ type RuneLen struct {
}

func (r *RuneLen) Call(ectx expr.Context, args []zed.Value) zed.Value {
val := args[0]
val := args[0].Under(ectx.Arena())
if !val.IsString() {
return r.zctx.WrapError(ectx.Arena(), "rune_len: string arg required", val)
}
Expand Down Expand Up @@ -77,7 +77,7 @@ type ToUpper struct {
}

func (t *ToUpper) Call(ectx expr.Context, args []zed.Value) zed.Value {
val := args[0]
val := args[0].Under(ectx.Arena())
if !val.IsString() {
return t.zctx.WrapError(ectx.Arena(), "upper: string arg required", val)
}
Expand All @@ -94,7 +94,7 @@ type Trim struct {

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#trim
func (t *Trim) Call(ectx expr.Context, args []zed.Value) zed.Value {
val := args[0]
val := args[0].Under(ectx.Arena())
if !val.IsString() {
return t.zctx.WrapError(ectx.Arena(), "trim: string arg required", val)
}
Expand All @@ -119,13 +119,13 @@ func newSplit(zctx *zed.Context) *Split {
}

func (s *Split) Call(ectx expr.Context, args []zed.Value) zed.Value {
sVal := args[0]
sepVal := args[1]
args = underAll(ectx.Arena(), args)
for i := range args {
if !args[i].IsString() {
return s.zctx.WrapError(ectx.Arena(), "split: string arg required", args[i])
}
}
sVal, sepVal := args[0], args[1]
if sVal.IsNull() || sepVal.IsNull() {
return ectx.Arena().New(s.typ, nil)
}
Expand All @@ -146,10 +146,11 @@ type Join struct {
}

func (j *Join) Call(ectx expr.Context, args []zed.Value) zed.Value {
args = underAll(ectx.Arena(), args)
splitsVal := args[0]
typ, ok := zed.TypeUnder(splitsVal.Type()).(*zed.TypeArray)
if !ok || typ.Type.ID() != zed.IDString {
return j.zctx.WrapError(ectx.Arena(), "join: array of string args required", splitsVal)
return j.zctx.WrapError(ectx.Arena(), "join: array of string arg required", splitsVal)
}
var separator string
if len(args) == 2 {
Expand Down Expand Up @@ -177,6 +178,7 @@ type Levenshtein struct {
}

func (l *Levenshtein) Call(ectx expr.Context, args []zed.Value) zed.Value {
args = underAll(ectx.Arena(), args)
a, b := args[0], args[1]
if !a.IsString() {
return l.zctx.WrapError(ectx.Arena(), "levenshtein: string args required", a)
Expand Down
15 changes: 15 additions & 0 deletions runtime/vam/expr/function/function.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,26 @@ func New(zctx *zed.Context, name string, narg int) (expr.Function, field.Path, e
var path field.Path
var f expr.Function
switch name {
case "join":
argmax = 2
f = &Join{zctx: zctx}
case "levenshtein":
argmin, argmax = 2, 2
f = &Levenshtein{zctx}
case "lower":
f = &ToLower{zctx}
case "replace":
argmin, argmax = 3, 3
f = &Replace{zctx}
case "rune_len":
f = &RuneLen{zctx}
case "trim":
f = &Trim{zctx}
case "split":
argmin, argmax = 2, 2
f = &Split{zctx}
case "upper":
f = &ToUpper{zctx}
default:
return nil, nil, function.ErrNoSuchFunction
}
Expand Down
168 changes: 167 additions & 1 deletion runtime/vam/expr/function/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,78 @@ package function

import (
"strings"
"unicode/utf8"

"github.com/agnivade/levenshtein"
"github.com/brimdata/zed"
"github.com/brimdata/zed/vector"
)

// // https://github.com/brimdata/zed/blob/main/docs/language/functions.md#join
type Join struct {
zctx *zed.Context
builder strings.Builder
}

func (j *Join) Call(args ...vector.Any) vector.Any {
args = underAll(args)
splitsVal := args[0]
typ, ok := splitsVal.Type().(*zed.TypeArray)
if !ok || typ.Type.ID() != zed.IDString {
return vector.NewWrappedError(j.zctx, "join: array of string arg required", splitsVal)
}
var sepVal vector.Any
if len(args) == 2 {
if sepVal = args[1]; sepVal.Type() != zed.TypeString {
return vector.NewWrappedError(j.zctx, "join: separator must be string", sepVal)
}
}
out := vector.NewStringEmpty(0, vector.NewBoolEmpty(splitsVal.Len(), nil))
inner := vector.Inner(splitsVal)
for i := uint32(0); i < splitsVal.Len(); i++ {
var seperator string
if sepVal != nil {
seperator, _ = vector.StringValue(sepVal, i)
}
off, end, null := vector.ContainerOffset(splitsVal, i)
if null {
out.Nulls.Set(i)
}
j.builder.Reset()
var sep string
for ; off < end; off++ {
s, _ := vector.StringValue(inner, off)
j.builder.WriteString(sep)
j.builder.WriteString(s)
sep = seperator
}
out.Append(j.builder.String())
}
return out
}

// // https://github.com/brimdata/zed/blob/main/docs/language/functions.md#levenshtein
type Levenshtein struct {
zctx *zed.Context
}

func (l *Levenshtein) Call(args ...vector.Any) vector.Any {
args = underAll(args)
for _, a := range args {
if a.Type() != zed.TypeString {
return vector.NewWrappedError(l.zctx, "levenshtein: string args required", a)
}
}
a, b := args[0], args[1]
out := vector.NewIntEmpty(zed.TypeInt64, a.Len(), nil)
for i := uint32(0); i < a.Len(); i++ {
as, _ := vector.StringValue(a, i)
bs, _ := vector.StringValue(b, i)
out.Append(int64(levenshtein.ComputeDistance(as, bs)))
}
return out
}

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#replace
type Replace struct {
zctx *zed.Context
Expand All @@ -22,7 +89,7 @@ func (r *Replace) Call(args ...vector.Any) vector.Any {
var errcnt uint32
sVal := args[0]
tags := make([]uint32, sVal.Len())
out := vector.NewStringEmpty(sVal.Len(), vector.NewBoolEmpty(sVal.Len(), nil))
out := vector.NewStringEmpty(0, vector.NewBoolEmpty(sVal.Len(), nil))
for i := uint32(0); i < sVal.Len(); i++ {
s, snull := vector.StringValue(sVal, i)
old, oldnull := vector.StringValue(args[1], i)
Expand All @@ -41,6 +108,63 @@ func (r *Replace) Call(args ...vector.Any) vector.Any {
return vector.NewVariant(tags, []vector.Any{out, errval})
}

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#run_len
type RuneLen struct {
zctx *zed.Context
}

func (r *RuneLen) Call(args ...vector.Any) vector.Any {
val := underAll(args)[0]
if val.Type() != zed.TypeString {
return vector.NewWrappedError(r.zctx, "rune_len: string arg required", val)
}
out := vector.NewIntEmpty(zed.TypeInt64, val.Len(), vector.NewBoolEmpty(val.Len(), nil))
for i := uint32(0); i < val.Len(); i++ {
s, null := vector.StringValue(val, i)
if null {
out.Nulls.Set(i)
}
out.Append(int64(utf8.RuneCountInString(s)))
}
return out
}

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#split
type Split struct {
zctx *zed.Context
}

func (s *Split) Call(args ...vector.Any) vector.Any {
args = underAll(args)
for i := range args {
if args[i].Type() != zed.TypeString {
return vector.NewWrappedError(s.zctx, "split: string arg required", args[i])
}
}
sVal, sepVal := args[0], args[1]
var offsets []uint32
values := vector.NewStringEmpty(0, nil)
nulls := vector.NewBoolEmpty(sVal.Len(), nil)
var off uint32
for i := uint32(0); i < sVal.Len(); i++ {
ss, snull := vector.StringValue(sVal, i)
sep, sepnull := vector.StringValue(sepVal, i)
if snull || sepnull {
offsets = append(offsets, off)
nulls.Set(i)
continue
}
splits := strings.Split(ss, sep)
for _, substr := range splits {
values.Append(substr)
}
offsets = append(offsets, off)
off += uint32(len(splits))
}
offsets = append(offsets, off)
return vector.NewArray(s.zctx.LookupTypeArray(zed.TypeString), offsets, values, nulls)
}

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#lower
type ToLower struct {
zctx *zed.Context
Expand All @@ -61,3 +185,45 @@ func (t *ToLower) Call(args ...vector.Any) vector.Any {
}
return out
}

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#upper
type ToUpper struct {
zctx *zed.Context
}

func (t *ToUpper) Call(args ...vector.Any) vector.Any {
v := vector.Under(args[0])
if v.Type() != zed.TypeString {
return vector.NewWrappedError(t.zctx, "upper: string arg required", v)
}
out := vector.NewStringEmpty(v.Len(), vector.NewBoolEmpty(v.Len(), nil))
for i := uint32(0); i < v.Len(); i++ {
s, null := vector.StringValue(v, i)
if null {
out.Nulls.Set(i)
}
out.Append(strings.ToUpper(s))
}
return out
}

// https://github.com/brimdata/zed/blob/main/docs/language/functions.md#trim
type Trim struct {
zctx *zed.Context
}

func (t *Trim) Call(args ...vector.Any) vector.Any {
val := vector.Under(args[0])
if val.Type() != zed.TypeString {
return vector.NewWrappedError(t.zctx, "trim: string arg required", val)
}
out := vector.NewStringEmpty(val.Len(), vector.NewBoolEmpty(val.Len(), nil))
for i := uint32(0); i < val.Len(); i++ {
s, null := vector.StringValue(val, i)
if null {
out.Nulls.Set(i)
}
out.Append(strings.TrimSpace(s))
}
return out
}
15 changes: 15 additions & 0 deletions runtime/ztests/expr/function/join.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
zed: join(s, sep)

vector: true

input: |
{s:["a","b","c"],sep:", "}
{s:"join",sep:","}
{s:["a"],sep:["b"]}
{s:["a","b",null(string),"c"],sep:""((int64,string))}
output: |
"a, b, c"
error({message:"join: array of string arg required",on:"join"})
error({message:"join: separator must be string",on:["b"]})
"abc"
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
zed: yield levenshtein(a, b)

vector: true

input: |
{a: "kitten", b: "sitting"}
{a: "kitten", b: ""}
{a: "kitten", b: ""((int64,string))}
{a: "kitten", b: null(string)}
{a: "kitten", b: null}
{a: 1, b: "kitten"}
Expand Down
2 changes: 1 addition & 1 deletion runtime/ztests/expr/function/lower.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
zed: "lower(this)"
zed: lower(this)

vector: true

Expand Down
13 changes: 13 additions & 0 deletions runtime/ztests/expr/function/rune_len.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
zed: rune_len(this)

vector: true

input: |
"hello"
"😎"((int64,string))
1.
output: |
5
1
error({message:"rune_len: string arg required",on:1.})
19 changes: 19 additions & 0 deletions runtime/ztests/expr/function/split.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
zed: split(s, sep)

vector: true

input: |
{s:"a,b,c",sep:","((int64,string))}
{s:"abc",sep:""}
{s:null(string),sep:","}
{s:"a,b,c",sep:null(string)}
{s:1.,sep:""}
{sep:""}
output: |
["a","b","c"]
["a","b","c"]
null([string])
null([string])
error({message:"split: string arg required",on:1.})
error({message:"split: string arg required",on:error("missing")})
13 changes: 13 additions & 0 deletions runtime/ztests/expr/function/trim.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
zed: trim(this)

vector: true

input: |
" cushion "
"foo "((string,int64))
1.
output: |
"cushion"
"foo"
error({message:"trim: string arg required",on:1.})
19 changes: 19 additions & 0 deletions runtime/ztests/expr/function/upper.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
zed: upper(this)

vector: true

input: |
"fOo"
127.0.0.1
null(string)
null(int64)
"BaR"((string,int64))
1((string,int64))
output: |
"FOO"
error({message:"upper: string arg required",on:127.0.0.1})
null(string)
error({message:"upper: string arg required",on:null(int64)})
"BAR"
error({message:"upper: string arg required",on:1})
Loading

0 comments on commit 2bbec80

Please sign in to comment.