Skip to content

Commit

Permalink
vam: regexp_replace() (#5626)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattnibs authored Feb 5, 2025
1 parent 0675913 commit 8e86ac0
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 17 deletions.
19 changes: 10 additions & 9 deletions runtime/sam/expr/function/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,26 +56,27 @@ type RegexpReplace struct {
}

func (r *RegexpReplace) Call(_ super.Allocator, args []super.Value) super.Value {
sVal := args[0]
reVal := args[1]
newVal := args[2]
sVal := args[0].Under()
reVal := args[1].Under()
newVal := args[2].Under()
for i := range args {
if !args[i].IsString() {
return r.zctx.WrapError("regexp_replace: string arg required", args[i])
}
}
if sVal.IsNull() {
return super.Null
}
if reVal.IsNull() || newVal.IsNull() {
return r.zctx.NewErrorf("regexp_replace: 2nd and 3rd args cannot be null")
if sVal.IsNull() || reVal.IsNull() || newVal.IsNull() {
return super.NullString
}
if re := super.DecodeString(reVal.Bytes()); r.restr != re {
r.restr = re
r.re, r.err = regexp.Compile(re)
}
if r.err != nil {
return r.zctx.NewErrorf("regexp_replace: %s", r.err)
msg := "regexp_replace: invalid regular expression"
if syntaxErr, ok := r.err.(*syntax.Error); ok {
msg += ": " + syntaxErr.Code.String()
}
return r.zctx.WrapError(msg, args[1])
}
return super.NewString(string(r.re.ReplaceAll(sVal.Bytes(), newVal.Bytes())))
}
3 changes: 3 additions & 0 deletions runtime/vam/expr/function/function.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ func New(zctx *super.Context, name string, narg int) (expr.Function, field.Path,
case "regexp":
argmin, argmax = 2, 2
f = &Regexp{zctx: zctx}
case "regexp_replace":
argmin, argmax = 3, 3
f = &RegexpReplace{zctx: zctx}
case "replace":
argmin, argmax = 3, 3
f = &Replace{zctx}
Expand Down
71 changes: 66 additions & 5 deletions runtime/vam/expr/function/regexp.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package function

import (
"fmt"
"regexp"
"regexp/syntax"

Expand Down Expand Up @@ -39,11 +40,7 @@ func (r *Regexp) Call(args ...vector.Any) vector.Any {
r.re, r.err = regexp.Compile(r.restr)
}
if r.err != nil {
msg := "regexp: invalid regular expression"
if syntaxErr, ok := r.err.(*syntax.Error); ok {
msg += ": " + syntaxErr.Code.String()
}
errMsg.Append(msg)
errMsg.Append(regexpErrMsg("regexp", r.err))
errs = append(errs, i)
continue
}
Expand All @@ -68,3 +65,67 @@ func (r *Regexp) Call(args ...vector.Any) vector.Any {
}
return out
}

// https://github.com/brimdata/super/blob/main/docs/language/functions.md#regexp_replace
type RegexpReplace struct {
zctx *super.Context
re *regexp.Regexp
restr string
err error
}

func (r *RegexpReplace) Call(args ...vector.Any) vector.Any {
args = underAll(args)
for i := range args {
if args[i].Type().ID() != super.IDString {
return vector.NewWrappedError(r.zctx, "regexp_replace: string arg required", args[i])
}
}
sVec := args[0]
reVec := args[1]
replaceVec := args[2]
errMsg := vector.NewStringEmpty(0, nil)
var errs []uint32
nulls := vector.Or(vector.Or(vector.NullsOf(sVec), vector.NullsOf(reVec)), vector.NullsOf(replaceVec))
out := vector.NewStringEmpty(0, nulls)
for i := range sVec.Len() {
s, null := vector.StringValue(sVec, i)
if null {
out.Append("")
continue
}
re, null := vector.StringValue(reVec, i)
if null {
out.Append("")
continue
}
replace, null := vector.StringValue(replaceVec, i)
if null {
out.Append("")
continue
}
if r.restr != re {
r.restr = re
r.re, r.err = regexp.Compile(re)
}
if r.err != nil {
errMsg.Append(regexpErrMsg("regexp_replace", r.err))
errs = append(errs, i)
continue
}
out.Append(r.re.ReplaceAllString(s, replace))
}
if len(errs) > 0 {
out.Nulls = vector.NewInverseView(out.Nulls, errs).(*vector.Bool)
return vector.Combine(out, errs, vector.NewVecWrappedError(r.zctx, errMsg, vector.NewView(args[1], errs)))
}
return out
}

func regexpErrMsg(fn string, err error) string {
msg := fmt.Sprintf("%s: invalid regular expression", fn)
if syntaxErr, ok := err.(*syntax.Error); ok {
msg += ": " + syntaxErr.Code.String()
}
return msg
}
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
zed: "yield regexp_replace(in, re, new)"

vector: true

input: |
{in:"-ab-axxb-",re:"ax*b",new:"T"}
{in:"-ab-axxb-",re:"a(x*)b",new:"$1"}
{in:"-ab-axxb-",re:"a(?P<X>x*)b",new:"$X"}
{in:"Foo bar",re:"Foo",new:"foo"}
{in:"", re:"a(x*)b",new:""}
{in:"foo",re:null(string),new:null(string)}
{in:null(string),re:"foo",new:"bar"}
// error cases
{in:"seafood fool",re:"foo(.?",new:"food"}
{in:4,re:5,new:["foo"]}
{in:"foo",re:null(string),new:null(string)}
output: |
"-T-T-"
"--xx-"
"--xx-"
"foo bar"
""
error("regexp_replace: error parsing regexp: missing closing ): `foo(.?`")
null(string)
null(string)
error({message:"regexp_replace: invalid regular expression: missing closing )",on:"foo(.?"})
error({message:"regexp_replace: string arg required",on:4})
error("regexp_replace: 2nd and 3rd args cannot be null")

0 comments on commit 8e86ac0

Please sign in to comment.