Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vam: Add slice expressions for Arrays and Sets #5533

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions compiler/kernel/vexpr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) {
return b.compileVamRegexpSearch(e)
case *dag.RecordExpr:
return b.compileVamRecordExpr(e)
case *dag.SliceExpr:
return b.compileVamSliceExpr(e)
//case *dag.SetExpr:
// return b.compileVamSetExpr(e)
//case *dag.MapCall:
Expand All @@ -73,12 +75,14 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) {
}
}

func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error) {
//XXX TBD
//if slice, ok := e.RHS.(*dag.BinaryExpr); ok && slice.Op == ":" {
// return b.compileVamSlice(e.LHS, slice)
//}
func (b *Builder) compileVamExprWithEmpty(e dag.Expr) (vamexpr.Evaluator, error) {
if e == nil {
return nil, nil
}
return b.compileVamExpr(e)
}

func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error) {
//XXX TBD
//if e.Op == "in" {
// Do a faster comparison if the LHS is a compile-time constant expression.
Expand Down Expand Up @@ -277,6 +281,22 @@ func (b *Builder) compileVamSearch(search *dag.Search) (vamexpr.Evaluator, error
return vamexpr.NewSearch(search.Text, val, e), nil
}

func (b *Builder) compileVamSliceExpr(slice *dag.SliceExpr) (vamexpr.Evaluator, error) {
e, err := b.compileVamExpr(slice.Expr)
if err != nil {
return nil, err
}
from, err := b.compileVamExprWithEmpty(slice.From)
if err != nil {
return nil, err
}
to, err := b.compileVamExprWithEmpty(slice.To)
if err != nil {
return nil, err
}
return vamexpr.NewSliceExpr(b.zctx(), e, from, to), nil
}

func (b *Builder) compileVamArrayExpr(e *dag.ArrayExpr) (vamexpr.Evaluator, error) {
elems, err := b.compileVamListElems(e.Elems)
if err != nil {
Expand Down
9 changes: 3 additions & 6 deletions runtime/sam/expr/slice.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ func (s *Slice) Eval(ectx Context, this super.Value) super.Value {
}
to = length
}
if from > to || to > length || from < 0 {
return s.zctx.NewErrorf("slice out of bounds")
}
bytes := elem.Bytes()
switch super.TypeUnder(elem.Type()).(type) {
case *super.TypeOfBytes:
Expand Down Expand Up @@ -98,12 +101,6 @@ func sliceIndex(ectx Context, this super.Value, slot Evaluator, length int) (int
if index < 0 {
index += length
}
if index < 0 {
return 0, nil
}
if index > length {
return length, nil
}
return index, nil
}

Expand Down
18 changes: 9 additions & 9 deletions runtime/sam/expr/ztests/slice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ output: |
{a1:null(bytes),a2:null(bytes),a3:null(bytes),a4:null(bytes),a5:null(bytes),a6:null(bytes),a7:null(bytes),a8:null}
{a1:null(string),a2:null(string),a3:null(string),a4:null(string),a5:null(string),a6:null(string),a7:null(string),a8:null}
{a1:null([int32]),a2:null([int32]),a3:null([int32]),a4:null([int32]),a5:null([int32]),a6:null([int32]),a7:null([int32]),a8:null}
{a1:0x,a2:0x,a3:0x,a4:0x,a5:0x,a6:0x,a7:0x,a8:null}
{a1:"",a2:"",a3:"",a4:"",a5:"",a6:"",a7:"",a8:null}
{a1:[]([int32]),a2:[]([int32]),a3:[]([int32]),a4:[]([int32]),a5:[]([int32]),a6:[]([int32]),a7:[]([int32]),a8:null}
{a1:0x1122,a2:0x112233,a3:0x00,a4:0x001122,a5:0x,a6:0x33,a7:0x22,a8:error("slice index is not a number")}
{a1:"12",a2:"123",a3:"0",a4:"012",a5:"",a6:"3",a7:"2",a8:error("slice index is not a number")}
{a1:"ⁱ⁲",a2:"ⁱ⁲3",a3:"0",a4:"0ⁱ⁲",a5:"",a6:"3",a7:"⁲",a8:error("slice index is not a number")}
{a1:"ⁱ⁲",a2:"ⁱ⁲⁳",a3:"⁰",a4:"⁰ⁱ⁲",a5:"",a6:"⁳",a7:"⁲",a8:error("slice index is not a number")}
{a1:[11(int32),12(int32)],a2:[11(int32),12(int32),13(int32)],a3:[10(int32)],a4:[10(int32),11(int32),12(int32)],a5:[]([int32]),a6:[13(int32)],a7:[12(int32)],a8:[10(int32),11(int32)]}
{a1:|[11(int32),12(int32)]|,a2:|[11(int32),12(int32),13(int32)]|,a3:|[10(int32)]|,a4:|[10(int32),11(int32),12(int32)]|,a5:|[]|(|[int32]|),a6:|[13(int32)]|,a7:|[12(int32)]|,a8:|[10(int32),11(int32)]|}
{a1:error("slice out of bounds"),a2:error("slice out of bounds"),a3:error("slice out of bounds"),a4:error("slice out of bounds"),a5:error("slice out of bounds"),a6:error("slice out of bounds"),a7:error("slice out of bounds"),a8:null}
{a1:error("slice out of bounds"),a2:error("slice out of bounds"),a3:error("slice out of bounds"),a4:error("slice out of bounds"),a5:error("slice out of bounds"),a6:error("slice out of bounds"),a7:error("slice out of bounds"),a8:null}
{a1:error("slice out of bounds"),a2:error("slice out of bounds"),a3:error("slice out of bounds"),a4:error("slice out of bounds"),a5:error("slice out of bounds"),a6:error("slice out of bounds"),a7:error("slice out of bounds"),a8:null}
{a1:0x1122,a2:0x112233,a3:0x00,a4:0x001122,a5:error("slice out of bounds"),a6:0x33,a7:0x22,a8:error("slice index is not a number")}
{a1:"12",a2:"123",a3:"0",a4:"012",a5:error("slice out of bounds"),a6:"3",a7:"2",a8:error("slice index is not a number")}
{a1:"ⁱ⁲",a2:"ⁱ⁲3",a3:"0",a4:"0ⁱ⁲",a5:error("slice out of bounds"),a6:"3",a7:"⁲",a8:error("slice index is not a number")}
{a1:"ⁱ⁲",a2:"ⁱ⁲⁳",a3:"⁰",a4:"⁰ⁱ⁲",a5:error("slice out of bounds"),a6:"⁳",a7:"⁲",a8:error("slice index is not a number")}
{a1:[11(int32),12(int32)],a2:[11(int32),12(int32),13(int32)],a3:[10(int32)],a4:[10(int32),11(int32),12(int32)],a5:error("slice out of bounds"),a6:[13(int32)],a7:[12(int32)],a8:[10(int32),11(int32)]}
{a1:|[11(int32),12(int32)]|,a2:|[11(int32),12(int32),13(int32)]|,a3:|[10(int32)]|,a4:|[10(int32),11(int32),12(int32)]|,a5:error("slice out of bounds"),a6:|[13(int32)]|,a7:|[12(int32)]|,a8:|[10(int32),11(int32)]|}
156 changes: 156 additions & 0 deletions runtime/vam/expr/slice.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package expr

import (
"github.com/brimdata/super"
"github.com/brimdata/super/vector"
)

type sliceExpr struct {
zctx *super.Context
containerEval, fromEval, toEval Evaluator
}

func NewSliceExpr(zctx *super.Context, container, from, to Evaluator) Evaluator {
return &sliceExpr{
zctx: zctx,
containerEval: container,
fromEval: from,
toEval: to,
}
}

func (s *sliceExpr) Eval(vec vector.Any) vector.Any {
vecs := []vector.Any{s.containerEval.Eval(vec)}
if s.fromEval != nil {
vecs = append(vecs, s.fromEval.Eval(vec))
}
if s.toEval != nil {
vecs = append(vecs, s.toEval.Eval(vec))
}
return vector.Apply(true, s.eval, vecs...)
}

func (s *sliceExpr) eval(vecs ...vector.Any) vector.Any {
container := vecs[0]
var from, to vector.Any
vecs = vecs[1:]
if s.fromEval != nil {
from = vecs[0]
if !super.IsSigned(from.Type().ID()) {
return vector.NewWrappedError(s.zctx, "slice: from value is not an integer", from)
}
vecs = vecs[1:]
}
if s.toEval != nil {
to = vecs[0]
if !super.IsSigned(to.Type().ID()) {
return vector.NewWrappedError(s.zctx, "slice: to value is not an integer", from)
}
}
switch vector.KindOf(container) {
case vector.KindArray, vector.KindSet:
return s.evalArrayOrSlice(container, from, to)
case vector.KindBytes, vector.KindString:
panic("slices on bytes and strings unsupported")
case vector.KindError:
return container
default:
return vector.NewWrappedError(s.zctx, "sliced value is not array, set, bytes, or string", container)
}
}

func (s *sliceExpr) evalArrayOrSlice(vec, fromVec, toVec vector.Any) vector.Any {
from, constFrom := sliceIsConstIndex(fromVec)
to, constTo := sliceIsConstIndex(toVec)
slowPath := !constFrom || !constTo
var index []uint32
if view, ok := vec.(*vector.View); ok {
vec, index = view.Any, view.Index
}
offsets, inner, nullsIn := arrayOrSetContents(vec)
newOffsets := []uint32{0}
var errs []uint32
var innerIndex []uint32
var nullsOut *vector.Bool
for i := range vec.Len() {
idx := i
if index != nil {
idx = index[i]
}
if nullsIn.Value(idx) {
newOffsets = append(newOffsets, newOffsets[len(newOffsets)-1])
if nullsOut == nil {
nullsOut = vector.NewBoolEmpty(vec.Len(), nil)
}
nullsOut.Set(i)
continue
}
off := offsets[idx]
size := int64(offsets[idx+1] - off)
start, end := int64(0), size
if fromVec != nil {
if slowPath {
from, _ = vector.IntValue(fromVec, idx)
}
start = sliceIndex(from, size)
}
if toVec != nil {
if slowPath {
to, _ = vector.IntValue(toVec, idx)
}
end = sliceIndex(to, size)
}
if start > end || end > size || start < 0 {
errs = append(errs, i)
continue
}
newOffsets = append(newOffsets, newOffsets[len(newOffsets)-1]+uint32(end-start))
for k := start; k < end; k++ {
innerIndex = append(innerIndex, off+uint32(k))
}

}
var out vector.Any
inner = vector.NewView(inner, innerIndex)
if vector.KindOf(vec) == vector.KindArray {
out = vector.NewArray(vec.Type().(*super.TypeArray), newOffsets, inner, nullsOut)
} else {
out = vector.NewSet(vec.Type().(*super.TypeSet), newOffsets, inner, nullsOut)
}
if nullsOut != nil {
nullsOut.SetLen(out.Len())
}
if len(errs) > 0 {
errOut := vector.NewStringError(s.zctx, "slice out of bounds", uint32(len(errs)))
return vector.Combine(out, errs, errOut)
}
return out
}

func sliceIsConstIndex(vec vector.Any) (int64, bool) {
if vec == nil {
return 0, true
}
if c, ok := vec.(*vector.Const); ok && c.Nulls == nil {
return c.Value().Int(), true
}
return 0, false
}

func sliceIndex(idx, size int64) int64 {
if idx < 0 {
idx += int64(size)
}
return idx
}

func arrayOrSetContents(vec vector.Any) ([]uint32, vector.Any, *vector.Bool) {
switch vec := vec.(type) {
case *vector.Array:
return vec.Offsets, vec.Values, vec.Nulls
case *vector.Set:
return vec.Offsets, vec.Values, vec.Nulls
default:
panic(vec)
}
}
20 changes: 20 additions & 0 deletions runtime/ztests/expr/slice-array.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
zed: "yield c[start:end]"

vector: true

input: |
{start:1,end:-1,c:null([int64])}
{start:1,end:-1,c:[1,2,3,4]}
{start:-3,end:3,c:[5,7,8,9]}
{start:-5,end:3,c:[5,7,8,9]}
{start:0,end:5,c:[5,7,8,9]}
{start:4,end:3,c:[5,7,8,9]}

output: |
null([int64])
[2,3]
[7,8]
error("slice out of bounds")
error("slice out of bounds")
error("slice out of bounds")

19 changes: 19 additions & 0 deletions runtime/ztests/expr/slice-set.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
zed: "yield c[start:end]"

vector: true

input: |
{start:1,end:-1,c:null(|[int64]|)}
{start:1,end:-1,c:|[1,2,3,4]|}
{start:-3,end:3,c:|[5,7,8,9]|}
{start:-5,end:3,c:|[5,7,8,9]|}
{start:0,end:5,c:|[5,7,8,9]|}
{start:4,end:3,c:|[5,7,8,9]|}

output: |
null(|[int64]|)
|[2,3]|
|[7,8]|
error("slice out of bounds")
error("slice out of bounds")
error("slice out of bounds")
6 changes: 6 additions & 0 deletions vector/kind.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ const (
KindIP = 6
KindType = 7
KindError = 8
KindArray = 9
KindSet = 10
)

const (
Expand All @@ -32,6 +34,8 @@ const (

func KindOf(v Any) Kind {
switch v := v.(type) {
case *Array:
return KindArray
case *Int:
return KindInt
case *Uint:
Expand All @@ -48,6 +52,8 @@ func KindOf(v Any) Kind {
return KindIP
case *TypeValue:
return KindType
case *Set:
return KindSet
case *Dict:
return KindOf(v.Any)
case *View:
Expand Down