From f814699a726d9e7c0288ddf745dc2daa34247224 Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Mon, 16 Dec 2024 11:00:44 -0800 Subject: [PATCH] vam: Add slice expressions for Arrays and Sets (#5533) --- compiler/kernel/vexpr.go | 30 +++++- runtime/sam/expr/slice.go | 9 +- runtime/sam/expr/ztests/slice.yaml | 18 ++-- runtime/vam/expr/slice.go | 156 +++++++++++++++++++++++++++ runtime/ztests/expr/slice-array.yaml | 20 ++++ runtime/ztests/expr/slice-set.yaml | 19 ++++ vector/kind.go | 6 ++ 7 files changed, 238 insertions(+), 20 deletions(-) create mode 100644 runtime/vam/expr/slice.go create mode 100644 runtime/ztests/expr/slice-array.yaml create mode 100644 runtime/ztests/expr/slice-set.yaml diff --git a/compiler/kernel/vexpr.go b/compiler/kernel/vexpr.go index cd42566cfa..4b0637e26d 100644 --- a/compiler/kernel/vexpr.go +++ b/compiler/kernel/vexpr.go @@ -54,6 +54,8 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) { return b.compileVamRegexpSearch(e) case *dag.RecordExpr: return b.compileVamRecordExpr(e) + case *dag.SliceExpr: + return b.compileVamSliceExpr(e) //case *dag.SetExpr: // return b.compileVamSetExpr(e) //case *dag.MapCall: @@ -73,12 +75,14 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) { } } -func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error) { - //XXX TBD - //if slice, ok := e.RHS.(*dag.BinaryExpr); ok && slice.Op == ":" { - // return b.compileVamSlice(e.LHS, slice) - //} +func (b *Builder) compileVamExprWithEmpty(e dag.Expr) (vamexpr.Evaluator, error) { + if e == nil { + return nil, nil + } + return b.compileVamExpr(e) +} +func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error) { //XXX TBD //if e.Op == "in" { // Do a faster comparison if the LHS is a compile-time constant expression. @@ -277,6 +281,22 @@ func (b *Builder) compileVamSearch(search *dag.Search) (vamexpr.Evaluator, error return vamexpr.NewSearch(search.Text, val, e), nil } +func (b *Builder) compileVamSliceExpr(slice *dag.SliceExpr) (vamexpr.Evaluator, error) { + e, err := b.compileVamExpr(slice.Expr) + if err != nil { + return nil, err + } + from, err := b.compileVamExprWithEmpty(slice.From) + if err != nil { + return nil, err + } + to, err := b.compileVamExprWithEmpty(slice.To) + if err != nil { + return nil, err + } + return vamexpr.NewSliceExpr(b.zctx(), e, from, to), nil +} + func (b *Builder) compileVamArrayExpr(e *dag.ArrayExpr) (vamexpr.Evaluator, error) { elems, err := b.compileVamListElems(e.Elems) if err != nil { diff --git a/runtime/sam/expr/slice.go b/runtime/sam/expr/slice.go index 562bff084c..c8e028011b 100644 --- a/runtime/sam/expr/slice.go +++ b/runtime/sam/expr/slice.go @@ -62,6 +62,9 @@ func (s *Slice) Eval(ectx Context, this super.Value) super.Value { } to = length } + if from > to || to > length || from < 0 { + return s.zctx.NewErrorf("slice out of bounds") + } bytes := elem.Bytes() switch super.TypeUnder(elem.Type()).(type) { case *super.TypeOfBytes: @@ -98,12 +101,6 @@ func sliceIndex(ectx Context, this super.Value, slot Evaluator, length int) (int if index < 0 { index += length } - if index < 0 { - return 0, nil - } - if index > length { - return length, nil - } return index, nil } diff --git a/runtime/sam/expr/ztests/slice.yaml b/runtime/sam/expr/ztests/slice.yaml index a67e6725b3..007b09ead1 100644 --- a/runtime/sam/expr/ztests/slice.yaml +++ b/runtime/sam/expr/ztests/slice.yaml @@ -20,12 +20,12 @@ output: | {a1:null(bytes),a2:null(bytes),a3:null(bytes),a4:null(bytes),a5:null(bytes),a6:null(bytes),a7:null(bytes),a8:null} {a1:null(string),a2:null(string),a3:null(string),a4:null(string),a5:null(string),a6:null(string),a7:null(string),a8:null} {a1:null([int32]),a2:null([int32]),a3:null([int32]),a4:null([int32]),a5:null([int32]),a6:null([int32]),a7:null([int32]),a8:null} - {a1:0x,a2:0x,a3:0x,a4:0x,a5:0x,a6:0x,a7:0x,a8:null} - {a1:"",a2:"",a3:"",a4:"",a5:"",a6:"",a7:"",a8:null} - {a1:[]([int32]),a2:[]([int32]),a3:[]([int32]),a4:[]([int32]),a5:[]([int32]),a6:[]([int32]),a7:[]([int32]),a8:null} - {a1:0x1122,a2:0x112233,a3:0x00,a4:0x001122,a5:0x,a6:0x33,a7:0x22,a8:error("slice index is not a number")} - {a1:"12",a2:"123",a3:"0",a4:"012",a5:"",a6:"3",a7:"2",a8:error("slice index is not a number")} - {a1:"ⁱ⁲",a2:"ⁱ⁲3",a3:"0",a4:"0ⁱ⁲",a5:"",a6:"3",a7:"⁲",a8:error("slice index is not a number")} - {a1:"ⁱ⁲",a2:"ⁱ⁲⁳",a3:"⁰",a4:"⁰ⁱ⁲",a5:"",a6:"⁳",a7:"⁲",a8:error("slice index is not a number")} - {a1:[11(int32),12(int32)],a2:[11(int32),12(int32),13(int32)],a3:[10(int32)],a4:[10(int32),11(int32),12(int32)],a5:[]([int32]),a6:[13(int32)],a7:[12(int32)],a8:[10(int32),11(int32)]} - {a1:|[11(int32),12(int32)]|,a2:|[11(int32),12(int32),13(int32)]|,a3:|[10(int32)]|,a4:|[10(int32),11(int32),12(int32)]|,a5:|[]|(|[int32]|),a6:|[13(int32)]|,a7:|[12(int32)]|,a8:|[10(int32),11(int32)]|} + {a1:error("slice out of bounds"),a2:error("slice out of bounds"),a3:error("slice out of bounds"),a4:error("slice out of bounds"),a5:error("slice out of bounds"),a6:error("slice out of bounds"),a7:error("slice out of bounds"),a8:null} + {a1:error("slice out of bounds"),a2:error("slice out of bounds"),a3:error("slice out of bounds"),a4:error("slice out of bounds"),a5:error("slice out of bounds"),a6:error("slice out of bounds"),a7:error("slice out of bounds"),a8:null} + {a1:error("slice out of bounds"),a2:error("slice out of bounds"),a3:error("slice out of bounds"),a4:error("slice out of bounds"),a5:error("slice out of bounds"),a6:error("slice out of bounds"),a7:error("slice out of bounds"),a8:null} + {a1:0x1122,a2:0x112233,a3:0x00,a4:0x001122,a5:error("slice out of bounds"),a6:0x33,a7:0x22,a8:error("slice index is not a number")} + {a1:"12",a2:"123",a3:"0",a4:"012",a5:error("slice out of bounds"),a6:"3",a7:"2",a8:error("slice index is not a number")} + {a1:"ⁱ⁲",a2:"ⁱ⁲3",a3:"0",a4:"0ⁱ⁲",a5:error("slice out of bounds"),a6:"3",a7:"⁲",a8:error("slice index is not a number")} + {a1:"ⁱ⁲",a2:"ⁱ⁲⁳",a3:"⁰",a4:"⁰ⁱ⁲",a5:error("slice out of bounds"),a6:"⁳",a7:"⁲",a8:error("slice index is not a number")} + {a1:[11(int32),12(int32)],a2:[11(int32),12(int32),13(int32)],a3:[10(int32)],a4:[10(int32),11(int32),12(int32)],a5:error("slice out of bounds"),a6:[13(int32)],a7:[12(int32)],a8:[10(int32),11(int32)]} + {a1:|[11(int32),12(int32)]|,a2:|[11(int32),12(int32),13(int32)]|,a3:|[10(int32)]|,a4:|[10(int32),11(int32),12(int32)]|,a5:error("slice out of bounds"),a6:|[13(int32)]|,a7:|[12(int32)]|,a8:|[10(int32),11(int32)]|} diff --git a/runtime/vam/expr/slice.go b/runtime/vam/expr/slice.go new file mode 100644 index 0000000000..af0ea1991f --- /dev/null +++ b/runtime/vam/expr/slice.go @@ -0,0 +1,156 @@ +package expr + +import ( + "github.com/brimdata/super" + "github.com/brimdata/super/vector" +) + +type sliceExpr struct { + zctx *super.Context + containerEval, fromEval, toEval Evaluator +} + +func NewSliceExpr(zctx *super.Context, container, from, to Evaluator) Evaluator { + return &sliceExpr{ + zctx: zctx, + containerEval: container, + fromEval: from, + toEval: to, + } +} + +func (s *sliceExpr) Eval(vec vector.Any) vector.Any { + vecs := []vector.Any{s.containerEval.Eval(vec)} + if s.fromEval != nil { + vecs = append(vecs, s.fromEval.Eval(vec)) + } + if s.toEval != nil { + vecs = append(vecs, s.toEval.Eval(vec)) + } + return vector.Apply(true, s.eval, vecs...) +} + +func (s *sliceExpr) eval(vecs ...vector.Any) vector.Any { + container := vecs[0] + var from, to vector.Any + vecs = vecs[1:] + if s.fromEval != nil { + from = vecs[0] + if !super.IsSigned(from.Type().ID()) { + return vector.NewWrappedError(s.zctx, "slice: from value is not an integer", from) + } + vecs = vecs[1:] + } + if s.toEval != nil { + to = vecs[0] + if !super.IsSigned(to.Type().ID()) { + return vector.NewWrappedError(s.zctx, "slice: to value is not an integer", from) + } + } + switch vector.KindOf(container) { + case vector.KindArray, vector.KindSet: + return s.evalArrayOrSlice(container, from, to) + case vector.KindBytes, vector.KindString: + panic("slices on bytes and strings unsupported") + case vector.KindError: + return container + default: + return vector.NewWrappedError(s.zctx, "sliced value is not array, set, bytes, or string", container) + } +} + +func (s *sliceExpr) evalArrayOrSlice(vec, fromVec, toVec vector.Any) vector.Any { + from, constFrom := sliceIsConstIndex(fromVec) + to, constTo := sliceIsConstIndex(toVec) + slowPath := !constFrom || !constTo + var index []uint32 + if view, ok := vec.(*vector.View); ok { + vec, index = view.Any, view.Index + } + offsets, inner, nullsIn := arrayOrSetContents(vec) + newOffsets := []uint32{0} + var errs []uint32 + var innerIndex []uint32 + var nullsOut *vector.Bool + for i := range vec.Len() { + idx := i + if index != nil { + idx = index[i] + } + if nullsIn.Value(idx) { + newOffsets = append(newOffsets, newOffsets[len(newOffsets)-1]) + if nullsOut == nil { + nullsOut = vector.NewBoolEmpty(vec.Len(), nil) + } + nullsOut.Set(i) + continue + } + off := offsets[idx] + size := int64(offsets[idx+1] - off) + start, end := int64(0), size + if fromVec != nil { + if slowPath { + from, _ = vector.IntValue(fromVec, idx) + } + start = sliceIndex(from, size) + } + if toVec != nil { + if slowPath { + to, _ = vector.IntValue(toVec, idx) + } + end = sliceIndex(to, size) + } + if start > end || end > size || start < 0 { + errs = append(errs, i) + continue + } + newOffsets = append(newOffsets, newOffsets[len(newOffsets)-1]+uint32(end-start)) + for k := start; k < end; k++ { + innerIndex = append(innerIndex, off+uint32(k)) + } + + } + var out vector.Any + inner = vector.NewView(inner, innerIndex) + if vector.KindOf(vec) == vector.KindArray { + out = vector.NewArray(vec.Type().(*super.TypeArray), newOffsets, inner, nullsOut) + } else { + out = vector.NewSet(vec.Type().(*super.TypeSet), newOffsets, inner, nullsOut) + } + if nullsOut != nil { + nullsOut.SetLen(out.Len()) + } + if len(errs) > 0 { + errOut := vector.NewStringError(s.zctx, "slice out of bounds", uint32(len(errs))) + return vector.Combine(out, errs, errOut) + } + return out +} + +func sliceIsConstIndex(vec vector.Any) (int64, bool) { + if vec == nil { + return 0, true + } + if c, ok := vec.(*vector.Const); ok && c.Nulls == nil { + return c.Value().Int(), true + } + return 0, false +} + +func sliceIndex(idx, size int64) int64 { + if idx < 0 { + idx += int64(size) + } + return idx +} + +func arrayOrSetContents(vec vector.Any) ([]uint32, vector.Any, *vector.Bool) { + switch vec := vec.(type) { + case *vector.Array: + return vec.Offsets, vec.Values, vec.Nulls + case *vector.Set: + return vec.Offsets, vec.Values, vec.Nulls + default: + panic(vec) + } +} diff --git a/runtime/ztests/expr/slice-array.yaml b/runtime/ztests/expr/slice-array.yaml new file mode 100644 index 0000000000..051bb14012 --- /dev/null +++ b/runtime/ztests/expr/slice-array.yaml @@ -0,0 +1,20 @@ +zed: "yield c[start:end]" + +vector: true + +input: | + {start:1,end:-1,c:null([int64])} + {start:1,end:-1,c:[1,2,3,4]} + {start:-3,end:3,c:[5,7,8,9]} + {start:-5,end:3,c:[5,7,8,9]} + {start:0,end:5,c:[5,7,8,9]} + {start:4,end:3,c:[5,7,8,9]} + +output: | + null([int64]) + [2,3] + [7,8] + error("slice out of bounds") + error("slice out of bounds") + error("slice out of bounds") + diff --git a/runtime/ztests/expr/slice-set.yaml b/runtime/ztests/expr/slice-set.yaml new file mode 100644 index 0000000000..00eceb7643 --- /dev/null +++ b/runtime/ztests/expr/slice-set.yaml @@ -0,0 +1,19 @@ +zed: "yield c[start:end]" + +vector: true + +input: | + {start:1,end:-1,c:null(|[int64]|)} + {start:1,end:-1,c:|[1,2,3,4]|} + {start:-3,end:3,c:|[5,7,8,9]|} + {start:-5,end:3,c:|[5,7,8,9]|} + {start:0,end:5,c:|[5,7,8,9]|} + {start:4,end:3,c:|[5,7,8,9]|} + +output: | + null(|[int64]|) + |[2,3]| + |[7,8]| + error("slice out of bounds") + error("slice out of bounds") + error("slice out of bounds") diff --git a/vector/kind.go b/vector/kind.go index eae859fe4b..8a505068af 100644 --- a/vector/kind.go +++ b/vector/kind.go @@ -19,6 +19,8 @@ const ( KindIP = 6 KindType = 7 KindError = 8 + KindArray = 9 + KindSet = 10 ) const ( @@ -32,6 +34,8 @@ const ( func KindOf(v Any) Kind { switch v := v.(type) { + case *Array: + return KindArray case *Int: return KindInt case *Uint: @@ -48,6 +52,8 @@ func KindOf(v Any) Kind { return KindIP case *TypeValue: return KindType + case *Set: + return KindSet case *Dict: return KindOf(v.Any) case *View: