From 44dedc9ce1cd266a2ecefa6a92eb5e54e88fc06d Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Fri, 13 Dec 2024 13:29:53 -0800 Subject: [PATCH] vam: Add slice expressions for Arrays and Sets --- compiler/kernel/vexpr.go | 30 +++++- runtime/sam/expr/slice.go | 9 +- runtime/vam/expr/slice.go | 150 +++++++++++++++++++++++++++ runtime/ztests/expr/slice-array.yaml | 20 ++++ runtime/ztests/expr/slice-set.yaml | 19 ++++ vector/kind.go | 6 ++ 6 files changed, 223 insertions(+), 11 deletions(-) create mode 100644 runtime/vam/expr/slice.go create mode 100644 runtime/ztests/expr/slice-array.yaml create mode 100644 runtime/ztests/expr/slice-set.yaml diff --git a/compiler/kernel/vexpr.go b/compiler/kernel/vexpr.go index cd42566cfa..4b0637e26d 100644 --- a/compiler/kernel/vexpr.go +++ b/compiler/kernel/vexpr.go @@ -54,6 +54,8 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) { return b.compileVamRegexpSearch(e) case *dag.RecordExpr: return b.compileVamRecordExpr(e) + case *dag.SliceExpr: + return b.compileVamSliceExpr(e) //case *dag.SetExpr: // return b.compileVamSetExpr(e) //case *dag.MapCall: @@ -73,12 +75,14 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) { } } -func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error) { - //XXX TBD - //if slice, ok := e.RHS.(*dag.BinaryExpr); ok && slice.Op == ":" { - // return b.compileVamSlice(e.LHS, slice) - //} +func (b *Builder) compileVamExprWithEmpty(e dag.Expr) (vamexpr.Evaluator, error) { + if e == nil { + return nil, nil + } + return b.compileVamExpr(e) +} +func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error) { //XXX TBD //if e.Op == "in" { // Do a faster comparison if the LHS is a compile-time constant expression. @@ -277,6 +281,22 @@ func (b *Builder) compileVamSearch(search *dag.Search) (vamexpr.Evaluator, error return vamexpr.NewSearch(search.Text, val, e), nil } +func (b *Builder) compileVamSliceExpr(slice *dag.SliceExpr) (vamexpr.Evaluator, error) { + e, err := b.compileVamExpr(slice.Expr) + if err != nil { + return nil, err + } + from, err := b.compileVamExprWithEmpty(slice.From) + if err != nil { + return nil, err + } + to, err := b.compileVamExprWithEmpty(slice.To) + if err != nil { + return nil, err + } + return vamexpr.NewSliceExpr(b.zctx(), e, from, to), nil +} + func (b *Builder) compileVamArrayExpr(e *dag.ArrayExpr) (vamexpr.Evaluator, error) { elems, err := b.compileVamListElems(e.Elems) if err != nil { diff --git a/runtime/sam/expr/slice.go b/runtime/sam/expr/slice.go index 562bff084c..c8e028011b 100644 --- a/runtime/sam/expr/slice.go +++ b/runtime/sam/expr/slice.go @@ -62,6 +62,9 @@ func (s *Slice) Eval(ectx Context, this super.Value) super.Value { } to = length } + if from > to || to > length || from < 0 { + return s.zctx.NewErrorf("slice out of bounds") + } bytes := elem.Bytes() switch super.TypeUnder(elem.Type()).(type) { case *super.TypeOfBytes: @@ -98,12 +101,6 @@ func sliceIndex(ectx Context, this super.Value, slot Evaluator, length int) (int if index < 0 { index += length } - if index < 0 { - return 0, nil - } - if index > length { - return length, nil - } return index, nil } diff --git a/runtime/vam/expr/slice.go b/runtime/vam/expr/slice.go new file mode 100644 index 0000000000..90ec90bae6 --- /dev/null +++ b/runtime/vam/expr/slice.go @@ -0,0 +1,150 @@ +package expr + +import ( + "github.com/brimdata/super" + "github.com/brimdata/super/vector" +) + +type sliceExpr struct { + zctx *super.Context + containerEval, fromEval, toEval Evaluator +} + +func NewSliceExpr(zctx *super.Context, container, from, to Evaluator) Evaluator { + return &sliceExpr{ + zctx: zctx, + containerEval: container, + fromEval: from, + toEval: to, + } +} + +func (s *sliceExpr) Eval(vec vector.Any) vector.Any { + vecs := []vector.Any{s.containerEval.Eval(vec)} + if s.fromEval != nil { + vecs = append(vecs, s.fromEval.Eval(vec)) + } + if s.toEval != nil { + vecs = append(vecs, s.toEval.Eval(vec)) + } + return vector.Apply(true, s.eval, vecs...) +} + +func (s *sliceExpr) eval(vecs ...vector.Any) vector.Any { + container := vecs[0] + var from, to vector.Any + vecs = vecs[1:] + if s.fromEval != nil { + from = vecs[0] + if !super.IsSigned(from.Type().ID()) { + return vector.NewWrappedError(s.zctx, "slice: from value is not an integer", from) + } + vecs = vecs[1:] + } + if s.toEval != nil { + to = vecs[0] + if !super.IsSigned(to.Type().ID()) { + return vector.NewWrappedError(s.zctx, "slice: to value is not an integer", from) + } + } + switch vector.KindOf(container) { + case vector.KindSet, vector.KindArray: + return s.evalArrayOrSlice(container, from, to) + default: + return vector.NewWrappedError(s.zctx, "sliced value is not array, set, bytes, or string", container) + } +} + +func (s *sliceExpr) evalArrayOrSlice(vec, fromVec, toVec vector.Any) vector.Any { + from, constFrom := sliceIsConstIndex(fromVec) + to, constTo := sliceIsConstIndex(toVec) + slowPath := !constFrom || !constTo + var index []uint32 + if view, ok := vec.(*vector.View); ok { + vec, index = view.Any, view.Index + } + offsets, inner, nullsIn := arrayOrSetContents(vec) + newOffsets := []uint32{0} + var errs []uint32 + var innerIndex []uint32 + var nullsOut *vector.Bool + for i := range vec.Len() { + idx := i + if index != nil { + idx = index[i] + } + if nullsIn.Value(idx) { + newOffsets = append(newOffsets, newOffsets[len(newOffsets)-1]) + if nullsOut == nil { + nullsOut = vector.NewBoolEmpty(vec.Len(), nil) + } + nullsOut.Set(i) + continue + } + start := offsets[idx] + origSize := int64(offsets[idx+1] - start) + relFrom, relTo := int64(0), origSize + if fromVec != nil { + if slowPath { + from, _ = vector.IntValue(fromVec, idx) + } + relFrom = sliceIndex(from, origSize) + } + if toVec != nil { + if slowPath { + to, _ = vector.IntValue(toVec, idx) + } + relTo = sliceIndex(to, origSize) + } + if relFrom > relTo || relTo > origSize || relFrom < 0 { + errs = append(errs, i) + continue + } + newOffsets = append(newOffsets, newOffsets[len(newOffsets)-1]+uint32(relTo-relFrom)) + for k := relFrom; k < relTo; k++ { + innerIndex = append(innerIndex, start+uint32(k)) + } + + } + var out vector.Any + inner = vector.NewView(inner, innerIndex) + if vector.KindOf(vec) == vector.KindArray { + out = vector.NewArray(vec.Type().(*super.TypeArray), newOffsets, inner, nullsOut) + } else { + out = vector.NewSet(vec.Type().(*super.TypeSet), newOffsets, inner, nullsOut) + } + if nullsOut != nil { + nullsOut.SetLen(out.Len()) + } + if len(errs) > 0 { + errOut := vector.NewStringError(s.zctx, "slice out of bounds", uint32(len(errs))) + return vector.Combine(out, errs, errOut) + } + return out +} + +func sliceIsConstIndex(vec vector.Any) (int64, bool) { + if vec == nil { + return 0, true + } + if c, ok := vec.(*vector.Const); ok && c.Nulls == nil { + return c.Value().Int(), true + } + return 0, false +} + +func sliceIndex(idx, size int64) int64 { + if idx < 0 { + idx += int64(size) + } + return idx +} + +func arrayOrSetContents(vec vector.Any) ([]uint32, vector.Any, *vector.Bool) { + if array, ok := vec.(*vector.Array); ok { + return array.Offsets, array.Values, array.Nulls + } else { + set := vec.(*vector.Set) + return set.Offsets, set.Values, set.Nulls + } +} diff --git a/runtime/ztests/expr/slice-array.yaml b/runtime/ztests/expr/slice-array.yaml new file mode 100644 index 0000000000..051bb14012 --- /dev/null +++ b/runtime/ztests/expr/slice-array.yaml @@ -0,0 +1,20 @@ +zed: "yield c[start:end]" + +vector: true + +input: | + {start:1,end:-1,c:null([int64])} + {start:1,end:-1,c:[1,2,3,4]} + {start:-3,end:3,c:[5,7,8,9]} + {start:-5,end:3,c:[5,7,8,9]} + {start:0,end:5,c:[5,7,8,9]} + {start:4,end:3,c:[5,7,8,9]} + +output: | + null([int64]) + [2,3] + [7,8] + error("slice out of bounds") + error("slice out of bounds") + error("slice out of bounds") + diff --git a/runtime/ztests/expr/slice-set.yaml b/runtime/ztests/expr/slice-set.yaml new file mode 100644 index 0000000000..00eceb7643 --- /dev/null +++ b/runtime/ztests/expr/slice-set.yaml @@ -0,0 +1,19 @@ +zed: "yield c[start:end]" + +vector: true + +input: | + {start:1,end:-1,c:null(|[int64]|)} + {start:1,end:-1,c:|[1,2,3,4]|} + {start:-3,end:3,c:|[5,7,8,9]|} + {start:-5,end:3,c:|[5,7,8,9]|} + {start:0,end:5,c:|[5,7,8,9]|} + {start:4,end:3,c:|[5,7,8,9]|} + +output: | + null(|[int64]|) + |[2,3]| + |[7,8]| + error("slice out of bounds") + error("slice out of bounds") + error("slice out of bounds") diff --git a/vector/kind.go b/vector/kind.go index eae859fe4b..8a505068af 100644 --- a/vector/kind.go +++ b/vector/kind.go @@ -19,6 +19,8 @@ const ( KindIP = 6 KindType = 7 KindError = 8 + KindArray = 9 + KindSet = 10 ) const ( @@ -32,6 +34,8 @@ const ( func KindOf(v Any) Kind { switch v := v.(type) { + case *Array: + return KindArray case *Int: return KindInt case *Uint: @@ -48,6 +52,8 @@ func KindOf(v Any) Kind { return KindIP case *TypeValue: return KindType + case *Set: + return KindSet case *Dict: return KindOf(v.Any) case *View: