diff --git a/compiler/kernel/filter.go b/compiler/kernel/filter.go index 211b65fb4a..eb22e521fa 100644 --- a/compiler/kernel/filter.go +++ b/compiler/kernel/filter.go @@ -36,9 +36,8 @@ func (f *DeleteFilter) AsEvaluator() (expr.Evaluator, error) { return nil, nil } // For a DeleteFilter Evaluator the pushdown gets wrapped in a unary ! - // expression so we get all values that don't match. We also add a missing - // call so if the expression results in an error("missing") the value is - // kept. + // expression so we get all values that don't match. We also add an error + // and null check because we want to keep these values around. return f.builder.compileExpr(&dag.BinaryExpr{ Kind: "BinaryExpr", Op: "or", @@ -47,10 +46,11 @@ func (f *DeleteFilter) AsEvaluator() (expr.Evaluator, error) { Op: "!", Operand: f.pushdown, }, - RHS: &dag.Call{ - Kind: "Call", - Name: "missing", - Args: []dag.Expr{f.pushdown}, + RHS: &dag.BinaryExpr{ + Kind: "BinaryExpr", + Op: "or", + LHS: &dag.IsNullExpr{Kind: "IsNullExpr", Expr: f.pushdown}, + RHS: &dag.Call{Kind: "Call", Name: "is_error", Args: []dag.Expr{f.pushdown}}, }, }) } diff --git a/lake/pool.go b/lake/pool.go index 70aa31de08..0d707db851 100644 --- a/lake/pool.go +++ b/lake/pool.go @@ -179,8 +179,7 @@ func filter(zctx *super.Context, ectx expr.Context, this super.Value, e expr.Eva if e == nil { return true } - val, ok := expr.EvalBool(zctx, ectx, this, e) - return ok && val.Bool() + return expr.EvalBool(zctx, ectx, this, e).Ptr().AsBool() } type BranchTip struct { diff --git a/runtime/sam/expr/agg.go b/runtime/sam/expr/agg.go index 29bedb3e7c..30b943c7ff 100644 --- a/runtime/sam/expr/agg.go +++ b/runtime/sam/expr/agg.go @@ -34,7 +34,7 @@ func (a *Aggregator) NewFunction() agg.Function { func (a *Aggregator) Apply(zctx *super.Context, ectx Context, f agg.Function, this super.Value) { if a.where != nil { - if val, ok := EvalBool(zctx, ectx, this, a.where); !ok || !val.Bool() { + if val := EvalBool(zctx, ectx, this, a.where); !val.AsBool() { // XXX Issue #3401: do something with "where" errors. return } diff --git a/runtime/sam/expr/eval.go b/runtime/sam/expr/eval.go index b5077347f0..06142194d2 100644 --- a/runtime/sam/expr/eval.go +++ b/runtime/sam/expr/eval.go @@ -34,14 +34,11 @@ func NewLogicalNot(zctx *super.Context, e Evaluator) *Not { } func (n *Not) Eval(ectx Context, this super.Value) super.Value { - val, ok := EvalBool(n.zctx, ectx, this, n.expr) - if !ok { + val := EvalBool(n.zctx, ectx, this, n.expr) + if val.IsError() || val.IsNull() { return val } - if val.Bool() { - return super.False - } - return super.True + return super.NewBool(!val.Bool()) } type And struct { @@ -65,53 +62,61 @@ func NewLogicalOr(zctx *super.Context, lhs, rhs Evaluator) *Or { } // EvalBool evaluates e with this and if the result is a Zed bool, returns the -// result and true. Otherwise, a Zed error (inclusive of missing) and false -// are returned. -func EvalBool(zctx *super.Context, ectx Context, this super.Value, e Evaluator) (super.Value, bool) { +// result. +func EvalBool(zctx *super.Context, ectx Context, this super.Value, e Evaluator) super.Value { val := e.Eval(ectx, this) - if super.TypeUnder(val.Type()) == super.TypeBool { - return val, true - } - if val.IsError() { - return val, false + if super.TypeUnder(val.Type()) == super.TypeBool || val.IsError() { + return val } - return zctx.WrapError("not type bool", val), false + return zctx.WrapError("not type bool", val) } func (a *And) Eval(ectx Context, this super.Value) super.Value { - lhs, ok := EvalBool(a.zctx, ectx, this, a.lhs) - if !ok { - return lhs - } - if !lhs.Bool() { + lhs := EvalBool(a.zctx, ectx, this, a.lhs) + rhs := EvalBool(a.zctx, ectx, this, a.rhs) + if isfalse(lhs) || isfalse(rhs) { + // anything AND FALSE = FALSE return super.False } - rhs, ok := EvalBool(a.zctx, ectx, this, a.rhs) - if !ok { + // ERROR AND NULL = ERROR + // ERROR AND TRUE = ERROR + if lhs.IsError() { + return lhs + } + if rhs.IsError() { return rhs } - if !rhs.Bool() { - return super.False + if lhs.IsNull() || rhs.IsNull() { + // NULL AND TRUE = NULL + return super.NullBool } return super.True } +func isfalse(val super.Value) bool { + return val.Type().ID() == super.IDBool && !val.IsNull() && !val.Bool() +} + func (o *Or) Eval(ectx Context, this super.Value) super.Value { - lhs, ok := EvalBool(o.zctx, ectx, this, o.lhs) - if ok && lhs.Bool() { + lhs := EvalBool(o.zctx, ectx, this, o.lhs) + rhs := EvalBool(o.zctx, ectx, this, o.rhs) + if lhs.AsBool() || rhs.AsBool() { + // anything OR TRUE = TRUE return super.True } - if lhs.IsError() && !lhs.IsMissing() { + if lhs.IsNull() || rhs.IsNull() { + // NULL OR FALSE = NULL + // NULL OR ERROR = NULL + return super.NullBool + } + // ERROR OR FALSE = ERROR + if lhs.IsError() { return lhs } - rhs, ok := EvalBool(o.zctx, ectx, this, o.rhs) - if ok { - if rhs.Bool() { - return super.True - } - return super.False + if rhs.IsError() { + return rhs } - return rhs + return super.False } type In struct { diff --git a/runtime/sam/expr/filter.go b/runtime/sam/expr/filter.go index 20aeb71d6d..a9a9ede499 100644 --- a/runtime/sam/expr/filter.go +++ b/runtime/sam/expr/filter.go @@ -254,8 +254,8 @@ func NewFilterApplier(zctx *super.Context, e Evaluator) Evaluator { } func (f *filterApplier) Eval(ectx Context, this super.Value) super.Value { - val, ok := EvalBool(f.zctx, ectx, this, f.expr) - if ok { + val := EvalBool(f.zctx, ectx, this, f.expr) + if val.Type().ID() == super.IDBool { if val.Bool() { return this } diff --git a/runtime/sam/expr/ztests/logical.yaml b/runtime/sam/expr/ztests/logical.yaml deleted file mode 100644 index bddc73d6be..0000000000 --- a/runtime/sam/expr/ztests/logical.yaml +++ /dev/null @@ -1,102 +0,0 @@ -script: | - echo === TRUE AND === - super -z -c "yield t AND t" in.jsup - super -z -c "yield t AND f" in.jsup - super -z -c "yield t AND n" in.jsup - super -z -c "yield missing(t AND m)" in.jsup - echo === FALSE AND === - super -z -c "yield f AND t" in.jsup - super -z -c "yield f AND f" in.jsup - super -z -c "yield f AND n" in.jsup - super -z -c "yield f AND m" in.jsup - echo === NULL AND === - super -z -c "yield n AND t" in.jsup - super -z -c "yield n AND f" in.jsup - super -z -c "yield n AND n" in.jsup - super -z -c "yield n AND m" in.jsup - echo === MISSING AND === - super -z -c "yield missing(m AND t)" in.jsup - super -z -c "yield missing(m AND f)" in.jsup - super -z -c "yield missing(m AND n)" in.jsup - super -z -c "yield missing(m AND m)" in.jsup - echo === TRUE OR === - super -z -c "yield t OR t" in.jsup - super -z -c "yield t OR f" in.jsup - super -z -c "yield t OR n" in.jsup - super -z -c "yield t OR m" in.jsup - echo === FALSE OR === - super -z -c "yield f OR t" in.jsup - super -z -c "yield f OR f" in.jsup - super -z -c "yield f OR n" in.jsup - super -z -c "yield missing(f OR m)" in.jsup - echo === NULL OR === - super -z -c "yield n OR t" in.jsup - super -z -c "yield n OR f" in.jsup - super -z -c "yield n OR n" in.jsup - super -z -c "yield missing(n OR m)" in.jsup - echo === MISSING OR === - super -z -c "yield m OR t" in.jsup - super -z -c "yield m OR f" in.jsup - super -z -c "yield m OR n" in.jsup - super -z -c "yield missing(m OR m)" in.jsup - echo === NOT - super -z -c "yield !t" in.jsup - super -z -c "yield !f" in.jsup - super -z -c "yield missing(!m)" in.jsup - super -z -c "yield !n" in.jsup - super -z -c "yield !!f" in.jsup - -inputs: - - name: in.jsup - data: | - {t:true,f:false,m:error("missing"),n:null(bool)} - -outputs: - - name: stdout - data: | - === TRUE AND === - true - false - false - true - === FALSE AND === - false - false - false - false - === NULL AND === - false - false - false - false - === MISSING AND === - true - true - true - true - === TRUE OR === - true - true - true - true - === FALSE OR === - true - false - false - true - === NULL OR === - true - false - false - true - === MISSING OR === - true - false - false - true - === NOT - false - true - true - true - false diff --git a/runtime/vam/expr/logic.go b/runtime/vam/expr/logic.go index ad1d2e6fd5..b268a41b65 100644 --- a/runtime/vam/expr/logic.go +++ b/runtime/vam/expr/logic.go @@ -17,16 +17,24 @@ func NewLogicalNot(zctx *super.Context, e Evaluator) *Not { } func (n *Not) Eval(val vector.Any) vector.Any { - val, ok := EvalBool(n.zctx, val, n.expr) - if !ok { - return val - } - b := val.(*vector.Bool) - bits := make([]uint64, len(b.Bits)) - for k := range bits { - bits[k] = ^b.Bits[k] + return evalBool(n.zctx, n.eval, n.expr.Eval(val)) +} + +func (n *Not) eval(vecs ...vector.Any) vector.Any { + switch vec := vecs[0].(type) { + case *vector.Bool: + bits := make([]uint64, len(vec.Bits)) + for k := range bits { + bits[k] = ^vec.Bits[k] + } + return vec.CopyWithBits(bits) + case *vector.Const: + return vector.NewConst(super.NewBool(!vec.Value().Bool()), vec.Len(), vec.Nulls) + case *vector.Error: + return vec + default: + panic(vec) } - return b.CopyWithBits(bits) } type And struct { @@ -50,72 +58,136 @@ func NewLogicalOr(zctx *super.Context, lhs, rhs Evaluator) *Or { } func (a *And) Eval(val vector.Any) vector.Any { - //XXX change this logic to handle dynamic instead of simple ok decision, - // if there are any valid bools then we need to and them together - lhs, ok := EvalBool(a.zctx, val, a.lhs) - if !ok { - //XXX mix errors - return lhs + return evalBool(a.zctx, a.eval, a.lhs.Eval(val), a.rhs.Eval(val)) +} + +func (a *And) eval(vecs ...vector.Any) vector.Any { + if vecs[0].Len() == 0 { + return vecs[0] + } + lhs, rhs := vector.Under(vecs[0]), vector.Under(vecs[1]) + if _, ok := lhs.(*vector.Error); ok { + return a.andError(lhs, rhs) } - rhs, ok := EvalBool(a.zctx, val, a.rhs) - if !ok { - //XXX mix errors - return rhs + if _, ok := rhs.(*vector.Error); ok { + return a.andError(rhs, lhs) } - blhs := lhs.(*vector.Bool) - brhs := rhs.(*vector.Bool) - if len(blhs.Bits) != len(brhs.Bits) { - panic("length mistmatch") + blhs, brhs := toBool(lhs), toBool(rhs) + out := vector.And(blhs, brhs) + if blhs.Nulls == nil && brhs.Nulls == nil { + return out } - bits := make([]uint64, len(blhs.Bits)) - for k := range bits { - bits[k] = blhs.Bits[k] & brhs.Bits[k] + // any and false = false + // null and true = null + notfalse := vector.And(vector.Or(blhs, blhs.Nulls), vector.Or(brhs, brhs.Nulls)) + out.Nulls = vector.And(notfalse, vector.Or(blhs.Nulls, brhs.Nulls)) + return out +} + +func (a *And) andError(err vector.Any, vec vector.Any) vector.Any { + if _, ok := vec.(*vector.Error); ok { + return err + } + b := toBool(vec) + // anything and FALSE = FALSE + isError := vector.Or(b, b.Nulls) + var index []uint32 + for i := range err.Len() { + if isError.Value(i) { + index = append(index, i) + } } - //XXX intersect nulls - return blhs.CopyWithBits(bits) + if len(index) > 0 { + base := vector.InverseView(vec, index) + return vector.Combine(base, index, vector.NewView(err, index)) + } + return vec } func (o *Or) Eval(val vector.Any) vector.Any { - lhs, ok := EvalBool(o.zctx, val, o.lhs) - if !ok { - return lhs + return evalBool(o.zctx, o.eval, o.lhs.Eval(val), o.rhs.Eval(val)) +} + +func (o *Or) eval(vecs ...vector.Any) vector.Any { + if vecs[0].Len() == 0 { + return vecs[0] } - rhs, ok := EvalBool(o.zctx, val, o.rhs) - if !ok { - return rhs + lhs, rhs := vector.Under(vecs[0]), vector.Under(vecs[1]) + if _, ok := lhs.(*vector.Error); ok { + return o.orError(lhs, rhs) } - blhs := lhs.(*vector.Bool) - brhs := rhs.(*vector.Bool) - bits := make([]uint64, len(blhs.Bits)) - if len(blhs.Bits) != len(brhs.Bits) { - panic("length mistmatch") + if _, ok := rhs.(*vector.Error); ok { + return o.orError(rhs, lhs) } - for k := range bits { - bits[k] = blhs.Bits[k] | brhs.Bits[k] + blhs, brhs := toBool(lhs), toBool(rhs) + out := vector.Or(blhs, brhs) + if blhs.Nulls == nil && brhs.Nulls == nil { + return out } - //XXX intersect nulls - return blhs.CopyWithBits(bits) + nulls := vector.Or(blhs.Nulls, brhs.Nulls) + out.Nulls = vector.And(vector.Not(out), nulls) + return out } -// EvalBool evaluates e using val to computs a boolean result. For elements +func (o *Or) orError(err, vec vector.Any) vector.Any { + if _, ok := vec.(*vector.Error); ok { + return err + } + b := toBool(vec) + // not error if true or null + notError := vector.Or(b, b.Nulls) + var index []uint32 + for i := range b.Len() { + if !notError.Value(i) { + index = append(index, i) + } + } + if len(index) > 0 { + base := vector.InverseView(vec, index) + return vector.Combine(base, index, vector.NewView(err, index)) + } + return vec +} + +// evalBool evaluates e using val to computs a boolean result. For elements // of the result that are not boolean, an error is calculated for each non-bool // slot and they are returned as an error. If all of the value slots are errors, // then the return value is nil. -func EvalBool(zctx *super.Context, val vector.Any, e Evaluator) (vector.Any, bool) { - //XXX Eval could return a dynamic vector of errors and bools and we should - // handle this correctly so the logic above is really the fast path - // and a slower path will handle picking apart the dynamic vector. - // maybe we could have a generic way to traverse dynamics for - // appliers doing their thing along the slow path - if val, ok := vector.Under(e.Eval(val)).(*vector.Bool); ok { - return val, true - } - //XXX need to implement a sparse dynamic (vector.Collection?) - // and check for that here. - // for now, if the vector is not uniformly boolean, we return error. - // XXX example is a field ref a union of structs where the type of - // the referenced field changes... there can be an arbitrary number - // of underlying types though any given slot has only one type - // obviously at any given time. - return vector.NewStringError(zctx, "not type bool", val.Len()), false +func evalBool(zctx *super.Context, fn func(...vector.Any) vector.Any, vecs ...vector.Any) vector.Any { + return vector.Apply(false, func(vecs ...vector.Any) vector.Any { + for i, vec := range vecs { + if vec := vector.Under(vec); vec.Type() == super.TypeBool || vector.KindOf(vec) == vector.KindError { + vecs[i] = vec + } else { + vecs[i] = vector.NewWrappedError(zctx, "not type bool", vec) + } + } + return fn(vecs...) + }, vecs...) +} + +func toBool(vec vector.Any) *vector.Bool { + switch vec := vec.(type) { + case *vector.Const: + val := vec.Value() + if val.Bool() { + out := trueBool(vec.Len()) + out.Nulls = vec.Nulls + return out + } else { + return vector.NewBoolEmpty(0, vec.Nulls) + } + case *vector.Bool: + return vec + default: + panic(vec) + } +} + +func trueBool(n uint32) *vector.Bool { + vec := vector.NewBoolEmpty(n, nil) + for i := range vec.Bits { + vec.Bits[i] = ^uint64(0) + } + return vec } diff --git a/runtime/ztests/expr/logical-and.yaml b/runtime/ztests/expr/logical-and.yaml new file mode 100644 index 0000000000..e13bde0b75 --- /dev/null +++ b/runtime/ztests/expr/logical-and.yaml @@ -0,0 +1,71 @@ +zed: | + yield case when typeof(this) == + then this + else a AND b + end + +vector: true + +input: | + "=== TRUE ===" + {a:true,b:true} + {a:true,b:false} + {a:true,b:null(bool)} + {a:true} + {a:true,b:"foo"} + "=== FALSE ===" + {a:false,b:true} + {a:false,b:false} + {a:false,b:null(bool)} + {a:false} + {a:false,b:"foo"} + "=== NULL ===" + {a:null(bool),b:true} + {a:null(bool),b:false} + {a:null(bool),b:null(bool)} + {a:null(bool)} + {a:null(bool),b:"foo"} + "=== MISSING ===" + {b:true} + {b:false} + {b:null(bool)} + {} + {b:"foo"} + "=== ERROR ===" + {a:"foo",b:true} + {a:"foo",b:false} + {a:"foo",b:null(bool)} + {a:"foo"} + {a:"foo",b:"foo"} + +output: | + "=== TRUE ===" + true + false + null(bool) + error("missing") + error({message:"not type bool",on:"foo"}) + "=== FALSE ===" + false + false + false + false + false + "=== NULL ===" + null(bool) + false + null(bool) + error("missing") + error({message:"not type bool",on:"foo"}) + "=== MISSING ===" + error("missing") + false + error("missing") + error("missing") + error("missing") + "=== ERROR ===" + error({message:"not type bool",on:"foo"}) + false + error({message:"not type bool",on:"foo"}) + error({message:"not type bool",on:"foo"}) + error({message:"not type bool",on:"foo"}) diff --git a/runtime/ztests/expr/logical-not.yaml b/runtime/ztests/expr/logical-not.yaml new file mode 100644 index 0000000000..d32ccfa269 --- /dev/null +++ b/runtime/ztests/expr/logical-not.yaml @@ -0,0 +1,17 @@ +zed: yield not this + +vector: true + +input: | + true + false + null(bool) + error("missing") + error("foo") + +output: | + false + true + null(bool) + error("missing") + error("foo") diff --git a/runtime/ztests/expr/logical-or.yaml b/runtime/ztests/expr/logical-or.yaml new file mode 100644 index 0000000000..6c4ff328b4 --- /dev/null +++ b/runtime/ztests/expr/logical-or.yaml @@ -0,0 +1,71 @@ +zed: | + yield case when typeof(this) == + then this + else a OR b + end + +vector: true + +input: | + "=== TRUE ===" + {a:true,b:true} + {a:true,b:false} + {a:true,b:null(bool)} + {a:true} + {a:true,b:"foo"} + "=== FALSE ===" + {a:false,b:true} + {a:false,b:false} + {a:false,b:null(bool)} + {a:false} + {a:false,b:"foo"} + "=== NULL ===" + {a:null(bool),b:true} + {a:null(bool),b:false} + {a:null(bool),b:null(bool)} + {a:null(bool)} + {a:null(bool),b:"foo"} + "=== MISSING ===" + {b:true} + {b:false} + {b:null(bool)} + {} + {b:"foo"} + "=== ERROR ===" + {a:"foo",b:true} + {a:"foo",b:false} + {a:"foo",b:null(bool)} + {a:"foo"} + {a:"foo",b:"foo"} + +output: | + "=== TRUE ===" + true + true + true + true + true + "=== FALSE ===" + true + false + null(bool) + error("missing") + error({message:"not type bool",on:"foo"}) + "=== NULL ===" + true + null(bool) + null(bool) + null(bool) + null(bool) + "=== MISSING ===" + true + error("missing") + null(bool) + error("missing") + error("missing") + "=== ERROR ===" + true + error({message:"not type bool",on:"foo"}) + null(bool) + error({message:"not type bool",on:"foo"}) + error({message:"not type bool",on:"foo"}) diff --git a/vector/bool.go b/vector/bool.go index 038d76d570..566393ff08 100644 --- a/vector/bool.go +++ b/vector/bool.go @@ -2,6 +2,7 @@ package vector import ( "math/bits" + "slices" "strings" "github.com/brimdata/super" @@ -52,7 +53,7 @@ func (b *Bool) CopyWithBits(bits []uint64) *Bool { } func (b *Bool) Serialize(builder *zcode.Builder, slot uint32) { - if b.Nulls.Value(slot) { + if b != nil && b.Nulls.Value(slot) { builder.Append(nil) } else { builder.Append(super.EncodeBool(b.Value(slot))) @@ -94,6 +95,17 @@ func (b *Bool) String() string { return s.String() } +func Not(a *Bool) *Bool { + if a == nil { + panic("not: nil bool") + } + bits := slices.Clone(a.Bits) + for i := range bits { + bits[i] = ^a.Bits[i] + } + return a.CopyWithBits(bits) +} + func Or(a, b *Bool) *Bool { if b == nil { return a @@ -111,6 +123,23 @@ func Or(a, b *Bool) *Bool { return out } +func And(a, b *Bool) *Bool { + if b == nil { + return nil + } + if a == nil { + return nil + } + if a.Len() != b.Len() { + panic("and'ing two different length bool vectors") + } + out := NewBoolEmpty(a.Len(), nil) + for i := range len(a.Bits) { + out.Bits[i] = a.Bits[i] & b.Bits[i] + } + return out +} + // BoolValue returns the value of slot in vec if the value is a Boolean. It // returns false otherwise. func BoolValue(vec Any, slot uint32) bool { @@ -136,6 +165,11 @@ func NullsOf(v Any) *Bool { return v.Nulls case *Bytes: return v.Nulls + case *Bool: + if v != nil { + return v.Nulls + } + return nil case *Const: if v.Value().IsNull() { out := NewBoolEmpty(v.Len(), nil) diff --git a/vector/kind.go b/vector/kind.go index 03bcf53884..4c8ddb4ce7 100644 --- a/vector/kind.go +++ b/vector/kind.go @@ -17,6 +17,7 @@ const ( KindString = 4 KindBytes = 5 KindType = 6 + KindError = 7 ) const ( @@ -40,6 +41,8 @@ func KindOf(v Any) Kind { return KindBytes case *String: return KindString + case *Error: + return KindError case *TypeValue: return KindType case *Dict: diff --git a/vector/view.go b/vector/view.go index 624a9f072a..60c8ec5f3d 100644 --- a/vector/view.go +++ b/vector/view.go @@ -54,6 +54,18 @@ func NewView(val Any, index []uint32) Any { return &View{val, index} } +func InverseView(vec Any, index []uint32) Any { + var reverse []uint32 + for i := range vec.Len() { + if len(index) > 0 && index[0] == i { + index = index[1:] + continue + } + reverse = append(reverse, i) + } + return NewView(vec, reverse) +} + func NewBoolView(vec *Bool, index []uint32) *Bool { if vec == nil { return nil