Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use demand in vector.read #4850

Merged
merged 27 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
86292ca
Move demand into a separate package so that we can import it in vngio…
jamii Nov 1, 2023
22b7fa9
Remove unused field
jamii Nov 1, 2023
bef6161
Store names in RecordReader
jamii Nov 1, 2023
3df8f5b
Use demand in vng reader
jamii Nov 1, 2023
5efd34b
Clean up demand
jamii Nov 1, 2023
53b5dab
Use demand to change type, to avoid materializing pointless null fields
jamii Nov 3, 2023
e860314
Fuzz vng vs zng queries
jamii Nov 3, 2023
a622c41
Rebase
jamii Nov 3, 2023
888a798
Use random queries
jamii Nov 3, 2023
3e05ba7
Hook up demand
jamii Nov 3, 2023
0778ab8
Name fields so that they can be referenced in the query
jamii Nov 3, 2023
8c6041e
Add some examples to corpus
jamii Nov 3, 2023
bd91b08
Conditionally cast reader so the test works even without ZED_USE_VECTOR
jamii Nov 3, 2023
c7c4b10
Move fuzz helper code into a separate package
jamii Nov 3, 2023
c63cbb6
Copy style fixes from jamii-fuzz-vng
jamii Nov 5, 2023
3f25575
Make demand representation private
jamii Nov 5, 2023
93f3b0d
Use fuzz package in vng_test
jamii Nov 6, 2023
104ccf5
Update compiler/optimizer/demand/demand.go
jamii Nov 7, 2023
5e9ad57
Update compiler/optimizer/demand/demand.go
jamii Nov 7, 2023
8b70169
Update compiler/optimizer/demand/demand.go
jamii Nov 7, 2023
8a6a75e
Update zio/vngio/reader.go
jamii Nov 7, 2023
e54589e
Update vector/materializer.go
jamii Nov 7, 2023
1040fad
Style fixes
jamii Nov 7, 2023
cc0218c
Update vector/read.go
jamii Nov 7, 2023
1552baa
Style fixes
jamii Nov 7, 2023
2c177b1
Thread demand through anyio.NewReader
jamii Nov 7, 2023
2585cda
Style fixes
jamii Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Copy style fixes from jamii-fuzz-vng
  • Loading branch information
jamii committed Nov 6, 2023
commit c63cbb6babf77f9b87c8b4ce4a2b1bea2511d8c6
93 changes: 35 additions & 58 deletions fuzz/fuzz.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"github.com/golang/mock/gomock"
"io"
"math"
"net/netip"
"testing"
Expand All @@ -19,57 +21,40 @@ import (
"github.com/brimdata/zed/pkg/nano"
"github.com/brimdata/zed/pkg/storage/mock"
"github.com/brimdata/zed/runtime"
"github.com/brimdata/zed/zbuf"
"github.com/brimdata/zed/zcode"
"github.com/brimdata/zed/zio"
"github.com/brimdata/zed/zio/vngio"
"github.com/brimdata/zed/zio/zngio"
"github.com/brimdata/zed/zson"
"github.com/stretchr/testify/require"
"github.com/x448/float16"
)

func WriteZng(t *testing.T, valuesIn []zed.Value, file *MockFile) {
writer := zngio.NewWriter(file)
for i := range valuesIn {
err := writer.Write(&valuesIn[i])
if err != nil {
t.Fatalf("%v", err)
}
}
err := writer.Close()
if err != nil {
t.Fatalf("%v", err)
}
func WriteZng(t *testing.T, valuesIn []zed.Value, buf *bytes.Buffer) {
writer := zngio.NewWriter(zio.NopCloser(buf))
require.NoError(t, zio.Copy(writer, zbuf.NewArray(valuesIn)))
require.NoError(t, writer.Close())
}

func WriteVng(t *testing.T, valuesIn []zed.Value, file *MockFile) {
writer, err := vngio.NewWriter(file)
if err != nil {
t.Fatalf("%v", err)
}
for i := range valuesIn {
err := writer.Write(&valuesIn[i])
if err != nil {
t.Fatalf("%v", err)
}
}
err = writer.Close()
if err != nil {
t.Fatalf("%v", err)
}
func WriteVng(t *testing.T, valuesIn []zed.Value, buf *bytes.Buffer, opts vngio.WriterOpts) {
writer, err := vngio.NewWriterWithOpts(zio.NopCloser(buf), opts)
require.NoError(t, err)
require.NoError(t, zio.Copy(writer, zbuf.NewArray(valuesIn)))
require.NoError(t, writer.Close())
}

func RunQueryZng(t *testing.T, file *MockFile, querySource string) []zed.Value {
func RunQueryZng(t *testing.T, buf *bytes.Buffer, querySource string) []zed.Value {
zctx := zed.NewContext()
readers := []zio.Reader{zngio.NewReader(zctx, bytes.NewReader(file.Bytes()))}
readers := []zio.Reader{zngio.NewReader(zctx, buf)}
defer zio.CloseReaders(readers)
return RunQuery(t, zctx, readers, querySource, func(_ demand.Demand) {})
}

func RunQueryVng(t *testing.T, file *MockFile, querySource string) []zed.Value {
func RunQueryVng(t *testing.T, buf *bytes.Buffer, querySource string) []zed.Value {
zctx := zed.NewContext()
reader, err := vngio.NewReader(zctx, bytes.NewReader(file.Bytes()))
if err != nil {
t.Fatalf("%v", err)
}
reader, err := vngio.NewReader(zctx, bytes.NewReader(buf.Bytes()))
require.NoError(t, err)
readers := []zio.Reader{reader}
defer zio.CloseReaders(readers)
return RunQuery(t, zctx, readers, querySource, func(demandIn demand.Demand) {
Expand Down Expand Up @@ -111,12 +96,10 @@ func RunQuery(t *testing.T, zctx *zed.Context, readers []zio.Reader, querySource
}

// Run query
valuesOut := make([]zed.Value, 0)
var valuesOut []zed.Value
for {
batch, err := query.Pull(false)
if err != nil {
t.Fatalf("%v", err)
}
require.NoError(t, err)
if batch == nil {
break
}
Expand Down Expand Up @@ -153,21 +136,14 @@ func CompareValues(t *testing.T, valuesExpected []zed.Value, valuesActual []zed.
}
}

type MockFile struct {
bytes.Buffer
}

func (f *MockFile) Close() error { return nil }

func GenValues(b *bytes.Reader, context *zed.Context, types []zed.Type) []zed.Value {
values := make([]zed.Value, 0)
var values []zed.Value
var builder zcode.Builder
for GenByte(b) != 0 {
typ := types[int(GenByte(b))%len(types)]
builder.Reset()
GenValue(b, context, typ, &builder)
it := builder.Bytes().Iter()
values = append(values, *zed.NewValue(typ, it.Next()).Copy())
values = append(values, *zed.NewValue(typ, builder.Bytes().Body()))
}
return values
}
Expand Down Expand Up @@ -199,7 +175,7 @@ func GenValue(b *bytes.Reader, context *zed.Context, typ zed.Type, builder *zcod
case zed.TypeTime:
builder.Append(zed.EncodeTime(nano.Ts(int64(binary.LittleEndian.Uint64(GenBytes(b, 8))))))
case zed.TypeFloat16:
panic("Unreachable")
builder.Append(zed.EncodeFloat16(float32(float16.Frombits(binary.LittleEndian.Uint16(GenBytes(b, 4))))))
case zed.TypeFloat32:
builder.Append(zed.EncodeFloat32(math.Float32frombits(binary.LittleEndian.Uint32(GenBytes(b, 4)))))
case zed.TypeFloat64:
Expand All @@ -211,9 +187,9 @@ func GenValue(b *bytes.Reader, context *zed.Context, typ zed.Type, builder *zcod
case zed.TypeString:
builder.Append(zed.EncodeString(string(GenBytes(b, int(GenByte(b))))))
case zed.TypeIP:
builder.Append(zed.EncodeIP(netip.AddrFrom16(*(*[16]byte)(GenBytes(b, 16)))))
builder.Append(zed.EncodeIP(netip.AddrFrom16([16]byte(GenBytes(b, 16)))))
case zed.TypeNet:
ip := netip.AddrFrom16(*(*[16]byte)(GenBytes(b, 16)))
ip := netip.AddrFrom16([16]byte(GenBytes(b, 16)))
numBits := int(GenByte(b)) % ip.BitLen()
net, err := ip.Prefix(numBits)
if err != nil {
Expand Down Expand Up @@ -268,7 +244,7 @@ func GenValue(b *bytes.Reader, context *zed.Context, typ zed.Type, builder *zcod
}

func GenTypes(b *bytes.Reader, context *zed.Context, depth int) []zed.Type {
types := make([]zed.Type, 0)
var types []zed.Type
for len(types) == 0 || GenByte(b) != 0 {
types = append(types, GenType(b, context, depth))
}
Expand Down Expand Up @@ -299,9 +275,7 @@ func GenType(b *bytes.Reader, context *zed.Context, depth int) zed.Type {
case 9:
return zed.TypeTime
case 10:
// TODO Find a way to convert u16 to float16.
//return zed.TypeFloat16
return zed.TypeNull
return zed.TypeFloat16
case 11:
return zed.TypeFloat32
case 12:
Expand All @@ -322,7 +296,7 @@ func GenType(b *bytes.Reader, context *zed.Context, depth int) zed.Type {
panic("Unreachable")
}
} else {
depth := depth - 1
depth--
switch GenByte(b) % 5 {
case 0:
fieldTypes := GenTypes(b, context, depth)
Expand Down Expand Up @@ -353,7 +327,7 @@ func GenType(b *bytes.Reader, context *zed.Context, depth int) zed.Type {
// TODO There are some weird corners around unions that contain null or duplicate types eg
// vng_test.go:107: comparing: in[0]=null((null,null)) vs out[0]=null((null,null))
// vng_test.go:112: values have different zng bytes: [1 0] vs [2 2 0]
unionTypes := make([]zed.Type, 0)
var unionTypes []zed.Type
for _, typ := range types {
skip := false
if typ == zed.TypeNull {
Expand All @@ -380,7 +354,10 @@ func GenType(b *bytes.Reader, context *zed.Context, depth int) zed.Type {

func GenByte(b *bytes.Reader) byte {
// If we're out of bytes, return 0.
byte, _ := b.ReadByte()
byte, err := b.ReadByte()
if err != nil && !errors.Is(err, io.EOF) {
panic(err)
}
return byte
}

Expand All @@ -393,7 +370,7 @@ func GenBytes(b *bytes.Reader, n int) []byte {
}

func GenAscii(b *bytes.Reader) string {
bytes := make([]byte, 0)
var bytes []byte
for {
byte := GenByte(b)
if byte == 0 {
Expand Down
4 changes: 2 additions & 2 deletions vector/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func read(context *zed.Context, reader vngvector.Reader, demandOut demand.Demand
return readPrimitive(context, reader.Typ, func() ([]byte, error) { return reader.ReadBytes() })

case *vngvector.RecordReader:
fields := make([]vector, 0)
var fields []vector
for i, fieldReader := range reader.Values {
demandValueOut := demand.GetKey(demandOut, reader.Names[i])
if !demand.IsNone(demandValueOut) {
Expand Down Expand Up @@ -559,7 +559,7 @@ func typeAfterDemand(context *zed.Context, reader vngvector.Reader, demandOut de

case *vngvector.RecordReader:
typ := typ.(*zed.TypeRecord)
fields := make([]zed.Field, 0)
var fields []zed.Field
for i, fieldReader := range reader.Values {
demandValueOut := demand.GetKey(demandOut, reader.Names[i])
if !demand.IsNone(demandValueOut) {
Expand Down
16 changes: 10 additions & 6 deletions vector/vector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/brimdata/zed"
"github.com/brimdata/zed/fuzz"
"github.com/brimdata/zed/zio/vngio"
)

func FuzzQuery(f *testing.F) {
Expand All @@ -26,13 +27,16 @@ func FuzzQuery(f *testing.F) {
// t.Logf("value: in[%v]=%v", i, zson.String(&values[i]))
//}

var zngFile fuzz.MockFile
fuzz.WriteZng(t, values, &zngFile)
resultZng := fuzz.RunQueryZng(t, &zngFile, querySource)
var zngBuf bytes.Buffer
fuzz.WriteZng(t, values, &zngBuf)
resultZng := fuzz.RunQueryZng(t, &zngBuf, querySource)

var vngFile fuzz.MockFile
fuzz.WriteVng(t, values, &vngFile)
resultVng := fuzz.RunQueryVng(t, &vngFile, querySource)
var vngBuf bytes.Buffer
fuzz.WriteVng(t, values, &vngBuf, vngio.WriterOpts{
SkewThresh: vngio.DefaultSkewThresh,
ColumnThresh: vngio.DefaultColumnThresh,
})
resultVng := fuzz.RunQueryVng(t, &vngBuf, querySource)

fuzz.CompareValues(t, resultZng, resultVng)
})
Expand Down