Skip to content

Commit

Permalink
Low-hanging fruit in vector.Read (#4881)
Browse files Browse the repository at this point in the history
* Read directly from segmap rather than using `ReadBytes`. 
* Include a count in each segment (this is a breaking change to vng). Use this to pre-size slices.
* Reuse a single buffer across all internal read calls.
* Avoid unnecessary copies for string/bytes vectors.
* Correctly handle dict vectors, now that they're enabled.
* Add a benchmark tracking vng read performance and comparing it to zng and raw varints.

On wrccdc_mono_10m.vng this cuts vector.Read time from 270ms -> 140ms for id.resp_p and 470ms -> 180ms for uid.
  • Loading branch information
jamii authored Nov 15, 2023
1 parent c17a623 commit 48342ce
Show file tree
Hide file tree
Showing 13 changed files with 499 additions and 404 deletions.
6 changes: 1 addition & 5 deletions runtime/op/meta/vec_sequence.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,7 @@ func newVecSequenceScanner(ctx context.Context, zctx *zed.Context, pool *lake.Po
if err != nil {
return nil, ksuid.KSUID{}, err
}
vngReader, err := vng.NewReader(object)
if err != nil {
return nil, ksuid.KSUID{}, err
}
vectorReader := vector.NewReader(vngReader, demandOut)
vectorReader := vector.NewReader(object, demandOut)
scanner, err := zbuf.NewScanner(ctx, vectorReader, nil)
if err != nil {
return nil, ksuid.KSUID{}, err
Expand Down
23 changes: 23 additions & 0 deletions vector/materializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,15 @@ func (v *constants) newMaterializer() materializer {
}
}

func (v *dict) newMaterializer() materializer {
var index int
return func(builder *zcode.Builder) {
tag := v.tags[index]
builder.Append(v.dict[tag].Value.Bytes())
index++
}
}

func (v *maps) newMaterializer() materializer {
var index int
keyMaterializer := v.keys.newMaterializer()
Expand Down Expand Up @@ -268,6 +277,20 @@ func (v *records) newMaterializer() materializer {
}
}

func (v *sets) newMaterializer() materializer {
var index int
elemMaterializer := v.elems.newMaterializer()
return func(builder *zcode.Builder) {
length := int(v.lengths[index])
builder.BeginContainer()
for i := 0; i < length; i++ {
elemMaterializer(builder)
}
builder.EndContainer()
index++
}
}

func (v *unions) newMaterializer() materializer {
var index int
payloadMaterializers := make([]materializer, len(v.payloads))
Expand Down
Loading

0 comments on commit 48342ce

Please sign in to comment.