-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pcap index: Compress offsets that exceed threshold
Introduce ranger.Envelope.Merge that merges two Envelopes into a single Envelope. This fixes bug where indexing a large pcap causes the system to oom panic. When constructing the time index for a pcap, compress the array of offset points to an Envelope when the size of the array reaches a certain threshold. Subsequent compressions will be merged into the section's Envelope keeping the memory footprint low. The downside to this approach is for the indexes of large pcap files the difference between adjacent X values starts out very wide then narrows as one iterate through the Bins. This will result in larger pcap scans (i.e. slow searches) for hits at the beginning of the file and smaller scans (i.e. faster searches) towards the end. Consensus was that the difference in search times probably won't be noticeable enough to warrant introducing a fancier algorithm. Filed #1095 to revisit. Closes #1039
- Loading branch information
Showing
3 changed files
with
167 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,81 @@ | ||
package ranger_test | ||
package ranger | ||
|
||
import ( | ||
"math" | ||
"testing" | ||
|
||
"github.com/brimsec/zq/pkg/ranger" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func find(pts []ranger.Point, nbin int, r ranger.Range) ranger.Domain { | ||
e := ranger.NewEnvelope(pts, nbin) | ||
func find(pts []Point, nbin int, r Range) Domain { | ||
e := NewEnvelope(pts, nbin) | ||
return e.FindSmallestDomain(r) | ||
} | ||
|
||
// func TestEnvelope(t *testing.T) { | ||
// t.Parallel() | ||
// pts := []Point{ | ||
// {1, 0x100}, | ||
// {2, 0x120}, | ||
// {3, 0x110}, | ||
// {4, 0x130}, | ||
// {5, 0x150}, | ||
// {6, 0x150}, | ||
// } | ||
// d := find(pts, 2, Range{0x100, 0x120}) | ||
// assert.Exactly(t, Domain{1, 5}, d) | ||
// } | ||
|
||
func TestEnvelope(t *testing.T) { | ||
t.Parallel() | ||
pts := []ranger.Point{ | ||
pts := []Point{ | ||
{1, 0x100}, | ||
{2, 0x120}, | ||
{3, 0x110}, | ||
{4, 0x130}, | ||
{5, 0x150}, | ||
{6, 0x150}, | ||
} | ||
d := find(pts, 0, ranger.Range{0x151, 0x151}) | ||
assert.Exactly(t, ranger.Domain{}, d) | ||
d = find(pts, 0, ranger.Range{0, 0x90}) | ||
assert.Exactly(t, ranger.Domain{}, d) | ||
d = find(pts, 0, ranger.Range{0x90, 0x111}) | ||
assert.Exactly(t, ranger.Domain{1, 4}, d) | ||
d = find(pts, 0, ranger.Range{0x115, 0x135}) | ||
assert.Exactly(t, ranger.Domain{2, 5}, d) | ||
d = find(pts, 0, ranger.Range{0x150, 0x150}) | ||
assert.Exactly(t, ranger.Domain{5, math.MaxUint64}, d) | ||
d = find(pts, 0, ranger.Range{0x151, 0x151}) | ||
assert.Exactly(t, ranger.Domain{}, d) | ||
d = find(pts, 3, ranger.Range{0x100, 0x109}) | ||
assert.Exactly(t, ranger.Domain{1, 3}, d) | ||
d = find(pts, 3, ranger.Range{0x100, 0x120}) | ||
assert.Exactly(t, ranger.Domain{1, 5}, d) | ||
d = find(pts, 3, ranger.Range{0x100, 0x130}) | ||
assert.Exactly(t, ranger.Domain{1, 5}, d) | ||
d := find(pts, 0, Range{0x151, 0x151}) | ||
assert.Exactly(t, Domain{}, d) | ||
d = find(pts, 0, Range{0, 0x90}) | ||
assert.Exactly(t, Domain{}, d) | ||
d = find(pts, 0, Range{0x90, 0x111}) | ||
assert.Exactly(t, Domain{1, 4}, d) | ||
d = find(pts, 0, Range{0x115, 0x135}) | ||
assert.Exactly(t, Domain{2, 5}, d) | ||
d = find(pts, 0, Range{0x150, 0x150}) | ||
assert.Exactly(t, Domain{5, math.MaxUint64}, d) | ||
d = find(pts, 0, Range{0x151, 0x151}) | ||
assert.Exactly(t, Domain{}, d) | ||
d = find(pts, 3, Range{0x100, 0x109}) | ||
assert.Exactly(t, Domain{1, 3}, d) | ||
d = find(pts, 3, Range{0x100, 0x120}) | ||
assert.Exactly(t, Domain{1, 5}, d) | ||
d = find(pts, 3, Range{0x100, 0x130}) | ||
assert.Exactly(t, Domain{1, 5}, d) | ||
pts[5].Y = 0x149 | ||
d = find(pts, 3, ranger.Range{0x100, 0x149}) | ||
assert.Exactly(t, ranger.Domain{1, math.MaxUint64}, d) | ||
d = find(pts, 3, Range{0x100, 0x149}) | ||
assert.Exactly(t, Domain{1, math.MaxUint64}, d) | ||
} | ||
|
||
func TestUnion(t *testing.T) { | ||
env1 := Envelope{ | ||
{1, Range{0x100, 0x120}}, | ||
{3, Range{0x110, 0x130}}, | ||
{5, Range{0x150, 0x150}}, | ||
} | ||
env2 := Envelope{ | ||
{7, Range{0x110, 0x160}}, | ||
{9, Range{0x170, 0x180}}, | ||
{11, Range{0x190, 0x190}}, | ||
{13, Range{0x180, 0x230}}, | ||
} | ||
assert.Exactly(t, env1.Merge(env2), Envelope{ | ||
{1, Range{0x100, 0x130}}, | ||
{5, Range{0x110, 0x160}}, | ||
{9, Range{0x170, 0x190}}, | ||
{13, Range{0x180, 0x230}}, | ||
}) | ||
assert.Exactly(t, Envelope{}.Merge(env1), env1) | ||
} |