Skip to content

Commit

Permalink
added observation points to track performance/resource usage of filter
Browse files Browse the repository at this point in the history
- ability to tell when collisions happen
- get number of items in the filter
- get memory usage of filter
  • Loading branch information
pkaeding committed Nov 21, 2017
1 parent 31792eb commit bbc6efc
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 35 deletions.
43 changes: 39 additions & 4 deletions go/oppobloom/oppobloom.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ import (
)

type Filter struct {
array []*[]byte
sizeMask uint32
array []*[]byte
sizeMask uint32
numEntries uint32
bytesUsed uint64
}

var ErrSizeTooLarge = errors.New("oppobloom: size given too large to round to a power of 2")
Expand All @@ -36,16 +38,49 @@ func NewFilter(size int) (*Filter, error) {
size = int(math.Pow(2, math.Ceil(math.Log2(float64(size)))))
slice := make([]*[]byte, size)
sizeMask := uint32(size - 1)
return &Filter{slice, sizeMask}, nil
return &Filter{array: slice, sizeMask: sizeMask}, nil
}

// Adds the given bytes to the set, and indicates if they were already present in the set.
// A true value here is definitive; a false value may be a false negative.
func (f *Filter) Contains(id []byte) bool {
ret, _ := f.ContainsCollision(id)
return ret
}

// Like Contains, but also indicates if there was a collision on the key. If both are false,
// then you can be sure that it is not a false negative. It may also be interested to track
// how often collisons happen-- that tracking is left to external concerns.
func (f *Filter) ContainsCollision(id []byte) (contains bool, collision bool) {
h := md5UintHash{md5.New()}
h.Write(id)
uindex := h.Sum32() & f.sizeMask
index := int32(uindex)
oldId := getAndSet(f.array, index, id)
return bytes.Equal(oldId, id)
contains = bytes.Equal(oldId, id)
collision = len(oldId) != 0 && !contains
if !contains && !collision {
atomic.AddUint32(&f.numEntries, 1)
}
var bytesUsedDelta int64 = int64(len(id)) - int64(len(oldId))
if bytesUsedDelta < 0 {
atomic.AddUint64(&f.bytesUsed, ^uint64((-1*bytesUsedDelta)-1))
} else {
atomic.AddUint64(&f.bytesUsed, uint64(bytesUsedDelta))
}

return contains, collision
}

// Indicates how many entries have been added to the set. This will increment when new entries
// are added, and they do not collide with existing entries.
func (f *Filter) NumEntries() uint32 {
return atomic.LoadUint32(&f.numEntries)
}

// Returns the total size of the data held by the Filter.
func (f *Filter) BytesUsed() uint64 {
return atomic.LoadUint64(&f.bytesUsed)
}

func (f *Filter) Size() int {
Expand Down
104 changes: 73 additions & 31 deletions go/oppobloom/oppobloom_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,57 +9,73 @@ import (

func TestTheBasics(t *testing.T) {
f, _ := NewFilter(2)
twentyNineId := []byte{27, 28, 29}
thirtyId := []byte{27, 28, 30}
thirtyThreeId := []byte{27, 28, 33}
shouldNotContain(t, "nothing should be contained at all", f, twentyNineId)
shouldContain(t, "now it should", f, twentyNineId)
shouldNotContain(t, "false unless the hash collides", f, thirtyId)
shouldContain(t, "original should still return true", f, twentyNineId)
shouldContain(t, "new array should still return true", f, thirtyId)
twentyNineId := []byte{27, 28, 29}
thirtyId := []byte{27, 28, 30}
thirtyThreeId := []byte{27, 28, 33}
shorterId := []byte{27, 28}
shouldNotContainShouldNotCollide(t, "nothing should be contained at all", f, twentyNineId)
numEntriesShouldBe(t, f, 1)
bytesUsedShouldBe(t, f, 3)
shouldContain(t, "now it should", f, twentyNineId)
numEntriesShouldBe(t, f, 1)
bytesUsedShouldBe(t, f, 3)
shouldNotContainShouldNotCollide(t, "false unless the hash collides", f, thirtyId)
numEntriesShouldBe(t, f, 2)
bytesUsedShouldBe(t, f, 6)
shouldContain(t, "original should still return true", f, twentyNineId)
shouldContain(t, "new array should still return true", f, thirtyId)
numEntriesShouldBe(t, f, 2)

// Handling collisions. {27, 28, 33} and {27, 28, 30} hash to the same
// index using the current hash function inside Filter.
shouldNotContain(t, "colliding array returns false", f, thirtyThreeId)
shouldContain(t,
"colliding array returns true in second call", f, thirtyThreeId)
shouldNotContain(t, "original colliding array returns false", f, thirtyId)
shouldContain(t, "original colliding array returns true", f, thirtyId)
shouldNotContain(t, "colliding array returns false", f, thirtyThreeId)
// Handling collisions. {27, 28, 33} and {27, 28, 30} hash to the same
// index using the current hash function inside Filter.
shouldNotContainShouldCollide(t, "colliding array returns false", f, thirtyThreeId)
numEntriesShouldBe(t, f, 2)
bytesUsedShouldBe(t, f, 6)
shouldContain(t,
"colliding array returns true in second call", f, thirtyThreeId)
shouldNotContainShouldCollide(t, "original colliding array returns false", f, thirtyId)
bytesUsedShouldBe(t, f, 6)
shouldContain(t, "original colliding array returns true", f, thirtyId)
shouldNotContainShouldCollide(t, "colliding array returns false", f, thirtyThreeId)
numEntriesShouldBe(t, f, 2)
bytesUsedShouldBe(t, f, 6)
shouldNotContainShouldCollide(t, "colliding shorter array returns false", f, shorterId)
numEntriesShouldBe(t, f, 2)
bytesUsedShouldBe(t, f, 5)
}

func TestSizeRounding(t *testing.T) {
f, _ := NewFilter(3);
if f.Size() != 4 {
t.Errorf("3 should round to 4, rounded to: ", f.Size())
f, _ := NewFilter(3)
if f.Size() != 4 {
t.Errorf("3 should round to 4, rounded to: %d", f.Size())
}
f, _ = NewFilter(4);
f, _ = NewFilter(4)
if f.Size() != 4 {
t.Errorf("4 should round to 4", f.Size())
t.Errorf("4 should round to 4, was: %d", f.Size())
}
f, _ = NewFilter(129)
if f.Size() != 256 {
t.Errorf("129 should round to 256", f.Size())
t.Errorf("129 should round to 256, was: %d", f.Size())
}
}

func TestTooLargeSize(t *testing.T) {
size := (1<<30) + 1;
f, err := NewFilter(size)
if (err != ErrSizeTooLarge) {
size := (1 << 30) + 1
f, err := NewFilter(size)
if err != ErrSizeTooLarge {
t.Errorf("did not error out on a too-large filter size")
}
if (f != nil) {
if f != nil {
t.Errorf("did not return nil on a too-large filter size")
}
}

func TestTooSmallSize(t *testing.T) {
f, err := NewFilter(0)
if (err != ErrSizeTooSmall) {
f, err := NewFilter(0)
if err != ErrSizeTooSmall {
t.Errorf("did not error out on a too small filter size")
}
if (f != nil) {
if f != nil {
t.Errorf("did not return nil on a too small filter size")
}
}
Expand All @@ -70,8 +86,34 @@ func shouldContain(t *testing.T, msg string, f *Filter, id []byte) {
}
}

func shouldNotContain(t *testing.T, msg string, f *Filter, id []byte) {
if f.Contains(id) {
func shouldNotContainShouldCollide(t *testing.T, msg string, f *Filter, id []byte) {
contains, collision := f.ContainsCollision(id)
if contains {
t.Errorf("should not contain, %s: %v", msg, id)
} else if !collision {
t.Errorf("should collide, %s: %v", msg, id)
}
}

func shouldNotContainShouldNotCollide(t *testing.T, msg string, f *Filter, id []byte) {
contains, collision := f.ContainsCollision(id)
if contains {
t.Errorf("should not contain, %s: %v", msg, id)
} else if collision {
t.Errorf("should not collide, %s: %v", msg, id)
}
}

func numEntriesShouldBe(t *testing.T, f *Filter, expected uint32) {
actual := f.NumEntries()
if actual != expected {
t.Errorf("expected NumEntries to be: %d, but was: %d", expected, actual)
}
}

func bytesUsedShouldBe(t *testing.T, f *Filter, expected uint64) {
actual := f.BytesUsed()
if actual != expected {
t.Errorf("expected BytesUsed to be: %d, but was: %d", expected, actual)
}
}

0 comments on commit bbc6efc

Please sign in to comment.