Skip to content

Commit

Permalink
Merge branch 'main' into btree-insert-find
Browse files Browse the repository at this point in the history
  • Loading branch information
friendlymatthew authored Jun 28, 2024
2 parents 1f1d18c + 64f1b73 commit 0063103
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 35 deletions.
1 change: 0 additions & 1 deletion pkg/bptree/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ func (n *BPTreeNode) NumPointers() int {
}

func (n *BPTreeNode) Size() int64 {

size := 4 // number of keys
for _, k := range n.Keys {
o := encoding.SizeVarint(k.DataPointer.Offset)
Expand Down
4 changes: 3 additions & 1 deletion pkg/btree/btree.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ type BTree struct {
MetaPage metapage.MetaPage
PageFile pagefile.ReadWriteSeekPager

VectorDim uint64

Width uint16
}

Expand All @@ -36,7 +38,7 @@ func (t *BTree) readNode(offset uint64) (*BTreeNode, error) {
return nil, err
}

node := &BTreeNode{Width: t.Width}
node := &BTreeNode{Width: t.Width, VectorDim: t.VectorDim}
buf := make([]byte, t.PageFile.PageSize())

if _, err := t.PageFile.Read(buf); err != nil {
Expand Down
98 changes: 70 additions & 28 deletions pkg/btree/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,51 @@ package btree

import (
"encoding/binary"
"fmt"
"github.com/kevmo314/appendable/pkg/encoding"
"github.com/kevmo314/appendable/pkg/hnsw"
"github.com/kevmo314/appendable/pkg/pointer"
"io"
"math"
)

type BTreeNode struct {
Keys []pointer.ReferencedId
Ids []pointer.ReferencedId
Vectors []hnsw.Point

Offsets []uint64

// Width should be 0 for varint
Width uint16
Offsets []uint64
Width uint16
VectorDim uint64
}

func (n *BTreeNode) Size() int64 {
size := 4

for _, k := range n.Keys {
for _, k := range n.Ids {
size += encoding.SizeVarint(k.DataPointer.Offset)
size += encoding.SizeVarint(uint64(k.Id))
size += encoding.SizeVarint(uint64(k.DataPointer.Length))
size += encoding.SizeVarint(uint64(k.Value))
}

for _, n := range n.Offsets {
size += encoding.SizeVarint(n)
}

size += encoding.SizeVarint(n.VectorDim)
size += len(n.Vectors) * (4 * int(n.VectorDim))

return int64(size)
}

func (n *BTreeNode) Leaf() bool {
return n.Offsets == nil || len(n.Offsets) == 0
}

func (n *BTreeNode) MarshalBinary() ([]byte, error) {
size := int32(len(n.Keys))
size := int32(len(n.Ids))

if size == 0 {
panic("writing empty node")
panic("writing empty node, no ids found!")
}

buf := make([]byte, n.Size())
Expand All @@ -49,20 +58,30 @@ func (n *BTreeNode) MarshalBinary() ([]byte, error) {
}

ct := 4
for _, k := range n.Keys {
for _, k := range n.Ids {
on := binary.PutUvarint(buf[ct:], k.DataPointer.Offset)
vn := binary.PutUvarint(buf[ct+on:], uint64(k.Id))
ct += on + vn

ln := binary.PutUvarint(buf[ct+on:], uint64(k.DataPointer.Length))
vn := binary.PutUvarint(buf[ct+on+ln:], uint64(k.Value))
ct += on + ln + vn
}

for _, o := range n.Offsets {
on := binary.PutUvarint(buf[ct:], o)
for _, n := range n.Offsets {
on := binary.PutUvarint(buf[ct:], n)
ct += on
}

vdn := binary.PutUvarint(buf[ct:], n.VectorDim)
ct += vdn

for _, v := range n.Vectors {
for _, elem := range v {
binary.LittleEndian.PutUint32(buf[ct:], math.Float32bits(elem))
ct += 4
}
}

if ct != int(n.Size()) {
panic("size mismatch")
panic(fmt.Sprintf("size mismatch. ct: %v, size: %v", ct, n.Size()))
}

return buf, nil
Expand All @@ -73,33 +92,56 @@ func (n *BTreeNode) UnmarshalBinary(buf []byte) error {
leaf := size < 0

if leaf {
n.Offsets = make([]uint64, (-size)+1)
n.Keys = make([]pointer.ReferencedId, -size)
n.Ids = make([]pointer.ReferencedId, -size)
n.Vectors = make([]hnsw.Point, -size)
n.Offsets = make([]uint64, 0)
} else {
n.Keys = make([]pointer.ReferencedId, size)
n.Ids = make([]pointer.ReferencedId, size)
n.Vectors = make([]hnsw.Point, size)
n.Offsets = make([]uint64, size+1)
}

if size == 0 {
panic("empty node")
}

m := 4
for i := range n.Keys {
for i := range n.Ids {
o, on := binary.Uvarint(buf[m:])
v, vn := binary.Uvarint(buf[m+on:])
l, ln := binary.Uvarint(buf[m+on:])

n.Ids[i].DataPointer.Offset = o
n.Ids[i].DataPointer.Length = uint32(l)

m += on + ln

n.Keys[i].Id = hnsw.Id(v)
n.Keys[i].DataPointer.Offset = o
v, vn := binary.Uvarint(buf[m:])
n.Ids[i].Value = hnsw.Id(v)

m += on + vn
m += vn
}

for i := range n.Offsets {
o, on := binary.Uvarint(buf[m:])
n.Offsets[i] = o
m += on
if !leaf {
for i := range n.Offsets {
o, on := binary.Uvarint(buf[m:])
n.Offsets[i] = o
m += on
}
}

vecdim, vdn := binary.Uvarint(buf[m:])
n.VectorDim = vecdim
m += vdn

for i := range n.Vectors {
vector := make(hnsw.Point, vecdim)

for vi := range vector {
vector[vi] = float32(binary.LittleEndian.Uint32(buf[m:]))
m += 4
}

n.Vectors[i] = vector
}

return nil
Expand Down
88 changes: 88 additions & 0 deletions pkg/btree/node_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package btree

import (
"bytes"
"github.com/kevmo314/appendable/pkg/hnsw"
"github.com/kevmo314/appendable/pkg/pointer"
"reflect"
"testing"
)

func TestBTreeNode_Size(t *testing.T) {
t.Run("node size", func(t *testing.T) {
n := &BTreeNode{ // 4
Ids: []pointer.ReferencedId{{Value: 1}, {Value: 2}, {Value: 3}}, // 3 * (3)
Vectors: []hnsw.Point{{1, 1}, {2, 2}, {3, 3}}, // 6 * 4 == 3 * 2 * 4 // 24
Offsets: make([]uint64, 0),
VectorDim: 2, // 1
}

if n.Size() != 38 {
t.Fatalf("wrong size: %d", n.Size())
}
})
}

func TestBTreeNode_MarshalBinary(t *testing.T) {
t.Run("leaf node", func(t *testing.T) {
n := &BTreeNode{
Ids: []pointer.ReferencedId{
{Value: 1},
{Value: 2},
{Value: 3},
},
Vectors: []hnsw.Point{{0, 0}, {0, 0}, {0, 0}},
Offsets: make([]uint64, 0),
VectorDim: 2,
}

buf := &bytes.Buffer{}
if _, err := n.WriteTo(buf); err != nil {
t.Fatal(err)
}

m := &BTreeNode{}
if err := m.UnmarshalBinary(buf.Bytes()); err != nil {
t.Fatal(err)
}

if !m.Leaf() {
t.Fatalf("expected leaf node, but got %v offsets", len(m.Offsets))
}

if !reflect.DeepEqual(n, m) {
t.Fatalf("encoded\n%#v\ndecoded\n%#v", n, m)
}
})

t.Run("intermediate node", func(t *testing.T) {
n := &BTreeNode{
Ids: []pointer.ReferencedId{
{Value: 1},
{Value: 2},
{Value: 3},
},
Vectors: []hnsw.Point{{0, 0}, {0, 0}, {0, 0}},
Offsets: []uint64{0, 4096, 8192, 6969},
VectorDim: 2,
}

buf := &bytes.Buffer{}
if _, err := n.WriteTo(buf); err != nil {
t.Fatal(err)
}

m := &BTreeNode{}
if err := m.UnmarshalBinary(buf.Bytes()); err != nil {
t.Fatal(err)
}

if m.Leaf() {
t.Fatal("expected intermediate node")
}

if !reflect.DeepEqual(n, m) {
t.Fatalf("encoded\n%#v\ndecoded\n%#v", n, m)
}
})
}
10 changes: 5 additions & 5 deletions pkg/pointer/referenced_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ type ReferencedValue struct {

type ReferencedId struct {
DataPointer MemoryPointer
Id hnsw.Id
Value hnsw.Id
}

func (rv ReferencedValue) String() string {
return fmt.Sprintf("ReferencedValue@%s{%s}", rv.DataPointer, rv.Value)
}

func (ri ReferencedId) String() string {
return fmt.Sprintf("ReferencedValue@%d{%d}", ri.Id, ri.Id)
func (rv ReferencedId) String() string {
return fmt.Sprintf("ReferencedId@%s{%d}", rv.DataPointer, rv.Value)
}

func CompareReferencedValues(a, b ReferencedValue) int {
Expand All @@ -48,9 +48,9 @@ func CompareReferencedValues(a, b ReferencedValue) int {
}

func CompareReferencedIds(a, b ReferencedId) int {
if a.Id > b.Id {
if a.Value > b.Value {
return 1
} else if a.Id < b.Id {
} else if a.Value < b.Value {
return -1
}

Expand Down

0 comments on commit 0063103

Please sign in to comment.