From 545bb1f899482c4781207a97e6da5d04467270a2 Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:17:03 -0500 Subject: [PATCH] Bug: iterator not iterating sequentially (#126) * bug * removing index file * step 1 -- convert all reference values to bigendian * step 2 -- convert all others to little endian * remove mock.index from tracing --- pkg/appendable/appendable.go | 12 ++-- pkg/btree/bptree.go | 48 ++++++++++++++++ pkg/btree/bptree_test.go | 102 +++++++++++++++++++++++++++++++++- pkg/btree/multi.go | 14 ++--- pkg/btree/node.go | 32 +++++------ pkg/btree/pagefile.go | 4 +- pkg/handlers/csv.go | 2 +- pkg/handlers/csv_test.go | 4 +- pkg/handlers/equality_test.go | 2 +- pkg/handlers/jsonl.go | 8 +-- pkg/handlers/jsonl_test.go | 4 +- pkg/mocks/main.go | 2 +- 12 files changed, 189 insertions(+), 45 deletions(-) diff --git a/pkg/appendable/appendable.go b/pkg/appendable/appendable.go index a1d2fe1b..9c1ab4ac 100644 --- a/pkg/appendable/appendable.go +++ b/pkg/appendable/appendable.go @@ -96,7 +96,7 @@ func (m *FileMeta) MarshalBinary() ([]byte, error) { buf := make([]byte, 10) buf[0] = byte(m.Version) buf[1] = byte(m.Format) - binary.BigEndian.PutUint64(buf[2:], m.ReadOffset) + binary.LittleEndian.PutUint64(buf[2:], m.ReadOffset) return buf, nil } @@ -117,7 +117,7 @@ func (m *FileMeta) UnmarshalBinary(buf []byte) error { return fmt.Errorf("unrecognized file format: %v", buf[1]) } - m.ReadOffset = binary.BigEndian.Uint64(buf[2:]) + m.ReadOffset = binary.LittleEndian.Uint64(buf[2:]) return nil } @@ -128,8 +128,8 @@ type IndexMeta struct { func (m *IndexMeta) MarshalBinary() ([]byte, error) { buf := make([]byte, 2+len(m.FieldName)+2) - binary.BigEndian.PutUint16(buf[0:], uint16(m.FieldType)) - binary.BigEndian.PutUint16(buf[2:], uint16(len(m.FieldName))) + binary.LittleEndian.PutUint16(buf[0:], uint16(m.FieldType)) + binary.LittleEndian.PutUint16(buf[2:], uint16(len(m.FieldName))) copy(buf[4:], m.FieldName) return buf, nil } @@ -138,8 +138,8 @@ func (m *IndexMeta) UnmarshalBinary(buf []byte) error { if len(buf) < 4 { return fmt.Errorf("invalid metadata size: %d", len(buf)) } - m.FieldType = FieldType(binary.BigEndian.Uint16(buf[0:])) - nameLength := binary.BigEndian.Uint16(buf[2:]) + m.FieldType = FieldType(binary.LittleEndian.Uint16(buf[0:])) + nameLength := binary.LittleEndian.Uint16(buf[2:]) if len(buf) < 4+int(nameLength) { return fmt.Errorf("invalid metadata size: %d", len(buf)) } diff --git a/pkg/btree/bptree.go b/pkg/btree/bptree.go index c2f80085..4bb99687 100644 --- a/pkg/btree/bptree.go +++ b/pkg/btree/bptree.go @@ -162,6 +162,54 @@ func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) { return node, nil } +func (t *BPTree) first() (ReferencedValue, error) { + rootNode, _, err := t.root() + + if err != nil { + return ReferencedValue{}, err + } + + currNode, err := t.readNode(rootNode.Pointer(0)) + if err != nil { + return ReferencedValue{}, err + } + + for !currNode.leaf() { + childPointer := currNode.Pointer(0) + currNode, err = t.readNode(childPointer) + + if err != nil { + return ReferencedValue{}, err + } + } + + return currNode.Keys[0], nil +} + +func (t *BPTree) last() (ReferencedValue, error) { + rootNode, _, err := t.root() + + if err != nil { + return ReferencedValue{}, err + } + + currNode, err := t.readNode(rootNode.Pointer(rootNode.NumPointers() - 1)) + if err != nil { + return ReferencedValue{}, err + } + + for !currNode.leaf() { + childPointer := currNode.Pointer(currNode.NumPointers() - 1) + currNode, err = t.readNode(childPointer) + + if err != nil { + return ReferencedValue{}, err + } + } + + return currNode.Keys[0], nil +} + // traverse returns the path from root to leaf in reverse order (leaf first) // the last element is always the node passed in func (t *BPTree) traverse(key ReferencedValue, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) { diff --git a/pkg/btree/bptree_test.go b/pkg/btree/bptree_test.go index f9cdedfe..cc478462 100644 --- a/pkg/btree/bptree_test.go +++ b/pkg/btree/bptree_test.go @@ -3,6 +3,7 @@ package btree import ( "bytes" "encoding/binary" + "fmt" "io" "math" "math/rand" @@ -27,7 +28,7 @@ func (m *testMetaPage) Root() (MemoryPointer, error) { func (m *testMetaPage) write() error { buf := make([]byte, 8) - binary.BigEndian.PutUint64(buf, m.root.Offset) + binary.LittleEndian.PutUint64(buf, m.root.Offset) if _, err := m.pf.Seek(4096, io.SeekStart); err != nil { return err } @@ -222,7 +223,7 @@ func TestBPTree_SequentialInsertionTest(t *testing.T) { tree := NewBPTree(p, newTestMetaPage(t, p)) for i := 0; i < 256; i++ { buf := make([]byte, 8) - binary.LittleEndian.PutUint64(buf, uint64(i)) + binary.BigEndian.PutUint64(buf, uint64(i)) if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i), Length: uint32(len(buf))}); err != nil { t.Fatal(err) } @@ -230,7 +231,7 @@ func TestBPTree_SequentialInsertionTest(t *testing.T) { for i := 0; i < 256; i++ { buf := make([]byte, 8) - binary.LittleEndian.PutUint64(buf, uint64(i)) + binary.BigEndian.PutUint64(buf, uint64(i)) k, v, err := tree.Find(ReferencedValue{Value: buf}) if err != nil { t.Fatal(err) @@ -470,3 +471,98 @@ func TestBPTree_Iteration_SinglePage(t *testing.T) { } }) } + +func TestBPTree_Iteration_FirstLast(t *testing.T) { + b := buftest.NewSeekableBuffer() + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree := NewBPTree(p, newTestMetaPage(t, p)) + start := 10.0 + increments := []float64{0.01, 0.05, 0.3} + currentIncrementIndex := 0 + + for i := start; i < 256; i += increments[currentIncrementIndex] { + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, math.Float64bits(i)) + + if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i * 100), Length: uint32(len(buf))}); err != nil { + t.Fatal(err) + } + + if int(i*100)%10 == 0 && currentIncrementIndex < len(increments)-1 { + currentIncrementIndex++ + } + } + + t.Run("find first and iter", func(t *testing.T) { + first, err := tree.first() + if err != nil { + t.Fatal(err) + } + firstBuf := make([]byte, 8) + binary.BigEndian.PutUint64(firstBuf, math.Float64bits(10)) + + if !bytes.Equal(first.Value, firstBuf) { + t.Fatal("expected 10 as first reference value") + } + + iter, err := tree.Iter(first) + if err != nil { + t.Fatal(err) + } + + i := 0 + var p *float64 = nil + for ; iter.Next(); i++ { + k := iter.Key() + var c float64 + reader := bytes.NewReader(k.Value) + err := binary.Read(reader, binary.BigEndian, &c) + if err != nil { + fmt.Println("binary.Read failed:", err) + return + } + + if p != nil && *p > c { + t.Errorf("expected a non-decreasing traversal but got prev: %v, curr: %v", *p, c) + return + } + p = &c + } + + }) + + t.Run("find last and iter", func(t *testing.T) { + last, err := tree.last() + if err != nil { + t.Fatal(err) + } + + iter, err := tree.Iter(last) + if err != nil { + t.Fatal(err) + } + + i := 0 + var r *float64 = nil + for ; iter.Prev(); i++ { + k := iter.Key() + var l float64 + reader := bytes.NewReader(k.Value) + err := binary.Read(reader, binary.BigEndian, &l) + if err != nil { + fmt.Println("binary.Read failed:", err) + return + } + + if r != nil && !(l <= *r) { + t.Errorf("expected a non-increasing traversal but got curr: %v, prev: %v", l, *r) + return + } + r = &l + } + + }) +} diff --git a/pkg/btree/multi.go b/pkg/btree/multi.go index 3cb1e286..62ce0b69 100644 --- a/pkg/btree/multi.go +++ b/pkg/btree/multi.go @@ -28,14 +28,14 @@ func (m *LinkedMetaPage) Root() (MemoryPointer, error) { return MemoryPointer{}, err } var mp MemoryPointer - return mp, binary.Read(m.rws, binary.BigEndian, &mp) + return mp, binary.Read(m.rws, binary.LittleEndian, &mp) } func (m *LinkedMetaPage) SetRoot(mp MemoryPointer) error { if _, err := m.rws.Seek(int64(m.offset), io.SeekStart); err != nil { return err } - return binary.Write(m.rws, binary.BigEndian, mp) + return binary.Write(m.rws, binary.LittleEndian, mp) } // BPTree returns a B+ tree that uses this meta page as the root @@ -62,7 +62,7 @@ func (m *LinkedMetaPage) Metadata() ([]byte, error) { return nil, err } // the first four bytes represents the length - length := binary.BigEndian.Uint32(buf[:4]) + length := binary.LittleEndian.Uint32(buf[:4]) return buf[4 : 4+length], nil } @@ -82,7 +82,7 @@ func (m *LinkedMetaPage) SetMetadata(data []byte) error { return err } buf := append(make([]byte, 4), data...) - binary.BigEndian.PutUint32(buf, uint32(len(data))) + binary.LittleEndian.PutUint32(buf, uint32(len(data))) if _, err := m.rws.Write(buf); err != nil { return err } @@ -102,7 +102,7 @@ func (m *LinkedMetaPage) Next() (*LinkedMetaPage, error) { return nil, err } var next MemoryPointer - if err := binary.Read(m.rws, binary.BigEndian, &next); err != nil { + if err := binary.Read(m.rws, binary.LittleEndian, &next); err != nil { return nil, err } return &LinkedMetaPage{rws: m.rws, offset: next.Offset}, nil @@ -128,7 +128,7 @@ func (m *LinkedMetaPage) AddNext() (*LinkedMetaPage, error) { if _, err := m.rws.Seek(int64(m.offset)+12, io.SeekStart); err != nil { return nil, err } - if err := binary.Write(m.rws, binary.BigEndian, next.offset); err != nil { + if err := binary.Write(m.rws, binary.LittleEndian, next.offset); err != nil { return nil, err } return next, nil @@ -158,7 +158,7 @@ func (m *LinkedMetaPage) Exists() (bool, error) { func (m *LinkedMetaPage) Reset() error { // write a full page of zeros emptyPage := make([]byte, m.rws.PageSize()) - binary.BigEndian.PutUint64(emptyPage[12:20], ^uint64(0)) + binary.LittleEndian.PutUint64(emptyPage[12:20], ^uint64(0)) if _, err := m.rws.Seek(int64(m.offset), io.SeekStart); err != nil { return err } diff --git a/pkg/btree/node.go b/pkg/btree/node.go index 25b827ef..d0fb3dac 100644 --- a/pkg/btree/node.go +++ b/pkg/btree/node.go @@ -101,9 +101,9 @@ func (n *BPTreeNode) MarshalBinary() ([]byte, error) { buf := make([]byte, n.Size()) // set the first bit to 1 if it's a leaf if n.leaf() { - binary.BigEndian.PutUint32(buf[:4], uint32(-size)) + binary.LittleEndian.PutUint32(buf[:4], uint32(-size)) } else { - binary.BigEndian.PutUint32(buf[:4], uint32(size)) + binary.LittleEndian.PutUint32(buf[:4], uint32(size)) } if size == 0 { panic("writing empty node") @@ -111,12 +111,12 @@ func (n *BPTreeNode) MarshalBinary() ([]byte, error) { ct := 4 for _, k := range n.Keys { if k.DataPointer.Length > 0 { - binary.BigEndian.PutUint32(buf[ct:ct+4], ^uint32(0)) - binary.BigEndian.PutUint64(buf[ct+4:ct+12], k.DataPointer.Offset) - binary.BigEndian.PutUint32(buf[ct+12:ct+16], k.DataPointer.Length) + binary.LittleEndian.PutUint32(buf[ct:ct+4], ^uint32(0)) + binary.LittleEndian.PutUint64(buf[ct+4:ct+12], k.DataPointer.Offset) + binary.LittleEndian.PutUint32(buf[ct+12:ct+16], k.DataPointer.Length) ct += 4 + 12 } else { - binary.BigEndian.PutUint32(buf[ct:ct+4], uint32(len(k.Value))) + binary.LittleEndian.PutUint32(buf[ct:ct+4], uint32(len(k.Value))) m := copy(buf[ct+4:ct+4+len(k.Value)], k.Value) if m != len(k.Value) { return nil, fmt.Errorf("failed to copy key: %w", io.ErrShortWrite) @@ -125,12 +125,12 @@ func (n *BPTreeNode) MarshalBinary() ([]byte, error) { } } for _, p := range n.leafPointers { - binary.BigEndian.PutUint64(buf[ct:ct+8], p.Offset) - binary.BigEndian.PutUint32(buf[ct+8:ct+12], p.Length) + binary.LittleEndian.PutUint64(buf[ct:ct+8], p.Offset) + binary.LittleEndian.PutUint32(buf[ct+8:ct+12], p.Length) ct += 12 } for _, p := range n.internalPointers { - binary.BigEndian.PutUint64(buf[ct:ct+8], p) + binary.LittleEndian.PutUint64(buf[ct:ct+8], p) ct += 8 } if ct != int(n.Size()) { @@ -149,7 +149,7 @@ func (n *BPTreeNode) WriteTo(w io.Writer) (int64, error) { } func (n *BPTreeNode) UnmarshalBinary(buf []byte) error { - size := int32(binary.BigEndian.Uint32(buf[:4])) + size := int32(binary.LittleEndian.Uint32(buf[:4])) leaf := size < 0 if leaf { n.leafPointers = make([]MemoryPointer, -size) @@ -164,11 +164,11 @@ func (n *BPTreeNode) UnmarshalBinary(buf []byte) error { m := 4 for i := range n.Keys { - l := binary.BigEndian.Uint32(buf[m : m+4]) + l := binary.LittleEndian.Uint32(buf[m : m+4]) if l == ^uint32(0) { // read the key out of the memory pointer stored at this position - n.Keys[i].DataPointer.Offset = binary.BigEndian.Uint64(buf[m+4 : m+12]) - n.Keys[i].DataPointer.Length = binary.BigEndian.Uint32(buf[m+12 : m+16]) + n.Keys[i].DataPointer.Offset = binary.LittleEndian.Uint64(buf[m+4 : m+12]) + n.Keys[i].DataPointer.Length = binary.LittleEndian.Uint32(buf[m+12 : m+16]) dp := n.Keys[i].DataPointer n.Keys[i].Value = n.DataParser.Parse(n.Data[dp.Offset : dp.Offset+uint64(dp.Length)]) // resolving the data-file m += 4 + 12 @@ -178,12 +178,12 @@ func (n *BPTreeNode) UnmarshalBinary(buf []byte) error { } } for i := range n.leafPointers { - n.leafPointers[i].Offset = binary.BigEndian.Uint64(buf[m : m+8]) - n.leafPointers[i].Length = binary.BigEndian.Uint32(buf[m+8 : m+12]) + n.leafPointers[i].Offset = binary.LittleEndian.Uint64(buf[m : m+8]) + n.leafPointers[i].Length = binary.LittleEndian.Uint32(buf[m+8 : m+12]) m += 12 } for i := range n.internalPointers { - n.internalPointers[i] = binary.BigEndian.Uint64(buf[m : m+8]) + n.internalPointers[i] = binary.LittleEndian.Uint64(buf[m : m+8]) m += 8 } return nil diff --git a/pkg/btree/pagefile.go b/pkg/btree/pagefile.go index fcd5384a..60455874 100644 --- a/pkg/btree/pagefile.go +++ b/pkg/btree/pagefile.go @@ -54,7 +54,7 @@ func NewPageFile(rws io.ReadWriteSeeker) (*PageFile, error) { } } else { for i := 0; i < len(pf.freePageIndexes); i++ { - offset := int64(binary.BigEndian.Uint64(buf[i*8 : (i+1)*8])) + offset := int64(binary.LittleEndian.Uint64(buf[i*8 : (i+1)*8])) if offset != 0 { pf.freePageIndexes[pf.freePageHead] = offset pf.freePageHead = (pf.freePageHead + 1) % len(pf.freePageIndexes) @@ -89,7 +89,7 @@ func (pf *PageFile) writeFreePageIndices() error { tail := (pf.freePageHead - pf.freePageCount + len(pf.freePageIndexes)) % len(pf.freePageIndexes) for i := 0; i < pf.freePageCount; i++ { offset := pf.freePageIndexes[tail+i] - binary.BigEndian.PutUint64(buf[i*8:(i+1)*8], uint64(offset)) + binary.LittleEndian.PutUint64(buf[i*8:(i+1)*8], uint64(offset)) } if _, err := pf.ReadWriteSeeker.Seek(0, io.SeekStart); err != nil { return err diff --git a/pkg/handlers/csv.go b/pkg/handlers/csv.go index d30aebd5..cab06fc3 100644 --- a/pkg/handlers/csv.go +++ b/pkg/handlers/csv.go @@ -131,7 +131,7 @@ func (c CSVHandler) Parse(value []byte) []byte { switch fieldType { case appendable.FieldTypeFloat64: buf := make([]byte, 8) - binary.LittleEndian.PutUint64(buf, math.Float64bits(parsed.(float64))) + binary.BigEndian.PutUint64(buf, math.Float64bits(parsed.(float64))) return buf case appendable.FieldTypeBoolean: if parsed.(bool) { diff --git a/pkg/handlers/csv_test.go b/pkg/handlers/csv_test.go index 96618828..b8bb5fa6 100644 --- a/pkg/handlers/csv_test.go +++ b/pkg/handlers/csv_test.go @@ -198,7 +198,7 @@ func TestCSV(t *testing.T) { } v2 := make([]byte, 8) - binary.LittleEndian.PutUint64(v2, math.Float64bits(123)) + binary.BigEndian.PutUint64(v2, math.Float64bits(123)) rv2, mp2, err := collected[1].BPTree(r2, CSVHandler{}).Find(btree.ReferencedValue{Value: v2}) if err != nil { t.Fatal(err) @@ -297,7 +297,7 @@ func TestCSV(t *testing.T) { } v2 := make([]byte, 8) - binary.LittleEndian.PutUint64(v2, math.Float64bits(1234)) + binary.BigEndian.PutUint64(v2, math.Float64bits(1234)) iter, err := collected[0].BPTree(r2, JSONLHandler{}).Iter(btree.ReferencedValue{Value: v2}) if err != nil { diff --git a/pkg/handlers/equality_test.go b/pkg/handlers/equality_test.go index 7ae2f5b9..3b3bcd2e 100644 --- a/pkg/handlers/equality_test.go +++ b/pkg/handlers/equality_test.go @@ -215,7 +215,7 @@ func compareMetaPages(i1, i2 []*btree.LinkedMetaPage, jr, cr []byte) (bool, stri for _, val := range h2 { v2 := make([]byte, 8) - binary.LittleEndian.PutUint64(v2, math.Float64bits(val)) + binary.BigEndian.PutUint64(v2, math.Float64bits(val)) rv1, mp1, err := collected1.BPTree(jr, JSONLHandler{}).Find(btree.ReferencedValue{Value: v2}) if err != nil { diff --git a/pkg/handlers/jsonl.go b/pkg/handlers/jsonl.go index 3b1de7a3..3f870b75 100644 --- a/pkg/handlers/jsonl.go +++ b/pkg/handlers/jsonl.go @@ -107,9 +107,9 @@ func (j JSONLHandler) Parse(value []byte) []byte { slog.Error("failed to parse float", "err", err) return nil } - binary.LittleEndian.PutUint64(buf, math.Float64bits(f)) + binary.BigEndian.PutUint64(buf, math.Float64bits(f)) case float64: - binary.LittleEndian.PutUint64(buf, math.Float64bits(token)) + binary.BigEndian.PutUint64(buf, math.Float64bits(token)) } return buf case bool: @@ -172,9 +172,9 @@ func (j JSONLHandler) handleJSONLObject(f *appendable.IndexFile, r []byte, dec * if err != nil { return fmt.Errorf("failed to parse float: %w", err) } - binary.LittleEndian.PutUint64(buf, math.Float64bits(f)) + binary.BigEndian.PutUint64(buf, math.Float64bits(f)) case float64: - binary.LittleEndian.PutUint64(buf, math.Float64bits(value)) + binary.BigEndian.PutUint64(buf, math.Float64bits(value)) } if err := page.BPTree(r, j).Insert(btree.ReferencedValue{ DataPointer: mp, Value: buf}, data); err != nil { diff --git a/pkg/handlers/jsonl_test.go b/pkg/handlers/jsonl_test.go index 8df19ef6..d6fb8dd0 100644 --- a/pkg/handlers/jsonl_test.go +++ b/pkg/handlers/jsonl_test.go @@ -266,7 +266,7 @@ func TestJSONL(t *testing.T) { } v2 := make([]byte, 8) - binary.LittleEndian.PutUint64(v2, math.Float64bits(123)) + binary.BigEndian.PutUint64(v2, math.Float64bits(123)) rv2, mp2, err := collected[1].BPTree(r2, JSONLHandler{}).Find(btree.ReferencedValue{Value: v2}) if err != nil { t.Fatal(err) @@ -670,7 +670,7 @@ func TestJSONL(t *testing.T) { } v2 := make([]byte, 8) - binary.LittleEndian.PutUint64(v2, math.Float64bits(1234)) + binary.BigEndian.PutUint64(v2, math.Float64bits(1234)) iter, err := collected[0].BPTree(r2, JSONLHandler{}).Iter(btree.ReferencedValue{Value: v2}) if err != nil { diff --git a/pkg/mocks/main.go b/pkg/mocks/main.go index f1cbf73c..676f0b80 100644 --- a/pkg/mocks/main.go +++ b/pkg/mocks/main.go @@ -26,7 +26,7 @@ func (m *testMetaPage) Root() (btree.MemoryPointer, error) { func (m *testMetaPage) write() error { buf := make([]byte, 8) - binary.BigEndian.PutUint64(buf, m.root.Offset) + binary.LittleEndian.PutUint64(buf, m.root.Offset) if _, err := m.pf.Seek(4096, io.SeekStart); err != nil { return err }