-
Notifications
You must be signed in to change notification settings - Fork 0
/
arc.go
509 lines (398 loc) · 13.4 KB
/
arc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
// Copyright Chrono Technologies LLC
// SPDX-License-Identifier: MIT
// Package arc implements a key-value database based on a Radix tree data
// structure and deduplication-enabled blob storage. The Radix tree provides
// space-efficient key management through prefix compression, while the blob
// storage handles values with automatic deduplication.
package arc
import (
"errors"
"sync"
)
var (
// ErrCorrupted is returned when a database corruption is detected.
ErrCorrupted = errors.New("database corruption detected")
// ErrDuplicateKey is returned when an insertion is attempted using a
// key that already exists in the database.
ErrDuplicateKey = errors.New("cannot insert duplicate key")
// ErrInvalidChecksum is returned when the node checksum is invalid.
ErrInvalidChecksum = errors.New("invalid checksum detected")
// ErrKeyNotFound is returned when the key does not exist in the index.
ErrKeyNotFound = errors.New("key not found")
// ErrKeyTooLarge is returned when the key size exceeds the 64KB limit.
ErrKeyTooLarge = errors.New("key is too large")
// ErrNilKey is returned when an insertion is attempted using a nil key.
ErrNilKey = errors.New("key cannot be nil")
// ErrNodeCorrupted is returned when an index node corruption is detected.
ErrNodeCorrupted = errors.New("index node corruption detected")
// ErrValueTooLarge is returned when the value size exceeds the 4GB limit.
ErrValueTooLarge = errors.New("value is too large")
)
const (
maxUint16 = (1 << 16) - 1 // maxUint16 is the maximum value of uint16.
maxUint32 = (1 << 32) - 1 // maxUint32 is the maximum value of uint32.
maxKeyBytes = maxUint16 // maxKeyBytes is the maximum key size.
maxValueBytes = maxUint32 // maxValueBytes is the maximum value size.
inlineValueThreshold = blobIDLen
)
// Arc represents the API interface of a space-efficient key-value database that
// combines a Radix tree for key indexing and a space-optimized blob store.
type Arc struct {
root *node // Pointer to the root node.
numNodes int // Number of nodes in the tree.
numRecords int // Number of records in the tree.
mu sync.RWMutex // RWLock for concurrency management.
// Stores deduplicated values that are larger than 32 bytes.
blobs blobStore
}
// New returns an empty Arc database handler.
func New() *Arc {
return &Arc{blobs: blobStore{}}
}
// Len returns the number of records.
func (a *Arc) Len() int {
a.mu.RLock()
defer a.mu.RUnlock()
return a.numRecords
}
// Add inserts a new key-value pair in the database. It returns ErrDuplicateKey
// if the key already exists.
func (a *Arc) Add(key []byte, value []byte) error {
a.mu.Lock()
defer a.mu.Unlock()
return a.insert(key, value, false)
}
// Put inserts or updates a key-value pair in the database.
func (a *Arc) Put(key []byte, value []byte) error {
a.mu.Lock()
defer a.mu.Unlock()
return a.insert(key, value, true)
}
// insert adds a key-value pair to the database. If the key already exists and
// overwrite is true, the existing value is updated. If overwrite is false and
// the key exists, ErrDuplicateKey is returned. It returns nil on success.
func (a *Arc) insert(key []byte, value []byte, overwrite bool) error {
if key == nil {
return ErrNilKey
}
if len(key) > maxKeyBytes {
return ErrKeyTooLarge
}
if len(value) > maxValueBytes {
return ErrValueTooLarge
}
// Empty tree, set the new record node as the root node.
if a.empty() {
a.root = newRecordNode(a.blobs, key, value)
a.numNodes = 1
a.numRecords = 1
return nil
}
// Create a common root node for keys with no shared prefix.
if len(a.root.key) > 0 && longestCommonPrefix(a.root.key, key) == nil {
oldRoot := a.root
a.root = &node{key: nil}
a.root.addChild(oldRoot)
a.root.addChild(newRecordNode(a.blobs, key, value))
a.numNodes += 2
a.numRecords++
return nil
}
var parent *node
var current = a.root
for {
prefix := longestCommonPrefix(current.key, key)
prefixLen := len(prefix)
// Found exact match. Put() will overwrite the existing value.
// Do not update counters because this is an in-place update.
if prefixLen == len(current.key) && prefixLen == len(key) {
if !overwrite {
return ErrDuplicateKey
}
if !current.isRecord {
a.numRecords++
}
current.setValue(a.blobs, value)
return nil
}
// The longest common prefix matches the entire key, but is shorter
// than current's key. The new key becomes the parent of current.
//
// For example, suppose the key is "app" and current.key is "apple".
// The longest common prefix is "app". Therefore "apple" is updated to
// "le", and then becomes a child of the "app" node, forming the path:
// ["app"(new node) -> "le"(current)].
if prefixLen == len(key) && prefixLen < len(current.key) {
if current == a.root {
current.setKey(current.key[len(key):])
a.root = newRecordNode(a.blobs, key, value)
a.root.addChild(current)
} else {
if err := parent.removeChild(current); err != nil {
return err
}
current.setKey(current.key[len(key):])
n := newRecordNode(a.blobs, key, value)
n.addChild(current)
parent.addChild(n)
}
a.numNodes++
a.numRecords++
return nil
}
// Partial match with key exhaustion: Insert via node splitting.
if prefixLen > 0 && prefixLen < len(current.key) {
a.splitNode(parent, current, newRecordNode(a.blobs, key, value), prefix)
return nil
}
// Search for a child whose key is compatible with the remaining
// portion of the key. Start by removing the prefix from the key.
key = key[prefixLen:]
nextNode := current.findCompatibleChild(key)
// No existing path matches the remaining key portion. The new record
// will be inserted as a leaf node. At this point, current's key must
// fully match the key prefix because:
//
// 1. "No common prefix" cases are handled earlier in the function
// 2. Partial prefix match would have triggered splitNode()
//
// TODO(toru): These conditions can likely be further simplified.
if nextNode == nil {
if current == a.root {
if a.root.key == nil || prefixLen == len(a.root.key) {
a.root.addChild(newRecordNode(a.blobs, key, value))
}
} else {
current.addChild(newRecordNode(a.blobs, key, value))
}
a.numNodes++
a.numRecords++
return nil
}
// Reaching this point means that a compatible child was found.
// Update relevant iterators and continue traversing the tree until
// we reach a leaf node or no further nodes are available.
parent = current
current = nextNode
}
}
// Get retrieves the value that matches the given key. Returns ErrKeyNotFound
// if the key does not exist.
func (a *Arc) Get(key []byte) ([]byte, error) {
if key == nil {
return nil, ErrNilKey
}
a.mu.RLock()
defer a.mu.RUnlock()
node, _, err := a.findNodeAndParent(key)
if err != nil {
return nil, err
}
if !node.isRecord {
return nil, ErrKeyNotFound
}
return node.value(a.blobs), nil
}
// Delete removes a record that matches the given key.
func (a *Arc) Delete(key []byte) error {
if key == nil {
return ErrNilKey
}
if a.empty() {
return ErrKeyNotFound
}
if len(key) > maxKeyBytes {
return ErrKeyTooLarge
}
a.mu.Lock()
defer a.mu.Unlock()
delNode, parent, err := a.findNodeAndParent(key)
if err != nil {
return err
}
if !delNode.isRecord {
return ErrKeyNotFound
}
// Root node deletion is handled separately to improve code readability.
if delNode == a.root {
a.deleteRootNode()
return nil
}
// If the deletion node is not a root node, its parent must be non-nil.
if parent == nil {
return ErrCorrupted
}
// The deletion node only has one child. Therefore the child will take
// place of the deletion node, after inheriting deletion node's key.
if delNode.numChildren == 1 {
if err := parent.removeChild(delNode); err != nil {
return err
}
child := delNode.firstChild
child.prependKey(delNode.key)
parent.addChild(child)
a.numNodes--
a.numRecords--
return nil
}
// In most cases, deleting a leaf node is simply a matter of removing it
// from its parent. However, if the parent is a non-record node and the
// deletion leaves it with only a single child, we must merge the nodes.
if delNode.isLeaf() {
if err := parent.removeChild(delNode); err != nil {
return err
}
a.numNodes--
a.numRecords--
// The deletion had left the non-record parent with one child. This
// means that the parent node is now redundant. Therefore merge the
// parent and the only-child nodes.
if !parent.isRecord && parent.numChildren == 1 {
child := parent.firstChild
child.prependKey(parent.key)
// Save the parent's sibling before overwriting it.
sibling := parent.nextSibling
// We do not have access to the grandparent, therefore shallow copy
// the child node's information to the parent node. This effectively
// replaces parent with child within the index tree structure.
parent.shallowCopyFrom(child)
parent.nextSibling = sibling
// Decrement for removing the parent node.
a.numNodes--
}
return nil
}
// Reaching this point means we are deleting a non-root internal node
// that has more than one edges. Convert the node to a non-record type.
delNode.isRecord = false
delNode.deleteValue(a.blobs)
a.numRecords--
return nil
}
// deleteRootNode removes the root node from the tree, while ensuring that
// the tree structure remains valid and consistent.
func (a *Arc) deleteRootNode() {
if a.root.isLeaf() {
a.clear()
return
}
if a.root.numChildren == 1 {
// The root node only has one child, which will become the new root.
child := a.root.firstChild
child.prependKey(a.root.key)
a.root = child
// Decrement for the original root node removal.
a.numNodes--
} else {
// The root node has multiple children, thus it must continue to exist
// for the tree to sustain its structure. Convert it to a non-record
// node by removing its value and flagging it as a non-record node.
a.root.isRecord = false
a.root.deleteValue(a.blobs)
}
a.numRecords--
}
// clear wipes the in-memory tree, and resets metadata. This function is
// intended for development and testing purposes only.
func (a *Arc) clear() {
a.root = nil
a.numNodes = 0
a.numRecords = 0
a.blobs = blobStore{}
}
// empty returns true if the database is empty.
func (a *Arc) empty() bool {
return a.root == nil && a.numRecords == 0
}
// splitNode splits a node based on a common prefix by creating an intermediate
// parent node. For the root node, it simply creates a new parent. For non-root
// nodes, it updates the parent-child relationships before modifying the node
// keys to maintain tree consistency. The current and newNode becomes children
// of the intermediate parent, with their keys updated to contain only their
// suffixes after the common prefix.
func (a *Arc) splitNode(parent *node, current *node, newNode *node, commonPrefix []byte) {
newParent := &node{key: commonPrefix}
// Splitting the root node only requires setting the new branch as root.
if current == a.root {
current.setKey(current.key[len(commonPrefix):])
newNode.setKey(newNode.key[len(commonPrefix):])
newParent.addChild(current)
newParent.addChild(newNode)
a.root = newParent
a.numNodes += 2
a.numRecords++
return
}
// Splitting the non-root node. Update the parent-child relationship
// before manipulating the node keys of current and newNode.
parent.removeChild(current)
parent.addChild(newParent)
// Reset current's nextSibling in prep for becoming a child of newParent.
current.nextSibling = nil
// Remove the common prefix from current and newNode.
current.setKey(current.key[len(commonPrefix):])
newNode.setKey(newNode.key[len(commonPrefix):])
newParent.addChild(current)
newParent.addChild(newNode)
a.numNodes += 2
a.numRecords++
}
// findNodeAndParent returns the node that matches the given key and its parent.
// The parent is nil if the discovered node is a root node.
func (a *Arc) findNodeAndParent(key []byte) (current *node, parent *node, err error) {
if key == nil {
return nil, nil, ErrNilKey
}
if a.empty() {
return nil, nil, ErrKeyNotFound
}
current = a.root
for {
prefix := longestCommonPrefix(current.key, key)
prefixLen := len(prefix)
// Lack of a common prefix means that the key does not exist in the
// tree, unless the current node is a root node.
if prefix == nil && current != a.root {
return nil, nil, ErrKeyNotFound
}
// Common prefix must be at least the length of the current key.
// If not, the search key cannot exist in a Radix tree.
if prefixLen != len(current.key) {
return nil, nil, ErrKeyNotFound
}
// The prefix matches the current node's key.
if prefixLen == len(key) {
return current, parent, nil
}
if !current.hasChildren() {
return nil, nil, ErrKeyNotFound
}
// Update the key for the next iteration, and then continue traversing.
key = key[len(prefix):]
parent = current
current = current.findCompatibleChild(key)
// The key does not exist if a compatible child is not found.
if current == nil {
return nil, nil, ErrKeyNotFound
}
}
}
// longestCommonPrefix compares the two given byte slices, and returns the
// longest common prefix. Memory-safety is ensured by establishing an index
// boundary based on the length of the shorter parameter.
func longestCommonPrefix(a, b []byte) []byte {
minLen := len(a)
if len(b) < minLen {
minLen = len(b)
}
var i int
for i = 0; i < minLen; i++ {
if a[i] != b[i] {
break
}
}
if i == 0 {
return nil
}
return a[:i]
}