-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy patholc_art.hpp
2847 lines (2421 loc) · 105 KB
/
olc_art.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2019-2025 UnoDB contributors
#ifndef UNODB_DETAIL_OLC_ART_HPP
#define UNODB_DETAIL_OLC_ART_HPP
// Should be the first include
#include "global.hpp" // IWYU pragma: keep
// IWYU pragma: no_include <__ostream/basic_ostream.h>
#include <array>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <optional>
#include <stack>
#include <tuple>
#include <type_traits>
#include "art_common.hpp"
#include "art_internal.hpp"
#include "art_internal_impl.hpp"
#include "assert.hpp"
#include "node_type.hpp"
#include "optimistic_lock.hpp"
#include "portability_arch.hpp"
#include "qsbr.hpp"
#include "qsbr_ptr.hpp"
namespace unodb {
template <typename Key>
class olc_db;
namespace detail {
// The OLC header contains an [optimistic_lock].
struct [[nodiscard]] olc_node_header {
// Return a reference to the [optimistic_lock].
[[nodiscard]] constexpr optimistic_lock& lock() const noexcept {
return m_lock;
}
#ifndef NDEBUG
// This is passed as a debug callback to QSBR deallocation to be
// checked at the physical deallocation time. This checks that the
// node being deallocated has no open RCS (read_critical_section).
static void check_on_dealloc(const void* ptr) noexcept {
static_cast<const olc_node_header*>(ptr)->m_lock.check_on_dealloc();
}
#endif
private:
mutable optimistic_lock m_lock; // The lock.
};
static_assert(std::is_standard_layout_v<olc_node_header>);
template <typename Key>
class olc_inode;
template <typename Key>
class olc_inode_4;
template <typename Key>
class olc_inode_16;
template <typename Key>
class olc_inode_48;
template <typename Key>
class olc_inode_256;
template <typename Key>
using olc_inode_defs =
basic_inode_def<olc_inode<Key>, olc_inode_4<Key>, olc_inode_16<Key>,
olc_inode_48<Key>, olc_inode_256<Key>>;
using olc_node_ptr = basic_node_ptr<olc_node_header>;
template <typename, class>
class db_inode_qsbr_deleter; // IWYU pragma: keep
template <class, class, template <class> class>
class db_leaf_qsbr_deleter; // IWYU pragma: keep
struct olc_impl_helpers;
template <typename Key>
using olc_art_policy = basic_art_policy<
Key, unodb::olc_db, unodb::in_critical_section, unodb::optimistic_lock,
unodb::optimistic_lock::read_critical_section, olc_node_ptr, olc_inode_defs,
db_inode_qsbr_deleter, db_leaf_qsbr_deleter>;
template <typename Key>
using olc_db_leaf_unique_ptr = typename olc_art_policy<Key>::db_leaf_unique_ptr;
template <typename Key>
using olc_inode_base = basic_inode_impl<olc_art_policy<Key>>;
template <typename Key>
class olc_inode : public olc_inode_base<Key> {};
template <typename Key>
using olc_leaf_type = typename olc_art_policy<Key>::leaf_type;
//
//
//
template <class AtomicArray>
using non_atomic_array =
std::array<typename AtomicArray::value_type::value_type,
std::tuple_size<AtomicArray>::value>;
template <class T>
inline non_atomic_array<T> copy_atomic_to_nonatomic(T& atomic_array) noexcept {
non_atomic_array<T> result;
for (typename decltype(result)::size_type i = 0; i < result.size(); ++i) {
result[i] = atomic_array[i].load(std::memory_order_relaxed);
}
return result;
}
template <typename Key>
using olc_leaf_unique_ptr =
basic_db_leaf_unique_ptr<Key, olc_node_header, olc_db>;
} // namespace detail
using qsbr_value_view = qsbr_ptr_span<const std::byte>;
/// A thread-safe Adaptive Radix Tree that is synchronized using
/// optimistic lock coupling. At any time, at most two
/// directly-related tree nodes can be write-locked by the insert
/// algorithm and three by the delete algorithm. The lock used is
/// optimistic lock (see optimistic_lock.hpp), where only writers lock
/// and readers access nodes optimistically with node version
/// checks. For deleted node reclamation, Quiescent State-Based
/// Reclamation is used.
template <typename Key>
class olc_db final {
// disable all other key types until unit tests prove that they work
static_assert(std::is_same_v<Key, std::uint64_t>);
public:
using key_type = Key;
using value_view = unodb::qsbr_value_view;
using get_result = std::optional<value_view>;
using inode_base = detail::olc_inode_base<Key>;
using leaf_type = detail ::olc_leaf_type<Key>;
private:
using art_key_type = detail::basic_art_key<Key>;
/// Query for a value associated with an encoded key.
[[nodiscard, gnu::pure]] get_result get_internal(
art_key_type search_key) const noexcept;
/// Insert a value under an encoded key iff there is no entry for
/// that key.
///
/// Note: Cannot be called during stack unwinding with
/// std::uncaught_exceptions() > 0
///
/// @return true iff the key value pair was inserted.
[[nodiscard]] bool insert_internal(art_key_type insert_key,
unodb::value_view v);
/// Remove the entry associated with the encoded key.
///
/// @return true if the delete was successful (i.e. the key was
/// found in the tree and the associated index entry was removed).
[[nodiscard]] bool remove_internal(art_key_type remove_key);
public:
// Creation and destruction
olc_db() noexcept = default;
~olc_db() noexcept;
/// Query for a value associated with a key.
///
/// @param search_key If Key is a simple primitive type, then it is
/// converted into a binary comparable key. If Key is key_value,
/// then it is assumed to already be a binary comparable key, e.g.,
/// as produced by unodb::key_encoder.
[[nodiscard, gnu::pure]] get_result get(Key search_key) const noexcept {
const auto k = art_key_type{search_key};
return get_internal(k);
}
/// Return true iff the tree is empty (no root leaf).
[[nodiscard]] auto empty() const noexcept { return root == nullptr; }
/// Insert a value under a binary comparable key iff there is no
/// entry for that key.
///
/// Note: Cannot be called during stack unwinding with
/// std::uncaught_exceptions() > 0
///
/// @param insert_key If Key is a simple primitive type, then it is
/// converted into a binary comparable key. If Key is key_value,
/// then it is assumed to already be a binary comparable key, e.g.,
/// as produced by unodb::key_encoder.
///
/// @return true iff the key value pair was inserted.
[[nodiscard]] bool insert(Key insert_key, unodb::value_view v) {
// TODO(thompsonbry) There should be a lambda variant of this to
// handle conflicts and support upsert or delete-upsert
// semantics. This would call the caller's lambda once the method
// was positioned on the leaf. The caller could then update the
// value or perhaps delete the entry under the key.
const auto k = art_key_type{insert_key};
return insert_internal(k, v);
}
/// Remove the entry associated with the key.
///
/// @param search_key If Key is a simple primitive type, then it is
/// converted into a binary comparable key. If Key is key_value,
/// then it is assumed to already be a binary comparable key, e.g.,
/// as produced by unodb::key_encoder.
///
/// @return true if the delete was successful (i.e. the key was
/// found in the tree and the associated index entry was removed).
[[nodiscard]] bool remove(Key search_key) {
const auto k = art_key_type{search_key};
return remove_internal(k);
}
/// Removes all entries in the index.
///
/// Note: Only legal in single-threaded context, as destructor
void clear() noexcept;
//
// iterator (the iterator is an internal API, the public API is scan()).
//
/// The OLC scan() logic tracks the version tag (a
/// read_critical_section) for each node in the stack. This
/// information is required because the iter_result tuples already
/// contain physical information read from nodes which may have been
/// invalidated by subsequent mutations. The scan is built on
/// iterator methods for seek(), next(), prior(), etc. These
/// methods must restart (rebuilding the stack and redoing the work)
/// if they encounter a version tag for an element on the stack
/// which is no longer valid. Restart works by performing a seek()
/// to the key for the leaf on the bottom of the stack. Restarts
/// can be full (from the root) or partial (from the first element
/// in the stack which was not invalidated by the structural
/// modification).
///
/// During scan(), the iterator seek()s to some key and then invokes
/// the caller's lambda passing a reference to a visitor object.
/// That visitor allows the caller to access the key and/or value
/// associated with the leaf. If the leaf is concurrently deleted
/// from the structure, the visitor relies on epoch protection to
/// return the data from the now invalidated leaf. This is still
/// the state that the caller would have observed without the
/// concurrent structural modification. When next() is called, it
/// will discover that the leaf on the bottom of the stack is not
/// valid (it is marked as obsolete) and it will have to restart by
/// seek() to the key for that leaf and then invoking next() if the
/// key still exists and otherwise we should already be on the
/// successor of that leaf.
///
/// Note: The OLC thread safety mechanisms permit concurrent
/// non-atomic (multi-work) mutations to be applied to nodes. This
/// means that a thread can read junk in the middle of some
/// reorganization of a node (e.g., the keys or children are being
/// reordered to maintain an invariant for I16). To protect against
/// reading such junk, the thread reads the version tag before and
/// after it accesses data in the node and restarts if the version
/// information has changed. The thread must not act on information
/// that it had read until it verifies that the version tag remained
/// unchanged across the read operation.
///
/// Note: The iterator is backed by a std::stack. This means that
/// the iterator methods accessing the stack can not be declared as
/// [[noexcept]].
class iterator {
friend class olc_db<Key>;
template <class>
friend class visitor;
/// The [node_ptr] is never [nullptr] and points to the internal
/// node or leaf for that step in the path from the root to some
/// leaf. For the bottom of the stack, [node_ptr] is the root.
/// For the top of the stack, [node_ptr] is the current leaf. In
/// the degenerate case where the tree is a single root leaf, then
/// the stack contains just that leaf.
///
/// The [key] is the [std::byte] along which the path descends
/// from that [node_ptr]. The [key] has no meaning for a leaf.
/// The key byte may be used to reconstruct the full key (along
/// with any prefix bytes in the nodes along the path). The key
/// byte is tracked to avoid having to search the keys of some
/// node types (N48) when the [child_index] does not directly
/// imply the key byte.
///
/// The [child_index] is the [std::uint8_t] index position in the
/// parent at which the [child_ptr] was found. The [child_index]
/// has no meaning for a leaf. In the special case of N48, the
/// [child_index] is the index into the [child_indexes[]]. For
/// all other internal node types, the [child_index] is a direct
/// index into the [children[]]. When finding the successor (or
/// predecessor) the [child_index] needs to be interpreted
/// according to the node type. For N4 and N16, you just look at
/// the next slot in the children[] to find the successor. For
/// N256, you look at the next non-null slot in the children[].
/// N48 is the oddest of the node types. For N48, you have to
/// look at the child_indexes[], find the next mapped key value
/// greater than the current one, and then look at its entry in
/// the children[].
///
/// The [tag] is the [version_tag_type] from the
/// read_critical_section and contains the version information
/// that must be valid to use the [key_byte] and [child_index]
/// data read from the [node]. The version tag is cached when
/// when those data are read from the node along with the
/// [key_byte] and [child_index] values that were read.
struct stack_entry : public inode_base::iter_result {
/// The [prefix_len] is the number of bytes in the key prefix
/// for [node]. When the node is pushed onto the stack, we also
/// push [prefix_bytes] plus the [key_byte] onto a key_buffer.
/// We track how many bytes were pushed here (not including the
/// key_byte) so we can pop off the correct number of bytes
/// later.
detail::key_prefix_size prefix_len;
/// The version tag invariant for the node.
///
/// Note: This is just the data for the version tag and not the
/// read_critical_section (RCS). Moving the RCS onto the stack
/// creates problems in the while(...) loops that use parent and
/// node lock chaining since the RCS in the loop is invalid as
/// soon as it is moved onto the stack. Hence, this is just the
/// data and the while loops continue to use the normal OLC
/// pattern for lock chaining.
version_tag_type version;
};
protected:
/// Construct an empty iterator (one that is logically not
/// positioned on anything and which will report !valid()).
explicit iterator(olc_db& tree) : db_(tree) {}
// iterator is not flyweight. disallow copy and move.
iterator(const iterator&) = delete;
iterator(iterator&&) = delete;
iterator& operator=(const iterator&) = delete;
// iterator& operator=(iterator&&) = delete; // test_only_iterator()
public:
using key_type = Key;
// EXPOSED TO THE TESTS
/// Position the iterator on the first entry in the index.
iterator& first();
/// Advance the iterator to next entry in the index.
iterator& next();
/// Position the iterator on the last entry in the index, which
/// can be used to initiate a reverse traversal.
iterator& last();
/// Position the iterator on the previous entry in the index.
iterator& prior();
/// Position the iterator on, before, or after the caller's key.
/// If the iterator can not be positioned, it will be invalidated.
/// For example, if [fwd:=true] and the [search_key] is GT any key
/// in the index then the iterator will be invalidated since there
/// is no index entry greater than the search key. Likewise, if
/// [fwd:=false] and the [search_key] is LT any key in the index,
/// then the iterator will be invalidated since there is no index
/// entry LT the search key.
///
/// @param search_key The internal key used to position the
/// iterator.
///
/// @param match Will be set to true iff the search key is an
/// exact match in the index data. Otherwise, the match is not
/// exact and the iterator is positioned either before or after
/// the search_key.
///
/// @param fwd When true, the iterator will be positioned first
/// entry which orders GTE the search_key and will be !valid() if
/// there is no such entry. Otherwise, the iterator will be
/// positioned on the last key which orders LTE the search_key and
/// !valid() if there is no such entry.
iterator& seek(art_key_type search_key, bool& match, bool fwd = true);
/// Return the key_view associated with the current position of
/// the iterator.
///
/// Precondition: The iterator MUST be valid().
[[nodiscard]] key_view get_key();
/// Return the value_view associated with the current position of
/// the iterator.
///
/// Precondition: The iterator MUST be valid().
[[nodiscard, gnu::pure]] const qsbr_value_view get_val() const;
/// Debugging
// LCOV_EXCL_START
[[gnu::cold]] UNODB_DETAIL_NOINLINE void dump(std::ostream& os) const {
if (empty()) {
os << "iter::stack:: empty\n";
return;
}
// Create a new stack and copy everything there. Using the new
// stack, print out the stack in top-bottom order. This avoids
// modifications to the existing stack for the iterator.
auto tmp = stack_;
auto level = tmp.size() - 1;
while (!tmp.empty()) {
const auto& e = tmp.top();
const auto& np = e.node;
os << "iter::stack:: level = " << level << ", key_byte=0x" << std::hex
<< std::setfill('0') << std::setw(2)
<< static_cast<uint64_t>(e.key_byte) << std::dec
<< ", child_index=0x" << std::hex << std::setfill('0')
<< std::setw(2) << static_cast<std::uint64_t>(e.child_index)
<< std::dec << ", prefix_len=" << e.prefix_len << std::dec
<< ", version=";
optimistic_lock::version_type(e.version).dump(os); // version tag.
os << ", ";
art_policy::dump_node(os, np, false /*recursive*/); // node or leaf.
if (np.type() != node_type::LEAF) os << '\n';
tmp.pop();
level--;
}
}
// LCOV_EXCL_STOP
/// Debugging
// LCOV_EXCL_START
[[gnu::cold]] UNODB_DETAIL_NOINLINE void dump() const { dump(std::cerr); }
// LCOV_EXCL_STOP
/// Return true unless the stack is empty (exposed to tests)
[[nodiscard]] bool valid() const { return !stack_.empty(); }
protected:
/// Compare the given key (e.g., the to_key) to the current key in
/// the internal buffer.
///
/// @return -1, 0, or 1 if this key is LT, EQ, or GT the other
/// key.
[[nodiscard, gnu::pure]] int cmp(const art_key_type& akey) const;
//
// stack access methods.
//
/// Return true iff the stack is empty.
[[nodiscard]] bool empty() const { return stack_.empty(); }
/// Push an entry onto the stack.
bool try_push(detail::olc_node_ptr node, std::byte key_byte,
std::uint8_t child_index,
const optimistic_lock::read_critical_section& rcs) {
// For variable length keys we need to know the number of bytes
// associated with the node's key_prefix. In addition there is
// one byte for the descent to the child node along the
// child_index. That information needs to be stored on the
// stack so we can pop off the right number of bytes even for
// OLC where the node might be concurrently modified.
UNODB_DETAIL_ASSERT(node.type() != node_type::LEAF);
auto* inode{node.ptr<inode_type*>()};
auto prefix{inode->get_key_prefix().get_snapshot()};
// Check the RCS to make sure the snapshot of the key prefix is valid.
if (UNODB_DETAIL_UNLIKELY(!rcs.check())) {
return false; // LCOV_EXCL_LINE
}
stack_.push({{node, key_byte, child_index}, prefix.size(), rcs.get()});
keybuf_.push(prefix.get_key_view());
keybuf_.push(key_byte);
return true;
}
/// Push a leaf onto the stack.
bool try_push_leaf(detail::olc_node_ptr aleaf,
const optimistic_lock::read_critical_section& rcs) {
// The [key], [child_index] and [prefix_len] are ignored for a
// leaf.
//
// TODO(thompsonbry) variable length keys - we will need to
// handle a final variable length key prefix on the leaf here.
stack_.push({{aleaf,
static_cast<std::byte>(0xFFU), // key_byte
static_cast<std::uint8_t>(0xFFU)}, // child_index
0, // prefix_len
rcs.get()});
return true;
}
/// Push an entry onto the stack.
bool try_push(const typename inode_base::iter_result& e,
const optimistic_lock::read_critical_section& rcs) {
const auto node_type = e.node.type();
if (UNODB_DETAIL_UNLIKELY(node_type == node_type::LEAF)) {
return try_push_leaf(e.node, rcs);
}
return try_push(e.node, e.key_byte, e.child_index, rcs);
}
/// Pop an entry from the stack and the corresponding bytes from
/// the key_buffer.
void pop() {
// Note: We DO NOT need to check the RCS here. The prefix_len
// on the stack is known to be valid at the time that the entry
// was pushed onto the stack and the stack and the keybuf are in
// sync with one another. So we can just do a simple POP for
// each of them.
const auto prefix_len = top().prefix_len;
stack_.pop();
keybuf_.pop(prefix_len);
}
/// Return the entry (if any) on the top of the stack.
[[nodiscard]] stack_entry& top() { return stack_.top(); }
/// Return the node on the top of the stack and nullptr if the
/// stack is empty (similar to top(), but handles an empty stack).
[[nodiscard]] detail::olc_node_ptr current_node() {
return stack_.empty() ? detail::olc_node_ptr(nullptr) : stack_.top().node;
}
private:
/// Invalidate the iterator (pops everything off of the stack).
///
/// post-condition: The iterator is !valid().
iterator& invalidate() {
while (!stack_.empty()) stack_.pop(); // clear the stack
return *this;
}
//
// Core logic invoked from retry loops.
//
[[nodiscard]] bool try_first();
[[nodiscard]] bool try_last();
[[nodiscard]] bool try_next();
[[nodiscard]] bool try_prior();
/// Push the given node onto the stack and traverse from the
/// caller's node to the left-most leaf under that node, pushing
/// nodes onto the stack as they are visited.
[[nodiscard]] bool try_left_most_traversal(
detail::olc_node_ptr node,
optimistic_lock::read_critical_section& parent_critical_section);
/// Descend from the current state of the stack to the right most
/// child leaf, updating the state of the iterator during the
/// descent.
[[nodiscard]] bool try_right_most_traversal(
detail::olc_node_ptr node,
optimistic_lock::read_critical_section& parent_critical_section);
/// Core logic invoked from retry loop.
[[nodiscard]] bool try_seek(art_key_type search_key, bool& match, bool fwd);
/// The outer db instance.
olc_db& db_;
/// A stack reflecting the parent path from the root of the tree
/// to the current leaf. An empty stack corresponds to a
/// logically empty iterator and can be detected using !valid().
/// The iterator for an empty tree is an empty stack.
std::stack<stack_entry> stack_{};
/// A buffer into which visited encoded (binary comparable) keys
/// are materialized by during the iterator traversal. Bytes are
/// pushed onto this buffer when we push something onto the
/// iterator stack and popped off of this buffer when we pop
/// something off of the iterator stack.
detail::key_buffer keybuf_{};
}; // class iterator
//
// end of the iterator API, which is an internal API.
//
public:
///
/// public scan API
///
// Note: The scan() interface is public. The iterator and the
// methods to obtain an iterator are protected (except for tests).
// This encapsulation makes it easier to define methods which
// operate on external keys (scan()) and those which operate on
// internal keys (seek() and the iterator). It also makes life
// easier for mutex_db since scan() can take the lock.
/// Scan the tree, applying the caller's lambda to each visited
/// leaf.
///
/// @param fn A function f(unodb::visitor<unodb::olc_db::iterator>&)
/// returning [bool:halt]. The traversal will halt if the function
/// returns [true].
///
/// @param fwd When [true] perform a forward scan, otherwise perform
/// a reverse scan.
template <typename FN>
void scan(FN fn, bool fwd = true) noexcept {
if (fwd) {
iterator it(*this);
it.first();
visitor<olc_db<Key>::iterator> v{it};
while (it.valid()) {
if (UNODB_DETAIL_UNLIKELY(fn(v))) break;
it.next();
}
} else {
iterator it(*this);
it.last();
visitor<olc_db<Key>::iterator> v{it};
while (it.valid()) {
if (UNODB_DETAIL_UNLIKELY(fn(v))) break;
it.prior();
}
}
}
/// Scan in the indicated direction, applying the caller's lambda to
/// each visited leaf.
///
/// @param from_key is an inclusive lower bound for the starting
/// point of the scan.
///
/// @param fn A function f(unodb::visitor<unodb::olc_db::iterator>&)
/// returning [bool:halt]. The traversal will halt if the function
/// returns [true].
///
/// @param fwd When [true] perform a forward scan, otherwise perform
/// a reverse scan.
template <typename FN>
void scan_from(Key from_key, FN fn, bool fwd = true) noexcept {
const auto from_key_ = art_key_type{from_key}; // convert to internal key
bool match{};
if (fwd) {
iterator it(*this);
it.seek(from_key_, match, true /*fwd*/);
visitor<olc_db<Key>::iterator> v{it};
while (it.valid()) {
if (UNODB_DETAIL_UNLIKELY(fn(v))) break;
it.next();
}
} else {
iterator it(*this);
it.seek(from_key_, match, false /*fwd*/);
visitor<olc_db<Key>::iterator> v{it};
while (it.valid()) {
if (UNODB_DETAIL_UNLIKELY(fn(v))) break;
it.prior();
}
}
}
/// Scan a half-open key range, applying the caller's lambda to each
/// visited leaf. The scan will proceed in lexicographic order iff
/// from_key is less than to_key and in reverse lexicographic order
/// iff to_key is less than from_key. When from_key < to_key, the
/// scan will visit all index entries in the half-open range
/// [from_key,to_key) in forward order. Otherwise the scan will
/// visit all index entries in the half-open range (from_key,to_key]
/// in reverse order.
//
/// @param from_key is an inclusive bound for the starting point of
/// the scan.
//
/// @param to_key is an exclusive bound for the ending point of the
/// scan.
//
/// @param fn A function f(unodb::visitor<unodb::olc_db::iterator>&)
/// returning [bool:halt]. The traversal will halt if the function
/// returns [true].
template <typename FN>
void scan_range(Key from_key, Key to_key, FN fn) noexcept {
// TODO(thompsonbry) : variable length keys. Explore a cheaper way
// to handle the exclusive bound case when developing variable
// length key support based on the maintained key buffer.
constexpr bool debug = false; // set true to debug scan.
const auto from_key_ = art_key_type{from_key}; // convert to internal key
const auto to_key_ = art_key_type{to_key}; // convert to internal key
const auto ret = from_key_.cmp(to_key_); // compare the internal keys.
const bool fwd{ret < 0}; // from key is less than to key
if (ret == 0) return; // NOP
bool match{};
if (fwd) {
iterator it(*this);
it.seek(from_key_, match, true /*fwd*/);
if constexpr (debug) {
std::cerr << "scan_range:: fwd"
<< ", from_key=" << from_key_ << ", to_key=" << to_key_
<< "\n";
it.dump(std::cerr);
}
visitor<olc_db<Key>::iterator> v{it};
while (it.valid() && it.cmp(to_key_) < 0) {
if (UNODB_DETAIL_UNLIKELY(fn(v))) break;
it.next();
if constexpr (debug) {
std::cerr << "scan: next()\n";
it.dump(std::cerr);
}
}
} else { // reverse traversal.
iterator it(*this);
it.seek(from_key_, match, false /*fwd*/);
if constexpr (debug) {
std::cerr << "scan_range:: rev"
<< ", from_key=" << from_key_ << ", to_key=" << to_key_
<< "\n";
it.dump(std::cerr);
}
visitor<olc_db<Key>::iterator> v{it};
while (it.valid() && it.cmp(to_key_) > 0) {
if (UNODB_DETAIL_UNLIKELY(fn(v))) break;
it.prior();
if constexpr (debug) {
std::cerr << "scan: prior()\n";
it.dump(std::cerr);
}
}
}
}
//
// TEST ONLY METHODS
//
// Used to write the iterator tests.
auto test_only_iterator() noexcept { return iterator(*this); }
// Stats
#ifdef UNODB_DETAIL_WITH_STATS
// Return current memory use by tree nodes in bytes
[[nodiscard]] auto get_current_memory_use() const noexcept {
return current_memory_use.load(std::memory_order_relaxed);
}
template <node_type NodeType>
[[nodiscard]] auto get_node_count() const noexcept {
return node_counts[as_i<NodeType>].load(std::memory_order_relaxed);
}
[[nodiscard]] auto get_node_counts() const noexcept {
return detail::copy_atomic_to_nonatomic(node_counts);
}
template <node_type NodeType>
[[nodiscard]] auto get_growing_inode_count() const noexcept {
return growing_inode_counts[internal_as_i<NodeType>].load(
std::memory_order_relaxed);
}
[[nodiscard]] auto get_growing_inode_counts() const noexcept {
return detail::copy_atomic_to_nonatomic(growing_inode_counts);
}
template <node_type NodeType>
[[nodiscard]] auto get_shrinking_inode_count() const noexcept {
return shrinking_inode_counts[internal_as_i<NodeType>].load(
std::memory_order_relaxed);
}
[[nodiscard]] auto get_shrinking_inode_counts() const noexcept {
return detail::copy_atomic_to_nonatomic(shrinking_inode_counts);
}
[[nodiscard]] auto get_key_prefix_splits() const noexcept {
return key_prefix_splits.load(std::memory_order_relaxed);
}
#endif // UNODB_DETAIL_WITH_STATS
// Public utils
[[nodiscard]] static constexpr auto key_found(
const get_result& result) noexcept {
return static_cast<bool>(result);
}
// Debugging
[[gnu::cold]] UNODB_DETAIL_NOINLINE void dump(std::ostream& os) const;
[[gnu::cold]] UNODB_DETAIL_NOINLINE void dump() const;
olc_db(const olc_db&) noexcept = delete;
olc_db(olc_db&&) noexcept = delete;
olc_db& operator=(const olc_db&) noexcept = delete;
olc_db& operator=(olc_db&&) noexcept = delete;
private:
using art_policy = detail::olc_art_policy<Key>;
using inode_type = detail::olc_inode<Key>;
using inode_4 = detail::olc_inode_4<Key>;
using tree_depth_type = detail::tree_depth<art_key_type>;
using olc_db_leaf_unique_ptr_type = detail::olc_db_leaf_unique_ptr<Key>;
// If get_result is not present, the search was interrupted. Yes, this
// resolves to std::optional<std::optional<value_view>>, but IMHO both
// levels of std::optional are clear here
using try_get_result_type = std::optional<get_result>;
using try_update_result_type = std::optional<bool>;
[[nodiscard]] try_get_result_type try_get(art_key_type k) const noexcept;
[[nodiscard]] try_update_result_type try_insert(
art_key_type k, unodb::value_view v,
olc_db_leaf_unique_ptr_type& cached_leaf);
[[nodiscard]] try_update_result_type try_remove(art_key_type k);
void delete_root_subtree() noexcept;
#ifdef UNODB_DETAIL_WITH_STATS
void increase_memory_use(std::size_t delta) noexcept;
void decrease_memory_use(std::size_t delta) noexcept;
void increment_leaf_count(std::size_t leaf_size) noexcept {
increase_memory_use(leaf_size);
node_counts[as_i<node_type::LEAF>].fetch_add(1, std::memory_order_relaxed);
}
UNODB_DETAIL_DISABLE_MSVC_WARNING(4189)
void decrement_leaf_count(std::size_t leaf_size) noexcept {
decrease_memory_use(leaf_size);
const auto old_leaf_count UNODB_DETAIL_USED_IN_DEBUG =
node_counts[as_i<node_type::LEAF>].fetch_sub(1,
std::memory_order_relaxed);
UNODB_DETAIL_ASSERT(old_leaf_count > 0);
}
UNODB_DETAIL_RESTORE_MSVC_WARNINGS()
template <class INode>
constexpr void increment_inode_count() noexcept;
template <class INode>
constexpr void decrement_inode_count() noexcept;
template <node_type NodeType>
constexpr void account_growing_inode() noexcept;
template <node_type NodeType>
constexpr void account_shrinking_inode() noexcept;
#endif // UNODB_DETAIL_WITH_STATS
// optimistic lock guarding the [root].
alignas(
detail::hardware_destructive_interference_size) mutable optimistic_lock
root_pointer_lock;
// The root of the tree, guarded by the [root_pointer_lock].
in_critical_section<detail::olc_node_ptr> root{detail::olc_node_ptr{nullptr}};
static_assert(sizeof(root_pointer_lock) + sizeof(root) <=
detail::hardware_constructive_interference_size);
#ifdef UNODB_DETAIL_WITH_STATS
// Current logically allocated memory that is not scheduled to be reclaimed.
// The total memory currently allocated is this plus the QSBR deallocation
// backlog (qsbr::previous_interval_total_dealloc_size +
// qsbr::current_interval_total_dealloc_size).
alignas(detail::hardware_destructive_interference_size)
std::atomic<std::size_t> current_memory_use{0};
alignas(detail::hardware_destructive_interference_size)
std::atomic<std::uint64_t> key_prefix_splits{0};
template <class T>
using atomic_array = std::array<std::atomic<typename T::value_type>,
std::tuple_size<T>::value>;
alignas(detail::hardware_destructive_interference_size)
atomic_array<node_type_counter_array> node_counts{};
alignas(detail::hardware_destructive_interference_size)
atomic_array<inode_type_counter_array> growing_inode_counts{};
alignas(detail::hardware_destructive_interference_size)
atomic_array<inode_type_counter_array> shrinking_inode_counts{};
#endif // UNODB_DETAIL_WITH_STATS
friend auto detail::make_db_leaf_ptr<Key, detail::olc_node_header, olc_db>(
art_key_type, unodb::value_view, olc_db&);
template <class, class, template <class> class>
friend class detail::basic_db_leaf_deleter;
template <typename, class, template <class> class>
friend class detail::db_leaf_qsbr_deleter;
template <typename, class>
friend class detail::db_inode_qsbr_deleter;
template <typename, // Key
template <class> class, // Db
template <class> class, // CriticalSectionPolicy
class, // LockPolicy
class, // ReadCriticalSection
class, // NodePtr
template <typename> class, // INodeDefs
template <typename, class> class, // INodeReclamator
template <typename, class,
template <class> class>
class> // LeafReclamator
friend struct detail::basic_art_policy;
template <class, class, template <class> class>
friend class detail::basic_db_inode_deleter;
friend struct detail::olc_impl_helpers;
};
namespace detail {
template <typename Key, class INode>
using db_inode_qsbr_deleter_parent =
unodb::detail::basic_db_inode_deleter<Key, INode, unodb::olc_db>;
template <typename Key, class INode>
class db_inode_qsbr_deleter : public db_inode_qsbr_deleter_parent<Key, INode> {
public:
using db_inode_qsbr_deleter_parent<Key, INode>::db_inode_qsbr_deleter_parent;
UNODB_DETAIL_DISABLE_MSVC_WARNING(26434)
// cppcheck-suppress duplInheritedMember
void operator()(INode* inode_ptr) {
static_assert(std::is_trivially_destructible_v<INode>);
this_thread().on_next_epoch_deallocate(inode_ptr
#ifdef UNODB_DETAIL_WITH_STATS
,
sizeof(INode)
#endif
#ifndef NDEBUG
,
olc_node_header::check_on_dealloc
#endif
);
#ifdef UNODB_DETAIL_WITH_STATS
this->get_db().template decrement_inode_count<INode>();
#endif // UNODB_DETAIL_WITH_STATS
}
UNODB_DETAIL_RESTORE_MSVC_WARNINGS()
};
template <typename Key, class Header, template <class> class Db>
class db_leaf_qsbr_deleter {
public:
using db_type = Db<Key>;
using leaf_type = basic_leaf<Key, Header>;
static_assert(std::is_trivially_destructible_v<leaf_type>);
constexpr explicit db_leaf_qsbr_deleter(
db_type& db_ UNODB_DETAIL_LIFETIMEBOUND) noexcept
: db_instance{db_} {}
void operator()(leaf_type* to_delete) const {
#ifdef UNODB_DETAIL_WITH_STATS
const auto leaf_size = to_delete->get_size();
#endif // UNODB_DETAIL_WITH_STATS
this_thread().on_next_epoch_deallocate(to_delete
#ifdef UNODB_DETAIL_WITH_STATS
,
leaf_size
#endif // UNODB_DETAIL_WITH_STATS
#ifndef NDEBUG
,
olc_node_header::check_on_dealloc
#endif
);
#ifdef UNODB_DETAIL_WITH_STATS
db_instance.decrement_leaf_count(leaf_size);
#endif // UNODB_DETAIL_WITH_STATS
}
~db_leaf_qsbr_deleter() = default;
db_leaf_qsbr_deleter(const db_leaf_qsbr_deleter&) = default;
db_leaf_qsbr_deleter& operator=(const db_leaf_qsbr_deleter&) = delete;
db_leaf_qsbr_deleter(db_leaf_qsbr_deleter&&) = delete;
db_leaf_qsbr_deleter& operator=(db_leaf_qsbr_deleter&&) = delete;
private: