-
Notifications
You must be signed in to change notification settings - Fork 115
/
stringbag.hh
171 lines (155 loc) · 6.16 KB
/
stringbag.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/* Masstree
* Eddie Kohler, Yandong Mao, Robert Morris
* Copyright (c) 2012-2014 President and Fellows of Harvard College
* Copyright (c) 2012-2014 Massachusetts Institute of Technology
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, subject to the conditions
* listed in the Masstree LICENSE file. These conditions include: you must
* preserve this copyright notice, and you cannot mention the copyright
* holders in advertising related to the Software without their permission.
* The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
* notice is a summary of the Masstree LICENSE file; the license in that file
* is legally binding.
*/
#ifndef STRINGBAG_HH
#define STRINGBAG_HH 1
#include "compiler.hh"
#include "string_slice.hh"
/** @brief String collection used for Masstree key suffixes.
A stringbag is a compact collection of up to W strings, where W is a
parameter called the <em>bag width</em>. These strings are stored
in contiguously allocated memory.
Stringbag component strings support
string_slice<uintptr_t>::equals_sloppy() without memory errors.
The template parameter T is the offset type. This type determines the
maximum supported capacity of a stringbag. Smaller types have lower
overhead, but support smaller bags.
Stringbags favor compactness over usability. The bag width W is an
important parameter, but you can't recover W from the stringbag itself;
you'll need to store that externally. Stringbags cannot be allocated
conventionally. You must manage stringbag memory yourself:
allocate an array of characters for the stringbag, then use placement
new to construct the stringbag on that memory. Stringbag has a
trivial destructor. */
template <typename T>
class stringbag {
public:
/** @brief Type of offsets. */
typedef T offset_type;
typedef string_slice<uintptr_t> slice_type;
private:
struct info_type {
offset_type pos;
offset_type len;
info_type()
: pos(0), len(0) {
}
info_type(unsigned p, unsigned l)
: pos(p), len(l) {
}
};
public:
/** @brief Return the maximum allowed capacity of a stringbag. */
static constexpr unsigned max_size() {
return ((unsigned) (offset_type) -1) + 1;
}
/** @brief Return the base size of a stringbag. */
static constexpr size_t empty_size() {
return sizeof(offset_type) * 2;
}
/** @brief Return the overhead for a stringbag of width @a width.
This is the number of bytes allocated for overhead. */
static constexpr size_t overhead(size_t width) {
return empty_size() + width * sizeof(info_type);
}
/** @brief Return a capacity that can definitely contain a stringbag.
@param width number of strings in bag
@param len total number of bytes in bag's strings */
static constexpr size_t safe_size(int width, unsigned len) {
return overhead(width) + len + slice_type::size - 1;
}
/** @brief Construct an empty stringbag.
@param width Number of strings in the bag
@param capacity Number of bytes allocated for the bag
@pre @a capacity > overhead(width)
@pre @a capacity <= max_offset
Stringbags should not be constructed on the stack or by a direct call
to new. Allocate space for a stringbag, then call the constructor on
that space using placement new. @a capacity must be no bigger than
the allocated space. */
stringbag(int width, size_t capacity) {
size_t firstpos = overhead(width);
assert(capacity >= firstpos && capacity <= max_size());
size_ = firstpos;
capacity_ = capacity - 1;
for (int i = 0; i != width; ++i) {
info_[i] = info_type();
}
}
/** @brief Return the capacity used to construct this bag. */
size_t capacity() const {
return capacity_ + 1;
}
/** @brief Return the number of bytes used so far (including overhead). */
size_t used_capacity() const {
return size_;
}
/** @brief Return the string at position @a p.
@pre @a p >= 0 && @a p < bag width */
lcdf::Str operator[](int p) const {
info_type info = info_[p];
return lcdf::Str(s_ + info.pos, info.len);
}
/** @brief Return the string at position @a p.
@pre @a p >= 0 && @a p < bag width */
lcdf::Str get(int p) const {
info_type info = info_[p];
return lcdf::Str(s_ + info.pos, info.len);
}
/** @brief Assign the string at position @a p to @a s.
@param p position
@param s string
@param len length of string
@return true if the assignment succeeded, false if it failed
(because the stringbag is out of capacity)
@pre @a p >= 0 && @a p < bag width */
bool assign(int p, const char *s, int len) {
unsigned pos, mylen = info_[p].len;
if (mylen >= (unsigned) len)
pos = info_[p].pos;
else if (size_ + (unsigned) std::max(len, slice_type::size)
<= capacity()) {
pos = size_;
size_ += len;
} else
return false;
memcpy(s_ + pos, s, len);
info_[p] = info_type(pos, len);
return true;
}
/** @override */
bool assign(int p, lcdf::Str s) {
return assign(p, s.s, s.len);
}
/** @brief Print a representation of the stringbag to @a f. */
void print(int width, FILE *f, const char *prefix, int indent) {
fprintf(f, "%s%*s%p (%d:)%d:%d...\n", prefix, indent, "",
this, (int) overhead(width), size_, capacity());
for (int i = 0; i < width; ++i)
if (info_[i].len)
fprintf(f, "%s%*s #%x %u:%u %.*s\n", prefix, indent, "",
i, info_[i].pos, info_[i].len, std::min(info_[i].len, 40U), s_ + info_[i].pos);
}
private:
union {
struct {
offset_type size_;
offset_type capacity_;
info_type info_[1];
};
char s_[1];
};
};
#endif