-
Notifications
You must be signed in to change notification settings - Fork 4
/
lzs.c
329 lines (288 loc) · 9.38 KB
/
lzs.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
/*
* OpenConnect (SSL + DTLS) VPN client
*
* Copyright © 2008-2015 Intel Corporation.
*
* Author: David Woodhouse <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
#include <config.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include "openconnect-internal.h"
#define GET_BITS(bits) \
do { \
/* Strictly speaking, this check ought to be on \
* (srclen < 1 + (bits_left < bits)). However, when bits == 9 \
* the (bits_left < bits) comparison is always true so it \
* always comes out as (srclen < 2). \
* And bits is only anything *other* than 9 when we're reading \
* reading part of a match encoding. And in that case, there \
* damn well ought to be an end marker (7 more bits) after \
* what we're reading now, so it's perfectly OK to use \
* (srclen < 2) in that case too. And a *lot* cheaper. */ \
if (srclen < 2) \
return -EINVAL; \
/* Explicit comparison with 8 to optimise it into a tautology \
* in the the bits == 9 case, because the compiler doesn't \
* know that bits_left can never be larger than 8. */ \
if (bits >= 8 || bits >= bits_left) { \
/* We need *all* the bits that are left in the current \
* byte. Take them and bump the input pointer. */ \
data = (src[0] << (bits - bits_left)) & ((1 << bits) - 1); \
src++; \
srclen--; \
bits_left += 8 - bits; \
if (bits > 8 || bits_left < 8) { \
/* We need bits from the next byte too... */ \
data |= src[0] >> bits_left; \
/* ...if we used *all* of them then (which can \
* only happen if bits > 8), then bump the \
* input pointer again so we never leave \
* bits_left == 0. */ \
if (bits > 8 && !bits_left) { \
bits_left = 8; \
src++; \
srclen--; \
} \
} \
} else { \
/* We need fewer bits than are left in the current byte */ \
data = (src[0] >> (bits_left - bits)) & ((1ULL << bits) - 1); \
bits_left -= bits; \
} \
} while (0)
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
{
int outlen = 0;
int bits_left = 8; /* Bits left in the current byte at *src */
uint32_t data;
uint16_t offset, length;
while (1) {
/* Get 9 bits, which is the minimum and a common case */
GET_BITS(9);
/* 0bbbbbbbb is a literal byte. The loop gives a hint to
* the compiler that we expect to see a few of these. */
while (data < 0x100) {
if (outlen == dstlen)
return -EFBIG;
dst[outlen++] = data;
GET_BITS(9);
}
/* 110000000 is the end marker */
if (data == 0x180)
return outlen;
/* 11bbbbbbb is a 7-bit offset */
offset = data & 0x7f;
/* 10bbbbbbbbbbb is an 11-bit offset, so get the next 4 bits */
if (data < 0x180) {
GET_BITS(4);
offset <<= 4;
offset |= data;
}
/* This is a compressed sequence; now get the length */
GET_BITS(2);
if (data != 3) {
/* 00, 01, 10 ==> 2, 3, 4 */
length = data + 2;
} else {
GET_BITS(2);
if (data != 3) {
/* 1100, 1101, 1110 => 5, 6, 7 */
length = data + 5;
} else {
/* For each 1111 prefix add 15 to the length. Then add
the value of final nybble. */
length = 8;
while (1) {
GET_BITS(4);
if (data != 15) {
length += data;
break;
}
length += 15;
}
}
}
if (offset > outlen)
return -EINVAL;
if (length + outlen > dstlen)
return -EFBIG;
while (length) {
dst[outlen] = dst[outlen - offset];
outlen++;
length--;
}
}
return -EINVAL;
}
#define PUT_BITS(nr, bits) \
do { \
outbits <<= (nr); \
outbits |= (bits); \
nr_outbits += (nr); \
if ((nr) > 8) { \
nr_outbits -= 8; \
if (outpos == dstlen) \
return -EFBIG; \
dst[outpos++] = outbits >> nr_outbits; \
} \
if (nr_outbits >= 8) { \
nr_outbits -= 8; \
if (outpos == dstlen) \
return -EFBIG; \
dst[outpos++] = outbits >> nr_outbits; \
} \
} while (0)
/*
* Much of the compression algorithm used here is based very loosely on ideas
* from isdn_lzscomp.c by Andre Beck: http://micky.ibh.de/~beck/stuff/lzs4i4l/
*/
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
{
int length, offset;
int inpos = 0, outpos = 0;
uint16_t longest_match_len;
uint16_t hofs, longest_match_ofs;
uint16_t hash;
uint32_t outbits = 0;
int nr_outbits = 0;
/*
* This is theoretically a hash. But RAM is cheap and just loading the
* 16-bit value and using it as a hash is *much* faster.
*/
#define HASH_BITS 16
#define HASH_TABLE_SIZE (1ULL << HASH_BITS)
#define HASH(p) (((struct oc_packed_uint16_t *)(p))->d)
/*
* There are two data structures for tracking the history. The first
* is the true hash table, an array indexed by the hash value described
* above. It yields the offset in the input buffer at which the given
* hash was most recently seen. We use INVALID_OFS (0xffff) for none
* since we know IP packets are limited to 64KiB and we can never be
* *starting* a match at the penultimate byte of the packet.
*/
#define INVALID_OFS 0xffff
uint16_t hash_table[HASH_TABLE_SIZE]; /* Buffer offset for first match */
/*
* The second data structure allows us to find the previous occurrences
* of the same hash value. It is a ring buffer containing links only for
* the latest MAX_HISTORY bytes of the input. The lookup for a given
* offset will yield the previous offset at which the same data hash
* value was found.
*/
#define MAX_HISTORY (1<<11) /* Highest offset LZS can represent is 11 bits */
uint16_t hash_chain[MAX_HISTORY];
/* Just in case anyone tries to use this in a more general-purpose
* scenario... */
if (srclen > INVALID_OFS + 1)
return -EFBIG;
/* No need to initialise hash_chain since we can only ever follow
* links to it that have already been initialised. */
memset(hash_table, 0xff, sizeof(hash_table));
while (inpos < srclen - 2) {
hash = HASH(src + inpos);
hofs = hash_table[hash];
hash_chain[inpos & (MAX_HISTORY - 1)] = hofs;
hash_table[hash] = inpos;
if (hofs == INVALID_OFS || hofs + MAX_HISTORY <= inpos) {
PUT_BITS(9, src[inpos]);
inpos++;
continue;
}
/* Since the hash is 16-bits, we *know* the first two bytes match */
longest_match_len = 2;
longest_match_ofs = hofs;
for (; hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos;
hofs = hash_chain[hofs & (MAX_HISTORY - 1)]) {
/* We only get here if longest_match_len is >= 2. We need to find
a match of longest_match_len + 1 for it to be interesting. */
if (!memcmp(src + hofs + 2, src + inpos + 2, longest_match_len - 1)) {
longest_match_ofs = hofs;
do {
longest_match_len++;
/* If we cannot *have* a longer match because we're at the
* end of the input, stop looking */
if (longest_match_len + inpos == srclen)
goto got_match;
} while (src[longest_match_len + inpos] == src[longest_match_len + hofs]);
}
/* Typical compressor tuning would have a break out of the loop
here depending on the number of potential match locations we've
tried, or a value of longest_match_len that's considered "good
enough" so we stop looking for something better. We could also
do a hybrid where we count the total bytes compared, so 5
attempts to find a match better than 10 bytes is worth the same
as 10 attempts to find a match better than 5 bytes. Or
something. Anyway, we currently don't give up until we run out
of reachable history — maximal compression. */
}
got_match:
/* Output offset, as 7-bit or 11-bit as appropriate */
offset = inpos - longest_match_ofs;
length = longest_match_len;
if (offset < 0x80)
PUT_BITS(9, 0x180 | offset);
else
PUT_BITS(13, 0x1000 | offset);
/* Output length */
if (length < 5)
PUT_BITS(2, length - 2);
else if (length < 8)
PUT_BITS(4, length + 7);
else {
length += 7;
while (length >= 30) {
PUT_BITS(8, 0xff);
length -= 30;
}
if (length >= 15)
PUT_BITS(8, 0xf0 + length - 15);
else
PUT_BITS(4, length);
}
/* If we're already done, don't bother updating the hash tables. */
if (inpos + longest_match_len >= srclen - 2) {
inpos += longest_match_len;
break;
}
/* We already added the first byte to the hash tables. Add the rest. */
inpos++;
while (--longest_match_len) {
hash = HASH(src + inpos);
hash_chain[inpos & (MAX_HISTORY - 1)] = hash_table[hash];
hash_table[hash] = inpos++;
}
}
/* Special cases at the end */
if (inpos == srclen - 2) {
hash = HASH(src + inpos);
hofs = hash_table[hash];
if (hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos) {
offset = inpos - hofs;
if (offset < 0x80)
PUT_BITS(9, 0x180 | offset);
else
PUT_BITS(13, 0x1000 | offset);
/* The length is 2 bytes */
PUT_BITS(2, 0);
} else {
PUT_BITS(9, src[inpos]);
PUT_BITS(9, src[inpos + 1]);
}
} else if (inpos == srclen - 1) {
PUT_BITS(9, src[inpos]);
}
/* End marker, with 7 trailing zero bits to ensure that it's flushed. */
PUT_BITS(16, 0xc000);
return outpos;
}