blob: 77743be0872ce20d96685a6d536f7b6e9a9b6e05 [file] [log] [blame]
Willy Tarreaua004ade2017-05-30 17:22:18 +02001/*
2 * Huffman decoding and encoding for HPACK (RFC7541)
3 *
4 * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
5 * Copyright (C) 2017 HAProxy Technologies
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
26 */
27
28#include <stdio.h>
Willy Tarreaua1bd1fa2019-03-29 17:26:33 +010029#include <inttypes.h>
Willy Tarreaua004ade2017-05-30 17:22:18 +020030#include <string.h>
31
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020032#include <haproxy/api.h>
Willy Tarreaube327fa2020-06-03 09:09:57 +020033#include <haproxy/hpack-huff.h>
Willy Tarreaufcce0e12022-04-01 17:16:44 +020034#include <haproxy/net_helper.h>
Willy Tarreaua004ade2017-05-30 17:22:18 +020035
36struct huff {
37 uint32_t c; /* code point */
38 int b; /* bits */
39};
40
Willy Tarreaua004ade2017-05-30 17:22:18 +020041/* huffman table as per RFC7541 appendix B */
42static const struct huff ht[257] = {
43 [ 0] = { .c = 0x00001ff8, .b = 13 },
44 [ 1] = { .c = 0x007fffd8, .b = 23 },
45 [ 2] = { .c = 0x0fffffe2, .b = 28 },
46 [ 3] = { .c = 0x0fffffe3, .b = 28 },
47 [ 4] = { .c = 0x0fffffe4, .b = 28 },
48 [ 5] = { .c = 0x0fffffe5, .b = 28 },
49 [ 6] = { .c = 0x0fffffe6, .b = 28 },
50 [ 7] = { .c = 0x0fffffe7, .b = 28 },
51 [ 8] = { .c = 0x0fffffe8, .b = 28 },
52 [ 9] = { .c = 0x00ffffea, .b = 24 },
53 [ 10] = { .c = 0x3ffffffc, .b = 30 },
54 [ 11] = { .c = 0x0fffffe9, .b = 28 },
55 [ 12] = { .c = 0x0fffffea, .b = 28 },
56 [ 13] = { .c = 0x3ffffffd, .b = 30 },
57 [ 14] = { .c = 0x0fffffeb, .b = 28 },
58 [ 15] = { .c = 0x0fffffec, .b = 28 },
59 [ 16] = { .c = 0x0fffffed, .b = 28 },
60 [ 17] = { .c = 0x0fffffee, .b = 28 },
61 [ 18] = { .c = 0x0fffffef, .b = 28 },
62 [ 19] = { .c = 0x0ffffff0, .b = 28 },
63 [ 20] = { .c = 0x0ffffff1, .b = 28 },
64 [ 21] = { .c = 0x0ffffff2, .b = 28 },
65 [ 22] = { .c = 0x3ffffffe, .b = 30 },
66 [ 23] = { .c = 0x0ffffff3, .b = 28 },
67 [ 24] = { .c = 0x0ffffff4, .b = 28 },
68 [ 25] = { .c = 0x0ffffff5, .b = 28 },
69 [ 26] = { .c = 0x0ffffff6, .b = 28 },
70 [ 27] = { .c = 0x0ffffff7, .b = 28 },
71 [ 28] = { .c = 0x0ffffff8, .b = 28 },
72 [ 29] = { .c = 0x0ffffff9, .b = 28 },
73 [ 30] = { .c = 0x0ffffffa, .b = 28 },
74 [ 31] = { .c = 0x0ffffffb, .b = 28 },
75 [ 32] = { .c = 0x00000014, .b = 6 },
76 [ 33] = { .c = 0x000003f8, .b = 10 },
77 [ 34] = { .c = 0x000003f9, .b = 10 },
78 [ 35] = { .c = 0x00000ffa, .b = 12 },
79 [ 36] = { .c = 0x00001ff9, .b = 13 },
80 [ 37] = { .c = 0x00000015, .b = 6 },
81 [ 38] = { .c = 0x000000f8, .b = 8 },
82 [ 39] = { .c = 0x000007fa, .b = 11 },
83 [ 40] = { .c = 0x000003fa, .b = 10 },
84 [ 41] = { .c = 0x000003fb, .b = 10 },
85 [ 42] = { .c = 0x000000f9, .b = 8 },
86 [ 43] = { .c = 0x000007fb, .b = 11 },
87 [ 44] = { .c = 0x000000fa, .b = 8 },
88 [ 45] = { .c = 0x00000016, .b = 6 },
89 [ 46] = { .c = 0x00000017, .b = 6 },
90 [ 47] = { .c = 0x00000018, .b = 6 },
91 [ 48] = { .c = 0x00000000, .b = 5 },
92 [ 49] = { .c = 0x00000001, .b = 5 },
93 [ 50] = { .c = 0x00000002, .b = 5 },
94 [ 51] = { .c = 0x00000019, .b = 6 },
95 [ 52] = { .c = 0x0000001a, .b = 6 },
96 [ 53] = { .c = 0x0000001b, .b = 6 },
97 [ 54] = { .c = 0x0000001c, .b = 6 },
98 [ 55] = { .c = 0x0000001d, .b = 6 },
99 [ 56] = { .c = 0x0000001e, .b = 6 },
100 [ 57] = { .c = 0x0000001f, .b = 6 },
101 [ 58] = { .c = 0x0000005c, .b = 7 },
102 [ 59] = { .c = 0x000000fb, .b = 8 },
103 [ 60] = { .c = 0x00007ffc, .b = 15 },
104 [ 61] = { .c = 0x00000020, .b = 6 },
105 [ 62] = { .c = 0x00000ffb, .b = 12 },
106 [ 63] = { .c = 0x000003fc, .b = 10 },
107 [ 64] = { .c = 0x00001ffa, .b = 13 },
108 [ 65] = { .c = 0x00000021, .b = 6 },
109 [ 66] = { .c = 0x0000005d, .b = 7 },
110 [ 67] = { .c = 0x0000005e, .b = 7 },
111 [ 68] = { .c = 0x0000005f, .b = 7 },
112 [ 69] = { .c = 0x00000060, .b = 7 },
113 [ 70] = { .c = 0x00000061, .b = 7 },
114 [ 71] = { .c = 0x00000062, .b = 7 },
115 [ 72] = { .c = 0x00000063, .b = 7 },
116 [ 73] = { .c = 0x00000064, .b = 7 },
117 [ 74] = { .c = 0x00000065, .b = 7 },
118 [ 75] = { .c = 0x00000066, .b = 7 },
119 [ 76] = { .c = 0x00000067, .b = 7 },
120 [ 77] = { .c = 0x00000068, .b = 7 },
121 [ 78] = { .c = 0x00000069, .b = 7 },
122 [ 79] = { .c = 0x0000006a, .b = 7 },
123 [ 80] = { .c = 0x0000006b, .b = 7 },
124 [ 81] = { .c = 0x0000006c, .b = 7 },
125 [ 82] = { .c = 0x0000006d, .b = 7 },
126 [ 83] = { .c = 0x0000006e, .b = 7 },
127 [ 84] = { .c = 0x0000006f, .b = 7 },
128 [ 85] = { .c = 0x00000070, .b = 7 },
129 [ 86] = { .c = 0x00000071, .b = 7 },
130 [ 87] = { .c = 0x00000072, .b = 7 },
131 [ 88] = { .c = 0x000000fc, .b = 8 },
132 [ 89] = { .c = 0x00000073, .b = 7 },
133 [ 90] = { .c = 0x000000fd, .b = 8 },
134 [ 91] = { .c = 0x00001ffb, .b = 13 },
135 [ 92] = { .c = 0x0007fff0, .b = 19 },
136 [ 93] = { .c = 0x00001ffc, .b = 13 },
137 [ 94] = { .c = 0x00003ffc, .b = 14 },
138 [ 95] = { .c = 0x00000022, .b = 6 },
139 [ 96] = { .c = 0x00007ffd, .b = 15 },
140 [ 97] = { .c = 0x00000003, .b = 5 },
141 [ 98] = { .c = 0x00000023, .b = 6 },
142 [ 99] = { .c = 0x00000004, .b = 5 },
143 [100] = { .c = 0x00000024, .b = 6 },
144 [101] = { .c = 0x00000005, .b = 5 },
145 [102] = { .c = 0x00000025, .b = 6 },
146 [103] = { .c = 0x00000026, .b = 6 },
147 [104] = { .c = 0x00000027, .b = 6 },
148 [105] = { .c = 0x00000006, .b = 5 },
149 [106] = { .c = 0x00000074, .b = 7 },
150 [107] = { .c = 0x00000075, .b = 7 },
151 [108] = { .c = 0x00000028, .b = 6 },
152 [109] = { .c = 0x00000029, .b = 6 },
153 [110] = { .c = 0x0000002a, .b = 6 },
154 [111] = { .c = 0x00000007, .b = 5 },
155 [112] = { .c = 0x0000002b, .b = 6 },
156 [113] = { .c = 0x00000076, .b = 7 },
157 [114] = { .c = 0x0000002c, .b = 6 },
158 [115] = { .c = 0x00000008, .b = 5 },
159 [116] = { .c = 0x00000009, .b = 5 },
160 [117] = { .c = 0x0000002d, .b = 6 },
161 [118] = { .c = 0x00000077, .b = 7 },
162 [119] = { .c = 0x00000078, .b = 7 },
163 [120] = { .c = 0x00000079, .b = 7 },
164 [121] = { .c = 0x0000007a, .b = 7 },
165 [122] = { .c = 0x0000007b, .b = 7 },
166 [123] = { .c = 0x00007ffe, .b = 15 },
167 [124] = { .c = 0x000007fc, .b = 11 },
168 [125] = { .c = 0x00003ffd, .b = 14 },
169 [126] = { .c = 0x00001ffd, .b = 13 },
170 [127] = { .c = 0x0ffffffc, .b = 28 },
171 [128] = { .c = 0x000fffe6, .b = 20 },
172 [129] = { .c = 0x003fffd2, .b = 22 },
173 [130] = { .c = 0x000fffe7, .b = 20 },
174 [131] = { .c = 0x000fffe8, .b = 20 },
175 [132] = { .c = 0x003fffd3, .b = 22 },
176 [133] = { .c = 0x003fffd4, .b = 22 },
177 [134] = { .c = 0x003fffd5, .b = 22 },
178 [135] = { .c = 0x007fffd9, .b = 23 },
179 [136] = { .c = 0x003fffd6, .b = 22 },
180 [137] = { .c = 0x007fffda, .b = 23 },
181 [138] = { .c = 0x007fffdb, .b = 23 },
182 [139] = { .c = 0x007fffdc, .b = 23 },
183 [140] = { .c = 0x007fffdd, .b = 23 },
184 [141] = { .c = 0x007fffde, .b = 23 },
185 [142] = { .c = 0x00ffffeb, .b = 24 },
186 [143] = { .c = 0x007fffdf, .b = 23 },
187 [144] = { .c = 0x00ffffec, .b = 24 },
188 [145] = { .c = 0x00ffffed, .b = 24 },
189 [146] = { .c = 0x003fffd7, .b = 22 },
190 [147] = { .c = 0x007fffe0, .b = 23 },
191 [148] = { .c = 0x00ffffee, .b = 24 },
192 [149] = { .c = 0x007fffe1, .b = 23 },
193 [150] = { .c = 0x007fffe2, .b = 23 },
194 [151] = { .c = 0x007fffe3, .b = 23 },
195 [152] = { .c = 0x007fffe4, .b = 23 },
196 [153] = { .c = 0x001fffdc, .b = 21 },
197 [154] = { .c = 0x003fffd8, .b = 22 },
198 [155] = { .c = 0x007fffe5, .b = 23 },
199 [156] = { .c = 0x003fffd9, .b = 22 },
200 [157] = { .c = 0x007fffe6, .b = 23 },
201 [158] = { .c = 0x007fffe7, .b = 23 },
202 [159] = { .c = 0x00ffffef, .b = 24 },
203 [160] = { .c = 0x003fffda, .b = 22 },
204 [161] = { .c = 0x001fffdd, .b = 21 },
205 [162] = { .c = 0x000fffe9, .b = 20 },
206 [163] = { .c = 0x003fffdb, .b = 22 },
207 [164] = { .c = 0x003fffdc, .b = 22 },
208 [165] = { .c = 0x007fffe8, .b = 23 },
209 [166] = { .c = 0x007fffe9, .b = 23 },
210 [167] = { .c = 0x001fffde, .b = 21 },
211 [168] = { .c = 0x007fffea, .b = 23 },
212 [169] = { .c = 0x003fffdd, .b = 22 },
213 [170] = { .c = 0x003fffde, .b = 22 },
214 [171] = { .c = 0x00fffff0, .b = 24 },
215 [172] = { .c = 0x001fffdf, .b = 21 },
216 [173] = { .c = 0x003fffdf, .b = 22 },
217 [174] = { .c = 0x007fffeb, .b = 23 },
218 [175] = { .c = 0x007fffec, .b = 23 },
219 [176] = { .c = 0x001fffe0, .b = 21 },
220 [177] = { .c = 0x001fffe1, .b = 21 },
221 [178] = { .c = 0x003fffe0, .b = 22 },
222 [179] = { .c = 0x001fffe2, .b = 21 },
223 [180] = { .c = 0x007fffed, .b = 23 },
224 [181] = { .c = 0x003fffe1, .b = 22 },
225 [182] = { .c = 0x007fffee, .b = 23 },
226 [183] = { .c = 0x007fffef, .b = 23 },
227 [184] = { .c = 0x000fffea, .b = 20 },
228 [185] = { .c = 0x003fffe2, .b = 22 },
229 [186] = { .c = 0x003fffe3, .b = 22 },
230 [187] = { .c = 0x003fffe4, .b = 22 },
231 [188] = { .c = 0x007ffff0, .b = 23 },
232 [189] = { .c = 0x003fffe5, .b = 22 },
233 [190] = { .c = 0x003fffe6, .b = 22 },
234 [191] = { .c = 0x007ffff1, .b = 23 },
235 [192] = { .c = 0x03ffffe0, .b = 26 },
236 [193] = { .c = 0x03ffffe1, .b = 26 },
237 [194] = { .c = 0x000fffeb, .b = 20 },
238 [195] = { .c = 0x0007fff1, .b = 19 },
239 [196] = { .c = 0x003fffe7, .b = 22 },
240 [197] = { .c = 0x007ffff2, .b = 23 },
241 [198] = { .c = 0x003fffe8, .b = 22 },
242 [199] = { .c = 0x01ffffec, .b = 25 },
243 [200] = { .c = 0x03ffffe2, .b = 26 },
244 [201] = { .c = 0x03ffffe3, .b = 26 },
245 [202] = { .c = 0x03ffffe4, .b = 26 },
246 [203] = { .c = 0x07ffffde, .b = 27 },
247 [204] = { .c = 0x07ffffdf, .b = 27 },
248 [205] = { .c = 0x03ffffe5, .b = 26 },
249 [206] = { .c = 0x00fffff1, .b = 24 },
250 [207] = { .c = 0x01ffffed, .b = 25 },
251 [208] = { .c = 0x0007fff2, .b = 19 },
252 [209] = { .c = 0x001fffe3, .b = 21 },
253 [210] = { .c = 0x03ffffe6, .b = 26 },
254 [211] = { .c = 0x07ffffe0, .b = 27 },
255 [212] = { .c = 0x07ffffe1, .b = 27 },
256 [213] = { .c = 0x03ffffe7, .b = 26 },
257 [214] = { .c = 0x07ffffe2, .b = 27 },
258 [215] = { .c = 0x00fffff2, .b = 24 },
259 [216] = { .c = 0x001fffe4, .b = 21 },
260 [217] = { .c = 0x001fffe5, .b = 21 },
261 [218] = { .c = 0x03ffffe8, .b = 26 },
262 [219] = { .c = 0x03ffffe9, .b = 26 },
263 [220] = { .c = 0x0ffffffd, .b = 28 },
264 [221] = { .c = 0x07ffffe3, .b = 27 },
265 [222] = { .c = 0x07ffffe4, .b = 27 },
266 [223] = { .c = 0x07ffffe5, .b = 27 },
267 [224] = { .c = 0x000fffec, .b = 20 },
268 [225] = { .c = 0x00fffff3, .b = 24 },
269 [226] = { .c = 0x000fffed, .b = 20 },
270 [227] = { .c = 0x001fffe6, .b = 21 },
271 [228] = { .c = 0x003fffe9, .b = 22 },
272 [229] = { .c = 0x001fffe7, .b = 21 },
273 [230] = { .c = 0x001fffe8, .b = 21 },
274 [231] = { .c = 0x007ffff3, .b = 23 },
275 [232] = { .c = 0x003fffea, .b = 22 },
276 [233] = { .c = 0x003fffeb, .b = 22 },
277 [234] = { .c = 0x01ffffee, .b = 25 },
278 [235] = { .c = 0x01ffffef, .b = 25 },
279 [236] = { .c = 0x00fffff4, .b = 24 },
280 [237] = { .c = 0x00fffff5, .b = 24 },
281 [238] = { .c = 0x03ffffea, .b = 26 },
282 [239] = { .c = 0x007ffff4, .b = 23 },
283 [240] = { .c = 0x03ffffeb, .b = 26 },
284 [241] = { .c = 0x07ffffe6, .b = 27 },
285 [242] = { .c = 0x03ffffec, .b = 26 },
286 [243] = { .c = 0x03ffffed, .b = 26 },
287 [244] = { .c = 0x07ffffe7, .b = 27 },
288 [245] = { .c = 0x07ffffe8, .b = 27 },
289 [246] = { .c = 0x07ffffe9, .b = 27 },
290 [247] = { .c = 0x07ffffea, .b = 27 },
291 [248] = { .c = 0x07ffffeb, .b = 27 },
292 [249] = { .c = 0x0ffffffe, .b = 28 },
293 [250] = { .c = 0x07ffffec, .b = 27 },
294 [251] = { .c = 0x07ffffed, .b = 27 },
295 [252] = { .c = 0x07ffffee, .b = 27 },
296 [253] = { .c = 0x07ffffef, .b = 27 },
297 [254] = { .c = 0x07fffff0, .b = 27 },
298 [255] = { .c = 0x03ffffee, .b = 26 },
299 [256] = { .c = 0x3fffffff, .b = 30 }, /* EOS */
300};
301
302
Willy Tarreau074ebcd2021-04-02 13:31:46 +0200303/* Reversed huffman codes, generated by dev/hpack/gen-rht.c from the table
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200304 * above, then simplified by hand by extracting the few different length
305 * values and writing code to produce them instead.
Willy Tarreaua004ade2017-05-30 17:22:18 +0200306 *
307 * The codes are aligned on the MSB since that's how they appear in the stream.
308 *
309 * Quick summary below of the way the tables work. They're based on how the
310 * prefixes are organized, starting from the MSB.
311 *
312 * These codes fit in a single octet (5 to 8 bits) :
313 * 00/5 08/5 10/5 18/5 20/5 28/5 30/5 38/5
314 * 40/5 48/5
315 *
316 * 50/6 54/6 58/6 5c/6 60/6 64/6 68/6 6c/6
317 * 70/6 74/6 78/6 7c/6 80/6 84/6 88/6 8c/6
318 * 90/6 94/6 98/6 9c/6 a0/6 a4/6 a8/6 ac/6
319 * b0/6 b4/6
320 *
321 * b8/7 ba/7 bc/7 be/7 c0/7 c2/7 c4/7 c6/7
322 * c8/7 ca/7 cc/7 ce/7 d0/7 d2/7 d4/7 d6/7
323 * d8/7 da/7 dc/7 de/7 e0/7 e2/7 e4/7 e6/7
324 * e8/7 ea/7 ec/7 ee/7 f0/7 f2/7 f4/7 f6/7
325 *
326 * f8/8 f9/8 fa/8 fb/8 fc/8 fd/8
327 *
328 * ==> a single 256-symbol table based on the full byte provides a direct
329 * access and the bit count
330 *
331 * These codes fit in two octets (10 to 15 bits, neither 9 nor 16 bits code) :
332 *
333 * fe + 2 bits:
334 * 00/2 40/2 80/2 c0/2
335 *
336 * ff + 2..7 bits :
337 * 00/2
338 * 40/3 60/3 80/3
339 * a0/4 b0/4
340 * c0/5 c8/5 d0/5 d8/5 e0/5 e8/5
341 * f0/6 f4/6
342 * f8/7 fa/7 fc/7
343 *
344 * ==> a single 256-symbol table made of b0.0 and b1.7-1 provides a direct
345 * access and the bit count after a miss on the first one above.
346 *
347 * These ones fit in three octets :
348 * ff fe + 3..5 bits :
349 * 00/3 20/3 40/3 60/4 70/4 80/4 90/4 a0/4
350 * b0/4 c0/4 d0/4
351 * e0/5 e8/5 f0/5 f8/5
352 *
353 * ff ff + 5..8 bits :
354 * 00/5 08/5 10/5 18/5 20/5 28/5 30/5 38/5
355 * 40/5
356 * 48/6 4c/6 50/6 54/6 58/6 5c/6 60/6 64/6
357 * 68/6 6c/6 70/6 74/6 78/6 7c/6 80/6 84/6
358 * 88/6 8c/6 90/6 94/6 98/6 9c/6 a0/6 a4/6
359 * a8/6 ac/6
360 * b0/7 b2/7 b4/7 b6/7 b8/7 ba/7 bc/7 be/7
361 * c0/7 c2/7 c4/7 c6/7 c8/7 ca/7 cc/7 ce/7
362 * d0/7 d2/7 d4/7 d6/7 d8/7 da/7 dc/7 de/7
363 * e0/7 e2/7 e4/7 e6/7 e8/7
364 * ea/8 eb/8 ec/8 ed/8 ee/8 ef/8 f0/8 f1/8
365 * f2/8 f3/8 f4/8 f5/8
366 *
367 * ==> a 32-symbol table has to be applied to 0xfffe
368 * ==> a 256-symbol table has to be applied to 0xffff
369 *
370 * The other ones fit in four octets with 1 to 6 bits in the last one :
371 * ff ff f6 : 00/1 80/1
372 * ff ff f7 : 00/1 80/1
373 * ff ff f8 : 00/2 40/2 80/2 c0/2
374 * ff ff f9 : 00/2 40/2 80/2 c0/2
375 * ff ff fa : 00/2 40/2 80/2 c0/2
376 * ff ff fb : 00/2 40/2 80/2
377 * ff ff fb : c0/3 e0/3
378 * ff ff fc : 00/3 20/3 40/3 60/3 80/3 a0/3 c0/3 e0/3
379 * ff ff fd : 00/3 20/3 40/3 60/3 80/3 a0/3 c0/3 e0/3
380 * ff ff fe : 00/3
381 * ff ff fe : 20/4 30/4 40/4 50/4 60/4 70/4 80/4 90/4 a0/4 b0/4 c0/4 d0/4 e0/4 f0/4
382 * ff ff ff : 00/4 10/4 20/4 30/4 40/4 50/4 60/4 70/4 80/4 90/4 a0/4 b0/4 c0/4 d0/4 e0/4
383 * ff ff ff : f0/6 f4/6 f8/6 fc/6
384 *
385 * ==> a 256-symbol table with b2.0-3,b3.7-4 gives all of them except the
386 * distinction between ffffff{f0,f4,f8,fc} which is rare enough
387 * and can be done by hand when bit count == 30.
388 *
389 *
390 * Code lengths :
391 * 5..8 : 0x00..0xfe
392 * 10..15 : 0xfe
393 * 0xff 0x00..0xfe
394 * 19..20 : 0xff 0xfe 0x00..0xdf
395 * 21 : 0xff 0xfe 0xe0..0xff
396 * 21 : 0xff 0xff 0x00..0x40
397 * 22..24 : 0xff 0xff 0x00..0xf5
398 * 24..28 : 0xff 0xff 0xf5..0xff
399 * 30 : 0xff 0xff 0xff 0xf0..0xff
400 *
401 *
402 * if b0 < 0xfe ==> 5..8 bits (74 codes)
403 * if b0 == 0xfe or 0xff : 10..15
404 * => if b0 == 0xfe || b1 < 0xfe : lookup (b0:0|b1:7..1) (21 codes)
405 *
406 * -- b0 = 0xff --
407 * if b1 == 0xfe : 19..21 bits
408 * => lookup b2:7..3 (15 codes)
409 *
410 * -- b0 = 0xff, b1 = 0xff : 147 codes --
411 * if b2 < 0xf6 : 21..24 bits (76 codes)
412 * if b2 >= 0xf6 : 25..30 bits (71 codes)
413 *
414 * Algorithm:
415 * - if > 24 and < 32, read missing bits.
416 * - if less than 24 bits, read 1 byte. If past end, insert 0xff instead.
417 * - if b0 < 0xfe lookup b0 in table0[0..255]
418 * - else if b0 == 0xfe, manual lookup
419 * - else if b0 == 0xff, lookup b1 in table1[0..255]
420 * ...
421 */
422
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200423uint8_t rht_bit31_24[256] = {
424 /* 0x00 */ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
425 /* 0x08 */ 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31,
426 /* 0x10 */ 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32,
427 /* 0x18 */ 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61,
428 /* 0x20 */ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
429 /* 0x28 */ 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65,
430 /* 0x30 */ 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69,
431 /* 0x38 */ 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f,
432 /* 0x40 */ 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73,
433 /* 0x48 */ 0x74, 0x74, 0x74, 0x74, 0x74, 0x74, 0x74, 0x74,
434 /* 0x50 */ 0x20, 0x20, 0x20, 0x20,
435 /* 0x54 */ 0x25, 0x25, 0x25, 0x25,
436 /* 0x58 */ 0x2d, 0x2d, 0x2d, 0x2d,
437 /* 0x5c */ 0x2e, 0x2e, 0x2e, 0x2e,
438 /* 0x60 */ 0x2f, 0x2f, 0x2f, 0x2f,
439 /* 0x64 */ 0x33, 0x33, 0x33, 0x33,
440 /* 0x68 */ 0x34, 0x34, 0x34, 0x34,
441 /* 0x6c */ 0x35, 0x35, 0x35, 0x35,
442 /* 0x70 */ 0x36, 0x36, 0x36, 0x36,
443 /* 0x74 */ 0x37, 0x37, 0x37, 0x37,
444 /* 0x78 */ 0x38, 0x38, 0x38, 0x38,
445 /* 0x7c */ 0x39, 0x39, 0x39, 0x39,
446 /* 0x80 */ 0x3d, 0x3d, 0x3d, 0x3d,
447 /* 0x84 */ 0x41, 0x41, 0x41, 0x41,
448 /* 0x88 */ 0x5f, 0x5f, 0x5f, 0x5f,
449 /* 0x8c */ 0x62, 0x62, 0x62, 0x62,
450 /* 0x90 */ 0x64, 0x64, 0x64, 0x64,
451 /* 0x94 */ 0x66, 0x66, 0x66, 0x66,
452 /* 0x98 */ 0x67, 0x67, 0x67, 0x67,
453 /* 0x9c */ 0x68, 0x68, 0x68, 0x68,
454 /* 0xa0 */ 0x6c, 0x6c, 0x6c, 0x6c,
455 /* 0xa4 */ 0x6d, 0x6d, 0x6d, 0x6d,
456 /* 0xa8 */ 0x6e, 0x6e, 0x6e, 0x6e,
457 /* 0xac */ 0x70, 0x70, 0x70, 0x70,
458 /* 0xb0 */ 0x72, 0x72, 0x72, 0x72,
459 /* 0xb4 */ 0x75, 0x75, 0x75, 0x75,
460 /* 0xb8 */ 0x3a, 0x3a,
461 /* 0xba */ 0x42, 0x42,
462 /* 0xbc */ 0x43, 0x43,
463 /* 0xbe */ 0x44, 0x44,
464 /* 0xc0 */ 0x45, 0x45,
465 /* 0xc2 */ 0x46, 0x46,
466 /* 0xc4 */ 0x47, 0x47,
467 /* 0xc6 */ 0x48, 0x48,
468 /* 0xc8 */ 0x49, 0x49,
469 /* 0xca */ 0x4a, 0x4a,
470 /* 0xcc */ 0x4b, 0x4b,
471 /* 0xce */ 0x4c, 0x4c,
472 /* 0xd0 */ 0x4d, 0x4d,
473 /* 0xd2 */ 0x4e, 0x4e,
474 /* 0xd4 */ 0x4f, 0x4f,
475 /* 0xd6 */ 0x50, 0x50,
476 /* 0xd8 */ 0x51, 0x51,
477 /* 0xda */ 0x52, 0x52,
478 /* 0xdc */ 0x53, 0x53,
479 /* 0xde */ 0x54, 0x54,
480 /* 0xe0 */ 0x55, 0x55,
481 /* 0xe2 */ 0x56, 0x56,
482 /* 0xe4 */ 0x57, 0x57,
483 /* 0xe6 */ 0x59, 0x59,
484 /* 0xe8 */ 0x6a, 0x6a,
485 /* 0xea */ 0x6b, 0x6b,
486 /* 0xec */ 0x71, 0x71,
487 /* 0xee */ 0x76, 0x76,
488 /* 0xf0 */ 0x77, 0x77,
489 /* 0xf2 */ 0x78, 0x78,
490 /* 0xf4 */ 0x79, 0x79,
491 /* 0xf6 */ 0x7a, 0x7a,
492 /* 0xf8 */ 0x26,
493 /* 0xf9 */ 0x2a,
494 /* 0xfa */ 0x2c,
495 /* 0xfb */ 0x3b,
496 /* 0xfc */ 0x58,
497 /* 0xfd */ 0x5a,
Willy Tarreaua004ade2017-05-30 17:22:18 +0200498};
499
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200500uint8_t rht_bit24_17[256] = {
501 /* 0x00 */ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
502 /* 0x10 */ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
503 /* 0x20 */ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
504 /* 0x30 */ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
505 /* 0x40 */ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
506 /* 0x50 */ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
507 /* 0x60 */ 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29,
508 /* 0x70 */ 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29,
509 /* 0x80 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
510 /* 0x90 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
511 /* 0xa0 */ 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27,
512 /* 0xb0 */ 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b,
513 /* 0xc0 */ 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c,
514 /* 0xd0 */ 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23,
515 /* 0xd8 */ 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e,
516 /* 0xe0 */ 0x00, 0x00, 0x00, 0x00,
517 /* 0xe4 */ 0x24, 0x24, 0x24, 0x24,
518 /* 0xe8 */ 0x40, 0x40, 0x40, 0x40,
519 /* 0xec */ 0x5b, 0x5b, 0x5b, 0x5b,
520 /* 0xf0 */ 0x5d, 0x5d, 0x5d, 0x5d,
521 /* 0xf4 */ 0x7e, 0x7e, 0x7e, 0x7e,
522 /* 0xf8 */ 0x5e, 0x5e,
523 /* 0xfa */ 0x7d, 0x7d,
524 /* 0xfc */ 0x3c,
525 /* 0xfd */ 0x60,
526 /* 0xfe */ 0x7b,
Willy Tarreaua004ade2017-05-30 17:22:18 +0200527};
528
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200529uint8_t rht_bit15_8[256] = {
530 /* 0x00 */ 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
531 /* 0x08 */ 0xb1, 0xb1, 0xb1, 0xb1, 0xb1, 0xb1, 0xb1, 0xb1,
532 /* 0x10 */ 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3,
533 /* 0x18 */ 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1,
534 /* 0x20 */ 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8,
535 /* 0x28 */ 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9,
536 /* 0x30 */ 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3,
537 /* 0x38 */ 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5,
538 /* 0x40 */ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6,
539 /* 0x48 */ 0x81, 0x81, 0x81, 0x81,
540 /* 0x4c */ 0x84, 0x84, 0x84, 0x84,
541 /* 0x50 */ 0x85, 0x85, 0x85, 0x85,
542 /* 0x54 */ 0x86, 0x86, 0x86, 0x86,
543 /* 0x58 */ 0x88, 0x88, 0x88, 0x88,
544 /* 0x5c */ 0x92, 0x92, 0x92, 0x92,
545 /* 0x60 */ 0x9a, 0x9a, 0x9a, 0x9a,
546 /* 0x64 */ 0x9c, 0x9c, 0x9c, 0x9c,
547 /* 0x68 */ 0xa0, 0xa0, 0xa0, 0xa0,
548 /* 0x6c */ 0xa3, 0xa3, 0xa3, 0xa3,
549 /* 0x70 */ 0xa4, 0xa4, 0xa4, 0xa4,
550 /* 0x74 */ 0xa9, 0xa9, 0xa9, 0xa9,
551 /* 0x78 */ 0xaa, 0xaa, 0xaa, 0xaa,
552 /* 0x7c */ 0xad, 0xad, 0xad, 0xad,
553 /* 0x80 */ 0xb2, 0xb2, 0xb2, 0xb2,
554 /* 0x84 */ 0xb5, 0xb5, 0xb5, 0xb5,
555 /* 0x88 */ 0xb9, 0xb9, 0xb9, 0xb9,
556 /* 0x8c */ 0xba, 0xba, 0xba, 0xba,
557 /* 0x90 */ 0xbb, 0xbb, 0xbb, 0xbb,
558 /* 0x94 */ 0xbd, 0xbd, 0xbd, 0xbd,
559 /* 0x98 */ 0xbe, 0xbe, 0xbe, 0xbe,
560 /* 0x9c */ 0xc4, 0xc4, 0xc4, 0xc4,
561 /* 0xa0 */ 0xc6, 0xc6, 0xc6, 0xc6,
562 /* 0xa4 */ 0xe4, 0xe4, 0xe4, 0xe4,
563 /* 0xa8 */ 0xe8, 0xe8, 0xe8, 0xe8,
564 /* 0xac */ 0xe9, 0xe9, 0xe9, 0xe9,
565 /* 0xb0 */ 0x01, 0x01,
566 /* 0xb2 */ 0x87, 0x87,
567 /* 0xb4 */ 0x89, 0x89,
568 /* 0xb6 */ 0x8a, 0x8a,
569 /* 0xb8 */ 0x8b, 0x8b,
570 /* 0xba */ 0x8c, 0x8c,
571 /* 0xbc */ 0x8d, 0x8d,
572 /* 0xbe */ 0x8f, 0x8f,
573 /* 0xc0 */ 0x93, 0x93,
574 /* 0xc2 */ 0x95, 0x95,
575 /* 0xc4 */ 0x96, 0x96,
576 /* 0xc6 */ 0x97, 0x97,
577 /* 0xc8 */ 0x98, 0x98,
578 /* 0xca */ 0x9b, 0x9b,
579 /* 0xcc */ 0x9d, 0x9d,
580 /* 0xce */ 0x9e, 0x9e,
581 /* 0xd0 */ 0xa5, 0xa5,
582 /* 0xd2 */ 0xa6, 0xa6,
583 /* 0xd4 */ 0xa8, 0xa8,
584 /* 0xd6 */ 0xae, 0xae,
585 /* 0xd8 */ 0xaf, 0xaf,
586 /* 0xda */ 0xb4, 0xb4,
587 /* 0xdc */ 0xb6, 0xb6,
588 /* 0xde */ 0xb7, 0xb7,
589 /* 0xe0 */ 0xbc, 0xbc,
590 /* 0xe2 */ 0xbf, 0xbf,
591 /* 0xe4 */ 0xc5, 0xc5,
592 /* 0xe6 */ 0xe7, 0xe7,
593 /* 0xe8 */ 0xef, 0xef,
594 /* 0xea */ 0x09,
595 /* 0xeb */ 0x8e,
596 /* 0xec */ 0x90,
597 /* 0xed */ 0x91,
598 /* 0xee */ 0x94,
599 /* 0xef */ 0x9f,
600 /* 0xf0 */ 0xab,
601 /* 0xf1 */ 0xce,
602 /* 0xf2 */ 0xd7,
603 /* 0xf3 */ 0xe1,
604 /* 0xf4 */ 0xec,
605 /* 0xf5 */ 0xed,
Willy Tarreaua004ade2017-05-30 17:22:18 +0200606};
607
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200608/* below two non-overlapping tables are merged in order to save on L1D:
609 * - bits 15-11 for values 0x00-0x1f
610 * - bits 11-4 for values 0x60-0xff
Willy Tarreau094ecf12023-01-26 11:12:11 +0100611 * Note that there's no data between 0x20 and 0x5f, the caller must
612 * adjust its offsets by subtracting 0x40 for values 0x60 and above.
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200613 */
Willy Tarreau094ecf12023-01-26 11:12:11 +0100614uint8_t rht_bit15_11_11_4[192] = {
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200615 /* part used for bits 15-11 (0x00-0x1f) */
616 /* 0x00 */ 0x5c, 0x5c, 0x5c, 0x5c,
617 /* 0x04 */ 0xc3, 0xc3, 0xc3, 0xc3,
618 /* 0x08 */ 0xd0, 0xd0, 0xd0, 0xd0,
619 /* 0x0c */ 0x80, 0x80,
620 /* 0x0e */ 0x82, 0x82,
621 /* 0x10 */ 0x83, 0x83,
622 /* 0x12 */ 0xa2, 0xa2,
623 /* 0x14 */ 0xb8, 0xb8,
624 /* 0x16 */ 0xc2, 0xc2,
625 /* 0x18 */ 0xe0, 0xe0,
626 /* 0x1a */ 0xe2, 0xe2,
627 /* 0x1c */ 0x99,
628 /* 0x1d */ 0xa1,
629 /* 0x1e */ 0xa7,
630 /* 0x1f */ 0xac,
Willy Tarreaua004ade2017-05-30 17:22:18 +0200631
Willy Tarreau094ecf12023-01-26 11:12:11 +0100632 /* part used for bits 11-4 for 0xf600 (0x60-0xff), starting @0x20 */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200633 /* 0x60 */ 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7,
634 /* 0x68 */ 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf,
635 /* 0x70 */ 0xea, 0xea, 0xea, 0xea, 0xea, 0xea, 0xea, 0xea,
636 /* 0x78 */ 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb,
637 /* 0x80 */ 0xc0, 0xc0, 0xc0, 0xc0,
638 /* 0x84 */ 0xc1, 0xc1, 0xc1, 0xc1,
639 /* 0x88 */ 0xc8, 0xc8, 0xc8, 0xc8,
640 /* 0x8c */ 0xc9, 0xc9, 0xc9, 0xc9,
641 /* 0x90 */ 0xca, 0xca, 0xca, 0xca,
642 /* 0x94 */ 0xcd, 0xcd, 0xcd, 0xcd,
643 /* 0x98 */ 0xd2, 0xd2, 0xd2, 0xd2,
644 /* 0x9c */ 0xd5, 0xd5, 0xd5, 0xd5,
645 /* 0xa0 */ 0xda, 0xda, 0xda, 0xda,
646 /* 0xa4 */ 0xdb, 0xdb, 0xdb, 0xdb,
647 /* 0xa8 */ 0xee, 0xee, 0xee, 0xee,
648 /* 0xac */ 0xf0, 0xf0, 0xf0, 0xf0,
649 /* 0xb0 */ 0xf2, 0xf2, 0xf2, 0xf2,
650 /* 0xb4 */ 0xf3, 0xf3, 0xf3, 0xf3,
651 /* 0xb8 */ 0xff, 0xff, 0xff, 0xff,
652 /* 0xbc */ 0xcb, 0xcb,
653 /* 0xbe */ 0xcc, 0xcc,
654 /* 0xc0 */ 0xd3, 0xd3,
655 /* 0xc2 */ 0xd4, 0xd4,
656 /* 0xc4 */ 0xd6, 0xd6,
657 /* 0xc6 */ 0xdd, 0xdd,
658 /* 0xc8 */ 0xde, 0xde,
659 /* 0xca */ 0xdf, 0xdf,
660 /* 0xcc */ 0xf1, 0xf1,
661 /* 0xce */ 0xf4, 0xf4,
662 /* 0xd0 */ 0xf5, 0xf5,
663 /* 0xd2 */ 0xf6, 0xf6,
664 /* 0xd4 */ 0xf7, 0xf7,
665 /* 0xd6 */ 0xf8, 0xf8,
666 /* 0xd8 */ 0xfa, 0xfa,
667 /* 0xda */ 0xfb, 0xfb,
668 /* 0xdc */ 0xfc, 0xfc,
669 /* 0xde */ 0xfd, 0xfd,
670 /* 0xe0 */ 0xfe, 0xfe,
671 /* 0xe2 */ 0x02,
672 /* 0xe3 */ 0x03,
673 /* 0xe4 */ 0x04,
674 /* 0xe5 */ 0x05,
675 /* 0xe6 */ 0x06,
676 /* 0xe7 */ 0x07,
677 /* 0xe8 */ 0x08,
678 /* 0xe9 */ 0x0b,
679 /* 0xea */ 0x0c,
680 /* 0xeb */ 0x0e,
681 /* 0xec */ 0x0f,
682 /* 0xed */ 0x10,
683 /* 0xee */ 0x11,
684 /* 0xef */ 0x12,
685 /* 0xf0 */ 0x13,
686 /* 0xf1 */ 0x14,
687 /* 0xf2 */ 0x15,
688 /* 0xf3 */ 0x17,
689 /* 0xf4 */ 0x18,
690 /* 0xf5 */ 0x19,
691 /* 0xf6 */ 0x1a,
692 /* 0xf7 */ 0x1b,
693 /* 0xf8 */ 0x1c,
694 /* 0xf9 */ 0x1d,
695 /* 0xfa */ 0x1e,
696 /* 0xfb */ 0x1f,
697 /* 0xfc */ 0x7f,
698 /* 0xfd */ 0xdc,
699 /* 0xfe */ 0xf9,
700 /* 0xff */ 0x0a,
701 /* Note, for [0xff], l==30 and bits 2..3 give 00:0x0a, 01:0x0d, 10:0x16, 11:EOS */
Willy Tarreaua004ade2017-05-30 17:22:18 +0200702};
703
704/* huffman-encode string <s> into the huff_tmp buffer and returns the amount
705 * of output bytes. The caller must ensure the output is large enough (ie at
706 * least 4 times as long as s).
707 *
708 * FIXME: bits are only counted for now, no code is emitted!
709 */
710int huff_enc(const char *s, char *out)
711{
712 int bits = 0;
713
714 while (*s) {
715 bits += ht[(uint8_t)*s].b;
716 s++;
717 }
718 bits += 7;
719
720 /* FIXME: huffman code is not emitted yet. */
721 //memset(out, 'H', bits / 8);
722 return bits / 8;
723}
724
725/* pass a huffman string, it will decode it and return the new output size or
726 * -1 in case of error.
727 *
728 * The principle of the decoder is to lookup full bytes in reverse-huffman
729 * tables. Since we may need up to 30 bits and the word positions are not
730 * always multiples of 8, we build the code word by shifting the "current"
731 * 32-bit word and the "next" one of the appropriate amount of bits. Once
732 * the shift goes beyond 32, words are swapped and the "next" one is refilled
733 * with new bytes. Shift operations are cheap when done a single time like this.
734 * On 64-bit platforms it is possible to further improve this by storing both
735 * of them in a single word.
736 */
737int huff_dec(const uint8_t *huff, int hlen, char *out, int olen)
738{
739 char *out_start = out;
740 char *out_end = out + olen;
741 const uint8_t *huff_end = huff + hlen;
742 uint32_t curr = 0;
743 uint32_t next = 0;
744 uint32_t shift;
745 uint32_t code; /* The 30-bit code being looked up, MSB-aligned */
746 uint8_t sym;
747 int bleft; /* bits left */
748 int l;
749
750 code = 0;
751 shift = 64; // start with an empty buffer
752 bleft = hlen << 3;
753 while (bleft > 0 && out != out_end) {
754 while (shift >= 32) {
755 curr = next;
756
Willy Tarreaufcce0e12022-04-01 17:16:44 +0200757 /* read up to 4 bytes into next */
Willy Tarreaua004ade2017-05-30 17:22:18 +0200758 next = 0;
759
760 if (huff + 4 <= huff_end) {
Willy Tarreaufcce0e12022-04-01 17:16:44 +0200761 next = read_n32(huff);
Willy Tarreaua004ade2017-05-30 17:22:18 +0200762 huff += 4;
763 }
764 else {
765 /* note: we append 0 and not 0xff so that we can
766 * distinguish shifted bits from a really inserted
767 * EOS.
768 */
Willy Tarreau17b46872022-04-01 17:12:08 +0200769 next = (((huff + 0 < huff_end) ? (uint32_t)huff[0] : 0x00) << 24) +
770 (((huff + 1 < huff_end) ? (uint32_t)huff[1] : 0x00) << 16) +
771 (((huff + 2 < huff_end) ? (uint32_t)huff[2] : 0x00) << 8) +
772 ((huff + 3 < huff_end) ? (uint32_t)huff[3] : 0x00);
Willy Tarreaua004ade2017-05-30 17:22:18 +0200773 huff = huff_end;
774 }
775
776 shift -= 32;
777 }
778
779 /* curr:next contain 64 bit of huffman code */
780 code = curr;
781 if (shift)
782 code = (code << shift) + (next >> (32 - shift));
783
784 /* now we necessarily have 32 bits available */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200785 if (code < 0xfe000000) {
Willy Tarreaua004ade2017-05-30 17:22:18 +0200786 /* single byte */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200787 sym = code >> 24;
788 l = sym < 0xb8 ?
789 sym < 0x50 ? 5 : 6 :
790 sym < 0xf8 ? 7 : 8;
791 sym = rht_bit31_24[code >> 24];
Willy Tarreaua004ade2017-05-30 17:22:18 +0200792 }
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200793 else if (code < 0xfffe0000) {
Willy Tarreaua004ade2017-05-30 17:22:18 +0200794 /* two bytes, 0xfe + 2 bits or 0xff + 2..7 bits */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200795 sym = code >> 17;
796 l = sym < 0xe0 ?
797 sym < 0xa0 ? 10 : sym < 0xd0 ? 11 : 12 :
798 sym < 0xf8 ? 13 : sym < 0xfc ? 14 : 15;
799
800 sym = rht_bit24_17[(code >> 17) & 0xff];
Willy Tarreaua004ade2017-05-30 17:22:18 +0200801 }
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200802 else if (code < 0xffff0000) { /* 3..5 bits */
Willy Tarreaua004ade2017-05-30 17:22:18 +0200803 /* 0xff + 0xfe + 3..5 bits or
804 * 0xff + 0xff + 5..8 bits for values till 0xf5
805 */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200806 sym = (code >> 11) & 0x1f;
807 l = sym < 0x0c ? 19 : sym < 0x1c ? 20 : 21;
808 sym = rht_bit15_11_11_4[(code >> 11) & 0x1f];
Willy Tarreaua004ade2017-05-30 17:22:18 +0200809 }
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200810 else if (code < 0xfffff600) { /* 5..8 bits */
Willy Tarreaua004ade2017-05-30 17:22:18 +0200811 /* that's 0xff + 0xff */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200812 sym = code >> 8;
813
814 l = sym < 0xb0 ?
815 sym < 0x48 ? 21 : 22 :
816 sym < 0xea ? 23 : 24;
817 sym = rht_bit15_8[(code >> 8) & 0xff];
Willy Tarreaua004ade2017-05-30 17:22:18 +0200818 }
819 else {
820 /* 0xff 0xff 0xf6..0xff */
Willy Tarreau094ecf12023-01-26 11:12:11 +0100821 sym = code >> 4; /* sym = 0x60..0xff */
Willy Tarreau9f4f6b02022-09-20 07:27:15 +0200822 l = sym < 0xbc ?
823 sym < 0x80 ? 25 : 26 :
824 sym < 0xe2 ? 27 : sym < 0xff ? 28 : 30;
825 if (sym < 0xff)
Willy Tarreau094ecf12023-01-26 11:12:11 +0100826 sym = rht_bit15_11_11_4[((code >> 4) & 0xff) - 0x40L];
Willy Tarreaua004ade2017-05-30 17:22:18 +0200827 else if ((code & 0xff) == 0xf0)
828 sym = 10;
829 else if ((code & 0xff) == 0xf4)
830 sym = 13;
831 else if ((code & 0xff) == 0xf8)
832 sym = 22;
833 else { // 0xfc : EOS
834 break;
835 }
836 }
837
838 if (!l || bleft - l < 0)
839 break;
840
841 bleft -= l;
842 shift += l;
843 *out++ = sym;
844 }
845
846 if (bleft > 0) {
847 /* some bits were not consumed after the last code, they must
Willy Tarreau4235d182017-12-03 12:00:36 +0100848 * match EOS (ie: all ones) and there must be 7 bits or less.
849 * (7541#5.2).
Willy Tarreaua004ade2017-05-30 17:22:18 +0200850 */
Willy Tarreau4235d182017-12-03 12:00:36 +0100851 if (bleft > 7)
852 return -1;
853
Willy Tarreaua004ade2017-05-30 17:22:18 +0200854 if ((code & -(1 << (32 - bleft))) != (uint32_t)-(1 << (32 - bleft)))
855 return -1;
856 }
857
858 if (out < out_end)
859 *out = 0; // end of string whenever possible
860 return out - out_start;
861}