blob: 23912da6a223d211ce75ccf8c621084922b610d1 [file] [log] [blame]
Willy Tarreauab2b7822021-04-22 14:09:44 +02001/*
2 * Copyright (C) 2013-2015 Willy Tarreau <w@1wt.eu>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
Willy Tarreau388fc252021-05-14 08:44:52 +020025#include <inttypes.h>
Willy Tarreauab2b7822021-04-22 14:09:44 +020026#include <stdio.h>
27#include <string.h>
28#include <import/slz.h>
29#include <import/slz-tables.h>
30
31/* First, RFC1951-specific declarations and extracts from the RFC.
32 *
33 * RFC1951 - deflate stream format
34
35
36 * Data elements are packed into bytes in order of
37 increasing bit number within the byte, i.e., starting
38 with the least-significant bit of the byte.
39 * Data elements other than Huffman codes are packed
40 starting with the least-significant bit of the data
41 element.
42 * Huffman codes are packed starting with the most-
43 significant bit of the code.
44
45 3.2.3. Details of block format
46
47 Each block of compressed data begins with 3 header bits
48 containing the following data:
49
50 first bit BFINAL
51 next 2 bits BTYPE
52
53 Note that the header bits do not necessarily begin on a byte
54 boundary, since a block does not necessarily occupy an integral
55 number of bytes.
56
57 BFINAL is set if and only if this is the last block of the data
58 set.
59
60 BTYPE specifies how the data are compressed, as follows:
61
62 00 - no compression
63 01 - compressed with fixed Huffman codes
64 10 - compressed with dynamic Huffman codes
65 11 - reserved (error)
66
67 3.2.4. Non-compressed blocks (BTYPE=00)
68
69 Any bits of input up to the next byte boundary are ignored.
70 The rest of the block consists of the following information:
71
72 0 1 2 3 4...
73 +---+---+---+---+================================+
74 | LEN | NLEN |... LEN bytes of literal data...|
75 +---+---+---+---+================================+
76
77 LEN is the number of data bytes in the block. NLEN is the
78 one's complement of LEN.
79
80 3.2.5. Compressed blocks (length and distance codes)
81
82 As noted above, encoded data blocks in the "deflate" format
83 consist of sequences of symbols drawn from three conceptually
84 distinct alphabets: either literal bytes, from the alphabet of
85 byte values (0..255), or <length, backward distance> pairs,
86 where the length is drawn from (3..258) and the distance is
87 drawn from (1..32,768). In fact, the literal and length
88 alphabets are merged into a single alphabet (0..285), where
89 values 0..255 represent literal bytes, the value 256 indicates
90 end-of-block, and values 257..285 represent length codes
91 (possibly in conjunction with extra bits following the symbol
92 code) as follows:
93
94Length encoding :
95 Extra Extra Extra
96 Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
97 ---- ---- ------ ---- ---- ------- ---- ---- -------
98 257 0 3 267 1 15,16 277 4 67-82
99 258 0 4 268 1 17,18 278 4 83-98
100 259 0 5 269 2 19-22 279 4 99-114
101 260 0 6 270 2 23-26 280 4 115-130
102 261 0 7 271 2 27-30 281 5 131-162
103 262 0 8 272 2 31-34 282 5 163-194
104 263 0 9 273 3 35-42 283 5 195-226
105 264 0 10 274 3 43-50 284 5 227-257
106 265 1 11,12 275 3 51-58 285 0 258
107 266 1 13,14 276 3 59-66
108
109Distance encoding :
110 Extra Extra Extra
111 Code Bits Dist Code Bits Dist Code Bits Distance
112 ---- ---- ---- ---- ---- ------ ---- ---- --------
113 0 0 1 10 4 33-48 20 9 1025-1536
114 1 0 2 11 4 49-64 21 9 1537-2048
115 2 0 3 12 5 65-96 22 10 2049-3072
116 3 0 4 13 5 97-128 23 10 3073-4096
117 4 1 5,6 14 6 129-192 24 11 4097-6144
118 5 1 7,8 15 6 193-256 25 11 6145-8192
119 6 2 9-12 16 7 257-384 26 12 8193-12288
120 7 2 13-16 17 7 385-512 27 12 12289-16384
121 8 3 17-24 18 8 513-768 28 13 16385-24576
122 9 3 25-32 19 8 769-1024 29 13 24577-32768
123
124 3.2.6. Compression with fixed Huffman codes (BTYPE=01)
125
126 The Huffman codes for the two alphabets are fixed, and are not
127 represented explicitly in the data. The Huffman code lengths
128 for the literal/length alphabet are:
129
130 Lit Value Bits Codes
131 --------- ---- -----
132 0 - 143 8 00110000 through
133 10111111
134 144 - 255 9 110010000 through
135 111111111
136 256 - 279 7 0000000 through
137 0010111
138 280 - 287 8 11000000 through
139 11000111
140
141 The code lengths are sufficient to generate the actual codes,
142 as described above; we show the codes in the table for added
143 clarity. Literal/length values 286-287 will never actually
144 occur in the compressed data, but participate in the code
145 construction.
146
147 Distance codes 0-31 are represented by (fixed-length) 5-bit
148 codes, with possible additional bits as shown in the table
149 shown in Paragraph 3.2.5, above. Note that distance codes 30-
150 31 will never actually occur in the compressed data.
151
152*/
153
154/* back references, built in a way that is optimal for 32/64 bits */
155union ref {
156 struct {
157 uint32_t pos;
158 uint32_t word;
159 } by32;
160 uint64_t by64;
161};
162
163#if defined(USE_64BIT_QUEUE) && defined(UNALIGNED_LE_OK)
164
165/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
166 * 32-bit words into output buffer. X must not contain non-zero bits above
167 * xbits.
168 */
169static inline void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
170{
171 uint64_t queue = strm->queue + ((uint64_t)x << strm->qbits);
172 uint32_t qbits = strm->qbits + xbits;
173
174 if (__builtin_expect(qbits >= 32, 1)) {
175 *(uint32_t *)strm->outbuf = queue;
176 queue >>= 32;
177 qbits -= 32;
178 strm->outbuf += 4;
179 }
180
181 strm->queue = queue;
182 strm->qbits = qbits;
183}
184
185#define enqueue8 enqueue24
186
187/* flush the queue and align to next byte */
188static inline void flush_bits(struct slz_stream *strm)
189{
190 if (strm->qbits > 0)
191 *strm->outbuf++ = strm->queue;
192
193 if (strm->qbits > 8)
194 *strm->outbuf++ = strm->queue >> 8;
195
196 if (strm->qbits > 16)
197 *strm->outbuf++ = strm->queue >> 16;
198
199 if (strm->qbits > 24)
200 *strm->outbuf++ = strm->queue >> 24;
201
202 strm->queue = 0;
203 strm->qbits = 0;
204}
205
206#else /* non-64 bit or aligned or big endian */
207
208/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
209 * bytes into out buf. X must not contain non-zero bits above xbits. Prefer
210 * enqueue8() when xbits is known for being 8 or less.
211 */
212static void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
213{
214 uint32_t queue = strm->queue + (x << strm->qbits);
215 uint32_t qbits = strm->qbits + xbits;
216
217 if (qbits >= 16) {
218#ifndef UNALIGNED_LE_OK
219 strm->outbuf[0] = queue;
220 strm->outbuf[1] = queue >> 8;
221#else
222 *(uint16_t *)strm->outbuf = queue;
223#endif
224 strm->outbuf += 2;
225 queue >>= 16;
226 qbits -= 16;
227 }
228
229 if (qbits >= 8) {
230 qbits -= 8;
231 *strm->outbuf++ = queue;
232 queue >>= 8;
233 }
234 strm->qbits = qbits;
235 strm->queue = queue;
236 return;
237}
238
239/* enqueue code x of <xbits> bits (at most 8) and copy complete bytes into
240 * out buf. X must not contain non-zero bits above xbits.
241 */
242static inline void enqueue8(struct slz_stream *strm, uint32_t x, uint32_t xbits)
243{
244 uint32_t queue = strm->queue + (x << strm->qbits);
245 uint32_t qbits = strm->qbits + xbits;
246
247 if (__builtin_expect((signed)(qbits - 8) >= 0, 1)) {
248 qbits -= 8;
249 *strm->outbuf++ = queue;
250 queue >>= 8;
251 }
252
253 strm->qbits = qbits;
254 strm->queue = queue;
255}
256
257/* align to next byte */
258static inline void flush_bits(struct slz_stream *strm)
259{
260 if (strm->qbits > 0)
261 *strm->outbuf++ = strm->queue;
262
263 if (strm->qbits > 8)
264 *strm->outbuf++ = strm->queue >> 8;
265
266 strm->queue = 0;
267 strm->qbits = 0;
268}
269#endif
270
271
272/* only valid if buffer is already aligned */
273static inline void copy_8b(struct slz_stream *strm, uint32_t x)
274{
275 *strm->outbuf++ = x;
276}
277
278/* only valid if buffer is already aligned */
279static inline void copy_16b(struct slz_stream *strm, uint32_t x)
280{
281 strm->outbuf[0] = x;
282 strm->outbuf[1] = x >> 8;
283 strm->outbuf += 2;
284}
285
286/* only valid if buffer is already aligned */
287static inline void copy_32b(struct slz_stream *strm, uint32_t x)
288{
289 strm->outbuf[0] = x;
290 strm->outbuf[1] = x >> 8;
291 strm->outbuf[2] = x >> 16;
292 strm->outbuf[3] = x >> 24;
293 strm->outbuf += 4;
294}
295
296static inline void send_huff(struct slz_stream *strm, uint32_t code)
297{
298 uint32_t bits;
299
300 code = fixed_huff[code];
301 bits = code & 15;
302 code >>= 4;
303 enqueue24(strm, code, bits);
304}
305
306static inline void send_eob(struct slz_stream *strm)
307{
308 enqueue8(strm, 0, 7); // direct encoding of 256 = EOB (cf RFC1951)
309}
310
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +0500311/* copies <len> literals from <buf>. <more> indicates that there are data past
Willy Tarreauab2b7822021-04-22 14:09:44 +0200312 * buf + <len>. <len> must not be null.
313 */
314static void copy_lit(struct slz_stream *strm, const void *buf, uint32_t len, int more)
315{
316 uint32_t len2;
317
318 do {
319 len2 = len;
320 if (__builtin_expect(len2 > 65535, 0))
321 len2 = 65535;
322
323 len -= len2;
324
325 if (strm->state != SLZ_ST_EOB)
326 send_eob(strm);
327
328 strm->state = (more || len) ? SLZ_ST_EOB : SLZ_ST_DONE;
329
330 enqueue8(strm, !(more || len), 3); // BFINAL = !more ; BTYPE = 00
331 flush_bits(strm);
332 copy_16b(strm, len2); // len2
333 copy_16b(strm, ~len2); // nlen2
334 memcpy(strm->outbuf, buf, len2);
335 buf += len2;
336 strm->outbuf += len2;
337 } while (len);
338}
339
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +0500340/* copies <len> literals from <buf>. <more> indicates that there are data past
Willy Tarreauab2b7822021-04-22 14:09:44 +0200341 * buf + <len>. <len> must not be null.
342 */
343static void copy_lit_huff(struct slz_stream *strm, const unsigned char *buf, uint32_t len, int more)
344{
345 uint32_t pos;
346
347 /* This ugly construct limits the mount of tests and optimizes for the
348 * most common case (more > 0).
349 */
350 if (strm->state == SLZ_ST_EOB) {
351 eob:
352 strm->state = more ? SLZ_ST_FIXED : SLZ_ST_LAST;
353 enqueue8(strm, 2 + !more, 3); // BFINAL = !more ; BTYPE = 01
354 }
355 else if (!more) {
356 send_eob(strm);
357 goto eob;
358 }
359
360 pos = 0;
361 do {
362 send_huff(strm, buf[pos++]);
363 } while (pos < len);
364}
365
366/* format:
367 * bit0..31 = word
368 * bit32..63 = last position in buffer of similar content
369 */
370
371/* This hash provides good average results on HTML contents, and is among the
372 * few which provide almost optimal results on various different pages.
373 */
374static inline uint32_t slz_hash(uint32_t a)
375{
376#if defined(__ARM_FEATURE_CRC32)
377 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(a) : "r"(0));
378 return a >> (32 - HASH_BITS);
379#else
380 return ((a << 19) + (a << 6) - a) >> (32 - HASH_BITS);
381#endif
382}
383
384/* This function compares buffers <a> and <b> and reads 32 or 64 bits at a time
385 * during the approach. It makes us of unaligned little endian memory accesses
386 * on capable architectures. <max> is the maximum number of bytes that can be
387 * read, so both <a> and <b> must have at least <max> bytes ahead. <max> may
388 * safely be null or negative if that simplifies computations in the caller.
389 */
390static inline long memmatch(const unsigned char *a, const unsigned char *b, long max)
391{
392 long len = 0;
393
394#ifdef UNALIGNED_LE_OK
395 unsigned long xor;
396
397 while (1) {
398 if ((long)(len + 2 * sizeof(long)) > max) {
399 while (len < max) {
400 if (a[len] != b[len])
401 break;
402 len++;
403 }
404 return len;
405 }
406
407 xor = *(long *)&a[len] ^ *(long *)&b[len];
408 if (xor)
409 break;
410 len += sizeof(long);
411
412 xor = *(long *)&a[len] ^ *(long *)&b[len];
413 if (xor)
414 break;
415 len += sizeof(long);
416 }
417
418#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
419 /* x86 has bsf. We know that xor is non-null here */
420 asm("bsf %1,%0\n" : "=r"(xor) : "0" (xor));
421 return len + xor / 8;
422#else
423 if (sizeof(long) > 4 && !(xor & 0xffffffff)) {
424 /* This code is optimized out on 32-bit archs, but we still
425 * need to shift in two passes to avoid a warning. It is
426 * properly optimized out as a single shift.
427 */
428 xor >>= 16; xor >>= 16;
429 if (xor & 0xffff) {
430 if (xor & 0xff)
431 return len + 4;
432 return len + 5;
433 }
434 if (xor & 0xffffff)
435 return len + 6;
436 return len + 7;
437 }
438
439 if (xor & 0xffff) {
440 if (xor & 0xff)
441 return len;
442 return len + 1;
443 }
444 if (xor & 0xffffff)
445 return len + 2;
446 return len + 3;
447#endif // x86
448
449#else // UNALIGNED_LE_OK
450 /* This is the generic version for big endian or unaligned-incompatible
451 * architectures.
452 */
453 while (len < max) {
454 if (a[len] != b[len])
455 break;
456 len++;
457 }
458 return len;
459
460#endif
461}
462
463/* sets <count> BYTES to -32769 in <refs> so that any uninitialized entry will
464 * verify (pos-last-1 >= 32768) and be ignored. <count> must be a multiple of
465 * 128 bytes and <refs> must be at least one count in length. It's supposed to
466 * be applied to 64-bit aligned data exclusively, which makes it slightly
467 * faster than the regular memset() since no alignment check is performed.
468 */
Tim Duesterhuseaf16fc2021-09-20 19:59:42 +0200469static void reset_refs(union ref *refs, long count)
Willy Tarreauab2b7822021-04-22 14:09:44 +0200470{
471 /* avoid a shift/mask by casting to void* */
472 union ref *end = (void *)refs + count;
473
474 do {
475 refs[ 0].by64 = -32769;
476 refs[ 1].by64 = -32769;
477 refs[ 2].by64 = -32769;
478 refs[ 3].by64 = -32769;
479 refs[ 4].by64 = -32769;
480 refs[ 5].by64 = -32769;
481 refs[ 6].by64 = -32769;
482 refs[ 7].by64 = -32769;
483 refs[ 8].by64 = -32769;
484 refs[ 9].by64 = -32769;
485 refs[10].by64 = -32769;
486 refs[11].by64 = -32769;
487 refs[12].by64 = -32769;
488 refs[13].by64 = -32769;
489 refs[14].by64 = -32769;
490 refs[15].by64 = -32769;
491 refs += 16;
492 } while (refs < end);
493}
494
495/* Compresses <ilen> bytes from <in> into <out> according to RFC1951. The
496 * output result may be up to 5 bytes larger than the input, to which 2 extra
497 * bytes may be added to send the last chunk due to BFINAL+EOB encoding (10
498 * bits) when <more> is not set. The caller is responsible for ensuring there
499 * is enough room in the output buffer for this. The amount of output bytes is
500 * returned, and no CRC is computed.
501 */
502long slz_rfc1951_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
503{
504 long rem = ilen;
505 unsigned long pos = 0;
506 unsigned long last;
507 uint32_t word = 0;
508 long mlen;
509 uint32_t h;
510 uint64_t ent;
511
512 uint32_t plit = 0;
513 uint32_t bit9 = 0;
514 uint32_t dist, code;
515 union ref refs[1 << HASH_BITS];
516
517 if (!strm->level) {
518 /* force to send as literals (eg to preserve CPU) */
519 strm->outbuf = out;
520 plit = pos = ilen;
521 bit9 = 52; /* force literal dump */
522 goto final_lit_dump;
523 }
524
525 reset_refs(refs, sizeof(refs));
526
527 strm->outbuf = out;
528
529#ifndef UNALIGNED_FASTER
530 word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
531#endif
532 while (rem >= 4) {
533#ifndef UNALIGNED_FASTER
534 word = ((unsigned char)in[pos + 3] << 24) + (word >> 8);
535#else
536 word = *(uint32_t *)&in[pos];
537#endif
538 h = slz_hash(word);
539 asm volatile ("" ::); // prevent gcc from trying to be smart with the prefetch
540
541 if (sizeof(long) >= 8) {
542 ent = refs[h].by64;
543 last = (uint32_t)ent;
544 ent >>= 32;
545 refs[h].by64 = ((uint64_t)pos) + ((uint64_t)word << 32);
546 } else {
547 ent = refs[h].by32.word;
548 last = refs[h].by32.pos;
549 refs[h].by32.pos = pos;
550 refs[h].by32.word = word;
551 }
552
Willy Tarreaufc89c3f2021-08-28 12:10:49 +0200553#ifdef FIND_OPTIMAL_MATCH
Willy Tarreauab2b7822021-04-22 14:09:44 +0200554 /* Experimental code to see what could be saved with an ideal
555 * longest match lookup algorithm. This one is very slow but
556 * scans the whole window. In short, here are the savings :
557 * file orig fast(ratio) optimal(ratio)
558 * README 5185 3419 (65.9%) 3165 (61.0%) -7.5%
559 * index.html 76799 35662 (46.4%) 29875 (38.9%) -16.3%
560 * rfc1952.c 29383 13442 (45.7%) 11793 (40.1%) -12.3%
561 *
562 * Thus the savings to expect for large files is at best 16%.
563 *
564 * A non-colliding hash gives 33025 instead of 35662 (-7.4%),
565 * and keeping the last two entries gives 31724 (-11.0%).
566 */
567 unsigned long scan;
568 int saved = 0;
569 int bestpos = 0;
570 int bestlen = 0;
571 int firstlen = 0;
572 int max_lookup = 2; // 0 = no limit
573
574 for (scan = pos - 1; scan < pos && (unsigned long)(pos - scan - 1) < 32768; scan--) {
575 if (*(uint32_t *)(in + scan) != word)
576 continue;
577
578 len = memmatch(in + pos, in + scan, rem);
579 if (!bestlen)
580 firstlen = len;
581
582 if (len > bestlen) {
583 bestlen = len;
584 bestpos = scan;
585 }
586 if (!--max_lookup)
587 break;
588 }
589 if (bestlen) {
590 //printf("pos=%d last=%d bestpos=%d word=%08x ent=%08x len=%d\n",
591 // (int)pos, (int)last, (int)bestpos, (int)word, (int)ent, bestlen);
592 last = bestpos;
593 ent = word;
594 saved += bestlen - firstlen;
595 }
596 //fprintf(stderr, "first=%d best=%d saved_total=%d\n", firstlen, bestlen, saved);
597#endif
598
599 if ((uint32_t)ent != word) {
600 send_as_lit:
601 rem--;
602 plit++;
603 bit9 += ((unsigned char)word >= 144);
604 pos++;
605 continue;
606 }
607
608 /* We reject pos = last and pos > last+32768 */
609 if ((unsigned long)(pos - last - 1) >= 32768)
610 goto send_as_lit;
611
612 /* Note: cannot encode a length larger than 258 bytes */
613 mlen = memmatch(in + pos + 4, in + last + 4, (rem > 258 ? 258 : rem) - 4) + 4;
614
615 /* found a matching entry */
616
617 if (bit9 >= 52 && mlen < 6)
618 goto send_as_lit;
619
620 /* compute the output code, its size and the length's size in
621 * bits to know if the reference is cheaper than literals.
622 */
623 code = len_fh[mlen];
624
625 /* direct mapping of dist->huffman code */
626 dist = fh_dist_table[pos - last - 1];
627
628 /* if encoding the dist+length is more expensive than sending
629 * the equivalent as bytes, lets keep the literals.
630 */
631 if ((dist & 0x1f) + (code >> 16) + 8 >= 8 * mlen + bit9)
632 goto send_as_lit;
633
634 /* first, copy pending literals */
635 if (plit) {
636 /* Huffman encoding requires 9 bits for octets 144..255, so this
637 * is a waste of space for binary data. Switching between Huffman
638 * and no-comp then huffman consumes 52 bits (7 for EOB + 3 for
639 * block type + 7 for alignment + 32 for LEN+NLEN + 3 for next
640 * block. Only use plain literals if there are more than 52 bits
641 * to save then.
642 */
643 if (bit9 >= 52)
644 copy_lit(strm, in + pos - plit, plit, 1);
645 else
646 copy_lit_huff(strm, in + pos - plit, plit, 1);
647
648 plit = 0;
649 }
650
651 /* use mode 01 - fixed huffman */
652 if (strm->state == SLZ_ST_EOB) {
653 strm->state = SLZ_ST_FIXED;
654 enqueue8(strm, 0x02, 3); // BTYPE = 01, BFINAL = 0
655 }
656
657 /* copy the length first */
658 enqueue24(strm, code & 0xFFFF, code >> 16);
659
660 /* in fixed huffman mode, dist is fixed 5 bits */
661 enqueue24(strm, dist >> 5, dist & 0x1f);
662 bit9 = 0;
663 rem -= mlen;
664 pos += mlen;
665
666#ifndef UNALIGNED_FASTER
667#ifdef UNALIGNED_LE_OK
668 word = *(uint32_t *)&in[pos - 1];
669#else
670 word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
671#endif
672#endif
673 }
674
675 if (__builtin_expect(rem, 0)) {
676 /* we're reading the 1..3 last bytes */
677 plit += rem;
678 do {
679 bit9 += ((unsigned char)in[pos++] >= 144);
680 } while (--rem);
681 }
682
683 final_lit_dump:
684 /* now copy remaining literals or mark the end */
685 if (plit) {
686 if (bit9 >= 52)
687 copy_lit(strm, in + pos - plit, plit, more);
688 else
689 copy_lit_huff(strm, in + pos - plit, plit, more);
690
691 plit = 0;
692 }
693
694 strm->ilen += ilen;
695 return strm->outbuf - out;
696}
697
698/* Initializes stream <strm> for use with raw deflate (rfc1951). The CRC is
699 * unused but set to zero. The compression level passed in <level> is set. This
700 * value can only be 0 (no compression) or 1 (compression) and other values
701 * will lead to unpredictable behaviour. The function always returns 0.
702 */
703int slz_rfc1951_init(struct slz_stream *strm, int level)
704{
705 strm->state = SLZ_ST_EOB; // no header
706 strm->level = level;
707 strm->format = SLZ_FMT_DEFLATE;
708 strm->crc32 = 0;
709 strm->ilen = 0;
710 strm->qbits = 0;
711 strm->queue = 0;
712 return 0;
713}
714
715/* Flushes any pending for stream <strm> into buffer <buf>, then sends BTYPE=1
716 * and BFINAL=1 if needed. The stream ends in SLZ_ST_DONE. It returns the number
717 * of bytes emitted. The trailer consists in flushing the possibly pending bits
718 * from the queue (up to 7 bits), then possibly EOB (7 bits), then 3 bits, EOB,
719 * a rounding to the next byte, which amounts to a total of 4 bytes max, that
720 * the caller must ensure are available before calling the function.
721 */
722int slz_rfc1951_finish(struct slz_stream *strm, unsigned char *buf)
723{
724 strm->outbuf = buf;
725
726 if (strm->state == SLZ_ST_FIXED || strm->state == SLZ_ST_LAST) {
727 strm->state = (strm->state == SLZ_ST_LAST) ? SLZ_ST_DONE : SLZ_ST_EOB;
728 send_eob(strm);
729 }
730
731 if (strm->state != SLZ_ST_DONE) {
732 /* send BTYPE=1, BFINAL=1 */
733 enqueue8(strm, 3, 3);
734 send_eob(strm);
735 strm->state = SLZ_ST_DONE;
736 }
737
738 flush_bits(strm);
739 return strm->outbuf - buf;
740}
741
742/* Now RFC1952-specific declarations and extracts from RFC.
743 * From RFC1952 about the GZIP file format :
744
745A gzip file consists of a series of "members" ...
746
7472.3. Member format
748
749 Each member has the following structure:
750
751 +---+---+---+---+---+---+---+---+---+---+
752 |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
753 +---+---+---+---+---+---+---+---+---+---+
754
755 (if FLG.FEXTRA set)
756
757 +---+---+=================================+
758 | XLEN |...XLEN bytes of "extra field"...| (more-->)
759 +---+---+=================================+
760
761 (if FLG.FNAME set)
762
763 +=========================================+
764 |...original file name, zero-terminated...| (more-->)
765 +=========================================+
766
767 (if FLG.FCOMMENT set)
768
769 +===================================+
770 |...file comment, zero-terminated...| (more-->)
771 +===================================+
772
773 (if FLG.FHCRC set)
774
775 +---+---+
776 | CRC16 |
777 +---+---+
778
779 +=======================+
780 |...compressed blocks...| (more-->)
781 +=======================+
782
783 0 1 2 3 4 5 6 7
784 +---+---+---+---+---+---+---+---+
785 | CRC32 | ISIZE |
786 +---+---+---+---+---+---+---+---+
787
788
7892.3.1. Member header and trailer
790
791 ID1 (IDentification 1)
792 ID2 (IDentification 2)
793 These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
794 (0x8b, \213), to identify the file as being in gzip format.
795
796 CM (Compression Method)
797 This identifies the compression method used in the file. CM
798 = 0-7 are reserved. CM = 8 denotes the "deflate"
799 compression method, which is the one customarily used by
800 gzip and which is documented elsewhere.
801
802 FLG (FLaGs)
803 This flag byte is divided into individual bits as follows:
804
805 bit 0 FTEXT
806 bit 1 FHCRC
807 bit 2 FEXTRA
808 bit 3 FNAME
809 bit 4 FCOMMENT
810 bit 5 reserved
811 bit 6 reserved
812 bit 7 reserved
813
814 Reserved FLG bits must be zero.
815
816 MTIME (Modification TIME)
817 This gives the most recent modification time of the original
818 file being compressed. The time is in Unix format, i.e.,
819 seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
820 may cause problems for MS-DOS and other systems that use
821 local rather than Universal time.) If the compressed data
822 did not come from a file, MTIME is set to the time at which
823 compression started. MTIME = 0 means no time stamp is
824 available.
825
826 XFL (eXtra FLags)
827 These flags are available for use by specific compression
828 methods. The "deflate" method (CM = 8) sets these flags as
829 follows:
830
831 XFL = 2 - compressor used maximum compression,
832 slowest algorithm
833 XFL = 4 - compressor used fastest algorithm
834
835 OS (Operating System)
836 This identifies the type of file system on which compression
837 took place. This may be useful in determining end-of-line
838 convention for text files. The currently defined values are
839 as follows:
840
841 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
842 1 - Amiga
843 2 - VMS (or OpenVMS)
844 3 - Unix
845 4 - VM/CMS
846 5 - Atari TOS
847 6 - HPFS filesystem (OS/2, NT)
848 7 - Macintosh
849 8 - Z-System
850 9 - CP/M
851 10 - TOPS-20
852 11 - NTFS filesystem (NT)
853 12 - QDOS
854 13 - Acorn RISCOS
855 255 - unknown
856
857 ==> A file compressed using "gzip -1" on Unix-like systems can be :
858
859 1F 8B 08 00 00 00 00 00 04 03
860 <deflate-compressed stream>
861 crc32 size32
862*/
863
864static const unsigned char gzip_hdr[] = { 0x1F, 0x8B, // ID1, ID2
865 0x08, 0x00, // Deflate, flags (none)
866 0x00, 0x00, 0x00, 0x00, // mtime: none
867 0x04, 0x03 }; // fastest comp, OS=Unix
868
869static inline uint32_t crc32_char(uint32_t crc, uint8_t x)
870{
871#if defined(__ARM_FEATURE_CRC32)
872 crc = ~crc;
873 __asm__ volatile("crc32b %w0,%w0,%w1" : "+r"(crc) : "r"(x));
874 crc = ~crc;
875#else
876 crc = crc32_fast[0][(crc ^ x) & 0xff] ^ (crc >> 8);
877#endif
878 return crc;
879}
880
881static inline uint32_t crc32_uint32(uint32_t data)
882{
883#if defined(__ARM_FEATURE_CRC32)
884 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(data) : "r"(~0UL));
885 data = ~data;
886#else
887 data = crc32_fast[3][(data >> 0) & 0xff] ^
888 crc32_fast[2][(data >> 8) & 0xff] ^
889 crc32_fast[1][(data >> 16) & 0xff] ^
890 crc32_fast[0][(data >> 24) & 0xff];
891#endif
892 return data;
893}
894
895/* Modified version originally from RFC1952, working with non-inverting CRCs */
896uint32_t slz_crc32_by1(uint32_t crc, const unsigned char *buf, int len)
897{
898 int n;
899
900 for (n = 0; n < len; n++)
901 crc = crc32_char(crc, buf[n]);
902 return crc;
903}
904
905/* This version computes the crc32 of <buf> over <len> bytes, doing most of it
906 * in 32-bit chunks.
907 */
908uint32_t slz_crc32_by4(uint32_t crc, const unsigned char *buf, int len)
909{
910 const unsigned char *end = buf + len;
911
912 while (buf <= end - 16) {
913#ifdef UNALIGNED_LE_OK
914#if defined(__ARM_FEATURE_CRC32)
915 crc = ~crc;
916 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
917 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
918 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
919 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
920 crc = ~crc;
921#else
922 crc ^= *(uint32_t *)buf;
923 crc = crc32_uint32(crc);
924
925 crc ^= *(uint32_t *)(buf + 4);
926 crc = crc32_uint32(crc);
927
928 crc ^= *(uint32_t *)(buf + 8);
929 crc = crc32_uint32(crc);
930
931 crc ^= *(uint32_t *)(buf + 12);
932 crc = crc32_uint32(crc);
933#endif
934#else
935 crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
936 crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
937 crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
938 crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
939
940 crc = crc32_fast[3][(buf[4] ^ (crc >> 0)) & 0xff] ^
941 crc32_fast[2][(buf[5] ^ (crc >> 8)) & 0xff] ^
942 crc32_fast[1][(buf[6] ^ (crc >> 16)) & 0xff] ^
943 crc32_fast[0][(buf[7] ^ (crc >> 24)) & 0xff];
944
945 crc = crc32_fast[3][(buf[8] ^ (crc >> 0)) & 0xff] ^
946 crc32_fast[2][(buf[9] ^ (crc >> 8)) & 0xff] ^
947 crc32_fast[1][(buf[10] ^ (crc >> 16)) & 0xff] ^
948 crc32_fast[0][(buf[11] ^ (crc >> 24)) & 0xff];
949
950 crc = crc32_fast[3][(buf[12] ^ (crc >> 0)) & 0xff] ^
951 crc32_fast[2][(buf[13] ^ (crc >> 8)) & 0xff] ^
952 crc32_fast[1][(buf[14] ^ (crc >> 16)) & 0xff] ^
953 crc32_fast[0][(buf[15] ^ (crc >> 24)) & 0xff];
954#endif
955 buf += 16;
956 }
957
958 while (buf <= end - 4) {
959#ifdef UNALIGNED_LE_OK
960 crc ^= *(uint32_t *)buf;
961 crc = crc32_uint32(crc);
962#else
963 crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
964 crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
965 crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
966 crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
967#endif
968 buf += 4;
969 }
970
971 while (buf < end)
Willy Tarreau027fdcb2021-05-12 08:34:36 +0200972 crc = crc32_char(crc, *buf++);
Willy Tarreauab2b7822021-04-22 14:09:44 +0200973 return crc;
974}
975
976/* uses the most suitable crc32 function to update crc on <buf, len> */
977static inline uint32_t update_crc(uint32_t crc, const void *buf, int len)
978{
979 return slz_crc32_by4(crc, buf, len);
980}
981
982/* Sends the gzip header for stream <strm> into buffer <buf>. When it's done,
983 * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
984 * emitted which is always 10. The caller is responsible for ensuring there's
985 * always enough room in the buffer.
986 */
987int slz_rfc1952_send_header(struct slz_stream *strm, unsigned char *buf)
988{
989 memcpy(buf, gzip_hdr, sizeof(gzip_hdr));
990 strm->state = SLZ_ST_EOB;
991 return sizeof(gzip_hdr);
992}
993
994/* Encodes the block according to rfc1952. This means that the CRC of the input
995 * block is computed according to the CRC32 algorithm. If the header was never
996 * sent, it may be sent first. The number of output bytes is returned.
997 */
998long slz_rfc1952_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
999{
1000 long ret = 0;
1001
1002 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1003 ret += slz_rfc1952_send_header(strm, out);
1004
1005 strm->crc32 = update_crc(strm->crc32, in, ilen);
1006 ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
1007 return ret;
1008}
1009
1010/* Initializes stream <strm> for use with the gzip format (rfc1952). The
1011 * compression level passed in <level> is set. This value can only be 0 (no
1012 * compression) or 1 (compression) and other values will lead to unpredictable
1013 * behaviour. The function always returns 0.
1014 */
1015int slz_rfc1952_init(struct slz_stream *strm, int level)
1016{
1017 strm->state = SLZ_ST_INIT;
1018 strm->level = level;
1019 strm->format = SLZ_FMT_GZIP;
1020 strm->crc32 = 0;
1021 strm->ilen = 0;
1022 strm->qbits = 0;
1023 strm->queue = 0;
1024 return 0;
1025}
1026
1027/* Flushes pending bits and sends the gzip trailer for stream <strm> into
1028 * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
1029 * returns the number of bytes emitted. The trailer consists in flushing the
1030 * possibly pending bits from the queue (up to 24 bits), rounding to the next
1031 * byte, then 4 bytes for the CRC and another 4 bytes for the input length.
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +05001032 * That may about to 4+4+4 = 12 bytes, that the caller must ensure are
Willy Tarreauab2b7822021-04-22 14:09:44 +02001033 * available before calling the function. Note that if the initial header was
1034 * never sent, it will be sent first as well (10 extra bytes).
1035 */
1036int slz_rfc1952_finish(struct slz_stream *strm, unsigned char *buf)
1037{
1038 strm->outbuf = buf;
1039
1040 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1041 strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
1042
1043 slz_rfc1951_finish(strm, strm->outbuf);
1044 copy_32b(strm, strm->crc32);
1045 copy_32b(strm, strm->ilen);
1046 strm->state = SLZ_ST_END;
1047
1048 return strm->outbuf - buf;
1049}
1050
1051
1052/* RFC1950-specific stuff. This is for the Zlib stream format.
1053 * From RFC1950 (zlib) :
1054 *
1055
1056 2.2. Data format
1057
1058 A zlib stream has the following structure:
1059
1060 0 1
1061 +---+---+
1062 |CMF|FLG| (more-->)
1063 +---+---+
1064
1065
1066 (if FLG.FDICT set)
1067
1068 0 1 2 3
1069 +---+---+---+---+
1070 | DICTID | (more-->)
1071 +---+---+---+---+
1072
1073 +=====================+---+---+---+---+
1074 |...compressed data...| ADLER32 |
1075 +=====================+---+---+---+---+
1076
1077 Any data which may appear after ADLER32 are not part of the zlib
1078 stream.
1079
1080 CMF (Compression Method and flags)
1081 This byte is divided into a 4-bit compression method and a 4-
1082 bit information field depending on the compression method.
1083
1084 bits 0 to 3 CM Compression method
1085 bits 4 to 7 CINFO Compression info
1086
1087 CM (Compression method)
1088 This identifies the compression method used in the file. CM = 8
1089 denotes the "deflate" compression method with a window size up
1090 to 32K. This is the method used by gzip and PNG (see
1091 references [1] and [2] in Chapter 3, below, for the reference
1092 documents). CM = 15 is reserved. It might be used in a future
1093 version of this specification to indicate the presence of an
1094 extra field before the compressed data.
1095
1096 CINFO (Compression info)
1097 For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
1098 size, minus eight (CINFO=7 indicates a 32K window size). Values
1099 of CINFO above 7 are not allowed in this version of the
1100 specification. CINFO is not defined in this specification for
1101 CM not equal to 8.
1102
1103 FLG (FLaGs)
1104 This flag byte is divided as follows:
1105
1106 bits 0 to 4 FCHECK (check bits for CMF and FLG)
1107 bit 5 FDICT (preset dictionary)
1108 bits 6 to 7 FLEVEL (compression level)
1109
1110 The FCHECK value must be such that CMF and FLG, when viewed as
1111 a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
1112 is a multiple of 31.
1113
1114
1115 FDICT (Preset dictionary)
1116 If FDICT is set, a DICT dictionary identifier is present
1117 immediately after the FLG byte. The dictionary is a sequence of
1118 bytes which are initially fed to the compressor without
1119 producing any compressed output. DICT is the Adler-32 checksum
1120 of this sequence of bytes (see the definition of ADLER32
1121 below). The decompressor can use this identifier to determine
1122 which dictionary has been used by the compressor.
1123
1124 FLEVEL (Compression level)
1125 These flags are available for use by specific compression
1126 methods. The "deflate" method (CM = 8) sets these flags as
1127 follows:
1128
1129 0 - compressor used fastest algorithm
1130 1 - compressor used fast algorithm
1131 2 - compressor used default algorithm
1132 3 - compressor used maximum compression, slowest algorithm
1133
1134 The information in FLEVEL is not needed for decompression; it
1135 is there to indicate if recompression might be worthwhile.
1136
1137 compressed data
1138 For compression method 8, the compressed data is stored in the
1139 deflate compressed data format as described in the document
1140 "DEFLATE Compressed Data Format Specification" by L. Peter
1141 Deutsch. (See reference [3] in Chapter 3, below)
1142
1143 Other compressed data formats are not specified in this version
1144 of the zlib specification.
1145
1146 ADLER32 (Adler-32 checksum)
1147 This contains a checksum value of the uncompressed data
1148 (excluding any dictionary data) computed according to Adler-32
1149 algorithm. This algorithm is a 32-bit extension and improvement
1150 of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
1151 standard. See references [4] and [5] in Chapter 3, below)
1152
1153 Adler-32 is composed of two sums accumulated per byte: s1 is
1154 the sum of all bytes, s2 is the sum of all s1 values. Both sums
1155 are done modulo 65521. s1 is initialized to 1, s2 to zero. The
1156 Adler-32 checksum is stored as s2*65536 + s1 in most-
1157 significant-byte first (network) order.
1158
1159 ==> The stream can start with only 2 bytes :
1160 - CM = 0x78 : CMINFO=7 (32kB window), CM=8 (deflate)
1161 - FLG = 0x01 : FLEVEL = 0 (fastest), FDICT=0 (no dict), FCHECK=1 so
1162 that 0x7801 is a multiple of 31 (30721 = 991 * 31).
1163
1164 ==> and it ends with only 4 bytes, the Adler-32 checksum in big-endian format.
1165
1166 */
1167
1168static const unsigned char zlib_hdr[] = { 0x78, 0x01 }; // 32k win, deflate, chk=1
1169
1170
1171/* Original version from RFC1950, verified and works OK */
1172uint32_t slz_adler32_by1(uint32_t crc, const unsigned char *buf, int len)
1173{
1174 uint32_t s1 = crc & 0xffff;
1175 uint32_t s2 = (crc >> 16) & 0xffff;
1176 int n;
1177
1178 for (n = 0; n < len; n++) {
1179 s1 = (s1 + buf[n]) % 65521;
1180 s2 = (s2 + s1) % 65521;
1181 }
1182 return (s2 << 16) + s1;
1183}
1184
1185/* Computes the adler32 sum on <buf> for <len> bytes. It avoids the expensive
1186 * modulus by retrofitting the number of bytes missed between 65521 and 65536
1187 * which is easy to count : For every sum above 65536, the modulus is offset
1188 * by (65536-65521) = 15. So for any value, we can count the accumulated extra
1189 * values by dividing the sum by 65536 and multiplying this value by
1190 * (65536-65521). That's easier with a drawing with boxes and marbles. It gives
1191 * this :
1192 * x % 65521 = (x % 65536) + (x / 65536) * (65536 - 65521)
1193 * = (x & 0xffff) + (x >> 16) * 15.
1194 */
1195uint32_t slz_adler32_block(uint32_t crc, const unsigned char *buf, long len)
1196{
1197 long s1 = crc & 0xffff;
1198 long s2 = (crc >> 16);
1199 long blk;
1200 long n;
1201
1202 do {
1203 blk = len;
1204 /* ensure we never overflow s2 (limit is about 2^((32-8)/2) */
1205 if (blk > (1U << 12))
1206 blk = 1U << 12;
1207 len -= blk;
1208
1209 for (n = 0; n < blk; n++) {
1210 s1 = (s1 + buf[n]);
1211 s2 = (s2 + s1);
1212 }
1213
1214 /* Largest value here is 2^12 * 255 = 1044480 < 2^20. We can
1215 * still overflow once, but not twice because the right hand
1216 * size is 225 max, so the total is 65761. However we also
1217 * have to take care of the values between 65521 and 65536.
1218 */
1219 s1 = (s1 & 0xffff) + 15 * (s1 >> 16);
1220 if (s1 >= 65521)
1221 s1 -= 65521;
1222
1223 /* For s2, the largest value is estimated to 2^32-1 for
1224 * simplicity, so the right hand side is about 15*65535
1225 * = 983025. We can overflow twice at most.
1226 */
1227 s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
1228 s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
1229 if (s2 >= 65521)
1230 s2 -= 65521;
1231
1232 buf += blk;
1233 } while (len);
1234 return (s2 << 16) + s1;
1235}
1236
1237/* Sends the zlib header for stream <strm> into buffer <buf>. When it's done,
1238 * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
1239 * emitted which is always 2. The caller is responsible for ensuring there's
1240 * always enough room in the buffer.
1241 */
1242int slz_rfc1950_send_header(struct slz_stream *strm, unsigned char *buf)
1243{
1244 memcpy(buf, zlib_hdr, sizeof(zlib_hdr));
1245 strm->state = SLZ_ST_EOB;
1246 return sizeof(zlib_hdr);
1247}
1248
1249/* Encodes the block according to rfc1950. This means that the CRC of the input
1250 * block is computed according to the ADLER32 algorithm. If the header was never
1251 * sent, it may be sent first. The number of output bytes is returned.
1252 */
1253long slz_rfc1950_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
1254{
1255 long ret = 0;
1256
1257 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1258 ret += slz_rfc1950_send_header(strm, out);
1259
1260 strm->crc32 = slz_adler32_block(strm->crc32, in, ilen);
1261 ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
1262 return ret;
1263}
1264
1265/* Initializes stream <strm> for use with the zlib format (rfc1952). The
1266 * compression level passed in <level> is set. This value can only be 0 (no
1267 * compression) or 1 (compression) and other values will lead to unpredictable
1268 * behaviour. The function always returns 0.
1269 */
1270int slz_rfc1950_init(struct slz_stream *strm, int level)
1271{
1272 strm->state = SLZ_ST_INIT;
1273 strm->level = level;
1274 strm->format = SLZ_FMT_ZLIB;
1275 strm->crc32 = 1; // rfc1950/zlib starts with initial crc=1
1276 strm->ilen = 0;
1277 strm->qbits = 0;
1278 strm->queue = 0;
1279 return 0;
1280}
1281
1282/* Flushes pending bits and sends the gzip trailer for stream <strm> into
1283 * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
1284 * returns the number of bytes emitted. The trailer consists in flushing the
1285 * possibly pending bits from the queue (up to 24 bits), rounding to the next
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +05001286 * byte, then 4 bytes for the CRC. That may about to 4+4 = 8 bytes, that the
Willy Tarreauab2b7822021-04-22 14:09:44 +02001287 * caller must ensure are available before calling the function. Note that if
1288 * the initial header was never sent, it will be sent first as well (2 extra
1289 * bytes).
1290 */
1291int slz_rfc1950_finish(struct slz_stream *strm, unsigned char *buf)
1292{
1293 strm->outbuf = buf;
1294
1295 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1296 strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
1297
1298 slz_rfc1951_finish(strm, strm->outbuf);
1299 copy_8b(strm, (strm->crc32 >> 24) & 0xff);
1300 copy_8b(strm, (strm->crc32 >> 16) & 0xff);
1301 copy_8b(strm, (strm->crc32 >> 8) & 0xff);
1302 copy_8b(strm, (strm->crc32 >> 0) & 0xff);
1303 strm->state = SLZ_ST_END;
1304 return strm->outbuf - buf;
1305}
1306
Willy Tarreauab2b7822021-04-22 14:09:44 +02001307__attribute__((constructor))
1308static void __slz_initialize(void)
1309{
Willy Tarreau9e274282021-05-12 08:36:09 +02001310#if !defined(__ARM_FEATURE_CRC32)
Willy Tarreauab2b7822021-04-22 14:09:44 +02001311 __slz_make_crc_table();
Willy Tarreau9e274282021-05-12 08:36:09 +02001312#endif
Willy Tarreauab2b7822021-04-22 14:09:44 +02001313 __slz_prepare_dist_table();
1314}