blob: 1d6032ea2e9294858b33314d998081075fbfea11 [file] [log] [blame]
Willy Tarreauab2b7822021-04-22 14:09:44 +02001/*
2 * Copyright (C) 2013-2015 Willy Tarreau <w@1wt.eu>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
Willy Tarreau388fc252021-05-14 08:44:52 +020025#include <inttypes.h>
Willy Tarreauab2b7822021-04-22 14:09:44 +020026#include <stdio.h>
27#include <string.h>
28#include <import/slz.h>
29#include <import/slz-tables.h>
30
31/* First, RFC1951-specific declarations and extracts from the RFC.
32 *
33 * RFC1951 - deflate stream format
34
35
36 * Data elements are packed into bytes in order of
37 increasing bit number within the byte, i.e., starting
38 with the least-significant bit of the byte.
39 * Data elements other than Huffman codes are packed
40 starting with the least-significant bit of the data
41 element.
42 * Huffman codes are packed starting with the most-
43 significant bit of the code.
44
45 3.2.3. Details of block format
46
47 Each block of compressed data begins with 3 header bits
48 containing the following data:
49
50 first bit BFINAL
51 next 2 bits BTYPE
52
53 Note that the header bits do not necessarily begin on a byte
54 boundary, since a block does not necessarily occupy an integral
55 number of bytes.
56
57 BFINAL is set if and only if this is the last block of the data
58 set.
59
60 BTYPE specifies how the data are compressed, as follows:
61
62 00 - no compression
63 01 - compressed with fixed Huffman codes
64 10 - compressed with dynamic Huffman codes
65 11 - reserved (error)
66
67 3.2.4. Non-compressed blocks (BTYPE=00)
68
69 Any bits of input up to the next byte boundary are ignored.
70 The rest of the block consists of the following information:
71
72 0 1 2 3 4...
73 +---+---+---+---+================================+
74 | LEN | NLEN |... LEN bytes of literal data...|
75 +---+---+---+---+================================+
76
77 LEN is the number of data bytes in the block. NLEN is the
78 one's complement of LEN.
79
80 3.2.5. Compressed blocks (length and distance codes)
81
82 As noted above, encoded data blocks in the "deflate" format
83 consist of sequences of symbols drawn from three conceptually
84 distinct alphabets: either literal bytes, from the alphabet of
85 byte values (0..255), or <length, backward distance> pairs,
86 where the length is drawn from (3..258) and the distance is
87 drawn from (1..32,768). In fact, the literal and length
88 alphabets are merged into a single alphabet (0..285), where
89 values 0..255 represent literal bytes, the value 256 indicates
90 end-of-block, and values 257..285 represent length codes
91 (possibly in conjunction with extra bits following the symbol
92 code) as follows:
93
94Length encoding :
95 Extra Extra Extra
96 Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
97 ---- ---- ------ ---- ---- ------- ---- ---- -------
98 257 0 3 267 1 15,16 277 4 67-82
99 258 0 4 268 1 17,18 278 4 83-98
100 259 0 5 269 2 19-22 279 4 99-114
101 260 0 6 270 2 23-26 280 4 115-130
102 261 0 7 271 2 27-30 281 5 131-162
103 262 0 8 272 2 31-34 282 5 163-194
104 263 0 9 273 3 35-42 283 5 195-226
105 264 0 10 274 3 43-50 284 5 227-257
106 265 1 11,12 275 3 51-58 285 0 258
107 266 1 13,14 276 3 59-66
108
109Distance encoding :
110 Extra Extra Extra
111 Code Bits Dist Code Bits Dist Code Bits Distance
112 ---- ---- ---- ---- ---- ------ ---- ---- --------
113 0 0 1 10 4 33-48 20 9 1025-1536
114 1 0 2 11 4 49-64 21 9 1537-2048
115 2 0 3 12 5 65-96 22 10 2049-3072
116 3 0 4 13 5 97-128 23 10 3073-4096
117 4 1 5,6 14 6 129-192 24 11 4097-6144
118 5 1 7,8 15 6 193-256 25 11 6145-8192
119 6 2 9-12 16 7 257-384 26 12 8193-12288
120 7 2 13-16 17 7 385-512 27 12 12289-16384
121 8 3 17-24 18 8 513-768 28 13 16385-24576
122 9 3 25-32 19 8 769-1024 29 13 24577-32768
123
124 3.2.6. Compression with fixed Huffman codes (BTYPE=01)
125
126 The Huffman codes for the two alphabets are fixed, and are not
127 represented explicitly in the data. The Huffman code lengths
128 for the literal/length alphabet are:
129
130 Lit Value Bits Codes
131 --------- ---- -----
132 0 - 143 8 00110000 through
133 10111111
134 144 - 255 9 110010000 through
135 111111111
136 256 - 279 7 0000000 through
137 0010111
138 280 - 287 8 11000000 through
139 11000111
140
141 The code lengths are sufficient to generate the actual codes,
142 as described above; we show the codes in the table for added
143 clarity. Literal/length values 286-287 will never actually
144 occur in the compressed data, but participate in the code
145 construction.
146
147 Distance codes 0-31 are represented by (fixed-length) 5-bit
148 codes, with possible additional bits as shown in the table
149 shown in Paragraph 3.2.5, above. Note that distance codes 30-
150 31 will never actually occur in the compressed data.
151
152*/
153
154/* back references, built in a way that is optimal for 32/64 bits */
155union ref {
156 struct {
157 uint32_t pos;
158 uint32_t word;
159 } by32;
160 uint64_t by64;
161};
162
163#if defined(USE_64BIT_QUEUE) && defined(UNALIGNED_LE_OK)
164
165/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
166 * 32-bit words into output buffer. X must not contain non-zero bits above
167 * xbits.
168 */
169static inline void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
170{
171 uint64_t queue = strm->queue + ((uint64_t)x << strm->qbits);
172 uint32_t qbits = strm->qbits + xbits;
173
174 if (__builtin_expect(qbits >= 32, 1)) {
175 *(uint32_t *)strm->outbuf = queue;
176 queue >>= 32;
177 qbits -= 32;
178 strm->outbuf += 4;
179 }
180
181 strm->queue = queue;
182 strm->qbits = qbits;
183}
184
185#define enqueue8 enqueue24
186
187/* flush the queue and align to next byte */
188static inline void flush_bits(struct slz_stream *strm)
189{
190 if (strm->qbits > 0)
191 *strm->outbuf++ = strm->queue;
192
193 if (strm->qbits > 8)
194 *strm->outbuf++ = strm->queue >> 8;
195
196 if (strm->qbits > 16)
197 *strm->outbuf++ = strm->queue >> 16;
198
199 if (strm->qbits > 24)
200 *strm->outbuf++ = strm->queue >> 24;
201
202 strm->queue = 0;
203 strm->qbits = 0;
204}
205
206#else /* non-64 bit or aligned or big endian */
207
208/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
209 * bytes into out buf. X must not contain non-zero bits above xbits. Prefer
210 * enqueue8() when xbits is known for being 8 or less.
211 */
212static void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
213{
214 uint32_t queue = strm->queue + (x << strm->qbits);
215 uint32_t qbits = strm->qbits + xbits;
216
217 if (qbits >= 16) {
218#ifndef UNALIGNED_LE_OK
219 strm->outbuf[0] = queue;
220 strm->outbuf[1] = queue >> 8;
221#else
222 *(uint16_t *)strm->outbuf = queue;
223#endif
224 strm->outbuf += 2;
225 queue >>= 16;
226 qbits -= 16;
227 }
228
229 if (qbits >= 8) {
230 qbits -= 8;
231 *strm->outbuf++ = queue;
232 queue >>= 8;
233 }
234 strm->qbits = qbits;
235 strm->queue = queue;
236 return;
237}
238
239/* enqueue code x of <xbits> bits (at most 8) and copy complete bytes into
240 * out buf. X must not contain non-zero bits above xbits.
241 */
242static inline void enqueue8(struct slz_stream *strm, uint32_t x, uint32_t xbits)
243{
244 uint32_t queue = strm->queue + (x << strm->qbits);
245 uint32_t qbits = strm->qbits + xbits;
246
247 if (__builtin_expect((signed)(qbits - 8) >= 0, 1)) {
248 qbits -= 8;
249 *strm->outbuf++ = queue;
250 queue >>= 8;
251 }
252
253 strm->qbits = qbits;
254 strm->queue = queue;
255}
256
257/* align to next byte */
258static inline void flush_bits(struct slz_stream *strm)
259{
260 if (strm->qbits > 0)
261 *strm->outbuf++ = strm->queue;
262
263 if (strm->qbits > 8)
264 *strm->outbuf++ = strm->queue >> 8;
265
266 strm->queue = 0;
267 strm->qbits = 0;
268}
269#endif
270
271
272/* only valid if buffer is already aligned */
273static inline void copy_8b(struct slz_stream *strm, uint32_t x)
274{
275 *strm->outbuf++ = x;
276}
277
278/* only valid if buffer is already aligned */
279static inline void copy_16b(struct slz_stream *strm, uint32_t x)
280{
281 strm->outbuf[0] = x;
282 strm->outbuf[1] = x >> 8;
283 strm->outbuf += 2;
284}
285
286/* only valid if buffer is already aligned */
287static inline void copy_32b(struct slz_stream *strm, uint32_t x)
288{
289 strm->outbuf[0] = x;
290 strm->outbuf[1] = x >> 8;
291 strm->outbuf[2] = x >> 16;
292 strm->outbuf[3] = x >> 24;
293 strm->outbuf += 4;
294}
295
296static inline void send_huff(struct slz_stream *strm, uint32_t code)
297{
298 uint32_t bits;
299
300 code = fixed_huff[code];
301 bits = code & 15;
302 code >>= 4;
303 enqueue24(strm, code, bits);
304}
305
306static inline void send_eob(struct slz_stream *strm)
307{
308 enqueue8(strm, 0, 7); // direct encoding of 256 = EOB (cf RFC1951)
309}
310
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +0500311/* copies <len> literals from <buf>. <more> indicates that there are data past
Willy Tarreauab2b7822021-04-22 14:09:44 +0200312 * buf + <len>. <len> must not be null.
313 */
314static void copy_lit(struct slz_stream *strm, const void *buf, uint32_t len, int more)
315{
316 uint32_t len2;
317
318 do {
319 len2 = len;
320 if (__builtin_expect(len2 > 65535, 0))
321 len2 = 65535;
322
323 len -= len2;
324
325 if (strm->state != SLZ_ST_EOB)
326 send_eob(strm);
327
328 strm->state = (more || len) ? SLZ_ST_EOB : SLZ_ST_DONE;
329
330 enqueue8(strm, !(more || len), 3); // BFINAL = !more ; BTYPE = 00
331 flush_bits(strm);
332 copy_16b(strm, len2); // len2
333 copy_16b(strm, ~len2); // nlen2
334 memcpy(strm->outbuf, buf, len2);
335 buf += len2;
336 strm->outbuf += len2;
337 } while (len);
338}
339
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +0500340/* copies <len> literals from <buf>. <more> indicates that there are data past
Willy Tarreauab2b7822021-04-22 14:09:44 +0200341 * buf + <len>. <len> must not be null.
342 */
343static void copy_lit_huff(struct slz_stream *strm, const unsigned char *buf, uint32_t len, int more)
344{
345 uint32_t pos;
346
347 /* This ugly construct limits the mount of tests and optimizes for the
348 * most common case (more > 0).
349 */
350 if (strm->state == SLZ_ST_EOB) {
351 eob:
352 strm->state = more ? SLZ_ST_FIXED : SLZ_ST_LAST;
353 enqueue8(strm, 2 + !more, 3); // BFINAL = !more ; BTYPE = 01
354 }
355 else if (!more) {
356 send_eob(strm);
357 goto eob;
358 }
359
360 pos = 0;
361 do {
362 send_huff(strm, buf[pos++]);
363 } while (pos < len);
364}
365
366/* format:
367 * bit0..31 = word
368 * bit32..63 = last position in buffer of similar content
369 */
370
371/* This hash provides good average results on HTML contents, and is among the
372 * few which provide almost optimal results on various different pages.
373 */
374static inline uint32_t slz_hash(uint32_t a)
375{
376#if defined(__ARM_FEATURE_CRC32)
Willy Tarreaub1544222021-12-03 17:38:42 +0100377# if defined(__ARM_ARCH_ISA_A64)
378 // 64 bit mode
Willy Tarreauab2b7822021-04-22 14:09:44 +0200379 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(a) : "r"(0));
Willy Tarreaub1544222021-12-03 17:38:42 +0100380# else
381 // 32 bit mode (e.g. armv7 compiler building for armv8
382 __asm__ volatile("crc32w %0,%0,%1" : "+r"(a) : "r"(0));
383# endif
Willy Tarreauab2b7822021-04-22 14:09:44 +0200384 return a >> (32 - HASH_BITS);
385#else
386 return ((a << 19) + (a << 6) - a) >> (32 - HASH_BITS);
387#endif
388}
389
390/* This function compares buffers <a> and <b> and reads 32 or 64 bits at a time
391 * during the approach. It makes us of unaligned little endian memory accesses
392 * on capable architectures. <max> is the maximum number of bytes that can be
393 * read, so both <a> and <b> must have at least <max> bytes ahead. <max> may
394 * safely be null or negative if that simplifies computations in the caller.
395 */
396static inline long memmatch(const unsigned char *a, const unsigned char *b, long max)
397{
398 long len = 0;
399
400#ifdef UNALIGNED_LE_OK
401 unsigned long xor;
402
403 while (1) {
404 if ((long)(len + 2 * sizeof(long)) > max) {
405 while (len < max) {
406 if (a[len] != b[len])
407 break;
408 len++;
409 }
410 return len;
411 }
412
413 xor = *(long *)&a[len] ^ *(long *)&b[len];
414 if (xor)
415 break;
416 len += sizeof(long);
417
418 xor = *(long *)&a[len] ^ *(long *)&b[len];
419 if (xor)
420 break;
421 len += sizeof(long);
422 }
423
424#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
425 /* x86 has bsf. We know that xor is non-null here */
426 asm("bsf %1,%0\n" : "=r"(xor) : "0" (xor));
427 return len + xor / 8;
428#else
429 if (sizeof(long) > 4 && !(xor & 0xffffffff)) {
430 /* This code is optimized out on 32-bit archs, but we still
431 * need to shift in two passes to avoid a warning. It is
432 * properly optimized out as a single shift.
433 */
434 xor >>= 16; xor >>= 16;
435 if (xor & 0xffff) {
436 if (xor & 0xff)
437 return len + 4;
438 return len + 5;
439 }
440 if (xor & 0xffffff)
441 return len + 6;
442 return len + 7;
443 }
444
445 if (xor & 0xffff) {
446 if (xor & 0xff)
447 return len;
448 return len + 1;
449 }
450 if (xor & 0xffffff)
451 return len + 2;
452 return len + 3;
453#endif // x86
454
455#else // UNALIGNED_LE_OK
456 /* This is the generic version for big endian or unaligned-incompatible
457 * architectures.
458 */
459 while (len < max) {
460 if (a[len] != b[len])
461 break;
462 len++;
463 }
464 return len;
465
466#endif
467}
468
469/* sets <count> BYTES to -32769 in <refs> so that any uninitialized entry will
470 * verify (pos-last-1 >= 32768) and be ignored. <count> must be a multiple of
471 * 128 bytes and <refs> must be at least one count in length. It's supposed to
472 * be applied to 64-bit aligned data exclusively, which makes it slightly
473 * faster than the regular memset() since no alignment check is performed.
474 */
Tim Duesterhuseaf16fc2021-09-20 19:59:42 +0200475static void reset_refs(union ref *refs, long count)
Willy Tarreauab2b7822021-04-22 14:09:44 +0200476{
477 /* avoid a shift/mask by casting to void* */
478 union ref *end = (void *)refs + count;
479
480 do {
481 refs[ 0].by64 = -32769;
482 refs[ 1].by64 = -32769;
483 refs[ 2].by64 = -32769;
484 refs[ 3].by64 = -32769;
485 refs[ 4].by64 = -32769;
486 refs[ 5].by64 = -32769;
487 refs[ 6].by64 = -32769;
488 refs[ 7].by64 = -32769;
489 refs[ 8].by64 = -32769;
490 refs[ 9].by64 = -32769;
491 refs[10].by64 = -32769;
492 refs[11].by64 = -32769;
493 refs[12].by64 = -32769;
494 refs[13].by64 = -32769;
495 refs[14].by64 = -32769;
496 refs[15].by64 = -32769;
497 refs += 16;
498 } while (refs < end);
499}
500
501/* Compresses <ilen> bytes from <in> into <out> according to RFC1951. The
502 * output result may be up to 5 bytes larger than the input, to which 2 extra
503 * bytes may be added to send the last chunk due to BFINAL+EOB encoding (10
504 * bits) when <more> is not set. The caller is responsible for ensuring there
505 * is enough room in the output buffer for this. The amount of output bytes is
506 * returned, and no CRC is computed.
507 */
508long slz_rfc1951_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
509{
510 long rem = ilen;
511 unsigned long pos = 0;
512 unsigned long last;
513 uint32_t word = 0;
514 long mlen;
515 uint32_t h;
516 uint64_t ent;
517
518 uint32_t plit = 0;
519 uint32_t bit9 = 0;
520 uint32_t dist, code;
521 union ref refs[1 << HASH_BITS];
522
523 if (!strm->level) {
524 /* force to send as literals (eg to preserve CPU) */
525 strm->outbuf = out;
526 plit = pos = ilen;
527 bit9 = 52; /* force literal dump */
528 goto final_lit_dump;
529 }
530
531 reset_refs(refs, sizeof(refs));
532
533 strm->outbuf = out;
534
535#ifndef UNALIGNED_FASTER
536 word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
537#endif
538 while (rem >= 4) {
539#ifndef UNALIGNED_FASTER
540 word = ((unsigned char)in[pos + 3] << 24) + (word >> 8);
541#else
542 word = *(uint32_t *)&in[pos];
543#endif
544 h = slz_hash(word);
545 asm volatile ("" ::); // prevent gcc from trying to be smart with the prefetch
546
547 if (sizeof(long) >= 8) {
548 ent = refs[h].by64;
549 last = (uint32_t)ent;
550 ent >>= 32;
551 refs[h].by64 = ((uint64_t)pos) + ((uint64_t)word << 32);
552 } else {
553 ent = refs[h].by32.word;
554 last = refs[h].by32.pos;
555 refs[h].by32.pos = pos;
556 refs[h].by32.word = word;
557 }
558
Willy Tarreaufc89c3f2021-08-28 12:10:49 +0200559#ifdef FIND_OPTIMAL_MATCH
Willy Tarreauab2b7822021-04-22 14:09:44 +0200560 /* Experimental code to see what could be saved with an ideal
561 * longest match lookup algorithm. This one is very slow but
562 * scans the whole window. In short, here are the savings :
563 * file orig fast(ratio) optimal(ratio)
564 * README 5185 3419 (65.9%) 3165 (61.0%) -7.5%
565 * index.html 76799 35662 (46.4%) 29875 (38.9%) -16.3%
566 * rfc1952.c 29383 13442 (45.7%) 11793 (40.1%) -12.3%
567 *
568 * Thus the savings to expect for large files is at best 16%.
569 *
570 * A non-colliding hash gives 33025 instead of 35662 (-7.4%),
571 * and keeping the last two entries gives 31724 (-11.0%).
572 */
573 unsigned long scan;
574 int saved = 0;
575 int bestpos = 0;
576 int bestlen = 0;
577 int firstlen = 0;
578 int max_lookup = 2; // 0 = no limit
579
580 for (scan = pos - 1; scan < pos && (unsigned long)(pos - scan - 1) < 32768; scan--) {
Willy Tarreaueda36f12021-12-13 09:07:05 +0100581 int len;
582
Willy Tarreauab2b7822021-04-22 14:09:44 +0200583 if (*(uint32_t *)(in + scan) != word)
584 continue;
585
586 len = memmatch(in + pos, in + scan, rem);
587 if (!bestlen)
588 firstlen = len;
589
590 if (len > bestlen) {
591 bestlen = len;
592 bestpos = scan;
593 }
594 if (!--max_lookup)
595 break;
596 }
597 if (bestlen) {
598 //printf("pos=%d last=%d bestpos=%d word=%08x ent=%08x len=%d\n",
599 // (int)pos, (int)last, (int)bestpos, (int)word, (int)ent, bestlen);
600 last = bestpos;
601 ent = word;
602 saved += bestlen - firstlen;
603 }
604 //fprintf(stderr, "first=%d best=%d saved_total=%d\n", firstlen, bestlen, saved);
605#endif
606
607 if ((uint32_t)ent != word) {
608 send_as_lit:
609 rem--;
610 plit++;
611 bit9 += ((unsigned char)word >= 144);
612 pos++;
613 continue;
614 }
615
616 /* We reject pos = last and pos > last+32768 */
617 if ((unsigned long)(pos - last - 1) >= 32768)
618 goto send_as_lit;
619
620 /* Note: cannot encode a length larger than 258 bytes */
621 mlen = memmatch(in + pos + 4, in + last + 4, (rem > 258 ? 258 : rem) - 4) + 4;
622
623 /* found a matching entry */
624
625 if (bit9 >= 52 && mlen < 6)
626 goto send_as_lit;
627
628 /* compute the output code, its size and the length's size in
629 * bits to know if the reference is cheaper than literals.
630 */
631 code = len_fh[mlen];
632
633 /* direct mapping of dist->huffman code */
634 dist = fh_dist_table[pos - last - 1];
635
636 /* if encoding the dist+length is more expensive than sending
637 * the equivalent as bytes, lets keep the literals.
638 */
639 if ((dist & 0x1f) + (code >> 16) + 8 >= 8 * mlen + bit9)
640 goto send_as_lit;
641
642 /* first, copy pending literals */
643 if (plit) {
644 /* Huffman encoding requires 9 bits for octets 144..255, so this
645 * is a waste of space for binary data. Switching between Huffman
646 * and no-comp then huffman consumes 52 bits (7 for EOB + 3 for
647 * block type + 7 for alignment + 32 for LEN+NLEN + 3 for next
648 * block. Only use plain literals if there are more than 52 bits
649 * to save then.
650 */
651 if (bit9 >= 52)
652 copy_lit(strm, in + pos - plit, plit, 1);
653 else
654 copy_lit_huff(strm, in + pos - plit, plit, 1);
655
656 plit = 0;
657 }
658
659 /* use mode 01 - fixed huffman */
660 if (strm->state == SLZ_ST_EOB) {
661 strm->state = SLZ_ST_FIXED;
662 enqueue8(strm, 0x02, 3); // BTYPE = 01, BFINAL = 0
663 }
664
665 /* copy the length first */
666 enqueue24(strm, code & 0xFFFF, code >> 16);
667
668 /* in fixed huffman mode, dist is fixed 5 bits */
669 enqueue24(strm, dist >> 5, dist & 0x1f);
670 bit9 = 0;
671 rem -= mlen;
672 pos += mlen;
673
674#ifndef UNALIGNED_FASTER
675#ifdef UNALIGNED_LE_OK
676 word = *(uint32_t *)&in[pos - 1];
677#else
678 word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
679#endif
680#endif
681 }
682
683 if (__builtin_expect(rem, 0)) {
684 /* we're reading the 1..3 last bytes */
685 plit += rem;
686 do {
687 bit9 += ((unsigned char)in[pos++] >= 144);
688 } while (--rem);
689 }
690
691 final_lit_dump:
692 /* now copy remaining literals or mark the end */
693 if (plit) {
694 if (bit9 >= 52)
695 copy_lit(strm, in + pos - plit, plit, more);
696 else
697 copy_lit_huff(strm, in + pos - plit, plit, more);
698
699 plit = 0;
700 }
701
702 strm->ilen += ilen;
703 return strm->outbuf - out;
704}
705
706/* Initializes stream <strm> for use with raw deflate (rfc1951). The CRC is
707 * unused but set to zero. The compression level passed in <level> is set. This
708 * value can only be 0 (no compression) or 1 (compression) and other values
709 * will lead to unpredictable behaviour. The function always returns 0.
710 */
711int slz_rfc1951_init(struct slz_stream *strm, int level)
712{
713 strm->state = SLZ_ST_EOB; // no header
714 strm->level = level;
715 strm->format = SLZ_FMT_DEFLATE;
716 strm->crc32 = 0;
717 strm->ilen = 0;
718 strm->qbits = 0;
719 strm->queue = 0;
720 return 0;
721}
722
723/* Flushes any pending for stream <strm> into buffer <buf>, then sends BTYPE=1
724 * and BFINAL=1 if needed. The stream ends in SLZ_ST_DONE. It returns the number
725 * of bytes emitted. The trailer consists in flushing the possibly pending bits
726 * from the queue (up to 7 bits), then possibly EOB (7 bits), then 3 bits, EOB,
727 * a rounding to the next byte, which amounts to a total of 4 bytes max, that
728 * the caller must ensure are available before calling the function.
729 */
730int slz_rfc1951_finish(struct slz_stream *strm, unsigned char *buf)
731{
732 strm->outbuf = buf;
733
734 if (strm->state == SLZ_ST_FIXED || strm->state == SLZ_ST_LAST) {
735 strm->state = (strm->state == SLZ_ST_LAST) ? SLZ_ST_DONE : SLZ_ST_EOB;
736 send_eob(strm);
737 }
738
739 if (strm->state != SLZ_ST_DONE) {
740 /* send BTYPE=1, BFINAL=1 */
741 enqueue8(strm, 3, 3);
742 send_eob(strm);
743 strm->state = SLZ_ST_DONE;
744 }
745
746 flush_bits(strm);
747 return strm->outbuf - buf;
748}
749
750/* Now RFC1952-specific declarations and extracts from RFC.
751 * From RFC1952 about the GZIP file format :
752
753A gzip file consists of a series of "members" ...
754
7552.3. Member format
756
757 Each member has the following structure:
758
759 +---+---+---+---+---+---+---+---+---+---+
760 |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
761 +---+---+---+---+---+---+---+---+---+---+
762
763 (if FLG.FEXTRA set)
764
765 +---+---+=================================+
766 | XLEN |...XLEN bytes of "extra field"...| (more-->)
767 +---+---+=================================+
768
769 (if FLG.FNAME set)
770
771 +=========================================+
772 |...original file name, zero-terminated...| (more-->)
773 +=========================================+
774
775 (if FLG.FCOMMENT set)
776
777 +===================================+
778 |...file comment, zero-terminated...| (more-->)
779 +===================================+
780
781 (if FLG.FHCRC set)
782
783 +---+---+
784 | CRC16 |
785 +---+---+
786
787 +=======================+
788 |...compressed blocks...| (more-->)
789 +=======================+
790
791 0 1 2 3 4 5 6 7
792 +---+---+---+---+---+---+---+---+
793 | CRC32 | ISIZE |
794 +---+---+---+---+---+---+---+---+
795
796
7972.3.1. Member header and trailer
798
799 ID1 (IDentification 1)
800 ID2 (IDentification 2)
801 These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
802 (0x8b, \213), to identify the file as being in gzip format.
803
804 CM (Compression Method)
805 This identifies the compression method used in the file. CM
806 = 0-7 are reserved. CM = 8 denotes the "deflate"
807 compression method, which is the one customarily used by
808 gzip and which is documented elsewhere.
809
810 FLG (FLaGs)
811 This flag byte is divided into individual bits as follows:
812
813 bit 0 FTEXT
814 bit 1 FHCRC
815 bit 2 FEXTRA
816 bit 3 FNAME
817 bit 4 FCOMMENT
818 bit 5 reserved
819 bit 6 reserved
820 bit 7 reserved
821
822 Reserved FLG bits must be zero.
823
824 MTIME (Modification TIME)
825 This gives the most recent modification time of the original
826 file being compressed. The time is in Unix format, i.e.,
827 seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
828 may cause problems for MS-DOS and other systems that use
829 local rather than Universal time.) If the compressed data
830 did not come from a file, MTIME is set to the time at which
831 compression started. MTIME = 0 means no time stamp is
832 available.
833
834 XFL (eXtra FLags)
835 These flags are available for use by specific compression
836 methods. The "deflate" method (CM = 8) sets these flags as
837 follows:
838
839 XFL = 2 - compressor used maximum compression,
840 slowest algorithm
841 XFL = 4 - compressor used fastest algorithm
842
843 OS (Operating System)
844 This identifies the type of file system on which compression
845 took place. This may be useful in determining end-of-line
846 convention for text files. The currently defined values are
847 as follows:
848
849 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
850 1 - Amiga
851 2 - VMS (or OpenVMS)
852 3 - Unix
853 4 - VM/CMS
854 5 - Atari TOS
855 6 - HPFS filesystem (OS/2, NT)
856 7 - Macintosh
857 8 - Z-System
858 9 - CP/M
859 10 - TOPS-20
860 11 - NTFS filesystem (NT)
861 12 - QDOS
862 13 - Acorn RISCOS
863 255 - unknown
864
865 ==> A file compressed using "gzip -1" on Unix-like systems can be :
866
867 1F 8B 08 00 00 00 00 00 04 03
868 <deflate-compressed stream>
869 crc32 size32
870*/
871
872static const unsigned char gzip_hdr[] = { 0x1F, 0x8B, // ID1, ID2
873 0x08, 0x00, // Deflate, flags (none)
874 0x00, 0x00, 0x00, 0x00, // mtime: none
875 0x04, 0x03 }; // fastest comp, OS=Unix
876
877static inline uint32_t crc32_char(uint32_t crc, uint8_t x)
878{
879#if defined(__ARM_FEATURE_CRC32)
880 crc = ~crc;
Willy Tarreaub1544222021-12-03 17:38:42 +0100881# if defined(__ARM_ARCH_ISA_A64)
882 // 64 bit mode
Willy Tarreauab2b7822021-04-22 14:09:44 +0200883 __asm__ volatile("crc32b %w0,%w0,%w1" : "+r"(crc) : "r"(x));
Willy Tarreaub1544222021-12-03 17:38:42 +0100884# else
885 // 32 bit mode (e.g. armv7 compiler building for armv8
886 __asm__ volatile("crc32b %0,%0,%1" : "+r"(crc) : "r"(x));
887# endif
Willy Tarreauab2b7822021-04-22 14:09:44 +0200888 crc = ~crc;
889#else
890 crc = crc32_fast[0][(crc ^ x) & 0xff] ^ (crc >> 8);
891#endif
892 return crc;
893}
894
895static inline uint32_t crc32_uint32(uint32_t data)
896{
897#if defined(__ARM_FEATURE_CRC32)
Willy Tarreaub1544222021-12-03 17:38:42 +0100898# if defined(__ARM_ARCH_ISA_A64)
899 // 64 bit mode
Willy Tarreauab2b7822021-04-22 14:09:44 +0200900 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(data) : "r"(~0UL));
Willy Tarreaub1544222021-12-03 17:38:42 +0100901# else
902 // 32 bit mode (e.g. armv7 compiler building for armv8
903 __asm__ volatile("crc32w %0,%0,%1" : "+r"(data) : "r"(~0UL));
904# endif
Willy Tarreauab2b7822021-04-22 14:09:44 +0200905 data = ~data;
906#else
907 data = crc32_fast[3][(data >> 0) & 0xff] ^
908 crc32_fast[2][(data >> 8) & 0xff] ^
909 crc32_fast[1][(data >> 16) & 0xff] ^
910 crc32_fast[0][(data >> 24) & 0xff];
911#endif
912 return data;
913}
914
915/* Modified version originally from RFC1952, working with non-inverting CRCs */
916uint32_t slz_crc32_by1(uint32_t crc, const unsigned char *buf, int len)
917{
918 int n;
919
920 for (n = 0; n < len; n++)
921 crc = crc32_char(crc, buf[n]);
922 return crc;
923}
924
925/* This version computes the crc32 of <buf> over <len> bytes, doing most of it
926 * in 32-bit chunks.
927 */
928uint32_t slz_crc32_by4(uint32_t crc, const unsigned char *buf, int len)
929{
930 const unsigned char *end = buf + len;
931
932 while (buf <= end - 16) {
933#ifdef UNALIGNED_LE_OK
934#if defined(__ARM_FEATURE_CRC32)
935 crc = ~crc;
Willy Tarreaub1544222021-12-03 17:38:42 +0100936# if defined(__ARM_ARCH_ISA_A64)
937 // 64 bit mode
Willy Tarreauab2b7822021-04-22 14:09:44 +0200938 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
939 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
940 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
941 __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
Willy Tarreaub1544222021-12-03 17:38:42 +0100942# else
943 // 32 bit mode (e.g. armv7 compiler building for armv8
944 __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
945 __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
946 __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
947 __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
948# endif
Willy Tarreauab2b7822021-04-22 14:09:44 +0200949 crc = ~crc;
950#else
951 crc ^= *(uint32_t *)buf;
952 crc = crc32_uint32(crc);
953
954 crc ^= *(uint32_t *)(buf + 4);
955 crc = crc32_uint32(crc);
956
957 crc ^= *(uint32_t *)(buf + 8);
958 crc = crc32_uint32(crc);
959
960 crc ^= *(uint32_t *)(buf + 12);
961 crc = crc32_uint32(crc);
962#endif
963#else
964 crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
965 crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
966 crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
967 crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
968
969 crc = crc32_fast[3][(buf[4] ^ (crc >> 0)) & 0xff] ^
970 crc32_fast[2][(buf[5] ^ (crc >> 8)) & 0xff] ^
971 crc32_fast[1][(buf[6] ^ (crc >> 16)) & 0xff] ^
972 crc32_fast[0][(buf[7] ^ (crc >> 24)) & 0xff];
973
974 crc = crc32_fast[3][(buf[8] ^ (crc >> 0)) & 0xff] ^
975 crc32_fast[2][(buf[9] ^ (crc >> 8)) & 0xff] ^
976 crc32_fast[1][(buf[10] ^ (crc >> 16)) & 0xff] ^
977 crc32_fast[0][(buf[11] ^ (crc >> 24)) & 0xff];
978
979 crc = crc32_fast[3][(buf[12] ^ (crc >> 0)) & 0xff] ^
980 crc32_fast[2][(buf[13] ^ (crc >> 8)) & 0xff] ^
981 crc32_fast[1][(buf[14] ^ (crc >> 16)) & 0xff] ^
982 crc32_fast[0][(buf[15] ^ (crc >> 24)) & 0xff];
983#endif
984 buf += 16;
985 }
986
987 while (buf <= end - 4) {
988#ifdef UNALIGNED_LE_OK
989 crc ^= *(uint32_t *)buf;
990 crc = crc32_uint32(crc);
991#else
992 crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
993 crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
994 crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
995 crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
996#endif
997 buf += 4;
998 }
999
1000 while (buf < end)
Willy Tarreau027fdcb2021-05-12 08:34:36 +02001001 crc = crc32_char(crc, *buf++);
Willy Tarreauab2b7822021-04-22 14:09:44 +02001002 return crc;
1003}
1004
1005/* uses the most suitable crc32 function to update crc on <buf, len> */
1006static inline uint32_t update_crc(uint32_t crc, const void *buf, int len)
1007{
1008 return slz_crc32_by4(crc, buf, len);
1009}
1010
1011/* Sends the gzip header for stream <strm> into buffer <buf>. When it's done,
1012 * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
1013 * emitted which is always 10. The caller is responsible for ensuring there's
1014 * always enough room in the buffer.
1015 */
1016int slz_rfc1952_send_header(struct slz_stream *strm, unsigned char *buf)
1017{
1018 memcpy(buf, gzip_hdr, sizeof(gzip_hdr));
1019 strm->state = SLZ_ST_EOB;
1020 return sizeof(gzip_hdr);
1021}
1022
1023/* Encodes the block according to rfc1952. This means that the CRC of the input
1024 * block is computed according to the CRC32 algorithm. If the header was never
1025 * sent, it may be sent first. The number of output bytes is returned.
1026 */
1027long slz_rfc1952_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
1028{
1029 long ret = 0;
1030
1031 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1032 ret += slz_rfc1952_send_header(strm, out);
1033
1034 strm->crc32 = update_crc(strm->crc32, in, ilen);
1035 ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
1036 return ret;
1037}
1038
1039/* Initializes stream <strm> for use with the gzip format (rfc1952). The
1040 * compression level passed in <level> is set. This value can only be 0 (no
1041 * compression) or 1 (compression) and other values will lead to unpredictable
1042 * behaviour. The function always returns 0.
1043 */
1044int slz_rfc1952_init(struct slz_stream *strm, int level)
1045{
1046 strm->state = SLZ_ST_INIT;
1047 strm->level = level;
1048 strm->format = SLZ_FMT_GZIP;
1049 strm->crc32 = 0;
1050 strm->ilen = 0;
1051 strm->qbits = 0;
1052 strm->queue = 0;
1053 return 0;
1054}
1055
1056/* Flushes pending bits and sends the gzip trailer for stream <strm> into
1057 * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
1058 * returns the number of bytes emitted. The trailer consists in flushing the
1059 * possibly pending bits from the queue (up to 24 bits), rounding to the next
1060 * byte, then 4 bytes for the CRC and another 4 bytes for the input length.
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +05001061 * That may about to 4+4+4 = 12 bytes, that the caller must ensure are
Willy Tarreauab2b7822021-04-22 14:09:44 +02001062 * available before calling the function. Note that if the initial header was
1063 * never sent, it will be sent first as well (10 extra bytes).
1064 */
1065int slz_rfc1952_finish(struct slz_stream *strm, unsigned char *buf)
1066{
1067 strm->outbuf = buf;
1068
1069 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1070 strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
1071
1072 slz_rfc1951_finish(strm, strm->outbuf);
1073 copy_32b(strm, strm->crc32);
1074 copy_32b(strm, strm->ilen);
1075 strm->state = SLZ_ST_END;
1076
1077 return strm->outbuf - buf;
1078}
1079
1080
1081/* RFC1950-specific stuff. This is for the Zlib stream format.
1082 * From RFC1950 (zlib) :
1083 *
1084
1085 2.2. Data format
1086
1087 A zlib stream has the following structure:
1088
1089 0 1
1090 +---+---+
1091 |CMF|FLG| (more-->)
1092 +---+---+
1093
1094
1095 (if FLG.FDICT set)
1096
1097 0 1 2 3
1098 +---+---+---+---+
1099 | DICTID | (more-->)
1100 +---+---+---+---+
1101
1102 +=====================+---+---+---+---+
1103 |...compressed data...| ADLER32 |
1104 +=====================+---+---+---+---+
1105
1106 Any data which may appear after ADLER32 are not part of the zlib
1107 stream.
1108
1109 CMF (Compression Method and flags)
1110 This byte is divided into a 4-bit compression method and a 4-
1111 bit information field depending on the compression method.
1112
1113 bits 0 to 3 CM Compression method
1114 bits 4 to 7 CINFO Compression info
1115
1116 CM (Compression method)
1117 This identifies the compression method used in the file. CM = 8
1118 denotes the "deflate" compression method with a window size up
1119 to 32K. This is the method used by gzip and PNG (see
1120 references [1] and [2] in Chapter 3, below, for the reference
1121 documents). CM = 15 is reserved. It might be used in a future
1122 version of this specification to indicate the presence of an
1123 extra field before the compressed data.
1124
1125 CINFO (Compression info)
1126 For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
1127 size, minus eight (CINFO=7 indicates a 32K window size). Values
1128 of CINFO above 7 are not allowed in this version of the
1129 specification. CINFO is not defined in this specification for
1130 CM not equal to 8.
1131
1132 FLG (FLaGs)
1133 This flag byte is divided as follows:
1134
1135 bits 0 to 4 FCHECK (check bits for CMF and FLG)
1136 bit 5 FDICT (preset dictionary)
1137 bits 6 to 7 FLEVEL (compression level)
1138
1139 The FCHECK value must be such that CMF and FLG, when viewed as
1140 a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
1141 is a multiple of 31.
1142
1143
1144 FDICT (Preset dictionary)
1145 If FDICT is set, a DICT dictionary identifier is present
1146 immediately after the FLG byte. The dictionary is a sequence of
1147 bytes which are initially fed to the compressor without
1148 producing any compressed output. DICT is the Adler-32 checksum
1149 of this sequence of bytes (see the definition of ADLER32
1150 below). The decompressor can use this identifier to determine
1151 which dictionary has been used by the compressor.
1152
1153 FLEVEL (Compression level)
1154 These flags are available for use by specific compression
1155 methods. The "deflate" method (CM = 8) sets these flags as
1156 follows:
1157
1158 0 - compressor used fastest algorithm
1159 1 - compressor used fast algorithm
1160 2 - compressor used default algorithm
1161 3 - compressor used maximum compression, slowest algorithm
1162
1163 The information in FLEVEL is not needed for decompression; it
1164 is there to indicate if recompression might be worthwhile.
1165
1166 compressed data
1167 For compression method 8, the compressed data is stored in the
1168 deflate compressed data format as described in the document
1169 "DEFLATE Compressed Data Format Specification" by L. Peter
1170 Deutsch. (See reference [3] in Chapter 3, below)
1171
1172 Other compressed data formats are not specified in this version
1173 of the zlib specification.
1174
1175 ADLER32 (Adler-32 checksum)
1176 This contains a checksum value of the uncompressed data
1177 (excluding any dictionary data) computed according to Adler-32
1178 algorithm. This algorithm is a 32-bit extension and improvement
1179 of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
1180 standard. See references [4] and [5] in Chapter 3, below)
1181
1182 Adler-32 is composed of two sums accumulated per byte: s1 is
1183 the sum of all bytes, s2 is the sum of all s1 values. Both sums
1184 are done modulo 65521. s1 is initialized to 1, s2 to zero. The
1185 Adler-32 checksum is stored as s2*65536 + s1 in most-
1186 significant-byte first (network) order.
1187
1188 ==> The stream can start with only 2 bytes :
1189 - CM = 0x78 : CMINFO=7 (32kB window), CM=8 (deflate)
1190 - FLG = 0x01 : FLEVEL = 0 (fastest), FDICT=0 (no dict), FCHECK=1 so
1191 that 0x7801 is a multiple of 31 (30721 = 991 * 31).
1192
1193 ==> and it ends with only 4 bytes, the Adler-32 checksum in big-endian format.
1194
1195 */
1196
1197static const unsigned char zlib_hdr[] = { 0x78, 0x01 }; // 32k win, deflate, chk=1
1198
1199
1200/* Original version from RFC1950, verified and works OK */
1201uint32_t slz_adler32_by1(uint32_t crc, const unsigned char *buf, int len)
1202{
1203 uint32_t s1 = crc & 0xffff;
1204 uint32_t s2 = (crc >> 16) & 0xffff;
1205 int n;
1206
1207 for (n = 0; n < len; n++) {
1208 s1 = (s1 + buf[n]) % 65521;
1209 s2 = (s2 + s1) % 65521;
1210 }
1211 return (s2 << 16) + s1;
1212}
1213
1214/* Computes the adler32 sum on <buf> for <len> bytes. It avoids the expensive
1215 * modulus by retrofitting the number of bytes missed between 65521 and 65536
1216 * which is easy to count : For every sum above 65536, the modulus is offset
1217 * by (65536-65521) = 15. So for any value, we can count the accumulated extra
1218 * values by dividing the sum by 65536 and multiplying this value by
1219 * (65536-65521). That's easier with a drawing with boxes and marbles. It gives
1220 * this :
1221 * x % 65521 = (x % 65536) + (x / 65536) * (65536 - 65521)
1222 * = (x & 0xffff) + (x >> 16) * 15.
1223 */
1224uint32_t slz_adler32_block(uint32_t crc, const unsigned char *buf, long len)
1225{
1226 long s1 = crc & 0xffff;
1227 long s2 = (crc >> 16);
1228 long blk;
1229 long n;
1230
1231 do {
1232 blk = len;
1233 /* ensure we never overflow s2 (limit is about 2^((32-8)/2) */
1234 if (blk > (1U << 12))
1235 blk = 1U << 12;
1236 len -= blk;
1237
1238 for (n = 0; n < blk; n++) {
1239 s1 = (s1 + buf[n]);
1240 s2 = (s2 + s1);
1241 }
1242
1243 /* Largest value here is 2^12 * 255 = 1044480 < 2^20. We can
1244 * still overflow once, but not twice because the right hand
1245 * size is 225 max, so the total is 65761. However we also
1246 * have to take care of the values between 65521 and 65536.
1247 */
1248 s1 = (s1 & 0xffff) + 15 * (s1 >> 16);
1249 if (s1 >= 65521)
1250 s1 -= 65521;
1251
1252 /* For s2, the largest value is estimated to 2^32-1 for
1253 * simplicity, so the right hand side is about 15*65535
1254 * = 983025. We can overflow twice at most.
1255 */
1256 s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
1257 s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
1258 if (s2 >= 65521)
1259 s2 -= 65521;
1260
1261 buf += blk;
1262 } while (len);
1263 return (s2 << 16) + s1;
1264}
1265
1266/* Sends the zlib header for stream <strm> into buffer <buf>. When it's done,
1267 * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
1268 * emitted which is always 2. The caller is responsible for ensuring there's
1269 * always enough room in the buffer.
1270 */
1271int slz_rfc1950_send_header(struct slz_stream *strm, unsigned char *buf)
1272{
1273 memcpy(buf, zlib_hdr, sizeof(zlib_hdr));
1274 strm->state = SLZ_ST_EOB;
1275 return sizeof(zlib_hdr);
1276}
1277
1278/* Encodes the block according to rfc1950. This means that the CRC of the input
1279 * block is computed according to the ADLER32 algorithm. If the header was never
1280 * sent, it may be sent first. The number of output bytes is returned.
1281 */
1282long slz_rfc1950_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
1283{
1284 long ret = 0;
1285
1286 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1287 ret += slz_rfc1950_send_header(strm, out);
1288
1289 strm->crc32 = slz_adler32_block(strm->crc32, in, ilen);
1290 ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
1291 return ret;
1292}
1293
1294/* Initializes stream <strm> for use with the zlib format (rfc1952). The
1295 * compression level passed in <level> is set. This value can only be 0 (no
1296 * compression) or 1 (compression) and other values will lead to unpredictable
1297 * behaviour. The function always returns 0.
1298 */
1299int slz_rfc1950_init(struct slz_stream *strm, int level)
1300{
1301 strm->state = SLZ_ST_INIT;
1302 strm->level = level;
1303 strm->format = SLZ_FMT_ZLIB;
1304 strm->crc32 = 1; // rfc1950/zlib starts with initial crc=1
1305 strm->ilen = 0;
1306 strm->qbits = 0;
1307 strm->queue = 0;
1308 return 0;
1309}
1310
1311/* Flushes pending bits and sends the gzip trailer for stream <strm> into
1312 * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
1313 * returns the number of bytes emitted. The trailer consists in flushing the
1314 * possibly pending bits from the queue (up to 24 bits), rounding to the next
Ilya Shipitsinb2be9a12021-04-24 13:25:42 +05001315 * byte, then 4 bytes for the CRC. That may about to 4+4 = 8 bytes, that the
Willy Tarreauab2b7822021-04-22 14:09:44 +02001316 * caller must ensure are available before calling the function. Note that if
1317 * the initial header was never sent, it will be sent first as well (2 extra
1318 * bytes).
1319 */
1320int slz_rfc1950_finish(struct slz_stream *strm, unsigned char *buf)
1321{
1322 strm->outbuf = buf;
1323
1324 if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
1325 strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
1326
1327 slz_rfc1951_finish(strm, strm->outbuf);
1328 copy_8b(strm, (strm->crc32 >> 24) & 0xff);
1329 copy_8b(strm, (strm->crc32 >> 16) & 0xff);
1330 copy_8b(strm, (strm->crc32 >> 8) & 0xff);
1331 copy_8b(strm, (strm->crc32 >> 0) & 0xff);
1332 strm->state = SLZ_ST_END;
1333 return strm->outbuf - buf;
1334}
1335
Willy Tarreauab2b7822021-04-22 14:09:44 +02001336__attribute__((constructor))
1337static void __slz_initialize(void)
1338{
Willy Tarreau9e274282021-05-12 08:36:09 +02001339#if !defined(__ARM_FEATURE_CRC32)
Willy Tarreauab2b7822021-04-22 14:09:44 +02001340 __slz_make_crc_table();
Willy Tarreau9e274282021-05-12 08:36:09 +02001341#endif
Willy Tarreauab2b7822021-04-22 14:09:44 +02001342 __slz_prepare_dist_table();
1343}