blob: 21a69a8aee24eae5e3e133cf325fab1fa19552d1 [file] [log] [blame]
Masahiro Yamada627b0942018-01-26 11:42:01 +09001/* crc32.c -- compute the CRC-32 of a data stream
Daniel Boulby958f1dd2022-10-05 11:05:22 +01002 * Copyright (C) 1995-2022 Mark Adler
Masahiro Yamada627b0942018-01-26 11:42:01 +09003 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
Daniel Boulby958f1dd2022-10-05 11:05:22 +01005 * This interleaved implementation of a CRC makes use of pipelined multiple
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
Masahiro Yamada627b0942018-01-26 11:42:01 +09008 */
9
10/* @(#) $Id$ */
11
12/*
13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14 protection on the static variables used to control the first-use generation
Daniel Boulby958f1dd2022-10-05 11:05:22 +010015 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
Masahiro Yamada627b0942018-01-26 11:42:01 +090016 first call get_crc_table() to initialize the tables before allowing more than
17 one thread to use crc32().
18
Daniel Boulby958f1dd2022-10-05 11:05:22 +010019 MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20 produced, so that this one source file can be compiled to an executable.
Masahiro Yamada627b0942018-01-26 11:42:01 +090021 */
22
23#ifdef MAKECRCH
24# include <stdio.h>
25# ifndef DYNAMIC_CRC_TABLE
26# define DYNAMIC_CRC_TABLE
27# endif /* !DYNAMIC_CRC_TABLE */
28#endif /* MAKECRCH */
29
Daniel Boulby958f1dd2022-10-05 11:05:22 +010030#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
Masahiro Yamada627b0942018-01-26 11:42:01 +090031
Daniel Boulby958f1dd2022-10-05 11:05:22 +010032 /*
33 A CRC of a message is computed on N braids of words in the message, where
34 each word consists of W bytes (4 or 8). If N is 3, for example, then three
35 running sparse CRCs are calculated respectively on each braid, at these
36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37 This is done starting at a word boundary, and continues until as many blocks
38 of N * W bytes as are available have been processed. The results are combined
39 into a single CRC at the end. For this code, N must be in the range 1..6 and
40 W must be 4 or 8. The upper limit on N can be increased if desired by adding
41 more #if blocks, extending the patterns apparent in the code. In addition,
42 crc32.h would need to be regenerated, if the maximum N value is increased.
43
44 N and W are chosen empirically by benchmarking the execution time on a given
45 processor. The choices for N and W below were based on testing on Intel Kaby
46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49 They were all tested with either gcc or clang, all using the -O3 optimization
50 level. Your mileage may vary.
51 */
52
53/* Define N */
54#ifdef Z_TESTN
55# define N Z_TESTN
56#else
57# define N 5
58#endif
59#if N < 1 || N > 6
60# error N must be in 1..6
Masahiro Yamada627b0942018-01-26 11:42:01 +090061#endif
Daniel Boulby958f1dd2022-10-05 11:05:22 +010062
63/*
64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66 that bytes are eight bits.
67 */
68
69/*
70 Define W and the associated z_word_t type. If W is not defined, then a
71 braided calculation is not used, and the associated tables and code are not
72 compiled.
73 */
74#ifdef Z_TESTW
75# if Z_TESTW-1 != -1
76# define W Z_TESTW
77# endif
Masahiro Yamada627b0942018-01-26 11:42:01 +090078#else
Daniel Boulby958f1dd2022-10-05 11:05:22 +010079# ifdef MAKECRCH
80# define W 8 /* required for MAKECRCH */
81# else
82# if defined(__x86_64__) || defined(__aarch64__)
83# define W 8
84# else
85# define W 4
86# endif
87# endif
88#endif
89#ifdef W
90# if W == 8 && defined(Z_U8)
91 typedef Z_U8 z_word_t;
92# elif defined(Z_U4)
93# undef W
94# define W 4
95 typedef Z_U4 z_word_t;
96# else
97# undef W
98# endif
99#endif
100
101/* If available, use the ARM processor CRC32 instruction. */
102#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103# define ARMCRC32
104#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900105
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100106/* Local functions. */
107local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
108local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
Masahiro Yamada627b0942018-01-26 11:42:01 +0900109
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100110#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111 local z_word_t byte_swap OF((z_word_t word));
112#endif
113
114#if defined(W) && !defined(ARMCRC32)
115 local z_crc_t crc_word OF((z_word_t data));
116 local z_word_t crc_word_big OF((z_word_t data));
117#endif
118
119#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
120/*
121 Swap the bytes in a z_word_t to convert between little and big endian. Any
122 self-respecting compiler will optimize this to a single machine byte-swap
123 instruction, if one is available. This assumes that word_t is either 32 bits
124 or 64 bits.
125 */
126local z_word_t byte_swap(word)
127 z_word_t word;
128{
129# if W == 8
130 return
131 (word & 0xff00000000000000) >> 56 |
132 (word & 0xff000000000000) >> 40 |
133 (word & 0xff0000000000) >> 24 |
134 (word & 0xff00000000) >> 8 |
135 (word & 0xff000000) << 8 |
136 (word & 0xff0000) << 24 |
137 (word & 0xff00) << 40 |
138 (word & 0xff) << 56;
139# else /* W == 4 */
140 return
141 (word & 0xff000000) >> 24 |
142 (word & 0xff0000) >> 8 |
143 (word & 0xff00) << 8 |
144 (word & 0xff) << 24;
145# endif
146}
147#endif
148
149/* CRC polynomial. */
150#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
Masahiro Yamada627b0942018-01-26 11:42:01 +0900151
152#ifdef DYNAMIC_CRC_TABLE
153
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100154local z_crc_t FAR crc_table[256];
155local z_crc_t FAR x2n_table[32];
Masahiro Yamada627b0942018-01-26 11:42:01 +0900156local void make_crc_table OF((void));
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100157#ifdef W
158 local z_word_t FAR crc_big_table[256];
159 local z_crc_t FAR crc_braid_table[W][256];
160 local z_word_t FAR crc_braid_big_table[W][256];
161 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
162#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900163#ifdef MAKECRCH
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100164 local void write_table OF((FILE *, const z_crc_t FAR *, int));
165 local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
166 local void write_table64 OF((FILE *, const z_word_t FAR *, int));
Masahiro Yamada627b0942018-01-26 11:42:01 +0900167#endif /* MAKECRCH */
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100168
169/*
170 Define a once() function depending on the availability of atomics. If this is
171 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
172 multiple threads, and if atomics are not available, then get_crc_table() must
173 be called to initialize the tables and must return before any threads are
174 allowed to compute or combine CRCs.
175 */
176
177/* Definition of once functionality. */
178typedef struct once_s once_t;
179local void once OF((once_t *, void (*)(void)));
180
181/* Check for the availability of atomics. */
182#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
183 !defined(__STDC_NO_ATOMICS__)
184
185#include <stdatomic.h>
186
187/* Structure for once(), which must be initialized with ONCE_INIT. */
188struct once_s {
189 atomic_flag begun;
190 atomic_int done;
191};
192#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
193
194/*
195 Run the provided init() function exactly once, even if multiple threads
196 invoke once() at the same time. The state must be a once_t initialized with
197 ONCE_INIT.
198 */
199local void once(state, init)
200 once_t *state;
201 void (*init)(void);
202{
203 if (!atomic_load(&state->done)) {
204 if (atomic_flag_test_and_set(&state->begun))
205 while (!atomic_load(&state->done))
206 ;
207 else {
208 init();
209 atomic_store(&state->done, 1);
210 }
211 }
212}
213
214#else /* no atomics */
215
216/* Structure for once(), which must be initialized with ONCE_INIT. */
217struct once_s {
218 volatile int begun;
219 volatile int done;
220};
221#define ONCE_INIT {0, 0}
222
223/* Test and set. Alas, not atomic, but tries to minimize the period of
224 vulnerability. */
225local int test_and_set OF((int volatile *));
226local int test_and_set(flag)
227 int volatile *flag;
228{
229 int was;
230
231 was = *flag;
232 *flag = 1;
233 return was;
234}
235
236/* Run the provided init() function once. This is not thread-safe. */
237local void once(state, init)
238 once_t *state;
239 void (*init)(void);
240{
241 if (!state->done) {
242 if (test_and_set(&state->begun))
243 while (!state->done)
244 ;
245 else {
246 init();
247 state->done = 1;
248 }
249 }
250}
251
252#endif
253
254/* State for once(). */
255local once_t made = ONCE_INIT;
256
Masahiro Yamada627b0942018-01-26 11:42:01 +0900257/*
258 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
259 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
260
261 Polynomials over GF(2) are represented in binary, one bit per coefficient,
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100262 with the lowest powers in the most significant bit. Then adding polynomials
Masahiro Yamada627b0942018-01-26 11:42:01 +0900263 is just exclusive-or, and multiplying a polynomial by x is a right shift by
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100264 one. If we call the above polynomial p, and represent a byte as the
Masahiro Yamada627b0942018-01-26 11:42:01 +0900265 polynomial q, also with the lowest power in the most significant bit (so the
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100266 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
Masahiro Yamada627b0942018-01-26 11:42:01 +0900267 where a mod b means the remainder after dividing a by b.
268
269 This calculation is done using the shift-register method of multiplying and
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100270 taking the remainder. The register is initialized to zero, and for each
Masahiro Yamada627b0942018-01-26 11:42:01 +0900271 incoming bit, x^32 is added mod p to the register if the bit is a one (where
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100272 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
273 (which is shifting right by one and adding x^32 mod p if the bit shifted out
274 is a one). We start with the highest power (least significant bit) of q and
275 repeat for all eight bits of q.
276
277 The table is simply the CRC of all possible eight bit values. This is all the
278 information needed to generate CRCs on data a byte at a time for all
279 combinations of CRC register values and incoming bytes.
280 */
Masahiro Yamada627b0942018-01-26 11:42:01 +0900281
Elyes Haouasd94f9432023-02-13 10:38:45 +0100282local void make_crc_table(void)
Masahiro Yamada627b0942018-01-26 11:42:01 +0900283{
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100284 unsigned i, j, n;
285 z_crc_t p;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900286
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100287 /* initialize the CRC of bytes tables */
288 for (i = 0; i < 256; i++) {
289 p = i;
290 for (j = 0; j < 8; j++)
291 p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
292 crc_table[i] = p;
293#ifdef W
294 crc_big_table[i] = byte_swap(p);
295#endif
296 }
Masahiro Yamada627b0942018-01-26 11:42:01 +0900297
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100298 /* initialize the x^2^n mod p(x) table */
299 p = (z_crc_t)1 << 30; /* x^1 */
300 x2n_table[0] = p;
301 for (n = 1; n < 32; n++)
302 x2n_table[n] = p = multmodp(p, p);
Masahiro Yamada627b0942018-01-26 11:42:01 +0900303
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100304#ifdef W
305 /* initialize the braiding tables -- needs x2n_table[] */
306 braid(crc_braid_table, crc_braid_big_table, N, W);
307#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900308
309#ifdef MAKECRCH
Masahiro Yamada627b0942018-01-26 11:42:01 +0900310 {
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100311 /*
312 The crc32.h header file contains tables for both 32-bit and 64-bit
313 z_word_t's, and so requires a 64-bit type be available. In that case,
314 z_word_t must be defined to be 64-bits. This code then also generates
315 and writes out the tables for the case that z_word_t is 32 bits.
316 */
317#if !defined(W) || W != 8
318# error Need a 64-bit integer type in order to generate crc32.h.
319#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900320 FILE *out;
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100321 int k, n;
322 z_crc_t ltl[8][256];
323 z_word_t big[8][256];
Masahiro Yamada627b0942018-01-26 11:42:01 +0900324
325 out = fopen("crc32.h", "w");
326 if (out == NULL) return;
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100327
328 /* write out little-endian CRC table to crc32.h */
329 fprintf(out,
330 "/* crc32.h -- tables for rapid CRC calculation\n"
331 " * Generated automatically by crc32.c\n */\n"
332 "\n"
333 "local const z_crc_t FAR crc_table[] = {\n"
334 " ");
335 write_table(out, crc_table, 256);
336 fprintf(out,
337 "};\n");
338
339 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
340 fprintf(out,
341 "\n"
342 "#ifdef W\n"
343 "\n"
344 "#if W == 8\n"
345 "\n"
346 "local const z_word_t FAR crc_big_table[] = {\n"
347 " ");
348 write_table64(out, crc_big_table, 256);
349 fprintf(out,
350 "};\n");
351
352 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
353 fprintf(out,
354 "\n"
355 "#else /* W == 4 */\n"
356 "\n"
357 "local const z_word_t FAR crc_big_table[] = {\n"
358 " ");
359 write_table32hi(out, crc_big_table, 256);
360 fprintf(out,
361 "};\n"
362 "\n"
363 "#endif\n");
364
365 /* write out braid tables for each value of N */
366 for (n = 1; n <= 6; n++) {
367 fprintf(out,
368 "\n"
369 "#if N == %d\n", n);
370
371 /* compute braid tables for this N and 64-bit word_t */
372 braid(ltl, big, n, 8);
373
374 /* write out braid tables for 64-bit z_word_t to crc32.h */
375 fprintf(out,
376 "\n"
377 "#if W == 8\n"
378 "\n"
379 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
380 for (k = 0; k < 8; k++) {
381 fprintf(out, " {");
382 write_table(out, ltl[k], 256);
383 fprintf(out, "}%s", k < 7 ? ",\n" : "");
384 }
385 fprintf(out,
386 "};\n"
387 "\n"
388 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
389 for (k = 0; k < 8; k++) {
390 fprintf(out, " {");
391 write_table64(out, big[k], 256);
392 fprintf(out, "}%s", k < 7 ? ",\n" : "");
393 }
394 fprintf(out,
395 "};\n");
396
397 /* compute braid tables for this N and 32-bit word_t */
398 braid(ltl, big, n, 4);
399
400 /* write out braid tables for 32-bit z_word_t to crc32.h */
401 fprintf(out,
402 "\n"
403 "#else /* W == 4 */\n"
404 "\n"
405 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
406 for (k = 0; k < 4; k++) {
407 fprintf(out, " {");
408 write_table(out, ltl[k], 256);
409 fprintf(out, "}%s", k < 3 ? ",\n" : "");
410 }
411 fprintf(out,
412 "};\n"
413 "\n"
414 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
415 for (k = 0; k < 4; k++) {
416 fprintf(out, " {");
417 write_table32hi(out, big[k], 256);
418 fprintf(out, "}%s", k < 3 ? ",\n" : "");
419 }
420 fprintf(out,
421 "};\n"
422 "\n"
423 "#endif\n"
424 "\n"
425 "#endif\n");
Masahiro Yamada627b0942018-01-26 11:42:01 +0900426 }
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100427 fprintf(out,
428 "\n"
429 "#endif\n");
430
431 /* write out zeros operator table to crc32.h */
432 fprintf(out,
433 "\n"
434 "local const z_crc_t FAR x2n_table[] = {\n"
435 " ");
436 write_table(out, x2n_table, 32);
437 fprintf(out,
438 "};\n");
Masahiro Yamada627b0942018-01-26 11:42:01 +0900439 fclose(out);
440 }
441#endif /* MAKECRCH */
442}
443
444#ifdef MAKECRCH
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100445
446/*
447 Write the 32-bit values in table[0..k-1] to out, five per line in
448 hexadecimal separated by commas.
449 */
450local void write_table(out, table, k)
Masahiro Yamada627b0942018-01-26 11:42:01 +0900451 FILE *out;
452 const z_crc_t FAR *table;
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100453 int k;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900454{
455 int n;
456
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100457 for (n = 0; n < k; n++)
458 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
Masahiro Yamada627b0942018-01-26 11:42:01 +0900459 (unsigned long)(table[n]),
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100460 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
461}
462
463/*
464 Write the high 32-bits of each value in table[0..k-1] to out, five per line
465 in hexadecimal separated by commas.
466 */
467local void write_table32hi(out, table, k)
468FILE *out;
469const z_word_t FAR *table;
470int k;
471{
472 int n;
473
474 for (n = 0; n < k; n++)
475 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
476 (unsigned long)(table[n] >> 32),
477 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
Masahiro Yamada627b0942018-01-26 11:42:01 +0900478}
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100479
480/*
481 Write the 64-bit values in table[0..k-1] to out, three per line in
482 hexadecimal separated by commas. This assumes that if there is a 64-bit
483 type, then there is also a long long integer type, and it is at least 64
484 bits. If not, then the type cast and format string can be adjusted
485 accordingly.
486 */
487local void write_table64(out, table, k)
488 FILE *out;
489 const z_word_t FAR *table;
490 int k;
491{
492 int n;
493
494 for (n = 0; n < k; n++)
495 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
496 (unsigned long long)(table[n]),
497 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
498}
499
500/* Actually do the deed. */
Elyes Haouasd94f9432023-02-13 10:38:45 +0100501int main(void)
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100502{
503 make_crc_table();
504 return 0;
505}
506
Masahiro Yamada627b0942018-01-26 11:42:01 +0900507#endif /* MAKECRCH */
508
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100509#ifdef W
510/*
511 Generate the little and big-endian braid tables for the given n and z_word_t
512 size w. Each array must have room for w blocks of 256 elements.
513 */
514local void braid(ltl, big, n, w)
515 z_crc_t ltl[][256];
516 z_word_t big[][256];
517 int n;
518 int w;
519{
520 int k;
521 z_crc_t i, p, q;
522 for (k = 0; k < w; k++) {
523 p = x2nmodp((n * w + 3 - k) << 3, 0);
524 ltl[k][0] = 0;
525 big[w - 1 - k][0] = 0;
526 for (i = 1; i < 256; i++) {
527 ltl[k][i] = q = multmodp(i << 24, p);
528 big[w - 1 - k][i] = byte_swap(q);
529 }
530 }
531}
532#endif
533
Masahiro Yamada627b0942018-01-26 11:42:01 +0900534#else /* !DYNAMIC_CRC_TABLE */
535/* ========================================================================
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100536 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
537 * of x for combining CRC-32s, all made by make_crc_table().
Masahiro Yamada627b0942018-01-26 11:42:01 +0900538 */
539#include "crc32.h"
540#endif /* DYNAMIC_CRC_TABLE */
541
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100542/* ========================================================================
543 * Routines used for CRC calculation. Some are also required for the table
544 * generation above.
545 */
546
547/*
548 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
549 reflected. For speed, this requires that a not be zero.
550 */
551local z_crc_t multmodp(a, b)
552 z_crc_t a;
553 z_crc_t b;
554{
555 z_crc_t m, p;
556
557 m = (z_crc_t)1 << 31;
558 p = 0;
559 for (;;) {
560 if (a & m) {
561 p ^= b;
562 if ((a & (m - 1)) == 0)
563 break;
564 }
565 m >>= 1;
566 b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
567 }
568 return p;
569}
570
571/*
572 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
573 initialized.
574 */
575local z_crc_t x2nmodp(n, k)
576 z_off64_t n;
577 unsigned k;
578{
579 z_crc_t p;
580
581 p = (z_crc_t)1 << 31; /* x^0 == 1 */
582 while (n) {
583 if (n & 1)
584 p = multmodp(x2n_table[k & 31], p);
585 n >>= 1;
586 k++;
587 }
588 return p;
589}
590
Masahiro Yamada627b0942018-01-26 11:42:01 +0900591/* =========================================================================
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100592 * This function can be used by asm versions of crc32(), and to force the
593 * generation of the CRC tables in a threaded application.
Masahiro Yamada627b0942018-01-26 11:42:01 +0900594 */
595const z_crc_t FAR * ZEXPORT get_crc_table()
596{
597#ifdef DYNAMIC_CRC_TABLE
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100598 once(&made, make_crc_table);
Masahiro Yamada627b0942018-01-26 11:42:01 +0900599#endif /* DYNAMIC_CRC_TABLE */
600 return (const z_crc_t FAR *)crc_table;
601}
602
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100603/* =========================================================================
604 * Use ARM machine instructions if available. This will compute the CRC about
605 * ten times faster than the braided calculation. This code does not check for
606 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
607 * only be defined if the compilation specifies an ARM processor architecture
608 * that has the instructions. For example, compiling with -march=armv8.1-a or
609 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
610 * instructions.
611 */
612#ifdef ARMCRC32
Masahiro Yamada627b0942018-01-26 11:42:01 +0900613
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100614/*
615 Constants empirically determined to maximize speed. These values are from
616 measurements on a Cortex-A57. Your mileage may vary.
617 */
618#define Z_BATCH 3990 /* number of words in a batch */
619#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
620#define Z_BATCH_MIN 800 /* fewest words in a final batch */
621
Masahiro Yamada627b0942018-01-26 11:42:01 +0900622unsigned long ZEXPORT crc32_z(crc, buf, len)
623 unsigned long crc;
624 const unsigned char FAR *buf;
625 z_size_t len;
626{
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100627 z_crc_t val;
628 z_word_t crc1, crc2;
629 const z_word_t *word;
630 z_word_t val0, val1, val2;
631 z_size_t last, last2, i;
632 z_size_t num;
633
634 /* Return initial CRC, if requested. */
635 if (buf == Z_NULL) return 0;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900636
637#ifdef DYNAMIC_CRC_TABLE
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100638 once(&made, make_crc_table);
Masahiro Yamada627b0942018-01-26 11:42:01 +0900639#endif /* DYNAMIC_CRC_TABLE */
640
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100641 /* Pre-condition the CRC */
642 crc = (~crc) & 0xffffffff;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900643
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100644 /* Compute the CRC up to a word boundary. */
645 while (len && ((z_size_t)buf & 7) != 0) {
646 len--;
647 val = *buf++;
648 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
Masahiro Yamada627b0942018-01-26 11:42:01 +0900649 }
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100650
651 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
652 word = (z_word_t const *)buf;
653 num = len >> 3;
654 len &= 7;
655
656 /* Do three interleaved CRCs to realize the throughput of one crc32x
657 instruction per cycle. Each CRC is calculated on Z_BATCH words. The
658 three CRCs are combined into a single CRC after each set of batches. */
659 while (num >= 3 * Z_BATCH) {
660 crc1 = 0;
661 crc2 = 0;
662 for (i = 0; i < Z_BATCH; i++) {
663 val0 = word[i];
664 val1 = word[i + Z_BATCH];
665 val2 = word[i + 2 * Z_BATCH];
666 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
667 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
668 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
669 }
670 word += 3 * Z_BATCH;
671 num -= 3 * Z_BATCH;
672 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
673 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900674 }
Masahiro Yamada627b0942018-01-26 11:42:01 +0900675
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100676 /* Do one last smaller batch with the remaining words, if there are enough
677 to pay for the combination of CRCs. */
678 last = num / 3;
679 if (last >= Z_BATCH_MIN) {
680 last2 = last << 1;
681 crc1 = 0;
682 crc2 = 0;
683 for (i = 0; i < last; i++) {
684 val0 = word[i];
685 val1 = word[i + last];
686 val2 = word[i + last2];
687 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
688 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
689 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
690 }
691 word += 3 * last;
692 num -= 3 * last;
693 val = x2nmodp(last, 6);
694 crc = multmodp(val, crc) ^ crc1;
695 crc = multmodp(val, crc) ^ crc2;
696 }
697
698 /* Compute the CRC on any remaining words. */
699 for (i = 0; i < num; i++) {
700 val0 = word[i];
701 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
702 }
703 word += num;
704
705 /* Complete the CRC on any remaining bytes. */
706 buf = (const unsigned char FAR *)word;
707 while (len) {
708 len--;
709 val = *buf++;
710 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
711 }
712
713 /* Return the CRC, post-conditioned. */
714 return crc ^ 0xffffffff;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900715}
716
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100717#else
718
719#ifdef W
Masahiro Yamada627b0942018-01-26 11:42:01 +0900720
721/*
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100722 Return the CRC of the W bytes in the word_t data, taking the
723 least-significant byte of the word as the first byte of data, without any pre
724 or post conditioning. This is used to combine the CRCs of each braid.
Masahiro Yamada627b0942018-01-26 11:42:01 +0900725 */
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100726local z_crc_t crc_word(data)
727 z_word_t data;
728{
729 int k;
730 for (k = 0; k < W; k++)
731 data = (data >> 8) ^ crc_table[data & 0xff];
732 return (z_crc_t)data;
733}
Masahiro Yamada627b0942018-01-26 11:42:01 +0900734
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100735local z_word_t crc_word_big(data)
736 z_word_t data;
737{
738 int k;
739 for (k = 0; k < W; k++)
740 data = (data << 8) ^
741 crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
742 return data;
743}
744
745#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900746
747/* ========================================================================= */
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100748unsigned long ZEXPORT crc32_z(crc, buf, len)
Masahiro Yamada627b0942018-01-26 11:42:01 +0900749 unsigned long crc;
750 const unsigned char FAR *buf;
751 z_size_t len;
752{
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100753 /* Return initial CRC, if requested. */
754 if (buf == Z_NULL) return 0;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900755
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100756#ifdef DYNAMIC_CRC_TABLE
757 once(&made, make_crc_table);
758#endif /* DYNAMIC_CRC_TABLE */
Masahiro Yamada627b0942018-01-26 11:42:01 +0900759
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100760 /* Pre-condition the CRC */
761 crc = (~crc) & 0xffffffff;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900762
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100763#ifdef W
Masahiro Yamada627b0942018-01-26 11:42:01 +0900764
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100765 /* If provided enough bytes, do a braided CRC calculation. */
766 if (len >= N * W + W - 1) {
767 z_size_t blks;
768 z_word_t const *words;
769 unsigned endian;
770 int k;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900771
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100772 /* Compute the CRC up to a z_word_t boundary. */
773 while (len && ((z_size_t)buf & (W - 1)) != 0) {
774 len--;
775 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
776 }
Masahiro Yamada627b0942018-01-26 11:42:01 +0900777
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100778 /* Compute the CRC on as many N z_word_t blocks as are available. */
779 blks = len / (N * W);
780 len -= blks * N * W;
781 words = (z_word_t const *)buf;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900782
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100783 /* Do endian check at execution time instead of compile time, since ARM
784 processors can change the endianess at execution time. If the
785 compiler knows what the endianess will be, it can optimize out the
786 check and the unused branch. */
787 endian = 1;
788 if (*(unsigned char *)&endian) {
789 /* Little endian. */
Masahiro Yamada627b0942018-01-26 11:42:01 +0900790
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100791 z_crc_t crc0;
792 z_word_t word0;
793#if N > 1
794 z_crc_t crc1;
795 z_word_t word1;
796#if N > 2
797 z_crc_t crc2;
798 z_word_t word2;
799#if N > 3
800 z_crc_t crc3;
801 z_word_t word3;
802#if N > 4
803 z_crc_t crc4;
804 z_word_t word4;
805#if N > 5
806 z_crc_t crc5;
807 z_word_t word5;
808#endif
809#endif
810#endif
811#endif
812#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900813
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100814 /* Initialize the CRC for each braid. */
815 crc0 = crc;
816#if N > 1
817 crc1 = 0;
818#if N > 2
819 crc2 = 0;
820#if N > 3
821 crc3 = 0;
822#if N > 4
823 crc4 = 0;
824#if N > 5
825 crc5 = 0;
826#endif
827#endif
828#endif
829#endif
830#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900831
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100832 /*
833 Process the first blks-1 blocks, computing the CRCs on each braid
834 independently.
835 */
836 while (--blks) {
837 /* Load the word for each braid into registers. */
838 word0 = crc0 ^ words[0];
839#if N > 1
840 word1 = crc1 ^ words[1];
841#if N > 2
842 word2 = crc2 ^ words[2];
843#if N > 3
844 word3 = crc3 ^ words[3];
845#if N > 4
846 word4 = crc4 ^ words[4];
847#if N > 5
848 word5 = crc5 ^ words[5];
849#endif
850#endif
851#endif
852#endif
853#endif
854 words += N;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900855
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100856 /* Compute and update the CRC for each word. The loop should
857 get unrolled. */
858 crc0 = crc_braid_table[0][word0 & 0xff];
859#if N > 1
860 crc1 = crc_braid_table[0][word1 & 0xff];
861#if N > 2
862 crc2 = crc_braid_table[0][word2 & 0xff];
863#if N > 3
864 crc3 = crc_braid_table[0][word3 & 0xff];
865#if N > 4
866 crc4 = crc_braid_table[0][word4 & 0xff];
867#if N > 5
868 crc5 = crc_braid_table[0][word5 & 0xff];
869#endif
870#endif
871#endif
872#endif
873#endif
874 for (k = 1; k < W; k++) {
875 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
876#if N > 1
877 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
878#if N > 2
879 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
880#if N > 3
881 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
882#if N > 4
883 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
884#if N > 5
885 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
886#endif
887#endif
888#endif
889#endif
890#endif
891 }
892 }
Masahiro Yamada627b0942018-01-26 11:42:01 +0900893
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100894 /*
895 Process the last block, combining the CRCs of the N braids at the
896 same time.
897 */
898 crc = crc_word(crc0 ^ words[0]);
899#if N > 1
900 crc = crc_word(crc1 ^ words[1] ^ crc);
901#if N > 2
902 crc = crc_word(crc2 ^ words[2] ^ crc);
903#if N > 3
904 crc = crc_word(crc3 ^ words[3] ^ crc);
905#if N > 4
906 crc = crc_word(crc4 ^ words[4] ^ crc);
907#if N > 5
908 crc = crc_word(crc5 ^ words[5] ^ crc);
909#endif
910#endif
911#endif
912#endif
913#endif
914 words += N;
915 }
916 else {
917 /* Big endian. */
Masahiro Yamada627b0942018-01-26 11:42:01 +0900918
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100919 z_word_t crc0, word0, comb;
920#if N > 1
921 z_word_t crc1, word1;
922#if N > 2
923 z_word_t crc2, word2;
924#if N > 3
925 z_word_t crc3, word3;
926#if N > 4
927 z_word_t crc4, word4;
928#if N > 5
929 z_word_t crc5, word5;
930#endif
931#endif
932#endif
933#endif
934#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900935
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100936 /* Initialize the CRC for each braid. */
937 crc0 = byte_swap(crc);
938#if N > 1
939 crc1 = 0;
940#if N > 2
941 crc2 = 0;
942#if N > 3
943 crc3 = 0;
944#if N > 4
945 crc4 = 0;
946#if N > 5
947 crc5 = 0;
948#endif
949#endif
950#endif
951#endif
952#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +0900953
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100954 /*
955 Process the first blks-1 blocks, computing the CRCs on each braid
956 independently.
957 */
958 while (--blks) {
959 /* Load the word for each braid into registers. */
960 word0 = crc0 ^ words[0];
961#if N > 1
962 word1 = crc1 ^ words[1];
963#if N > 2
964 word2 = crc2 ^ words[2];
965#if N > 3
966 word3 = crc3 ^ words[3];
967#if N > 4
968 word4 = crc4 ^ words[4];
969#if N > 5
970 word5 = crc5 ^ words[5];
971#endif
972#endif
973#endif
974#endif
975#endif
976 words += N;
Masahiro Yamada627b0942018-01-26 11:42:01 +0900977
Daniel Boulby958f1dd2022-10-05 11:05:22 +0100978 /* Compute and update the CRC for each word. The loop should
979 get unrolled. */
980 crc0 = crc_braid_big_table[0][word0 & 0xff];
981#if N > 1
982 crc1 = crc_braid_big_table[0][word1 & 0xff];
983#if N > 2
984 crc2 = crc_braid_big_table[0][word2 & 0xff];
985#if N > 3
986 crc3 = crc_braid_big_table[0][word3 & 0xff];
987#if N > 4
988 crc4 = crc_braid_big_table[0][word4 & 0xff];
989#if N > 5
990 crc5 = crc_braid_big_table[0][word5 & 0xff];
991#endif
992#endif
993#endif
994#endif
995#endif
996 for (k = 1; k < W; k++) {
997 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
998#if N > 1
999 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1000#if N > 2
1001 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1002#if N > 3
1003 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1004#if N > 4
1005 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1006#if N > 5
1007 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1008#endif
1009#endif
1010#endif
1011#endif
1012#endif
1013 }
1014 }
Masahiro Yamada627b0942018-01-26 11:42:01 +09001015
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001016 /*
1017 Process the last block, combining the CRCs of the N braids at the
1018 same time.
1019 */
1020 comb = crc_word_big(crc0 ^ words[0]);
1021#if N > 1
1022 comb = crc_word_big(crc1 ^ words[1] ^ comb);
1023#if N > 2
1024 comb = crc_word_big(crc2 ^ words[2] ^ comb);
1025#if N > 3
1026 comb = crc_word_big(crc3 ^ words[3] ^ comb);
1027#if N > 4
1028 comb = crc_word_big(crc4 ^ words[4] ^ comb);
1029#if N > 5
1030 comb = crc_word_big(crc5 ^ words[5] ^ comb);
1031#endif
1032#endif
1033#endif
1034#endif
1035#endif
1036 words += N;
1037 crc = byte_swap(comb);
1038 }
Masahiro Yamada627b0942018-01-26 11:42:01 +09001039
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001040 /*
1041 Update the pointer to the remaining bytes to process.
1042 */
1043 buf = (unsigned char const *)words;
1044 }
Masahiro Yamada627b0942018-01-26 11:42:01 +09001045
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001046#endif /* W */
Masahiro Yamada627b0942018-01-26 11:42:01 +09001047
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001048 /* Complete the computation of the CRC on any remaining bytes. */
1049 while (len >= 8) {
1050 len -= 8;
1051 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1055 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1056 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1057 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1058 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1059 }
1060 while (len) {
1061 len--;
1062 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1063 }
Masahiro Yamada627b0942018-01-26 11:42:01 +09001064
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001065 /* Return the CRC, post-conditioned. */
1066 return crc ^ 0xffffffff;
1067}
Masahiro Yamada627b0942018-01-26 11:42:01 +09001068
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001069#endif
Masahiro Yamada627b0942018-01-26 11:42:01 +09001070
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001071/* ========================================================================= */
1072unsigned long ZEXPORT crc32(crc, buf, len)
1073 unsigned long crc;
1074 const unsigned char FAR *buf;
1075 uInt len;
1076{
1077 return crc32_z(crc, buf, len);
1078}
Masahiro Yamada627b0942018-01-26 11:42:01 +09001079
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001080/* ========================================================================= */
1081uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1082 uLong crc1;
1083 uLong crc2;
1084 z_off64_t len2;
1085{
1086#ifdef DYNAMIC_CRC_TABLE
1087 once(&made, make_crc_table);
1088#endif /* DYNAMIC_CRC_TABLE */
1089 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
Masahiro Yamada627b0942018-01-26 11:42:01 +09001090}
1091
1092/* ========================================================================= */
1093uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1094 uLong crc1;
1095 uLong crc2;
1096 z_off_t len2;
1097{
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001098 return crc32_combine64(crc1, crc2, (z_off64_t)len2);
Masahiro Yamada627b0942018-01-26 11:42:01 +09001099}
1100
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001101/* ========================================================================= */
1102uLong ZEXPORT crc32_combine_gen64(len2)
1103 z_off64_t len2;
1104{
1105#ifdef DYNAMIC_CRC_TABLE
1106 once(&made, make_crc_table);
1107#endif /* DYNAMIC_CRC_TABLE */
1108 return x2nmodp(len2, 3);
1109}
1110
1111/* ========================================================================= */
1112uLong ZEXPORT crc32_combine_gen(len2)
1113 z_off_t len2;
1114{
1115 return crc32_combine_gen64((z_off64_t)len2);
1116}
1117
1118/* ========================================================================= */
1119uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
Masahiro Yamada627b0942018-01-26 11:42:01 +09001120 uLong crc1;
1121 uLong crc2;
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001122 uLong op;
Masahiro Yamada627b0942018-01-26 11:42:01 +09001123{
Daniel Boulby958f1dd2022-10-05 11:05:22 +01001124 return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
Masahiro Yamada627b0942018-01-26 11:42:01 +09001125}