blob: 97145b06088542b722727dbed03aecd4695b26fa [file] [log] [blame]
Marek BehĂșne87e2002019-04-29 22:40:44 +02001// SPDX-License-Identifier: (GPL-2.0 or BSD-2-Clause)
2/*
3 * Huffman decoder, part of New Generation Entropy library
4 * Copyright (C) 2013-2016, Yann Collet.
5 *
6 * You can contact the author at :
7 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8 */
9
10/* **************************************************************
11* Compiler specifics
12****************************************************************/
13#define FORCE_INLINE static __always_inline
14
15/* **************************************************************
16* Dependencies
17****************************************************************/
18#include "bitstream.h" /* BIT_* */
19#include "fse.h" /* header compression */
20#include "huf.h"
21#include <linux/compiler.h>
22#include <linux/kernel.h>
23#include <linux/string.h> /* memcpy, memset */
24
25/* **************************************************************
26* Error Management
27****************************************************************/
28#define HUF_STATIC_ASSERT(c) \
29 { \
30 enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
31 } /* use only *after* variable declarations */
32
33/*-***************************/
34/* generic DTableDesc */
35/*-***************************/
36
37typedef struct {
38 BYTE maxTableLog;
39 BYTE tableType;
40 BYTE tableLog;
41 BYTE reserved;
42} DTableDesc;
43
44static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
45{
46 DTableDesc dtd;
47 memcpy(&dtd, table, sizeof(dtd));
48 return dtd;
49}
50
51/*-***************************/
52/* single-symbol decoding */
53/*-***************************/
54
55typedef struct {
56 BYTE byte;
57 BYTE nbBits;
58} HUF_DEltX2; /* single-symbol decoding */
59
60size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
61{
62 U32 tableLog = 0;
63 U32 nbSymbols = 0;
64 size_t iSize;
65 void *const dtPtr = DTable + 1;
66 HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
67
68 U32 *rankVal;
69 BYTE *huffWeight;
70 size_t spaceUsed32 = 0;
71
72 rankVal = (U32 *)workspace + spaceUsed32;
73 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
74 huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
75 spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
76
77 if ((spaceUsed32 << 2) > workspaceSize)
78 return ERROR(tableLog_tooLarge);
79 workspace = (U32 *)workspace + spaceUsed32;
80 workspaceSize -= (spaceUsed32 << 2);
81
82 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
83 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
84
85 iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
86 if (HUF_isError(iSize))
87 return iSize;
88
89 /* Table header */
90 {
91 DTableDesc dtd = HUF_getDTableDesc(DTable);
92 if (tableLog > (U32)(dtd.maxTableLog + 1))
93 return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
94 dtd.tableType = 0;
95 dtd.tableLog = (BYTE)tableLog;
96 memcpy(DTable, &dtd, sizeof(dtd));
97 }
98
99 /* Calculate starting value for each rank */
100 {
101 U32 n, nextRankStart = 0;
102 for (n = 1; n < tableLog + 1; n++) {
103 U32 const curr = nextRankStart;
104 nextRankStart += (rankVal[n] << (n - 1));
105 rankVal[n] = curr;
106 }
107 }
108
109 /* fill DTable */
110 {
111 U32 n;
112 for (n = 0; n < nbSymbols; n++) {
113 U32 const w = huffWeight[n];
114 U32 const length = (1 << w) >> 1;
115 U32 u;
116 HUF_DEltX2 D;
117 D.byte = (BYTE)n;
118 D.nbBits = (BYTE)(tableLog + 1 - w);
119 for (u = rankVal[w]; u < rankVal[w] + length; u++)
120 dt[u] = D;
121 rankVal[w] += length;
122 }
123 }
124
125 return iSize;
126}
127
128static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
129{
130 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
131 BYTE const c = dt[val].byte;
132 BIT_skipBits(Dstream, dt[val].nbBits);
133 return c;
134}
135
136#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
137
138#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
139 if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
140 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
141
142#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
143 if (ZSTD_64bits()) \
144 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
145
146FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
147{
148 BYTE *const pStart = p;
149
150 /* up to 4 symbols at a time */
151 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
152 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
153 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
154 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
155 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
156 }
157
158 /* closer to the end */
159 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
160 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
161
162 /* no more data to retrieve from bitstream, hence no need to reload */
163 while (p < pEnd)
164 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
165
166 return pEnd - pStart;
167}
168
169static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
170{
171 BYTE *op = (BYTE *)dst;
172 BYTE *const oend = op + dstSize;
173 const void *dtPtr = DTable + 1;
174 const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
175 BIT_DStream_t bitD;
176 DTableDesc const dtd = HUF_getDTableDesc(DTable);
177 U32 const dtLog = dtd.tableLog;
178
179 {
180 size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
181 if (HUF_isError(errorCode))
182 return errorCode;
183 }
184
185 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
186
187 /* check */
188 if (!BIT_endOfDStream(&bitD))
189 return ERROR(corruption_detected);
190
191 return dstSize;
192}
193
194size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
195{
196 DTableDesc dtd = HUF_getDTableDesc(DTable);
197 if (dtd.tableType != 0)
198 return ERROR(GENERIC);
199 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
200}
201
202size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
203{
204 const BYTE *ip = (const BYTE *)cSrc;
205
206 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
207 if (HUF_isError(hSize))
208 return hSize;
209 if (hSize >= cSrcSize)
210 return ERROR(srcSize_wrong);
211 ip += hSize;
212 cSrcSize -= hSize;
213
214 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
215}
216
217static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
218{
219 /* Check */
220 if (cSrcSize < 10)
221 return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
222
223 {
224 const BYTE *const istart = (const BYTE *)cSrc;
225 BYTE *const ostart = (BYTE *)dst;
226 BYTE *const oend = ostart + dstSize;
227 const void *const dtPtr = DTable + 1;
228 const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
229
230 /* Init */
231 BIT_DStream_t bitD1;
232 BIT_DStream_t bitD2;
233 BIT_DStream_t bitD3;
234 BIT_DStream_t bitD4;
235 size_t const length1 = ZSTD_readLE16(istart);
236 size_t const length2 = ZSTD_readLE16(istart + 2);
237 size_t const length3 = ZSTD_readLE16(istart + 4);
238 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
239 const BYTE *const istart1 = istart + 6; /* jumpTable */
240 const BYTE *const istart2 = istart1 + length1;
241 const BYTE *const istart3 = istart2 + length2;
242 const BYTE *const istart4 = istart3 + length3;
243 const size_t segmentSize = (dstSize + 3) / 4;
244 BYTE *const opStart2 = ostart + segmentSize;
245 BYTE *const opStart3 = opStart2 + segmentSize;
246 BYTE *const opStart4 = opStart3 + segmentSize;
247 BYTE *op1 = ostart;
248 BYTE *op2 = opStart2;
249 BYTE *op3 = opStart3;
250 BYTE *op4 = opStart4;
251 U32 endSignal;
252 DTableDesc const dtd = HUF_getDTableDesc(DTable);
253 U32 const dtLog = dtd.tableLog;
254
255 if (length4 > cSrcSize)
256 return ERROR(corruption_detected); /* overflow */
257 {
258 size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
259 if (HUF_isError(errorCode))
260 return errorCode;
261 }
262 {
263 size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
264 if (HUF_isError(errorCode))
265 return errorCode;
266 }
267 {
268 size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
269 if (HUF_isError(errorCode))
270 return errorCode;
271 }
272 {
273 size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
274 if (HUF_isError(errorCode))
275 return errorCode;
276 }
277
278 /* 16-32 symbols per loop (4-8 symbols per stream) */
279 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
280 for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
281 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
282 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
283 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
284 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
285 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
286 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
287 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
288 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
289 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
290 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
291 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
292 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
293 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
294 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
295 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
296 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
297 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
298 }
299
300 /* check corruption */
301 if (op1 > opStart2)
302 return ERROR(corruption_detected);
303 if (op2 > opStart3)
304 return ERROR(corruption_detected);
305 if (op3 > opStart4)
306 return ERROR(corruption_detected);
307 /* note : op4 supposed already verified within main loop */
308
309 /* finish bitStreams one by one */
310 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
311 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
312 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
313 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
314
315 /* check */
316 endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
317 if (!endSignal)
318 return ERROR(corruption_detected);
319
320 /* decoded size */
321 return dstSize;
322 }
323}
324
325size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
326{
327 DTableDesc dtd = HUF_getDTableDesc(DTable);
328 if (dtd.tableType != 0)
329 return ERROR(GENERIC);
330 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
331}
332
333size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
334{
335 const BYTE *ip = (const BYTE *)cSrc;
336
337 size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
338 if (HUF_isError(hSize))
339 return hSize;
340 if (hSize >= cSrcSize)
341 return ERROR(srcSize_wrong);
342 ip += hSize;
343 cSrcSize -= hSize;
344
345 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
346}
347
348/* *************************/
349/* double-symbols decoding */
350/* *************************/
351typedef struct {
352 U16 sequence;
353 BYTE nbBits;
354 BYTE length;
355} HUF_DEltX4; /* double-symbols decoding */
356
357typedef struct {
358 BYTE symbol;
359 BYTE weight;
360} sortedSymbol_t;
361
362/* HUF_fillDTableX4Level2() :
363 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
364static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
365 const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
366{
367 HUF_DEltX4 DElt;
368 U32 rankVal[HUF_TABLELOG_MAX + 1];
369
370 /* get pre-calculated rankVal */
371 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
372
373 /* fill skipped values */
374 if (minWeight > 1) {
375 U32 i, skipSize = rankVal[minWeight];
376 ZSTD_writeLE16(&(DElt.sequence), baseSeq);
377 DElt.nbBits = (BYTE)(consumed);
378 DElt.length = 1;
379 for (i = 0; i < skipSize; i++)
380 DTable[i] = DElt;
381 }
382
383 /* fill DTable */
384 {
385 U32 s;
386 for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
387 const U32 symbol = sortedSymbols[s].symbol;
388 const U32 weight = sortedSymbols[s].weight;
389 const U32 nbBits = nbBitsBaseline - weight;
390 const U32 length = 1 << (sizeLog - nbBits);
391 const U32 start = rankVal[weight];
392 U32 i = start;
393 const U32 end = start + length;
394
395 ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
396 DElt.nbBits = (BYTE)(nbBits + consumed);
397 DElt.length = 2;
398 do {
399 DTable[i++] = DElt;
400 } while (i < end); /* since length >= 1 */
401
402 rankVal[weight] += length;
403 }
404 }
405}
406
407typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
408typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
409
410static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
411 rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
412{
413 U32 rankVal[HUF_TABLELOG_MAX + 1];
414 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
415 const U32 minBits = nbBitsBaseline - maxWeight;
416 U32 s;
417
418 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
419
420 /* fill DTable */
421 for (s = 0; s < sortedListSize; s++) {
422 const U16 symbol = sortedList[s].symbol;
423 const U32 weight = sortedList[s].weight;
424 const U32 nbBits = nbBitsBaseline - weight;
425 const U32 start = rankVal[weight];
426 const U32 length = 1 << (targetLog - nbBits);
427
428 if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
429 U32 sortedRank;
430 int minWeight = nbBits + scaleLog;
431 if (minWeight < 1)
432 minWeight = 1;
433 sortedRank = rankStart[minWeight];
434 HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
435 sortedListSize - sortedRank, nbBitsBaseline, symbol);
436 } else {
437 HUF_DEltX4 DElt;
438 ZSTD_writeLE16(&(DElt.sequence), symbol);
439 DElt.nbBits = (BYTE)(nbBits);
440 DElt.length = 1;
441 {
442 U32 const end = start + length;
443 U32 u;
444 for (u = start; u < end; u++)
445 DTable[u] = DElt;
446 }
447 }
448 rankVal[weight] += length;
449 }
450}
451
452size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
453{
454 U32 tableLog, maxW, sizeOfSort, nbSymbols;
455 DTableDesc dtd = HUF_getDTableDesc(DTable);
456 U32 const maxTableLog = dtd.maxTableLog;
457 size_t iSize;
458 void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
459 HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
460 U32 *rankStart;
461
462 rankValCol_t *rankVal;
463 U32 *rankStats;
464 U32 *rankStart0;
465 sortedSymbol_t *sortedSymbol;
466 BYTE *weightList;
467 size_t spaceUsed32 = 0;
468
469 HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
470
471 rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
472 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
473 rankStats = (U32 *)workspace + spaceUsed32;
474 spaceUsed32 += HUF_TABLELOG_MAX + 1;
475 rankStart0 = (U32 *)workspace + spaceUsed32;
476 spaceUsed32 += HUF_TABLELOG_MAX + 2;
477 sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
478 spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
479 weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
480 spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
481
482 if ((spaceUsed32 << 2) > workspaceSize)
483 return ERROR(tableLog_tooLarge);
484 workspace = (U32 *)workspace + spaceUsed32;
485 workspaceSize -= (spaceUsed32 << 2);
486
487 rankStart = rankStart0 + 1;
488 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
489
490 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
491 if (maxTableLog > HUF_TABLELOG_MAX)
492 return ERROR(tableLog_tooLarge);
493 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
494
495 iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
496 if (HUF_isError(iSize))
497 return iSize;
498
499 /* check result */
500 if (tableLog > maxTableLog)
501 return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
502
503 /* find maxWeight */
504 for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
505 } /* necessarily finds a solution before 0 */
506
507 /* Get start index of each weight */
508 {
509 U32 w, nextRankStart = 0;
510 for (w = 1; w < maxW + 1; w++) {
511 U32 curr = nextRankStart;
512 nextRankStart += rankStats[w];
513 rankStart[w] = curr;
514 }
515 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
516 sizeOfSort = nextRankStart;
517 }
518
519 /* sort symbols by weight */
520 {
521 U32 s;
522 for (s = 0; s < nbSymbols; s++) {
523 U32 const w = weightList[s];
524 U32 const r = rankStart[w]++;
525 sortedSymbol[r].symbol = (BYTE)s;
526 sortedSymbol[r].weight = (BYTE)w;
527 }
528 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
529 }
530
531 /* Build rankVal */
532 {
533 U32 *const rankVal0 = rankVal[0];
534 {
535 int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
536 U32 nextRankVal = 0;
537 U32 w;
538 for (w = 1; w < maxW + 1; w++) {
539 U32 curr = nextRankVal;
540 nextRankVal += rankStats[w] << (w + rescale);
541 rankVal0[w] = curr;
542 }
543 }
544 {
545 U32 const minBits = tableLog + 1 - maxW;
546 U32 consumed;
547 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
548 U32 *const rankValPtr = rankVal[consumed];
549 U32 w;
550 for (w = 1; w < maxW + 1; w++) {
551 rankValPtr[w] = rankVal0[w] >> consumed;
552 }
553 }
554 }
555 }
556
557 HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
558
559 dtd.tableLog = (BYTE)maxTableLog;
560 dtd.tableType = 1;
561 memcpy(DTable, &dtd, sizeof(dtd));
562 return iSize;
563}
564
565static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
566{
567 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
568 memcpy(op, dt + val, 2);
569 BIT_skipBits(DStream, dt[val].nbBits);
570 return dt[val].length;
571}
572
573static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
574{
575 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
576 memcpy(op, dt + val, 1);
577 if (dt[val].length == 1)
578 BIT_skipBits(DStream, dt[val].nbBits);
579 else {
580 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
581 BIT_skipBits(DStream, dt[val].nbBits);
582 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
583 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
584 DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
585 }
586 }
587 return 1;
588}
589
590#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
591
592#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
593 if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
594 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
595
596#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
597 if (ZSTD_64bits()) \
598 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
599
600FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
601{
602 BYTE *const pStart = p;
603
604 /* up to 8 symbols at a time */
605 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
606 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
607 HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
608 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
609 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
610 }
611
612 /* closer to end : up to 2 symbols at a time */
613 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
614 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
615
616 while (p <= pEnd - 2)
617 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
618
619 if (p < pEnd)
620 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
621
622 return p - pStart;
623}
624
625static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
626{
627 BIT_DStream_t bitD;
628
629 /* Init */
630 {
631 size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
632 if (HUF_isError(errorCode))
633 return errorCode;
634 }
635
636 /* decode */
637 {
638 BYTE *const ostart = (BYTE *)dst;
639 BYTE *const oend = ostart + dstSize;
640 const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
641 const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
642 DTableDesc const dtd = HUF_getDTableDesc(DTable);
643 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
644 }
645
646 /* check */
647 if (!BIT_endOfDStream(&bitD))
648 return ERROR(corruption_detected);
649
650 /* decoded size */
651 return dstSize;
652}
653
654size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
655{
656 DTableDesc dtd = HUF_getDTableDesc(DTable);
657 if (dtd.tableType != 1)
658 return ERROR(GENERIC);
659 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
660}
661
662size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
663{
664 const BYTE *ip = (const BYTE *)cSrc;
665
666 size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
667 if (HUF_isError(hSize))
668 return hSize;
669 if (hSize >= cSrcSize)
670 return ERROR(srcSize_wrong);
671 ip += hSize;
672 cSrcSize -= hSize;
673
674 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
675}
676
677static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
678{
679 if (cSrcSize < 10)
680 return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
681
682 {
683 const BYTE *const istart = (const BYTE *)cSrc;
684 BYTE *const ostart = (BYTE *)dst;
685 BYTE *const oend = ostart + dstSize;
686 const void *const dtPtr = DTable + 1;
687 const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
688
689 /* Init */
690 BIT_DStream_t bitD1;
691 BIT_DStream_t bitD2;
692 BIT_DStream_t bitD3;
693 BIT_DStream_t bitD4;
694 size_t const length1 = ZSTD_readLE16(istart);
695 size_t const length2 = ZSTD_readLE16(istart + 2);
696 size_t const length3 = ZSTD_readLE16(istart + 4);
697 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
698 const BYTE *const istart1 = istart + 6; /* jumpTable */
699 const BYTE *const istart2 = istart1 + length1;
700 const BYTE *const istart3 = istart2 + length2;
701 const BYTE *const istart4 = istart3 + length3;
702 size_t const segmentSize = (dstSize + 3) / 4;
703 BYTE *const opStart2 = ostart + segmentSize;
704 BYTE *const opStart3 = opStart2 + segmentSize;
705 BYTE *const opStart4 = opStart3 + segmentSize;
706 BYTE *op1 = ostart;
707 BYTE *op2 = opStart2;
708 BYTE *op3 = opStart3;
709 BYTE *op4 = opStart4;
710 U32 endSignal;
711 DTableDesc const dtd = HUF_getDTableDesc(DTable);
712 U32 const dtLog = dtd.tableLog;
713
714 if (length4 > cSrcSize)
715 return ERROR(corruption_detected); /* overflow */
716 {
717 size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
718 if (HUF_isError(errorCode))
719 return errorCode;
720 }
721 {
722 size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
723 if (HUF_isError(errorCode))
724 return errorCode;
725 }
726 {
727 size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
728 if (HUF_isError(errorCode))
729 return errorCode;
730 }
731 {
732 size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
733 if (HUF_isError(errorCode))
734 return errorCode;
735 }
736
737 /* 16-32 symbols per loop (4-8 symbols per stream) */
738 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
739 for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
740 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
741 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
742 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
743 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
744 HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
745 HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
746 HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
747 HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
748 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
749 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
750 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
751 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
752 HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
753 HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
754 HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
755 HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
756
757 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
758 }
759
760 /* check corruption */
761 if (op1 > opStart2)
762 return ERROR(corruption_detected);
763 if (op2 > opStart3)
764 return ERROR(corruption_detected);
765 if (op3 > opStart4)
766 return ERROR(corruption_detected);
767 /* note : op4 already verified within main loop */
768
769 /* finish bitStreams one by one */
770 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
771 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
772 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
773 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
774
775 /* check */
776 {
777 U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
778 if (!endCheck)
779 return ERROR(corruption_detected);
780 }
781
782 /* decoded size */
783 return dstSize;
784 }
785}
786
787size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
788{
789 DTableDesc dtd = HUF_getDTableDesc(DTable);
790 if (dtd.tableType != 1)
791 return ERROR(GENERIC);
792 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
793}
794
795size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
796{
797 const BYTE *ip = (const BYTE *)cSrc;
798
799 size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
800 if (HUF_isError(hSize))
801 return hSize;
802 if (hSize >= cSrcSize)
803 return ERROR(srcSize_wrong);
804 ip += hSize;
805 cSrcSize -= hSize;
806
807 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
808}
809
810/* ********************************/
811/* Generic decompression selector */
812/* ********************************/
813
814size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
815{
816 DTableDesc const dtd = HUF_getDTableDesc(DTable);
817 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
818 : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
819}
820
821size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
822{
823 DTableDesc const dtd = HUF_getDTableDesc(DTable);
824 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
825 : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
826}
827
828typedef struct {
829 U32 tableTime;
830 U32 decode256Time;
831} algo_time_t;
832static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
833 /* single, double, quad */
834 {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */
835 {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */
836 {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
837 {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
838 {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
839 {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
840 {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
841 {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
842 {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
843 {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
844 {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
845 {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
846 {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
847 {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
848 {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
849 {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */
850};
851
852/** HUF_selectDecoder() :
853* Tells which decoder is likely to decode faster,
854* based on a set of pre-determined metrics.
855* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
856* Assumption : 0 < cSrcSize < dstSize <= 128 KB */
857U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
858{
859 /* decoder timing evaluation */
860 U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
861 U32 const D256 = (U32)(dstSize >> 8);
862 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
863 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
864 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
865
866 return DTime1 < DTime0;
867}
868
869typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
870
871size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
872{
873 /* validation checks */
874 if (dstSize == 0)
875 return ERROR(dstSize_tooSmall);
876 if (cSrcSize > dstSize)
877 return ERROR(corruption_detected); /* invalid */
878 if (cSrcSize == dstSize) {
879 memcpy(dst, cSrc, dstSize);
880 return dstSize;
881 } /* not compressed */
882 if (cSrcSize == 1) {
883 memset(dst, *(const BYTE *)cSrc, dstSize);
884 return dstSize;
885 } /* RLE */
886
887 {
888 U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
889 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
890 : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
891 }
892}
893
894size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
895{
896 /* validation checks */
897 if (dstSize == 0)
898 return ERROR(dstSize_tooSmall);
899 if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
900 return ERROR(corruption_detected); /* invalid */
901
902 {
903 U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
904 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
905 : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
906 }
907}
908
909size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
910{
911 /* validation checks */
912 if (dstSize == 0)
913 return ERROR(dstSize_tooSmall);
914 if (cSrcSize > dstSize)
915 return ERROR(corruption_detected); /* invalid */
916 if (cSrcSize == dstSize) {
917 memcpy(dst, cSrc, dstSize);
918 return dstSize;
919 } /* not compressed */
920 if (cSrcSize == 1) {
921 memset(dst, *(const BYTE *)cSrc, dstSize);
922 return dstSize;
923 } /* RLE */
924
925 {
926 U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
927 return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
928 : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
929 }
930}