blob: 5aa75db413a0680d2faba54835e14864ce119403 [file] [log] [blame]
Brandon Maierdbe88da2023-01-12 10:27:45 -06001/* ******************************************************************
2 * FSE : Finite State Entropy decoder
3 * Copyright (c) Yann Collet, Facebook, Inc.
4 *
5 * You can contact the author at :
6 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8 *
9 * This source code is licensed under both the BSD-style license (found in the
10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 * in the COPYING file in the root directory of this source tree).
12 * You may select, at your option, one of the above-listed licenses.
13****************************************************************** */
14
Brandon Maierdbe88da2023-01-12 10:27:45 -060015/* **************************************************************
16* Includes
17****************************************************************/
18#include "debug.h" /* assert */
19#include "bitstream.h"
20#include "compiler.h"
21#define FSE_STATIC_LINKING_ONLY
22#include "fse.h"
23#include "error_private.h"
24#define ZSTD_DEPS_NEED_MALLOC
25#include "zstd_deps.h"
26
Brandon Maierdbe88da2023-01-12 10:27:45 -060027/* **************************************************************
28* Error Management
29****************************************************************/
30#define FSE_isError ERR_isError
31#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
32
Brandon Maierdbe88da2023-01-12 10:27:45 -060033/* **************************************************************
34* Templates
35****************************************************************/
36/*
37 designed to be included
38 for type-specific functions (template emulation in C)
39 Objective is to write these functions only once, for improved maintenance
40*/
41
42/* safety checks */
43#ifndef FSE_FUNCTION_EXTENSION
44# error "FSE_FUNCTION_EXTENSION must be defined"
45#endif
46#ifndef FSE_FUNCTION_TYPE
47# error "FSE_FUNCTION_TYPE must be defined"
48#endif
49
50/* Function names */
51#define FSE_CAT(X,Y) X##Y
52#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
53#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
54
Brandon Maierdbe88da2023-01-12 10:27:45 -060055/* Function templates */
56FSE_DTable* FSE_createDTable (unsigned tableLog)
57{
58 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
59 return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
60}
61
62void FSE_freeDTable (FSE_DTable* dt)
63{
64 ZSTD_free(dt);
65}
66
67static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
68{
69 void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
70 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
71 U16* symbolNext = (U16*)workSpace;
72 BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
73
74 U32 const maxSV1 = maxSymbolValue + 1;
75 U32 const tableSize = 1 << tableLog;
76 U32 highThreshold = tableSize-1;
77
78 /* Sanity Checks */
79 if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
80 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
81 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
82
83 /* Init, lay down lowprob symbols */
84 { FSE_DTableHeader DTableH;
85 DTableH.tableLog = (U16)tableLog;
86 DTableH.fastMode = 1;
87 { S16 const largeLimit= (S16)(1 << (tableLog-1));
88 U32 s;
89 for (s=0; s<maxSV1; s++) {
90 if (normalizedCounter[s]==-1) {
91 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
92 symbolNext[s] = 1;
93 } else {
94 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
95 symbolNext[s] = normalizedCounter[s];
96 } } }
97 ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
98 }
99
100 /* Spread symbols */
101 if (highThreshold == tableSize - 1) {
102 size_t const tableMask = tableSize-1;
103 size_t const step = FSE_TABLESTEP(tableSize);
104 /* First lay down the symbols in order.
105 * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
106 * misses since small blocks generally have small table logs, so nearly
107 * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
108 * our buffer to handle the over-write.
109 */
110 {
111 U64 const add = 0x0101010101010101ull;
112 size_t pos = 0;
113 U64 sv = 0;
114 U32 s;
115 for (s=0; s<maxSV1; ++s, sv += add) {
116 int i;
117 int const n = normalizedCounter[s];
118 MEM_write64(spread + pos, sv);
119 for (i = 8; i < n; i += 8) {
120 MEM_write64(spread + pos + i, sv);
121 }
122 pos += n;
123 }
124 }
125 /* Now we spread those positions across the table.
126 * The benefit of doing it in two stages is that we avoid the the
127 * variable size inner loop, which caused lots of branch misses.
128 * Now we can run through all the positions without any branch misses.
129 * We unroll the loop twice, since that is what emperically worked best.
130 */
131 {
132 size_t position = 0;
133 size_t s;
134 size_t const unroll = 2;
135 assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
136 for (s = 0; s < (size_t)tableSize; s += unroll) {
137 size_t u;
138 for (u = 0; u < unroll; ++u) {
139 size_t const uPosition = (position + (u * step)) & tableMask;
140 tableDecode[uPosition].symbol = spread[s + u];
141 }
142 position = (position + (unroll * step)) & tableMask;
143 }
144 assert(position == 0);
145 }
146 } else {
147 U32 const tableMask = tableSize-1;
148 U32 const step = FSE_TABLESTEP(tableSize);
149 U32 s, position = 0;
150 for (s=0; s<maxSV1; s++) {
151 int i;
152 for (i=0; i<normalizedCounter[s]; i++) {
153 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
154 position = (position + step) & tableMask;
155 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
156 } }
157 if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
158 }
159
160 /* Build Decoding table */
161 { U32 u;
162 for (u=0; u<tableSize; u++) {
163 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
164 U32 const nextState = symbolNext[symbol]++;
165 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
166 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
167 } }
168
169 return 0;
170}
171
172size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
173{
174 return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
175}
176
Brandon Maierdbe88da2023-01-12 10:27:45 -0600177#ifndef FSE_COMMONDEFS_ONLY
178
179/*-*******************************************************
180* Decompression (Byte symbols)
181*********************************************************/
182size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
183{
184 void* ptr = dt;
185 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
186 void* dPtr = dt + 1;
187 FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
188
189 DTableH->tableLog = 0;
190 DTableH->fastMode = 0;
191
192 cell->newState = 0;
193 cell->symbol = symbolValue;
194 cell->nbBits = 0;
195
196 return 0;
197}
198
Brandon Maierdbe88da2023-01-12 10:27:45 -0600199size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
200{
201 void* ptr = dt;
202 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
203 void* dPtr = dt + 1;
204 FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
205 const unsigned tableSize = 1 << nbBits;
206 const unsigned tableMask = tableSize - 1;
207 const unsigned maxSV1 = tableMask+1;
208 unsigned s;
209
210 /* Sanity checks */
211 if (nbBits < 1) return ERROR(GENERIC); /* min size */
212
213 /* Build Decoding Table */
214 DTableH->tableLog = (U16)nbBits;
215 DTableH->fastMode = 1;
216 for (s=0; s<maxSV1; s++) {
217 dinfo[s].newState = 0;
218 dinfo[s].symbol = (BYTE)s;
219 dinfo[s].nbBits = (BYTE)nbBits;
220 }
221
222 return 0;
223}
224
225FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
226 void* dst, size_t maxDstSize,
227 const void* cSrc, size_t cSrcSize,
228 const FSE_DTable* dt, const unsigned fast)
229{
230 BYTE* const ostart = (BYTE*) dst;
231 BYTE* op = ostart;
232 BYTE* const omax = op + maxDstSize;
233 BYTE* const olimit = omax-3;
234
235 BIT_DStream_t bitD;
236 FSE_DState_t state1;
237 FSE_DState_t state2;
238
239 /* Init */
240 CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
241
242 FSE_initDState(&state1, &bitD, dt);
243 FSE_initDState(&state2, &bitD, dt);
244
245#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
246
247 /* 4 symbols per loop */
248 for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
249 op[0] = FSE_GETSYMBOL(&state1);
250
251 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
252 BIT_reloadDStream(&bitD);
253
254 op[1] = FSE_GETSYMBOL(&state2);
255
256 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
257 { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
258
259 op[2] = FSE_GETSYMBOL(&state1);
260
261 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
262 BIT_reloadDStream(&bitD);
263
264 op[3] = FSE_GETSYMBOL(&state2);
265 }
266
267 /* tail */
268 /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
269 while (1) {
270 if (op>(omax-2)) return ERROR(dstSize_tooSmall);
271 *op++ = FSE_GETSYMBOL(&state1);
272 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
273 *op++ = FSE_GETSYMBOL(&state2);
274 break;
275 }
276
277 if (op>(omax-2)) return ERROR(dstSize_tooSmall);
278 *op++ = FSE_GETSYMBOL(&state2);
279 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
280 *op++ = FSE_GETSYMBOL(&state1);
281 break;
282 } }
283
284 return op-ostart;
285}
286
Brandon Maierdbe88da2023-01-12 10:27:45 -0600287size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
288 const void* cSrc, size_t cSrcSize,
289 const FSE_DTable* dt)
290{
291 const void* ptr = dt;
292 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
293 const U32 fastMode = DTableH->fastMode;
294
295 /* select fast mode (static) */
296 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
297 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
298}
299
Brandon Maierdbe88da2023-01-12 10:27:45 -0600300size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
301{
302 return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
303}
304
305typedef struct {
306 short ncount[FSE_MAX_SYMBOL_VALUE + 1];
307 FSE_DTable dtable[1]; /* Dynamically sized */
308} FSE_DecompressWksp;
309
Brandon Maierdbe88da2023-01-12 10:27:45 -0600310FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
311 void* dst, size_t dstCapacity,
312 const void* cSrc, size_t cSrcSize,
313 unsigned maxLog, void* workSpace, size_t wkspSize,
314 int bmi2)
315{
316 const BYTE* const istart = (const BYTE*)cSrc;
317 const BYTE* ip = istart;
318 unsigned tableLog;
319 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
320 FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
321
322 DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
323 if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
324
325 /* normal FSE decoding mode */
326 {
327 size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
328 if (FSE_isError(NCountLength)) return NCountLength;
329 if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
330 assert(NCountLength <= cSrcSize);
331 ip += NCountLength;
332 cSrcSize -= NCountLength;
333 }
334
335 if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
336 workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
337 wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
338
339 CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
340
341 {
342 const void* ptr = wksp->dtable;
343 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
344 const U32 fastMode = DTableH->fastMode;
345
346 /* select fast mode (static) */
347 if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
348 return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
349 }
350}
351
352/* Avoids the FORCE_INLINE of the _body() function. */
353static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
354{
355 return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
356}
357
358#if DYNAMIC_BMI2
359BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
360{
361 return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
362}
363#endif
364
365size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
366{
367#if DYNAMIC_BMI2
368 if (bmi2) {
369 return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
370 }
371#endif
372 (void)bmi2;
373 return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
374}
375
Brandon Maierdbe88da2023-01-12 10:27:45 -0600376typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
377
Brandon Maierdbe88da2023-01-12 10:27:45 -0600378#endif /* FSE_COMMONDEFS_ONLY */