Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) Yann Collet, Facebook, Inc. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 11 | /* zstd_decompress_internal: |
| 12 | * objects and definitions shared within lib/decompress modules */ |
| 13 | |
| 14 | #ifndef ZSTD_DECOMPRESS_INTERNAL_H |
| 15 | #define ZSTD_DECOMPRESS_INTERNAL_H |
| 16 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 17 | /*-******************************************************* |
| 18 | * Dependencies |
| 19 | *********************************************************/ |
| 20 | #include "../common/mem.h" /* BYTE, U16, U32 */ |
| 21 | #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */ |
| 22 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 23 | /*-******************************************************* |
| 24 | * Constants |
| 25 | *********************************************************/ |
| 26 | static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { |
| 27 | 0, 1, 2, 3, 4, 5, 6, 7, |
| 28 | 8, 9, 10, 11, 12, 13, 14, 15, |
| 29 | 16, 18, 20, 22, 24, 28, 32, 40, |
| 30 | 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, |
| 31 | 0x2000, 0x4000, 0x8000, 0x10000 }; |
| 32 | |
| 33 | static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { |
| 34 | 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, |
| 35 | 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, |
| 36 | 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, |
| 37 | 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; |
| 38 | |
| 39 | static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = { |
| 40 | 0, 1, 2, 3, 4, 5, 6, 7, |
| 41 | 8, 9, 10, 11, 12, 13, 14, 15, |
| 42 | 16, 17, 18, 19, 20, 21, 22, 23, |
| 43 | 24, 25, 26, 27, 28, 29, 30, 31 }; |
| 44 | |
| 45 | static UNUSED_ATTR const U32 ML_base[MaxML+1] = { |
| 46 | 3, 4, 5, 6, 7, 8, 9, 10, |
| 47 | 11, 12, 13, 14, 15, 16, 17, 18, |
| 48 | 19, 20, 21, 22, 23, 24, 25, 26, |
| 49 | 27, 28, 29, 30, 31, 32, 33, 34, |
| 50 | 35, 37, 39, 41, 43, 47, 51, 59, |
| 51 | 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, |
| 52 | 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; |
| 53 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 54 | /*-******************************************************* |
| 55 | * Decompression types |
| 56 | *********************************************************/ |
| 57 | typedef struct { |
| 58 | U32 fastMode; |
| 59 | U32 tableLog; |
| 60 | } ZSTD_seqSymbol_header; |
| 61 | |
| 62 | typedef struct { |
| 63 | U16 nextState; |
| 64 | BYTE nbAdditionalBits; |
| 65 | BYTE nbBits; |
| 66 | U32 baseValue; |
| 67 | } ZSTD_seqSymbol; |
| 68 | |
| 69 | #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) |
| 70 | |
| 71 | #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) |
| 72 | #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) |
| 73 | |
| 74 | typedef struct { |
| 75 | ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ |
| 76 | ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ |
| 77 | ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ |
| 78 | HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ |
| 79 | U32 rep[ZSTD_REP_NUM]; |
| 80 | U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; |
| 81 | } ZSTD_entropyDTables_t; |
| 82 | |
| 83 | typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, |
| 84 | ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, |
| 85 | ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, |
| 86 | ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; |
| 87 | |
| 88 | typedef enum { zdss_init=0, zdss_loadHeader, |
| 89 | zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; |
| 90 | |
| 91 | typedef enum { |
| 92 | ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ |
| 93 | ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ |
| 94 | ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ |
| 95 | } ZSTD_dictUses_e; |
| 96 | |
| 97 | /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ |
| 98 | typedef struct { |
| 99 | const ZSTD_DDict** ddictPtrTable; |
| 100 | size_t ddictPtrTableSize; |
| 101 | size_t ddictPtrCount; |
| 102 | } ZSTD_DDictHashSet; |
| 103 | |
| 104 | #ifndef ZSTD_DECODER_INTERNAL_BUFFER |
| 105 | # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16) |
| 106 | #endif |
| 107 | |
| 108 | #define ZSTD_LBMIN 64 |
| 109 | #define ZSTD_LBMAX (128 << 10) |
| 110 | |
| 111 | /* extra buffer, compensates when dst is not large enough to store litBuffer */ |
| 112 | #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX) |
| 113 | |
| 114 | typedef enum { |
| 115 | ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */ |
| 116 | ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */ |
| 117 | ZSTD_split = 2 /* Split between litExtraBuffer and dst */ |
| 118 | } ZSTD_litLocation_e; |
| 119 | |
| 120 | struct ZSTD_DCtx_s |
| 121 | { |
| 122 | const ZSTD_seqSymbol* LLTptr; |
| 123 | const ZSTD_seqSymbol* MLTptr; |
| 124 | const ZSTD_seqSymbol* OFTptr; |
| 125 | const HUF_DTable* HUFptr; |
| 126 | ZSTD_entropyDTables_t entropy; |
| 127 | U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ |
| 128 | const void* previousDstEnd; /* detect continuity */ |
| 129 | const void* prefixStart; /* start of current segment */ |
| 130 | const void* virtualStart; /* virtual start of previous segment if it was just before current one */ |
| 131 | const void* dictEnd; /* end of previous segment */ |
| 132 | size_t expected; |
| 133 | ZSTD_frameHeader fParams; |
| 134 | U64 processedCSize; |
| 135 | U64 decodedSize; |
| 136 | blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ |
| 137 | ZSTD_dStage stage; |
| 138 | U32 litEntropy; |
| 139 | U32 fseEntropy; |
| 140 | struct xxh64_state xxhState; |
| 141 | size_t headerSize; |
| 142 | ZSTD_format_e format; |
| 143 | ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ |
| 144 | U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ |
| 145 | const BYTE* litPtr; |
| 146 | ZSTD_customMem customMem; |
| 147 | size_t litSize; |
| 148 | size_t rleSize; |
| 149 | size_t staticSize; |
| 150 | #if DYNAMIC_BMI2 != 0 |
| 151 | int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ |
| 152 | #endif |
| 153 | |
| 154 | /* dictionary */ |
| 155 | ZSTD_DDict* ddictLocal; |
| 156 | const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ |
| 157 | U32 dictID; |
| 158 | int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ |
| 159 | ZSTD_dictUses_e dictUses; |
| 160 | ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ |
| 161 | ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ |
| 162 | |
| 163 | /* streaming */ |
| 164 | ZSTD_dStreamStage streamStage; |
| 165 | char* inBuff; |
| 166 | size_t inBuffSize; |
| 167 | size_t inPos; |
| 168 | size_t maxWindowSize; |
| 169 | char* outBuff; |
| 170 | size_t outBuffSize; |
| 171 | size_t outStart; |
| 172 | size_t outEnd; |
| 173 | size_t lhSize; |
| 174 | U32 hostageByte; |
| 175 | int noForwardProgress; |
| 176 | ZSTD_bufferMode_e outBufferMode; |
| 177 | ZSTD_outBuffer expectedOutBuffer; |
| 178 | |
| 179 | /* workspace */ |
| 180 | BYTE* litBuffer; |
| 181 | const BYTE* litBufferEnd; |
| 182 | ZSTD_litLocation_e litBufferLocation; |
| 183 | BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */ |
| 184 | BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; |
| 185 | |
| 186 | size_t oversizedDuration; |
| 187 | |
| 188 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
| 189 | void const* dictContentBeginForFuzzing; |
| 190 | void const* dictContentEndForFuzzing; |
| 191 | #endif |
| 192 | |
| 193 | /* Tracing */ |
| 194 | }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ |
| 195 | |
| 196 | MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { |
| 197 | #if DYNAMIC_BMI2 != 0 |
| 198 | return dctx->bmi2; |
| 199 | #else |
| 200 | (void)dctx; |
| 201 | return 0; |
| 202 | #endif |
| 203 | } |
| 204 | |
| 205 | /*-******************************************************* |
| 206 | * Shared internal functions |
| 207 | *********************************************************/ |
| 208 | |
| 209 | /*! ZSTD_loadDEntropy() : |
| 210 | * dict : must point at beginning of a valid zstd dictionary. |
| 211 | * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ |
| 212 | size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, |
| 213 | const void* const dict, size_t const dictSize); |
| 214 | |
| 215 | /*! ZSTD_checkContinuity() : |
| 216 | * check if next `dst` follows previous position, where decompression ended. |
| 217 | * If yes, do nothing (continue on current segment). |
| 218 | * If not, classify previous segment as "external dictionary", and start a new segment. |
| 219 | * This function cannot fail. */ |
| 220 | void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); |
| 221 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 222 | #endif /* ZSTD_DECOMPRESS_INTERNAL_H */ |