Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) Yann Collet, Facebook, Inc. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | /* zstd_ddict.c : |
| 12 | * concentrates all logic that needs to know the internals of ZSTD_DDict object */ |
| 13 | |
| 14 | /*-******************************************************* |
| 15 | * Dependencies |
| 16 | *********************************************************/ |
| 17 | #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ |
| 18 | #include "../common/cpu.h" /* bmi2 */ |
| 19 | #include "../common/mem.h" /* low level memory routines */ |
| 20 | #define FSE_STATIC_LINKING_ONLY |
| 21 | #include "../common/fse.h" |
| 22 | #define HUF_STATIC_LINKING_ONLY |
| 23 | #include "../common/huf.h" |
| 24 | #include "zstd_decompress_internal.h" |
| 25 | #include "zstd_ddict.h" |
| 26 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 27 | /*-******************************************************* |
| 28 | * Types |
| 29 | *********************************************************/ |
| 30 | struct ZSTD_DDict_s { |
| 31 | void* dictBuffer; |
| 32 | const void* dictContent; |
| 33 | size_t dictSize; |
| 34 | ZSTD_entropyDTables_t entropy; |
| 35 | U32 dictID; |
| 36 | U32 entropyPresent; |
| 37 | ZSTD_customMem cMem; |
| 38 | }; /* typedef'd to ZSTD_DDict within "zstd.h" */ |
| 39 | |
| 40 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) |
| 41 | { |
| 42 | assert(ddict != NULL); |
| 43 | return ddict->dictContent; |
| 44 | } |
| 45 | |
| 46 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) |
| 47 | { |
| 48 | assert(ddict != NULL); |
| 49 | return ddict->dictSize; |
| 50 | } |
| 51 | |
| 52 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) |
| 53 | { |
| 54 | DEBUGLOG(4, "ZSTD_copyDDictParameters"); |
| 55 | assert(dctx != NULL); |
| 56 | assert(ddict != NULL); |
| 57 | dctx->dictID = ddict->dictID; |
| 58 | dctx->prefixStart = ddict->dictContent; |
| 59 | dctx->virtualStart = ddict->dictContent; |
| 60 | dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; |
| 61 | dctx->previousDstEnd = dctx->dictEnd; |
| 62 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
| 63 | dctx->dictContentBeginForFuzzing = dctx->prefixStart; |
| 64 | dctx->dictContentEndForFuzzing = dctx->previousDstEnd; |
| 65 | #endif |
| 66 | if (ddict->entropyPresent) { |
| 67 | dctx->litEntropy = 1; |
| 68 | dctx->fseEntropy = 1; |
| 69 | dctx->LLTptr = ddict->entropy.LLTable; |
| 70 | dctx->MLTptr = ddict->entropy.MLTable; |
| 71 | dctx->OFTptr = ddict->entropy.OFTable; |
| 72 | dctx->HUFptr = ddict->entropy.hufTable; |
| 73 | dctx->entropy.rep[0] = ddict->entropy.rep[0]; |
| 74 | dctx->entropy.rep[1] = ddict->entropy.rep[1]; |
| 75 | dctx->entropy.rep[2] = ddict->entropy.rep[2]; |
| 76 | } else { |
| 77 | dctx->litEntropy = 0; |
| 78 | dctx->fseEntropy = 0; |
| 79 | } |
| 80 | } |
| 81 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 82 | static size_t |
| 83 | ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, |
| 84 | ZSTD_dictContentType_e dictContentType) |
| 85 | { |
| 86 | ddict->dictID = 0; |
| 87 | ddict->entropyPresent = 0; |
| 88 | if (dictContentType == ZSTD_dct_rawContent) return 0; |
| 89 | |
| 90 | if (ddict->dictSize < 8) { |
| 91 | if (dictContentType == ZSTD_dct_fullDict) |
| 92 | return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ |
| 93 | return 0; /* pure content mode */ |
| 94 | } |
| 95 | { U32 const magic = MEM_readLE32(ddict->dictContent); |
| 96 | if (magic != ZSTD_MAGIC_DICTIONARY) { |
| 97 | if (dictContentType == ZSTD_dct_fullDict) |
| 98 | return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ |
| 99 | return 0; /* pure content mode */ |
| 100 | } |
| 101 | } |
| 102 | ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); |
| 103 | |
| 104 | /* load entropy tables */ |
| 105 | RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( |
| 106 | &ddict->entropy, ddict->dictContent, ddict->dictSize)), |
| 107 | dictionary_corrupted, ""); |
| 108 | ddict->entropyPresent = 1; |
| 109 | return 0; |
| 110 | } |
| 111 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 112 | static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, |
| 113 | const void* dict, size_t dictSize, |
| 114 | ZSTD_dictLoadMethod_e dictLoadMethod, |
| 115 | ZSTD_dictContentType_e dictContentType) |
| 116 | { |
| 117 | if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { |
| 118 | ddict->dictBuffer = NULL; |
| 119 | ddict->dictContent = dict; |
| 120 | if (!dict) dictSize = 0; |
| 121 | } else { |
| 122 | void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); |
| 123 | ddict->dictBuffer = internalBuffer; |
| 124 | ddict->dictContent = internalBuffer; |
| 125 | if (!internalBuffer) return ERROR(memory_allocation); |
| 126 | ZSTD_memcpy(internalBuffer, dict, dictSize); |
| 127 | } |
| 128 | ddict->dictSize = dictSize; |
| 129 | ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ |
| 130 | |
| 131 | /* parse dictionary content */ |
| 132 | FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); |
| 133 | |
| 134 | return 0; |
| 135 | } |
| 136 | |
| 137 | ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, |
| 138 | ZSTD_dictLoadMethod_e dictLoadMethod, |
| 139 | ZSTD_dictContentType_e dictContentType, |
| 140 | ZSTD_customMem customMem) |
| 141 | { |
| 142 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; |
| 143 | |
| 144 | { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); |
| 145 | if (ddict == NULL) return NULL; |
| 146 | ddict->cMem = customMem; |
| 147 | { size_t const initResult = ZSTD_initDDict_internal(ddict, |
| 148 | dict, dictSize, |
| 149 | dictLoadMethod, dictContentType); |
| 150 | if (ZSTD_isError(initResult)) { |
| 151 | ZSTD_freeDDict(ddict); |
| 152 | return NULL; |
| 153 | } } |
| 154 | return ddict; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | /*! ZSTD_createDDict() : |
| 159 | * Create a digested dictionary, to start decompression without startup delay. |
| 160 | * `dict` content is copied inside DDict. |
| 161 | * Consequently, `dict` can be released after `ZSTD_DDict` creation */ |
| 162 | ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) |
| 163 | { |
| 164 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
| 165 | return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); |
| 166 | } |
| 167 | |
| 168 | /*! ZSTD_createDDict_byReference() : |
| 169 | * Create a digested dictionary, to start decompression without startup delay. |
| 170 | * Dictionary content is simply referenced, it will be accessed during decompression. |
| 171 | * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ |
| 172 | ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) |
| 173 | { |
| 174 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
| 175 | return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); |
| 176 | } |
| 177 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 178 | const ZSTD_DDict* ZSTD_initStaticDDict( |
| 179 | void* sBuffer, size_t sBufferSize, |
| 180 | const void* dict, size_t dictSize, |
| 181 | ZSTD_dictLoadMethod_e dictLoadMethod, |
| 182 | ZSTD_dictContentType_e dictContentType) |
| 183 | { |
| 184 | size_t const neededSpace = sizeof(ZSTD_DDict) |
| 185 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); |
| 186 | ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; |
| 187 | assert(sBuffer != NULL); |
| 188 | assert(dict != NULL); |
| 189 | if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ |
| 190 | if (sBufferSize < neededSpace) return NULL; |
| 191 | if (dictLoadMethod == ZSTD_dlm_byCopy) { |
| 192 | ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ |
| 193 | dict = ddict+1; |
| 194 | } |
| 195 | if (ZSTD_isError( ZSTD_initDDict_internal(ddict, |
| 196 | dict, dictSize, |
| 197 | ZSTD_dlm_byRef, dictContentType) )) |
| 198 | return NULL; |
| 199 | return ddict; |
| 200 | } |
| 201 | |
Brandon Maier | dbe88da | 2023-01-12 10:27:45 -0600 | [diff] [blame] | 202 | size_t ZSTD_freeDDict(ZSTD_DDict* ddict) |
| 203 | { |
| 204 | if (ddict==NULL) return 0; /* support free on NULL */ |
| 205 | { ZSTD_customMem const cMem = ddict->cMem; |
| 206 | ZSTD_customFree(ddict->dictBuffer, cMem); |
| 207 | ZSTD_customFree(ddict, cMem); |
| 208 | return 0; |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | /*! ZSTD_estimateDDictSize() : |
| 213 | * Estimate amount of memory that will be needed to create a dictionary for decompression. |
| 214 | * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ |
| 215 | size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) |
| 216 | { |
| 217 | return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); |
| 218 | } |
| 219 | |
| 220 | size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) |
| 221 | { |
| 222 | if (ddict==NULL) return 0; /* support sizeof on NULL */ |
| 223 | return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; |
| 224 | } |
| 225 | |
| 226 | /*! ZSTD_getDictID_fromDDict() : |
| 227 | * Provides the dictID of the dictionary loaded into `ddict`. |
| 228 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. |
| 229 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ |
| 230 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) |
| 231 | { |
| 232 | if (ddict==NULL) return 0; |
| 233 | return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); |
| 234 | } |