Blame - lib/zstd/decompress/zstd_decompress_block.c - filogic/uboot

blob: 6e46a1bc5242b61533751207dd44e76a9a40dccf [file] [log] [blame]

Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1	/*
				2	* Copyright (c) Yann Collet, Facebook, Inc.
				3	* All rights reserved.
				4	*
				5	* This source code is licensed under both the BSD-style license (found in the
				6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
				7	* in the COPYING file in the root directory of this source tree).
				8	* You may select, at your option, one of the above-listed licenses.
				9	*/
				10
				11	/* zstd_decompress_block :
				12	* this module takes care of decompressing _compressed_ block */
				13
				14	/-******************************************************
				15	* Dependencies
				16	*********************************************************/
				17	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
				18	#include "../common/compiler.h" /* prefetch */
				19	#include "../common/cpu.h" /* bmi2 */
				20	#include "../common/mem.h" /* low level memory routines */
				21	#define FSE_STATIC_LINKING_ONLY
				22	#include "../common/fse.h"
				23	#define HUF_STATIC_LINKING_ONLY
				24	#include "../common/huf.h"
				25	#include "../common/zstd_internal.h"
				26	#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
				27	#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
				28	#include "zstd_decompress_block.h"
				29
				30	/_******************************************************
				31	* Macros
				32	**********************************************************/
				33
				34	/* These two optional macros force the use one way or another of the two
				35	* ZSTD_decompressSequences implementations. You can't force in both directions
				36	* at the same time.
				37	*/
				38	#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				39	defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				40	#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
				41	#endif
				42
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	43	/_******************************************************
				44	* Memory operations
				45	**********************************************************/
				46	static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
				47
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	48	/-************************************************************
				49	* Block decoding
				50	***************************************************************/
				51
				52	/*! ZSTD_getcBlockSize() :
				53	* Provides the size of compressed block from block header `src` */
				54	size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
				55	blockProperties_t* bpPtr)
				56	{
				57	RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
				58
				59	{ U32 const cBlockHeader = MEM_readLE24(src);
				60	U32 const cSize = cBlockHeader >> 3;
				61	bpPtr->lastBlock = cBlockHeader & 1;
				62	bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
				63	bpPtr->origSize = cSize; /* only useful for RLE */
				64	if (bpPtr->blockType == bt_rle) return 1;
				65	RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
				66	return cSize;
				67	}
				68	}
				69
				70	/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
				71	static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
				72	const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
				73	{
				74	if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
				75	{
				76	/* room for litbuffer to fit without read faulting */
				77	dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
				78	dctx->litBufferEnd = dctx->litBuffer + litSize;
				79	dctx->litBufferLocation = ZSTD_in_dst;
				80	}
				81	else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
				82	{
				83	/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
				84	if (splitImmediately) {
				85	/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
				86	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
				87	dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
				88	}
				89	else {
				90	/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
				91	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
				92	dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
				93	}
				94	dctx->litBufferLocation = ZSTD_split;
				95	}
				96	else
				97	{
				98	/* fits entirely within litExtraBuffer, so no split is necessary */
				99	dctx->litBuffer = dctx->litExtraBuffer;
				100	dctx->litBufferEnd = dctx->litBuffer + litSize;
				101	dctx->litBufferLocation = ZSTD_not_in_dst;
				102	}
				103	}
				104
				105	/* Hidden declaration for fullbench */
				106	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
				107	const void* src, size_t srcSize,
				108	void* dst, size_t dstCapacity, const streaming_operation streaming);
				109	/*! ZSTD_decodeLiteralsBlock() :
				110	* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
				111	* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
				112	* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
				113	* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
				114	*
				115	* @return : nb of bytes read from src (< srcSize )
				116	* note : symbol not declared but exposed for fullbench */
				117	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
				118	const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
				119	void* dst, size_t dstCapacity, const streaming_operation streaming)
				120	{
				121	DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
				122	RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
				123
				124	{ const BYTE* const istart = (const BYTE*) src;
				125	symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
				126
				127	switch(litEncType)
				128	{
				129	case set_repeat:
				130	DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
				131	RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
				132	ZSTD_FALLTHROUGH;
				133
				134	case set_compressed:
				135	RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
				136	{ size_t lhSize, litSize, litCSize;
				137	U32 singleStream=0;
				138	U32 const lhlCode = (istart[0] >> 2) & 3;
				139	U32 const lhc = MEM_readLE32(istart);
				140	size_t hufSuccess;
				141	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
				142	switch(lhlCode)
				143	{
				144	case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
				145	/* 2 - 2 - 10 - 10 */
				146	singleStream = !lhlCode;
				147	lhSize = 3;
				148	litSize = (lhc >> 4) & 0x3FF;
				149	litCSize = (lhc >> 14) & 0x3FF;
				150	break;
				151	case 2:
				152	/* 2 - 2 - 14 - 14 */
				153	lhSize = 4;
				154	litSize = (lhc >> 4) & 0x3FFF;
				155	litCSize = lhc >> 18;
				156	break;
				157	case 3:
				158	/* 2 - 2 - 18 - 18 */
				159	lhSize = 5;
				160	litSize = (lhc >> 4) & 0x3FFFF;
				161	litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
				162	break;
				163	}
				164	RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
				165	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
				166	RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
				167	RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
				168	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
				169
				170	/* prefetch huffman table if cold */
				171	if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
				172	PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
				173	}
				174
				175	if (litEncType==set_repeat) {
				176	if (singleStream) {
				177	hufSuccess = HUF_decompress1X_usingDTable_bmi2(
				178	dctx->litBuffer, litSize, istart+lhSize, litCSize,
				179	dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
				180	} else {
				181	hufSuccess = HUF_decompress4X_usingDTable_bmi2(
				182	dctx->litBuffer, litSize, istart+lhSize, litCSize,
				183	dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
				184	}
				185	} else {
				186	if (singleStream) {
				187	#if defined(HUF_FORCE_DECOMPRESS_X2)
				188	hufSuccess = HUF_decompress1X_DCtx_wksp(
				189	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				190	istart+lhSize, litCSize, dctx->workspace,
				191	sizeof(dctx->workspace));
				192	#else
				193	hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
				194	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				195	istart+lhSize, litCSize, dctx->workspace,
				196	sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
				197	#endif
				198	} else {
				199	hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
				200	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				201	istart+lhSize, litCSize, dctx->workspace,
				202	sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
				203	}
				204	}
				205	if (dctx->litBufferLocation == ZSTD_split)
				206	{
				207	ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
				208	ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
				209	dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
				210	dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
				211	}
				212
				213	RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
				214
				215	dctx->litPtr = dctx->litBuffer;
				216	dctx->litSize = litSize;
				217	dctx->litEntropy = 1;
				218	if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
				219	return litCSize + lhSize;
				220	}
				221
				222	case set_basic:
				223	{ size_t litSize, lhSize;
				224	U32 const lhlCode = ((istart[0]) >> 2) & 3;
				225	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
				226	switch(lhlCode)
				227	{
				228	case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
				229	lhSize = 1;
				230	litSize = istart[0] >> 3;
				231	break;
				232	case 1:
				233	lhSize = 2;
				234	litSize = MEM_readLE16(istart) >> 4;
				235	break;
				236	case 3:
				237	lhSize = 3;
				238	litSize = MEM_readLE24(istart) >> 4;
				239	break;
				240	}
				241
				242	RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
				243	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
				244	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
				245	if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
				246	RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
				247	if (dctx->litBufferLocation == ZSTD_split)
				248	{
				249	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
				250	ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
				251	}
				252	else
				253	{
				254	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
				255	}
				256	dctx->litPtr = dctx->litBuffer;
				257	dctx->litSize = litSize;
				258	return lhSize+litSize;
				259	}
				260	/* direct reference into compressed stream */
				261	dctx->litPtr = istart+lhSize;
				262	dctx->litSize = litSize;
				263	dctx->litBufferEnd = dctx->litPtr + litSize;
				264	dctx->litBufferLocation = ZSTD_not_in_dst;
				265	return lhSize+litSize;
				266	}
				267
				268	case set_rle:
				269	{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
				270	size_t litSize, lhSize;
				271	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
				272	switch(lhlCode)
				273	{
				274	case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
				275	lhSize = 1;
				276	litSize = istart[0] >> 3;
				277	break;
				278	case 1:
				279	lhSize = 2;
				280	litSize = MEM_readLE16(istart) >> 4;
				281	break;
				282	case 3:
				283	lhSize = 3;
				284	litSize = MEM_readLE24(istart) >> 4;
				285	RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
				286	break;
				287	}
				288	RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
				289	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
				290	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
				291	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
				292	if (dctx->litBufferLocation == ZSTD_split)
				293	{
				294	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
				295	ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
				296	}
				297	else
				298	{
				299	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
				300	}
				301	dctx->litPtr = dctx->litBuffer;
				302	dctx->litSize = litSize;
				303	return lhSize+1;
				304	}
				305	default:
				306	RETURN_ERROR(corruption_detected, "impossible");
				307	}
				308	}
				309	}
				310
				311	/* Default FSE distribution tables.
				312	* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
				313	* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
				314	* They were generated programmatically with following method :
				315	* - start from default distributions, present in /lib/common/zstd_internal.h
				316	* - generate tables normally, using ZSTD_buildFSETable()
				317	* - printout the content of tables
				318	* - pretify output, report below, test with fuzzer to ensure it's correct */
				319
				320	/* Default FSE distribution table for Literal Lengths */
				321	static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
				322	{ 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				323	/* nextState, nbAddBits, nbBits, baseVal */
				324	{ 0, 0, 4, 0}, { 16, 0, 4, 0},
				325	{ 32, 0, 5, 1}, { 0, 0, 5, 3},
				326	{ 0, 0, 5, 4}, { 0, 0, 5, 6},
				327	{ 0, 0, 5, 7}, { 0, 0, 5, 9},
				328	{ 0, 0, 5, 10}, { 0, 0, 5, 12},
				329	{ 0, 0, 6, 14}, { 0, 1, 5, 16},
				330	{ 0, 1, 5, 20}, { 0, 1, 5, 22},
				331	{ 0, 2, 5, 28}, { 0, 3, 5, 32},
				332	{ 0, 4, 5, 48}, { 32, 6, 5, 64},
				333	{ 0, 7, 5, 128}, { 0, 8, 6, 256},
				334	{ 0, 10, 6, 1024}, { 0, 12, 6, 4096},
				335	{ 32, 0, 4, 0}, { 0, 0, 4, 1},
				336	{ 0, 0, 5, 2}, { 32, 0, 5, 4},
				337	{ 0, 0, 5, 5}, { 32, 0, 5, 7},
				338	{ 0, 0, 5, 8}, { 32, 0, 5, 10},
				339	{ 0, 0, 5, 11}, { 0, 0, 6, 13},
				340	{ 32, 1, 5, 16}, { 0, 1, 5, 18},
				341	{ 32, 1, 5, 22}, { 0, 2, 5, 24},
				342	{ 32, 3, 5, 32}, { 0, 3, 5, 40},
				343	{ 0, 6, 4, 64}, { 16, 6, 4, 64},
				344	{ 32, 7, 5, 128}, { 0, 9, 6, 512},
				345	{ 0, 11, 6, 2048}, { 48, 0, 4, 0},
				346	{ 16, 0, 4, 1}, { 32, 0, 5, 2},
				347	{ 32, 0, 5, 3}, { 32, 0, 5, 5},
				348	{ 32, 0, 5, 6}, { 32, 0, 5, 8},
				349	{ 32, 0, 5, 9}, { 32, 0, 5, 11},
				350	{ 32, 0, 5, 12}, { 0, 0, 6, 15},
				351	{ 32, 1, 5, 18}, { 32, 1, 5, 20},
				352	{ 32, 2, 5, 24}, { 32, 2, 5, 28},
				353	{ 32, 3, 5, 40}, { 32, 4, 5, 48},
				354	{ 0, 16, 6,65536}, { 0, 15, 6,32768},
				355	{ 0, 14, 6,16384}, { 0, 13, 6, 8192},
				356	}; /* LL_defaultDTable */
				357
				358	/* Default FSE distribution table for Offset Codes */
				359	static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
				360	{ 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				361	/* nextState, nbAddBits, nbBits, baseVal */
				362	{ 0, 0, 5, 0}, { 0, 6, 4, 61},
				363	{ 0, 9, 5, 509}, { 0, 15, 5,32765},
				364	{ 0, 21, 5,2097149}, { 0, 3, 5, 5},
				365	{ 0, 7, 4, 125}, { 0, 12, 5, 4093},
				366	{ 0, 18, 5,262141}, { 0, 23, 5,8388605},
				367	{ 0, 5, 5, 29}, { 0, 8, 4, 253},
				368	{ 0, 14, 5,16381}, { 0, 20, 5,1048573},
				369	{ 0, 2, 5, 1}, { 16, 7, 4, 125},
				370	{ 0, 11, 5, 2045}, { 0, 17, 5,131069},
				371	{ 0, 22, 5,4194301}, { 0, 4, 5, 13},
				372	{ 16, 8, 4, 253}, { 0, 13, 5, 8189},
				373	{ 0, 19, 5,524285}, { 0, 1, 5, 1},
				374	{ 16, 6, 4, 61}, { 0, 10, 5, 1021},
				375	{ 0, 16, 5,65533}, { 0, 28, 5,268435453},
				376	{ 0, 27, 5,134217725}, { 0, 26, 5,67108861},
				377	{ 0, 25, 5,33554429}, { 0, 24, 5,16777213},
				378	}; /* OF_defaultDTable */
				379
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	380	/* Default FSE distribution table for Match Lengths */
				381	static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
				382	{ 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				383	/* nextState, nbAddBits, nbBits, baseVal */
				384	{ 0, 0, 6, 3}, { 0, 0, 4, 4},
				385	{ 32, 0, 5, 5}, { 0, 0, 5, 6},
				386	{ 0, 0, 5, 8}, { 0, 0, 5, 9},
				387	{ 0, 0, 5, 11}, { 0, 0, 6, 13},
				388	{ 0, 0, 6, 16}, { 0, 0, 6, 19},
				389	{ 0, 0, 6, 22}, { 0, 0, 6, 25},
				390	{ 0, 0, 6, 28}, { 0, 0, 6, 31},
				391	{ 0, 0, 6, 34}, { 0, 1, 6, 37},
				392	{ 0, 1, 6, 41}, { 0, 2, 6, 47},
				393	{ 0, 3, 6, 59}, { 0, 4, 6, 83},
				394	{ 0, 7, 6, 131}, { 0, 9, 6, 515},
				395	{ 16, 0, 4, 4}, { 0, 0, 4, 5},
				396	{ 32, 0, 5, 6}, { 0, 0, 5, 7},
				397	{ 32, 0, 5, 9}, { 0, 0, 5, 10},
				398	{ 0, 0, 6, 12}, { 0, 0, 6, 15},
				399	{ 0, 0, 6, 18}, { 0, 0, 6, 21},
				400	{ 0, 0, 6, 24}, { 0, 0, 6, 27},
				401	{ 0, 0, 6, 30}, { 0, 0, 6, 33},
				402	{ 0, 1, 6, 35}, { 0, 1, 6, 39},
				403	{ 0, 2, 6, 43}, { 0, 3, 6, 51},
				404	{ 0, 4, 6, 67}, { 0, 5, 6, 99},
				405	{ 0, 8, 6, 259}, { 32, 0, 4, 4},
				406	{ 48, 0, 4, 4}, { 16, 0, 4, 5},
				407	{ 32, 0, 5, 7}, { 32, 0, 5, 8},
				408	{ 32, 0, 5, 10}, { 32, 0, 5, 11},
				409	{ 0, 0, 6, 14}, { 0, 0, 6, 17},
				410	{ 0, 0, 6, 20}, { 0, 0, 6, 23},
				411	{ 0, 0, 6, 26}, { 0, 0, 6, 29},
				412	{ 0, 0, 6, 32}, { 0, 16, 6,65539},
				413	{ 0, 15, 6,32771}, { 0, 14, 6,16387},
				414	{ 0, 13, 6, 8195}, { 0, 12, 6, 4099},
				415	{ 0, 11, 6, 2051}, { 0, 10, 6, 1027},
				416	}; /* ML_defaultDTable */
				417
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	418	static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
				419	{
				420	void* ptr = dt;
				421	ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
				422	ZSTD_seqSymbol* const cell = dt + 1;
				423
				424	DTableH->tableLog = 0;
				425	DTableH->fastMode = 0;
				426
				427	cell->nbBits = 0;
				428	cell->nextState = 0;
				429	assert(nbAddBits < 255);
				430	cell->nbAdditionalBits = nbAddBits;
				431	cell->baseValue = baseValue;
				432	}
				433
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	434	/* ZSTD_buildFSETable() :
				435	* generate FSE decoding table for one symbol (ll, ml or off)
				436	* cannot fail if input is valid =>
				437	* all inputs are presumed validated at this stage */
				438	FORCE_INLINE_TEMPLATE
				439	void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
				440	const short* normalizedCounter, unsigned maxSymbolValue,
				441	const U32* baseValue, const U8* nbAdditionalBits,
				442	unsigned tableLog, void* wksp, size_t wkspSize)
				443	{
				444	ZSTD_seqSymbol* const tableDecode = dt+1;
				445	U32 const maxSV1 = maxSymbolValue + 1;
				446	U32 const tableSize = 1 << tableLog;
				447
				448	U16* symbolNext = (U16*)wksp;
				449	BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
				450	U32 highThreshold = tableSize - 1;
				451
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	452	/* Sanity Checks */
				453	assert(maxSymbolValue <= MaxSeq);
				454	assert(tableLog <= MaxFSELog);
				455	assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
				456	(void)wkspSize;
				457	/* Init, lay down lowprob symbols */
				458	{ ZSTD_seqSymbol_header DTableH;
				459	DTableH.tableLog = tableLog;
				460	DTableH.fastMode = 1;
				461	{ S16 const largeLimit= (S16)(1 << (tableLog-1));
				462	U32 s;
				463	for (s=0; s<maxSV1; s++) {
				464	if (normalizedCounter[s]==-1) {
				465	tableDecode[highThreshold--].baseValue = s;
				466	symbolNext[s] = 1;
				467	} else {
				468	if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
				469	assert(normalizedCounter[s]>=0);
				470	symbolNext[s] = (U16)normalizedCounter[s];
				471	} } }
				472	ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
				473	}
				474
				475	/* Spread symbols */
				476	assert(tableSize <= 512);
				477	/* Specialized symbol spreading for the case when there are
				478	* no low probability (-1 count) symbols. When compressing
				479	* small blocks we avoid low probability symbols to hit this
				480	* case, since header decoding speed matters more.
				481	*/
				482	if (highThreshold == tableSize - 1) {
				483	size_t const tableMask = tableSize-1;
				484	size_t const step = FSE_TABLESTEP(tableSize);
				485	/* First lay down the symbols in order.
				486	* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
				487	* misses since small blocks generally have small table logs, so nearly
				488	* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
				489	* our buffer to handle the over-write.
				490	*/
				491	{
				492	U64 const add = 0x0101010101010101ull;
				493	size_t pos = 0;
				494	U64 sv = 0;
				495	U32 s;
				496	for (s=0; s<maxSV1; ++s, sv += add) {
				497	int i;
				498	int const n = normalizedCounter[s];
				499	MEM_write64(spread + pos, sv);
				500	for (i = 8; i < n; i += 8) {
				501	MEM_write64(spread + pos + i, sv);
				502	}
				503	pos += n;
				504	}
				505	}
				506	/* Now we spread those positions across the table.
				507	* The benefit of doing it in two stages is that we avoid the the
				508	* variable size inner loop, which caused lots of branch misses.
				509	* Now we can run through all the positions without any branch misses.
				510	* We unroll the loop twice, since that is what emperically worked best.
				511	*/
				512	{
				513	size_t position = 0;
				514	size_t s;
				515	size_t const unroll = 2;
				516	assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
				517	for (s = 0; s < (size_t)tableSize; s += unroll) {
				518	size_t u;
				519	for (u = 0; u < unroll; ++u) {
				520	size_t const uPosition = (position + (u * step)) & tableMask;
				521	tableDecode[uPosition].baseValue = spread[s + u];
				522	}
				523	position = (position + (unroll * step)) & tableMask;
				524	}
				525	assert(position == 0);
				526	}
				527	} else {
				528	U32 const tableMask = tableSize-1;
				529	U32 const step = FSE_TABLESTEP(tableSize);
				530	U32 s, position = 0;
				531	for (s=0; s<maxSV1; s++) {
				532	int i;
				533	int const n = normalizedCounter[s];
				534	for (i=0; i<n; i++) {
				535	tableDecode[position].baseValue = s;
				536	position = (position + step) & tableMask;
				537	while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
				538	} }
				539	assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
				540	}
				541
				542	/* Build Decoding table */
				543	{
				544	U32 u;
				545	for (u=0; u<tableSize; u++) {
				546	U32 const symbol = tableDecode[u].baseValue;
				547	U32 const nextState = symbolNext[symbol]++;
				548	tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
				549	tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
				550	assert(nbAdditionalBits[symbol] < 255);
				551	tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
				552	tableDecode[u].baseValue = baseValue[symbol];
				553	}
				554	}
				555	}
				556
				557	/* Avoids the FORCE_INLINE of the _body() function. */
				558	static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
				559	const short* normalizedCounter, unsigned maxSymbolValue,
				560	const U32* baseValue, const U8* nbAdditionalBits,
				561	unsigned tableLog, void* wksp, size_t wkspSize)
				562	{
				563	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
				564	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				565	}
				566
				567	#if DYNAMIC_BMI2
				568	BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
				569	const short* normalizedCounter, unsigned maxSymbolValue,
				570	const U32* baseValue, const U8* nbAdditionalBits,
				571	unsigned tableLog, void* wksp, size_t wkspSize)
				572	{
				573	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
				574	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				575	}
				576	#endif
				577
				578	void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
				579	const short* normalizedCounter, unsigned maxSymbolValue,
				580	const U32* baseValue, const U8* nbAdditionalBits,
				581	unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
				582	{
				583	#if DYNAMIC_BMI2
				584	if (bmi2) {
				585	ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
				586	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				587	return;
				588	}
				589	#endif
				590	(void)bmi2;
				591	ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
				592	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				593	}
				594
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	595	/*! ZSTD_buildSeqTable() :
				596	* @return : nb bytes read from src,
				597	* or an error code if it fails */
				598	static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
				599	symbolEncodingType_e type, unsigned max, U32 maxLog,
				600	const void* src, size_t srcSize,
				601	const U32* baseValue, const U8* nbAdditionalBits,
				602	const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
				603	int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
				604	int bmi2)
				605	{
				606	switch(type)
				607	{
				608	case set_rle :
				609	RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
				610	RETURN_ERROR_IF(((const BYTE)src) > max, corruption_detected, "");
				611	{ U32 const symbol = (const BYTE)src;
				612	U32 const baseline = baseValue[symbol];
				613	U8 const nbBits = nbAdditionalBits[symbol];
				614	ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
				615	}
				616	*DTablePtr = DTableSpace;
				617	return 1;
				618	case set_basic :
				619	*DTablePtr = defaultTable;
				620	return 0;
				621	case set_repeat:
				622	RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
				623	/* prefetch FSE table if used */
				624	if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
				625	const void* const pStart = *DTablePtr;
				626	size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
				627	PREFETCH_AREA(pStart, pSize);
				628	}
				629	return 0;
				630	case set_compressed :
				631	{ unsigned tableLog;
				632	S16 norm[MaxSeq+1];
				633	size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
				634	RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
				635	RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
				636	ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
				637	*DTablePtr = DTableSpace;
				638	return headerSize;
				639	}
				640	default :
				641	assert(0);
				642	RETURN_ERROR(GENERIC, "impossible");
				643	}
				644	}
				645
				646	size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
				647	const void* src, size_t srcSize)
				648	{
				649	const BYTE* const istart = (const BYTE*)src;
				650	const BYTE* const iend = istart + srcSize;
				651	const BYTE* ip = istart;
				652	int nbSeq;
				653	DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
				654
				655	/* check */
				656	RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
				657
				658	/* SeqHead */
				659	nbSeq = *ip++;
				660	if (!nbSeq) {
				661	*nbSeqPtr=0;
				662	RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
				663	return 1;
				664	}
				665	if (nbSeq > 0x7F) {
				666	if (nbSeq == 0xFF) {
				667	RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
				668	nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
				669	ip+=2;
				670	} else {
				671	RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
				672	nbSeq = ((nbSeq-0x80)<<8) + *ip++;
				673	}
				674	}
				675	*nbSeqPtr = nbSeq;
				676
				677	/* FSE table descriptors */
				678	RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
				679	{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
				680	symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
				681	symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
				682	ip++;
				683
				684	/* Build DTables */
				685	{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
				686	LLtype, MaxLL, LLFSELog,
				687	ip, iend-ip,
				688	LL_base, LL_bits,
				689	LL_defaultDTable, dctx->fseEntropy,
				690	dctx->ddictIsCold, nbSeq,
				691	dctx->workspace, sizeof(dctx->workspace),
				692	ZSTD_DCtx_get_bmi2(dctx));
				693	RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				694	ip += llhSize;
				695	}
				696
				697	{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
				698	OFtype, MaxOff, OffFSELog,
				699	ip, iend-ip,
				700	OF_base, OF_bits,
				701	OF_defaultDTable, dctx->fseEntropy,
				702	dctx->ddictIsCold, nbSeq,
				703	dctx->workspace, sizeof(dctx->workspace),
				704	ZSTD_DCtx_get_bmi2(dctx));
				705	RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				706	ip += ofhSize;
				707	}
				708
				709	{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
				710	MLtype, MaxML, MLFSELog,
				711	ip, iend-ip,
				712	ML_base, ML_bits,
				713	ML_defaultDTable, dctx->fseEntropy,
				714	dctx->ddictIsCold, nbSeq,
				715	dctx->workspace, sizeof(dctx->workspace),
				716	ZSTD_DCtx_get_bmi2(dctx));
				717	RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				718	ip += mlhSize;
				719	}
				720	}
				721
				722	return ip-istart;
				723	}
				724
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	725	typedef struct {
				726	size_t litLength;
				727	size_t matchLength;
				728	size_t offset;
				729	} seq_t;
				730
				731	typedef struct {
				732	size_t state;
				733	const ZSTD_seqSymbol* table;
				734	} ZSTD_fseState;
				735
				736	typedef struct {
				737	BIT_DStream_t DStream;
				738	ZSTD_fseState stateLL;
				739	ZSTD_fseState stateOffb;
				740	ZSTD_fseState stateML;
				741	size_t prevOffset[ZSTD_REP_NUM];
				742	} seqState_t;
				743
				744	/*! ZSTD_overlapCopy8() :
				745	* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
				746	* If the offset is < 8 then the offset is spread to at least 8 bytes.
				747	*
				748	* Precondition: ip <= op
				749	* Postcondition: op - op >= 8
				750	*/
				751	HINT_INLINE void ZSTD_overlapCopy8(BYTE op, BYTE const ip, size_t offset) {
				752	assert(ip <= op);
				753	if (offset < 8) {
				754	/* close range match, overlap */
				755	static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
				756	static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
				757	int const sub2 = dec64table[offset];
				758	(op)[0] = (ip)[0];
				759	(op)[1] = (ip)[1];
				760	(op)[2] = (ip)[2];
				761	(op)[3] = (ip)[3];
				762	*ip += dec32table[offset];
				763	ZSTD_copy4(op+4, ip);
				764	*ip -= sub2;
				765	} else {
				766	ZSTD_copy8(op, ip);
				767	}
				768	*ip += 8;
				769	*op += 8;
				770	assert(op - ip >= 8);
				771	}
				772
				773	/*! ZSTD_safecopy() :
				774	* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
				775	* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
				776	* This function is only called in the uncommon case where the sequence is near the end of the block. It
				777	* should be fast for a single long sequence, but can be slow for several short sequences.
				778	*
				779	* @param ovtype controls the overlap detection
				780	* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
				781	* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
				782	* The src buffer must be before the dst buffer.
				783	*/
				784	static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
				785	ptrdiff_t const diff = op - ip;
				786	BYTE* const oend = op + length;
				787
				788	assert((ovtype == ZSTD_no_overlap && (diff <= -8 \|\| diff >= 8 \|\| op >= oend_w)) \|\|
				789	(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
				790
				791	if (length < 8) {
				792	/* Handle short lengths. */
				793	while (op < oend) op++ = ip++;
				794	return;
				795	}
				796	if (ovtype == ZSTD_overlap_src_before_dst) {
				797	/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
				798	assert(length >= 8);
				799	ZSTD_overlapCopy8(&op, &ip, diff);
				800	length -= 8;
				801	assert(op - ip >= 8);
				802	assert(op <= oend);
				803	}
				804
				805	if (oend <= oend_w) {
				806	/* No risk of overwrite. */
				807	ZSTD_wildcopy(op, ip, length, ovtype);
				808	return;
				809	}
				810	if (op <= oend_w) {
				811	/* Wildcopy until we get close to the end. */
				812	assert(oend > oend_w);
				813	ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
				814	ip += oend_w - op;
				815	op += oend_w - op;
				816	}
				817	/* Handle the leftovers. */
				818	while (op < oend) op++ = ip++;
				819	}
				820
				821	/* ZSTD_safecopyDstBeforeSrc():
				822	* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
				823	* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
				824	static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
				825	ptrdiff_t const diff = op - ip;
				826	BYTE* const oend = op + length;
				827
				828	if (length < 8 \|\| diff > -8) {
				829	/* Handle short lengths, close overlaps, and dst not before src. */
				830	while (op < oend) op++ = ip++;
				831	return;
				832	}
				833
				834	if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
				835	ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
				836	ip += oend - WILDCOPY_OVERLENGTH - op;
				837	op += oend - WILDCOPY_OVERLENGTH - op;
				838	}
				839
				840	/* Handle the leftovers. */
				841	while (op < oend) op++ = ip++;
				842	}
				843
				844	/* ZSTD_execSequenceEnd():
				845	* This version handles cases that are near the end of the output buffer. It requires
				846	* more careful checks to make sure there is no overflow. By separating out these hard
				847	* and unlikely cases, we can speed up the common cases.
				848	*
				849	* NOTE: This function needs to be fast for a single long sequence, but doesn't need
				850	* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
				851	*/
				852	FORCE_NOINLINE
				853	size_t ZSTD_execSequenceEnd(BYTE* op,
				854	BYTE* const oend, seq_t sequence,
				855	const BYTE** litPtr, const BYTE* const litLimit,
				856	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				857	{
				858	BYTE* const oLitEnd = op + sequence.litLength;
				859	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				860	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				861	const BYTE* match = oLitEnd - sequence.offset;
				862	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
				863
				864	/* bounds checks : careful of address space overflow in 32-bit mode */
				865	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
				866	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
				867	assert(op < op + sequenceLength);
				868	assert(oLitEnd < op + sequenceLength);
				869
				870	/* copy literals */
				871	ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
				872	op = oLitEnd;
				873	*litPtr = iLitEnd;
				874
				875	/* copy Match */
				876	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				877	/* offset beyond prefix */
				878	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
				879	match = dictEnd - (prefixStart - match);
				880	if (match + sequence.matchLength <= dictEnd) {
				881	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				882	return sequenceLength;
				883	}
				884	/* span extDict & currentPrefixSegment */
				885	{ size_t const length1 = dictEnd - match;
				886	ZSTD_memmove(oLitEnd, match, length1);
				887	op = oLitEnd + length1;
				888	sequence.matchLength -= length1;
				889	match = prefixStart;
				890	}
				891	}
				892	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
				893	return sequenceLength;
				894	}
				895
				896	/* ZSTD_execSequenceEndSplitLitBuffer():
				897	* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
				898	*/
				899	FORCE_NOINLINE
				900	size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
				901	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
				902	const BYTE** litPtr, const BYTE* const litLimit,
				903	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				904	{
				905	BYTE* const oLitEnd = op + sequence.litLength;
				906	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				907	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				908	const BYTE* match = oLitEnd - sequence.offset;
				909
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	910	/* bounds checks : careful of address space overflow in 32-bit mode */
				911	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
				912	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
				913	assert(op < op + sequenceLength);
				914	assert(oLitEnd < op + sequenceLength);
				915
				916	/* copy literals */
				917	RETURN_ERROR_IF(op > litPtr && op < litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
				918	ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
				919	op = oLitEnd;
				920	*litPtr = iLitEnd;
				921
				922	/* copy Match */
				923	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				924	/* offset beyond prefix */
				925	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
				926	match = dictEnd - (prefixStart - match);
				927	if (match + sequence.matchLength <= dictEnd) {
				928	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				929	return sequenceLength;
				930	}
				931	/* span extDict & currentPrefixSegment */
				932	{ size_t const length1 = dictEnd - match;
				933	ZSTD_memmove(oLitEnd, match, length1);
				934	op = oLitEnd + length1;
				935	sequence.matchLength -= length1;
				936	match = prefixStart;
				937	}
				938	}
				939	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
				940	return sequenceLength;
				941	}
				942
				943	HINT_INLINE
				944	size_t ZSTD_execSequence(BYTE* op,
				945	BYTE* const oend, seq_t sequence,
				946	const BYTE** litPtr, const BYTE* const litLimit,
				947	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				948	{
				949	BYTE* const oLitEnd = op + sequence.litLength;
				950	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				951	BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
				952	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
				953	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				954	const BYTE* match = oLitEnd - sequence.offset;
				955
				956	assert(op != NULL /* Precondition */);
				957	assert(oend_w < oend /* No underflow */);
				958	/* Handle edge cases in a slow path:
				959	* - Read beyond end of literals
				960	* - Match end is within WILDCOPY_OVERLIMIT of oend
				961	* - 32-bit mode and the match length overflows
				962	*/
				963	if (UNLIKELY(
				964	iLitEnd > litLimit \|\|
				965	oMatchEnd > oend_w \|\|
				966	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
				967	return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
				968
				969	/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
				970	assert(op <= oLitEnd /* No overflow */);
				971	assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
				972	assert(oMatchEnd <= oend /* No underflow */);
				973	assert(iLitEnd <= litLimit /* Literal length is in bounds */);
				974	assert(oLitEnd <= oend_w /* Can wildcopy literals */);
				975	assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
				976
				977	/* Copy Literals:
				978	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
				979	* We likely don't need the full 32-byte wildcopy.
				980	*/
				981	assert(WILDCOPY_OVERLENGTH >= 16);
				982	ZSTD_copy16(op, (*litPtr));
				983	if (UNLIKELY(sequence.litLength > 16)) {
				984	ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
				985	}
				986	op = oLitEnd;
				987	litPtr = iLitEnd; / update for next sequence */
				988
				989	/* Copy Match */
				990	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				991	/* offset beyond prefix -> go into extDict */
				992	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
				993	match = dictEnd + (match - prefixStart);
				994	if (match + sequence.matchLength <= dictEnd) {
				995	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				996	return sequenceLength;
				997	}
				998	/* span extDict & currentPrefixSegment */
				999	{ size_t const length1 = dictEnd - match;
				1000	ZSTD_memmove(oLitEnd, match, length1);
				1001	op = oLitEnd + length1;
				1002	sequence.matchLength -= length1;
				1003	match = prefixStart;
				1004	}
				1005	}
				1006	/* Match within prefix of 1 or more bytes */
				1007	assert(op <= oMatchEnd);
				1008	assert(oMatchEnd <= oend_w);
				1009	assert(match >= prefixStart);
				1010	assert(sequence.matchLength >= 1);
				1011
				1012	/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
				1013	* without overlap checking.
				1014	*/
				1015	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
				1016	/* We bet on a full wildcopy for matches, since we expect matches to be
				1017	* longer than literals (in general). In silesia, ~10% of matches are longer
				1018	* than 16 bytes.
				1019	*/
				1020	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
				1021	return sequenceLength;
				1022	}
				1023	assert(sequence.offset < WILDCOPY_VECLEN);
				1024
				1025	/* Copy 8 bytes and spread the offset to be >= 8. */
				1026	ZSTD_overlapCopy8(&op, &match, sequence.offset);
				1027
				1028	/* If the match length is > 8 bytes, then continue with the wildcopy. */
				1029	if (sequence.matchLength > 8) {
				1030	assert(op < oMatchEnd);
				1031	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
				1032	}
				1033	return sequenceLength;
				1034	}
				1035
				1036	HINT_INLINE
				1037	size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
				1038	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
				1039	const BYTE** litPtr, const BYTE* const litLimit,
				1040	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				1041	{
				1042	BYTE* const oLitEnd = op + sequence.litLength;
				1043	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				1044	BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
				1045	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				1046	const BYTE* match = oLitEnd - sequence.offset;
				1047
				1048	assert(op != NULL /* Precondition */);
				1049	assert(oend_w < oend /* No underflow */);
				1050	/* Handle edge cases in a slow path:
				1051	* - Read beyond end of literals
				1052	* - Match end is within WILDCOPY_OVERLIMIT of oend
				1053	* - 32-bit mode and the match length overflows
				1054	*/
				1055	if (UNLIKELY(
				1056	iLitEnd > litLimit \|\|
				1057	oMatchEnd > oend_w \|\|
				1058	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
				1059	return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
				1060
				1061	/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
				1062	assert(op <= oLitEnd /* No overflow */);
				1063	assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
				1064	assert(oMatchEnd <= oend /* No underflow */);
				1065	assert(iLitEnd <= litLimit /* Literal length is in bounds */);
				1066	assert(oLitEnd <= oend_w /* Can wildcopy literals */);
				1067	assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
				1068
				1069	/* Copy Literals:
				1070	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
				1071	* We likely don't need the full 32-byte wildcopy.
				1072	*/
				1073	assert(WILDCOPY_OVERLENGTH >= 16);
				1074	ZSTD_copy16(op, (*litPtr));
				1075	if (UNLIKELY(sequence.litLength > 16)) {
				1076	ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
				1077	}
				1078	op = oLitEnd;
				1079	litPtr = iLitEnd; / update for next sequence */
				1080
				1081	/* Copy Match */
				1082	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				1083	/* offset beyond prefix -> go into extDict */
				1084	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
				1085	match = dictEnd + (match - prefixStart);
				1086	if (match + sequence.matchLength <= dictEnd) {
				1087	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				1088	return sequenceLength;
				1089	}
				1090	/* span extDict & currentPrefixSegment */
				1091	{ size_t const length1 = dictEnd - match;
				1092	ZSTD_memmove(oLitEnd, match, length1);
				1093	op = oLitEnd + length1;
				1094	sequence.matchLength -= length1;
				1095	match = prefixStart;
				1096	} }
				1097	/* Match within prefix of 1 or more bytes */
				1098	assert(op <= oMatchEnd);
				1099	assert(oMatchEnd <= oend_w);
				1100	assert(match >= prefixStart);
				1101	assert(sequence.matchLength >= 1);
				1102
				1103	/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
				1104	* without overlap checking.
				1105	*/
				1106	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
				1107	/* We bet on a full wildcopy for matches, since we expect matches to be
				1108	* longer than literals (in general). In silesia, ~10% of matches are longer
				1109	* than 16 bytes.
				1110	*/
				1111	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
				1112	return sequenceLength;
				1113	}
				1114	assert(sequence.offset < WILDCOPY_VECLEN);
				1115
				1116	/* Copy 8 bytes and spread the offset to be >= 8. */
				1117	ZSTD_overlapCopy8(&op, &match, sequence.offset);
				1118
				1119	/* If the match length is > 8 bytes, then continue with the wildcopy. */
				1120	if (sequence.matchLength > 8) {
				1121	assert(op < oMatchEnd);
				1122	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
				1123	}
				1124	return sequenceLength;
				1125	}
				1126
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1127	static void
				1128	ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
				1129	{
				1130	const void* ptr = dt;
				1131	const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
				1132	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
				1133	DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
				1134	(U32)DStatePtr->state, DTableH->tableLog);
				1135	BIT_reloadDStream(bitD);
				1136	DStatePtr->table = dt + 1;
				1137	}
				1138
				1139	FORCE_INLINE_TEMPLATE void
				1140	ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
				1141	{
				1142	size_t const lowBits = BIT_readBits(bitD, nbBits);
				1143	DStatePtr->state = nextState + lowBits;
				1144	}
				1145
				1146	/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
				1147	* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
				1148	* bits before reloading. This value is the maximum number of bytes we read
				1149	* after reloading when we are decoding long offsets.
				1150	*/
				1151	#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
				1152	(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
				1153	? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
				1154	: 0)
				1155
				1156	typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
				1157
				1158	FORCE_INLINE_TEMPLATE seq_t
				1159	ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
				1160	{
				1161	seq_t seq;
				1162	const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
				1163	const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
				1164	const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
				1165	seq.matchLength = mlDInfo->baseValue;
				1166	seq.litLength = llDInfo->baseValue;
				1167	{ U32 const ofBase = ofDInfo->baseValue;
				1168	BYTE const llBits = llDInfo->nbAdditionalBits;
				1169	BYTE const mlBits = mlDInfo->nbAdditionalBits;
				1170	BYTE const ofBits = ofDInfo->nbAdditionalBits;
				1171	BYTE const totalBits = llBits+mlBits+ofBits;
				1172
				1173	U16 const llNext = llDInfo->nextState;
				1174	U16 const mlNext = mlDInfo->nextState;
				1175	U16 const ofNext = ofDInfo->nextState;
				1176	U32 const llnbBits = llDInfo->nbBits;
				1177	U32 const mlnbBits = mlDInfo->nbBits;
				1178	U32 const ofnbBits = ofDInfo->nbBits;
				1179	/*
				1180	* As gcc has better branch and block analyzers, sometimes it is only
				1181	* valuable to mark likelyness for clang, it gives around 3-4% of
				1182	* performance.
				1183	*/
				1184
				1185	/* sequence */
				1186	{ size_t offset;
				1187	#if defined(__clang__)
				1188	if (LIKELY(ofBits > 1)) {
				1189	#else
				1190	if (ofBits > 1) {
				1191	#endif
				1192	ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
				1193	ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
				1194	assert(ofBits <= MaxOff);
				1195	if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
				1196	U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
				1197	offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
				1198	BIT_reloadDStream(&seqState->DStream);
				1199	if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
				1200	assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
				1201	} else {
				1202	offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/>0/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
				1203	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
				1204	}
				1205	seqState->prevOffset[2] = seqState->prevOffset[1];
				1206	seqState->prevOffset[1] = seqState->prevOffset[0];
				1207	seqState->prevOffset[0] = offset;
				1208	} else {
				1209	U32 const ll0 = (llDInfo->baseValue == 0);
				1210	if (LIKELY((ofBits == 0))) {
				1211	offset = seqState->prevOffset[ll0];
				1212	seqState->prevOffset[1] = seqState->prevOffset[!ll0];
				1213	seqState->prevOffset[0] = offset;
				1214	} else {
				1215	offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
				1216	{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
				1217	temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
				1218	if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
				1219	seqState->prevOffset[1] = seqState->prevOffset[0];
				1220	seqState->prevOffset[0] = offset = temp;
				1221	} } }
				1222	seq.offset = offset;
				1223	}
				1224
				1225	#if defined(__clang__)
				1226	if (UNLIKELY(mlBits > 0))
				1227	#else
				1228	if (mlBits > 0)
				1229	#endif
				1230	seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/>0/);
				1231
				1232	if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
				1233	BIT_reloadDStream(&seqState->DStream);
				1234	if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
				1235	BIT_reloadDStream(&seqState->DStream);
				1236	/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
				1237	ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
				1238
				1239	#if defined(__clang__)
				1240	if (UNLIKELY(llBits > 0))
				1241	#else
				1242	if (llBits > 0)
				1243	#endif
				1244	seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/>0/);
				1245
				1246	if (MEM_32bits())
				1247	BIT_reloadDStream(&seqState->DStream);
				1248
				1249	DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
				1250	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
				1251
				1252	ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
				1253	ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
				1254	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
				1255	ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
				1256	}
				1257
				1258	return seq;
				1259	}
				1260
				1261	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				1262	MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
				1263	{
				1264	size_t const windowSize = dctx->fParams.windowSize;
				1265	/* No dictionary used. */
				1266	if (dctx->dictContentEndForFuzzing == NULL) return 0;
				1267	/* Dictionary is our prefix. */
				1268	if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
				1269	/* Dictionary is not our ext-dict. */
				1270	if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
				1271	/* Dictionary is not within our window size. */
				1272	if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
				1273	/* Dictionary is active. */
				1274	return 1;
				1275	}
				1276
				1277	MEM_STATIC void ZSTD_assertValidSequence(
				1278	ZSTD_DCtx const* dctx,
				1279	BYTE const* op, BYTE const* oend,
				1280	seq_t const seq,
				1281	BYTE const* prefixStart, BYTE const* virtualStart)
				1282	{
				1283	#if DEBUGLEVEL >= 1
				1284	size_t const windowSize = dctx->fParams.windowSize;
				1285	size_t const sequenceSize = seq.litLength + seq.matchLength;
				1286	BYTE const* const oLitEnd = op + seq.litLength;
				1287	DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
				1288	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
				1289	assert(op <= oend);
				1290	assert((size_t)(oend - op) >= sequenceSize);
				1291	assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
				1292	if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
				1293	size_t const dictSize = (size_t)((char const)dctx->dictContentEndForFuzzing - (char const)dctx->dictContentBeginForFuzzing);
				1294	/* Offset must be within the dictionary. */
				1295	assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
				1296	assert(seq.offset <= windowSize + dictSize);
				1297	} else {
				1298	/* Offset must be within our window. */
				1299	assert(seq.offset <= windowSize);
				1300	}
				1301	#else
				1302	(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
				1303	#endif
				1304	}
				1305	#endif
				1306
				1307	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1308
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1309	FORCE_INLINE_TEMPLATE size_t
				1310	DONT_VECTORIZE
				1311	ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
				1312	void* dst, size_t maxDstSize,
				1313	const void* seqStart, size_t seqSize, int nbSeq,
				1314	const ZSTD_longOffset_e isLongOffset,
				1315	const int frame)
				1316	{
				1317	const BYTE* ip = (const BYTE*)seqStart;
				1318	const BYTE* const iend = ip + seqSize;
				1319	BYTE* const ostart = (BYTE*)dst;
				1320	BYTE* const oend = ostart + maxDstSize;
				1321	BYTE* op = ostart;
				1322	const BYTE* litPtr = dctx->litPtr;
				1323	const BYTE* litBufferEnd = dctx->litBufferEnd;
				1324	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
				1325	const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
				1326	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
				1327	DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
				1328	(void)frame;
				1329
				1330	/* Regen sequences */
				1331	if (nbSeq) {
				1332	seqState_t seqState;
				1333	dctx->fseEntropy = 1;
				1334	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1335	RETURN_ERROR_IF(
				1336	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
				1337	corruption_detected, "");
				1338	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1339	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1340	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1341	assert(dst != NULL);
				1342
				1343	ZSTD_STATIC_ASSERT(
				1344	BIT_DStream_unfinished < BIT_DStream_completed &&
				1345	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
				1346	BIT_DStream_completed < BIT_DStream_overflow);
				1347
				1348	/* decompress without overrunning litPtr begins */
				1349	{
				1350	seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1351	/* Align the decompression loop to 32 + 16 bytes.
				1352	*
				1353	* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
				1354	* speed swings based on the alignment of the decompression loop. This
				1355	* performance swing is caused by parts of the decompression loop falling
				1356	* out of the DSB. The entire decompression loop should fit in the DSB,
				1357	* when it can't we get much worse performance. You can measure if you've
				1358	* hit the good case or the bad case with this perf command for some
				1359	* compressed file test.zst:
				1360	*
				1361	* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
				1362	* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
				1363	*
				1364	* If you see most cycles served out of the MITE you've hit the bad case.
				1365	* If you see most cycles served out of the DSB you've hit the good case.
				1366	* If it is pretty even then you may be in an okay case.
				1367	*
				1368	* This issue has been reproduced on the following CPUs:
				1369	* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
				1370	* Use Instruments->Counters to get DSB/MITE cycles.
				1371	* I never got performance swings, but I was able to
				1372	* go from the good case of mostly DSB to half of the
				1373	* cycles served from MITE.
				1374	* - Coffeelake: Intel i9-9900k
				1375	* - Coffeelake: Intel i7-9700k
				1376	*
				1377	* I haven't been able to reproduce the instability or DSB misses on any
				1378	* of the following CPUS:
				1379	* - Haswell
				1380	* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
				1381	* - Skylake
				1382	*
				1383	* Alignment is done for each of the three major decompression loops:
				1384	* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
				1385	* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
				1386	* - ZSTD_decompressSequences_body
				1387	* Alignment choices are made to minimize large swings on bad cases and influence on performance
				1388	* from changes external to this code, rather than to overoptimize on the current commit.
				1389	*
				1390	* If you are seeing performance stability this script can help test.
				1391	* It tests on 4 commits in zstd where I saw performance change.
				1392	*
				1393	* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
				1394	*/
				1395	#if defined(__x86_64__)
				1396	__asm__(".p2align 6");
				1397	# if __GNUC__ >= 7
				1398	/* good for gcc-7, gcc-9, and gcc-11 */
				1399	__asm__("nop");
				1400	__asm__(".p2align 5");
				1401	__asm__("nop");
				1402	__asm__(".p2align 4");
				1403	# if __GNUC__ == 8 \|\| __GNUC__ == 10
				1404	/* good for gcc-8 and gcc-10 */
				1405	__asm__("nop");
				1406	__asm__(".p2align 3");
				1407	# endif
				1408	# endif
				1409	#endif
				1410
				1411	/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
				1412	for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
				1413	size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
				1414	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1415	assert(!ZSTD_isError(oneSeqSize));
				1416	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1417	#endif
				1418	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1419	return oneSeqSize;
				1420	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1421	op += oneSeqSize;
				1422	if (UNLIKELY(!--nbSeq))
				1423	break;
				1424	BIT_reloadDStream(&(seqState.DStream));
				1425	sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1426	}
				1427
				1428	/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
				1429	if (nbSeq > 0) {
				1430	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
				1431	if (leftoverLit)
				1432	{
				1433	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
				1434	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
				1435	sequence.litLength -= leftoverLit;
				1436	op += leftoverLit;
				1437	}
				1438	litPtr = dctx->litExtraBuffer;
				1439	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1440	dctx->litBufferLocation = ZSTD_not_in_dst;
				1441	{
				1442	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
				1443	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1444	assert(!ZSTD_isError(oneSeqSize));
				1445	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1446	#endif
				1447	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1448	return oneSeqSize;
				1449	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1450	op += oneSeqSize;
				1451	if (--nbSeq)
				1452	BIT_reloadDStream(&(seqState.DStream));
				1453	}
				1454	}
				1455	}
				1456
				1457	if (nbSeq > 0) /* there is remaining lit from extra buffer */
				1458	{
				1459
				1460	#if defined(__x86_64__)
				1461	__asm__(".p2align 6");
				1462	__asm__("nop");
				1463	# if __GNUC__ != 7
				1464	/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
				1465	__asm__(".p2align 4");
				1466	__asm__("nop");
				1467	__asm__(".p2align 3");
				1468	# elif __GNUC__ >= 11
				1469	__asm__(".p2align 3");
				1470	# else
				1471	__asm__(".p2align 5");
				1472	__asm__("nop");
				1473	__asm__(".p2align 3");
				1474	# endif
				1475	#endif
				1476
				1477	for (; ; ) {
				1478	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1479	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
				1480	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1481	assert(!ZSTD_isError(oneSeqSize));
				1482	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1483	#endif
				1484	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1485	return oneSeqSize;
				1486	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1487	op += oneSeqSize;
				1488	if (UNLIKELY(!--nbSeq))
				1489	break;
				1490	BIT_reloadDStream(&(seqState.DStream));
				1491	}
				1492	}
				1493
				1494	/* check if reached exact end */
				1495	DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
				1496	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
				1497	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
				1498	/* save reps for next block */
				1499	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1500	}
				1501
				1502	/* last literal segment */
				1503	if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
				1504	{
				1505	size_t const lastLLSize = litBufferEnd - litPtr;
				1506	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
				1507	if (op != NULL) {
				1508	ZSTD_memmove(op, litPtr, lastLLSize);
				1509	op += lastLLSize;
				1510	}
				1511	litPtr = dctx->litExtraBuffer;
				1512	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1513	dctx->litBufferLocation = ZSTD_not_in_dst;
				1514	}
				1515	{ size_t const lastLLSize = litBufferEnd - litPtr;
				1516	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1517	if (op != NULL) {
				1518	ZSTD_memcpy(op, litPtr, lastLLSize);
				1519	op += lastLLSize;
				1520	}
				1521	}
				1522
				1523	return op-ostart;
				1524	}
				1525
				1526	FORCE_INLINE_TEMPLATE size_t
				1527	DONT_VECTORIZE
				1528	ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
				1529	void* dst, size_t maxDstSize,
				1530	const void* seqStart, size_t seqSize, int nbSeq,
				1531	const ZSTD_longOffset_e isLongOffset,
				1532	const int frame)
				1533	{
				1534	const BYTE* ip = (const BYTE*)seqStart;
				1535	const BYTE* const iend = ip + seqSize;
				1536	BYTE* const ostart = (BYTE*)dst;
				1537	BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
				1538	BYTE* op = ostart;
				1539	const BYTE* litPtr = dctx->litPtr;
				1540	const BYTE* const litEnd = litPtr + dctx->litSize;
				1541	const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
				1542	const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
				1543	const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
				1544	DEBUGLOG(5, "ZSTD_decompressSequences_body");
				1545	(void)frame;
				1546
				1547	/* Regen sequences */
				1548	if (nbSeq) {
				1549	seqState_t seqState;
				1550	dctx->fseEntropy = 1;
				1551	{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1552	RETURN_ERROR_IF(
				1553	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
				1554	corruption_detected, "");
				1555	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1556	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1557	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1558	assert(dst != NULL);
				1559
				1560	ZSTD_STATIC_ASSERT(
				1561	BIT_DStream_unfinished < BIT_DStream_completed &&
				1562	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
				1563	BIT_DStream_completed < BIT_DStream_overflow);
				1564
				1565	#if defined(__x86_64__)
				1566	__asm__(".p2align 6");
				1567	__asm__("nop");
				1568	# if __GNUC__ >= 7
				1569	__asm__(".p2align 5");
				1570	__asm__("nop");
				1571	__asm__(".p2align 3");
				1572	# else
				1573	__asm__(".p2align 4");
				1574	__asm__("nop");
				1575	__asm__(".p2align 3");
				1576	# endif
				1577	#endif
				1578
				1579	for ( ; ; ) {
				1580	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1581	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
				1582	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1583	assert(!ZSTD_isError(oneSeqSize));
				1584	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1585	#endif
				1586	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1587	return oneSeqSize;
				1588	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1589	op += oneSeqSize;
				1590	if (UNLIKELY(!--nbSeq))
				1591	break;
				1592	BIT_reloadDStream(&(seqState.DStream));
				1593	}
				1594
				1595	/* check if reached exact end */
				1596	DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
				1597	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
				1598	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
				1599	/* save reps for next block */
				1600	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1601	}
				1602
				1603	/* last literal segment */
				1604	{ size_t const lastLLSize = litEnd - litPtr;
				1605	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1606	if (op != NULL) {
				1607	ZSTD_memcpy(op, litPtr, lastLLSize);
				1608	op += lastLLSize;
				1609	}
				1610	}
				1611
				1612	return op-ostart;
				1613	}
				1614
				1615	static size_t
				1616	ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
				1617	void* dst, size_t maxDstSize,
				1618	const void* seqStart, size_t seqSize, int nbSeq,
				1619	const ZSTD_longOffset_e isLongOffset,
				1620	const int frame)
				1621	{
				1622	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1623	}
				1624
				1625	static size_t
				1626	ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
				1627	void* dst, size_t maxDstSize,
				1628	const void* seqStart, size_t seqSize, int nbSeq,
				1629	const ZSTD_longOffset_e isLongOffset,
				1630	const int frame)
				1631	{
				1632	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1633	}
				1634	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1635
				1636	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1637
				1638	FORCE_INLINE_TEMPLATE size_t
				1639	ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
				1640	const BYTE* const prefixStart, const BYTE* const dictEnd)
				1641	{
				1642	prefetchPos += sequence.litLength;
				1643	{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
				1644	const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
				1645	* No consequence though : memory address is only used for prefetching, not for dereferencing */
				1646	PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
				1647	}
				1648	return prefetchPos + sequence.matchLength;
				1649	}
				1650
				1651	/* This decoding function employs prefetching
				1652	* to reduce latency impact of cache misses.
				1653	* It's generally employed when block contains a significant portion of long-distance matches
				1654	* or when coupled with a "cold" dictionary */
				1655	FORCE_INLINE_TEMPLATE size_t
				1656	ZSTD_decompressSequencesLong_body(
				1657	ZSTD_DCtx* dctx,
				1658	void* dst, size_t maxDstSize,
				1659	const void* seqStart, size_t seqSize, int nbSeq,
				1660	const ZSTD_longOffset_e isLongOffset,
				1661	const int frame)
				1662	{
				1663	const BYTE* ip = (const BYTE*)seqStart;
				1664	const BYTE* const iend = ip + seqSize;
				1665	BYTE* const ostart = (BYTE*)dst;
				1666	BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
				1667	BYTE* op = ostart;
				1668	const BYTE* litPtr = dctx->litPtr;
				1669	const BYTE* litBufferEnd = dctx->litBufferEnd;
				1670	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
				1671	const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
				1672	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
				1673	(void)frame;
				1674
				1675	/* Regen sequences */
				1676	if (nbSeq) {
				1677	#define STORED_SEQS 8
				1678	#define STORED_SEQS_MASK (STORED_SEQS-1)
				1679	#define ADVANCED_SEQS STORED_SEQS
				1680	seq_t sequences[STORED_SEQS];
				1681	int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
				1682	seqState_t seqState;
				1683	int seqNb;
				1684	size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
				1685
				1686	dctx->fseEntropy = 1;
				1687	{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1688	assert(dst != NULL);
				1689	assert(iend >= ip);
				1690	RETURN_ERROR_IF(
				1691	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
				1692	corruption_detected, "");
				1693	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1694	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1695	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1696
				1697	/* prepare in advance */
				1698	for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
				1699	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1700	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
				1701	sequences[seqNb] = sequence;
				1702	}
				1703	RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
				1704
				1705	/* decompress without stomping litBuffer */
				1706	for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
				1707	seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1708	size_t oneSeqSize;
				1709
				1710	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
				1711	{
				1712	/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
				1713	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
				1714	if (leftoverLit)
				1715	{
				1716	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
				1717	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
				1718	sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
				1719	op += leftoverLit;
				1720	}
				1721	litPtr = dctx->litExtraBuffer;
				1722	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1723	dctx->litBufferLocation = ZSTD_not_in_dst;
				1724	oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1725	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1726	assert(!ZSTD_isError(oneSeqSize));
				1727	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
				1728	#endif
				1729	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1730
				1731	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
				1732	sequences[seqNb & STORED_SEQS_MASK] = sequence;
				1733	op += oneSeqSize;
				1734	}
				1735	else
				1736	{
				1737	/* lit buffer is either wholly contained in first or second split, or not split at all*/
				1738	oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
				1739	ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
				1740	ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1741	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1742	assert(!ZSTD_isError(oneSeqSize));
				1743	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
				1744	#endif
				1745	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1746
				1747	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
				1748	sequences[seqNb & STORED_SEQS_MASK] = sequence;
				1749	op += oneSeqSize;
				1750	}
				1751	}
				1752	RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
				1753
				1754	/* finish queue */
				1755	seqNb -= seqAdvance;
				1756	for ( ; seqNb<nbSeq ; seqNb++) {
				1757	seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
				1758	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
				1759	{
				1760	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
				1761	if (leftoverLit)
				1762	{
				1763	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
				1764	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
				1765	sequence->litLength -= leftoverLit;
				1766	op += leftoverLit;
				1767	}
				1768	litPtr = dctx->litExtraBuffer;
				1769	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1770	dctx->litBufferLocation = ZSTD_not_in_dst;
				1771	{
				1772	size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1773	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1774	assert(!ZSTD_isError(oneSeqSize));
				1775	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
				1776	#endif
				1777	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1778	op += oneSeqSize;
				1779	}
				1780	}
				1781	else
				1782	{
				1783	size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
				1784	ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
				1785	ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1786	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1787	assert(!ZSTD_isError(oneSeqSize));
				1788	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
				1789	#endif
				1790	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1791	op += oneSeqSize;
				1792	}
				1793	}
				1794
				1795	/* save reps for next block */
				1796	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1797	}
				1798
				1799	/* last literal segment */
				1800	if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
				1801	{
				1802	size_t const lastLLSize = litBufferEnd - litPtr;
				1803	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
				1804	if (op != NULL) {
				1805	ZSTD_memmove(op, litPtr, lastLLSize);
				1806	op += lastLLSize;
				1807	}
				1808	litPtr = dctx->litExtraBuffer;
				1809	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1810	}
				1811	{ size_t const lastLLSize = litBufferEnd - litPtr;
				1812	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1813	if (op != NULL) {
				1814	ZSTD_memmove(op, litPtr, lastLLSize);
				1815	op += lastLLSize;
				1816	}
				1817	}
				1818
				1819	return op-ostart;
				1820	}
				1821
				1822	static size_t
				1823	ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
				1824	void* dst, size_t maxDstSize,
				1825	const void* seqStart, size_t seqSize, int nbSeq,
				1826	const ZSTD_longOffset_e isLongOffset,
				1827	const int frame)
				1828	{
				1829	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1830	}
				1831	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1832
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1833	#if DYNAMIC_BMI2
				1834
				1835	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1836	static BMI2_TARGET_ATTRIBUTE size_t
				1837	DONT_VECTORIZE
				1838	ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
				1839	void* dst, size_t maxDstSize,
				1840	const void* seqStart, size_t seqSize, int nbSeq,
				1841	const ZSTD_longOffset_e isLongOffset,
				1842	const int frame)
				1843	{
				1844	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1845	}
				1846	static BMI2_TARGET_ATTRIBUTE size_t
				1847	DONT_VECTORIZE
				1848	ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
				1849	void* dst, size_t maxDstSize,
				1850	const void* seqStart, size_t seqSize, int nbSeq,
				1851	const ZSTD_longOffset_e isLongOffset,
				1852	const int frame)
				1853	{
				1854	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1855	}
				1856	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1857
				1858	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1859	static BMI2_TARGET_ATTRIBUTE size_t
				1860	ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
				1861	void* dst, size_t maxDstSize,
				1862	const void* seqStart, size_t seqSize, int nbSeq,
				1863	const ZSTD_longOffset_e isLongOffset,
				1864	const int frame)
				1865	{
				1866	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1867	}
				1868	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1869
				1870	#endif /* DYNAMIC_BMI2 */
				1871
				1872	typedef size_t (*ZSTD_decompressSequences_t)(
				1873	ZSTD_DCtx* dctx,
				1874	void* dst, size_t maxDstSize,
				1875	const void* seqStart, size_t seqSize, int nbSeq,
				1876	const ZSTD_longOffset_e isLongOffset,
				1877	const int frame);
				1878
				1879	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1880	static size_t
				1881	ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
				1882	const void* seqStart, size_t seqSize, int nbSeq,
				1883	const ZSTD_longOffset_e isLongOffset,
				1884	const int frame)
				1885	{
				1886	DEBUGLOG(5, "ZSTD_decompressSequences");
				1887	#if DYNAMIC_BMI2
				1888	if (ZSTD_DCtx_get_bmi2(dctx)) {
				1889	return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1890	}
				1891	#endif
				1892	return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1893	}
				1894	static size_t
				1895	ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
				1896	const void* seqStart, size_t seqSize, int nbSeq,
				1897	const ZSTD_longOffset_e isLongOffset,
				1898	const int frame)
				1899	{
				1900	DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
				1901	#if DYNAMIC_BMI2
				1902	if (ZSTD_DCtx_get_bmi2(dctx)) {
				1903	return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1904	}
				1905	#endif
				1906	return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1907	}
				1908	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1909
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1910	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1911	/* ZSTD_decompressSequencesLong() :
				1912	* decompression function triggered when a minimum share of offsets is considered "long",
				1913	* aka out of cache.
				1914	* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
				1915	* This function will try to mitigate main memory latency through the use of prefetching */
				1916	static size_t
				1917	ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
				1918	void* dst, size_t maxDstSize,
				1919	const void* seqStart, size_t seqSize, int nbSeq,
				1920	const ZSTD_longOffset_e isLongOffset,
				1921	const int frame)
				1922	{
				1923	DEBUGLOG(5, "ZSTD_decompressSequencesLong");
				1924	#if DYNAMIC_BMI2
				1925	if (ZSTD_DCtx_get_bmi2(dctx)) {
				1926	return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1927	}
				1928	#endif
				1929	return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1930	}
				1931	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1932
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1933	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1934	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1935	/* ZSTD_getLongOffsetsShare() :
				1936	* condition : offTable must be valid
				1937	* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
				1938	* compared to maximum possible of (1<<OffFSELog) */
				1939	static unsigned
				1940	ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
				1941	{
				1942	const void* ptr = offTable;
				1943	U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
				1944	const ZSTD_seqSymbol* table = offTable + 1;
				1945	U32 const max = 1 << tableLog;
				1946	U32 u, total = 0;
				1947	DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
				1948
				1949	assert(max <= (1 << OffFSELog)); /* max not too large */
				1950	for (u=0; u<max; u++) {
				1951	if (table[u].nbAdditionalBits > 22) total += 1;
				1952	}
				1953
				1954	assert(tableLog <= OffFSELog);
				1955	total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
				1956
				1957	return total;
				1958	}
				1959	#endif
				1960
				1961	size_t
				1962	ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
				1963	void* dst, size_t dstCapacity,
				1964	const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
				1965	{ /* blockType == blockCompressed */
				1966	const BYTE* ip = (const BYTE*)src;
				1967	/* isLongOffset must be true if there are long offsets.
				1968	* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
				1969	* We don't expect that to be the case in 64-bit mode.
				1970	* In block mode, window size is not known, so we have to be conservative.
				1971	* (note: but it could be evaluated from current-lowLimit)
				1972	*/
				1973	ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame \|\| (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
				1974	DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
				1975
				1976	RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
				1977
				1978	/* Decode literals section */
				1979	{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
				1980	DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
				1981	if (ZSTD_isError(litCSize)) return litCSize;
				1982	ip += litCSize;
				1983	srcSize -= litCSize;
				1984	}
				1985
				1986	/* Build Decoding Tables */
				1987	{
				1988	/* These macros control at build-time which decompressor implementation
				1989	* we use. If neither is defined, we do some inspection and dispatch at
				1990	* runtime.
				1991	*/
				1992	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1993	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1994	int usePrefetchDecoder = dctx->ddictIsCold;
				1995	#endif
				1996	int nbSeq;
				1997	size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
				1998	if (ZSTD_isError(seqHSize)) return seqHSize;
				1999	ip += seqHSize;
				2000	srcSize -= seqHSize;
				2001
				2002	RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
				2003
				2004	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				2005	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				2006	if ( !usePrefetchDecoder
				2007	&& (!frame \|\| (dctx->fParams.windowSize > (1<<24)))
				2008	&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
				2009	U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
				2010	U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
				2011	usePrefetchDecoder = (shareLongOffsets >= minShare);
				2012	}
				2013	#endif
				2014
				2015	dctx->ddictIsCold = 0;
				2016
				2017	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				2018	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				2019	if (usePrefetchDecoder)
				2020	#endif
				2021	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				2022	return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				2023	#endif
				2024
				2025	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				2026	/* else */
				2027	if (dctx->litBufferLocation == ZSTD_split)
				2028	return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				2029	else
				2030	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				2031	#endif
				2032	}
				2033	}
				2034
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	2035	void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
				2036	{
				2037	if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
				2038	dctx->dictEnd = dctx->previousDstEnd;
				2039	dctx->virtualStart = (const char)dst - ((const char)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
				2040	dctx->prefixStart = dst;
				2041	dctx->previousDstEnd = dst;
				2042	}
				2043	}
				2044
Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	2045	size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
				2046	void* dst, size_t dstCapacity,
				2047	const void* src, size_t srcSize)
				2048	{
				2049	size_t dSize;
				2050	ZSTD_checkContinuity(dctx, dst, dstCapacity);
				2051	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
				2052	dctx->previousDstEnd = (char*)dst + dSize;
				2053	return dSize;
				2054	}