Blame - lib/zstd/decompress/zstd_decompress_block.c - filogic/uboot

blob: c1913b8e7c89747c54f4896815701cc17e07febc [file] [log] [blame]

Brandon Maier	dbe88da	2023-01-12 10:27:45 -0600	[diff] [blame]	1	/*
				2	* Copyright (c) Yann Collet, Facebook, Inc.
				3	* All rights reserved.
				4	*
				5	* This source code is licensed under both the BSD-style license (found in the
				6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
				7	* in the COPYING file in the root directory of this source tree).
				8	* You may select, at your option, one of the above-listed licenses.
				9	*/
				10
				11	/* zstd_decompress_block :
				12	* this module takes care of decompressing _compressed_ block */
				13
				14	/-******************************************************
				15	* Dependencies
				16	*********************************************************/
				17	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
				18	#include "../common/compiler.h" /* prefetch */
				19	#include "../common/cpu.h" /* bmi2 */
				20	#include "../common/mem.h" /* low level memory routines */
				21	#define FSE_STATIC_LINKING_ONLY
				22	#include "../common/fse.h"
				23	#define HUF_STATIC_LINKING_ONLY
				24	#include "../common/huf.h"
				25	#include "../common/zstd_internal.h"
				26	#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
				27	#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
				28	#include "zstd_decompress_block.h"
				29
				30	/_******************************************************
				31	* Macros
				32	**********************************************************/
				33
				34	/* These two optional macros force the use one way or another of the two
				35	* ZSTD_decompressSequences implementations. You can't force in both directions
				36	* at the same time.
				37	*/
				38	#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				39	defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				40	#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
				41	#endif
				42
				43
				44	/_******************************************************
				45	* Memory operations
				46	**********************************************************/
				47	static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
				48
				49
				50	/-************************************************************
				51	* Block decoding
				52	***************************************************************/
				53
				54	/*! ZSTD_getcBlockSize() :
				55	* Provides the size of compressed block from block header `src` */
				56	size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
				57	blockProperties_t* bpPtr)
				58	{
				59	RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
				60
				61	{ U32 const cBlockHeader = MEM_readLE24(src);
				62	U32 const cSize = cBlockHeader >> 3;
				63	bpPtr->lastBlock = cBlockHeader & 1;
				64	bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
				65	bpPtr->origSize = cSize; /* only useful for RLE */
				66	if (bpPtr->blockType == bt_rle) return 1;
				67	RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
				68	return cSize;
				69	}
				70	}
				71
				72	/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
				73	static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
				74	const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
				75	{
				76	if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
				77	{
				78	/* room for litbuffer to fit without read faulting */
				79	dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
				80	dctx->litBufferEnd = dctx->litBuffer + litSize;
				81	dctx->litBufferLocation = ZSTD_in_dst;
				82	}
				83	else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
				84	{
				85	/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
				86	if (splitImmediately) {
				87	/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
				88	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
				89	dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
				90	}
				91	else {
				92	/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
				93	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
				94	dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
				95	}
				96	dctx->litBufferLocation = ZSTD_split;
				97	}
				98	else
				99	{
				100	/* fits entirely within litExtraBuffer, so no split is necessary */
				101	dctx->litBuffer = dctx->litExtraBuffer;
				102	dctx->litBufferEnd = dctx->litBuffer + litSize;
				103	dctx->litBufferLocation = ZSTD_not_in_dst;
				104	}
				105	}
				106
				107	/* Hidden declaration for fullbench */
				108	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
				109	const void* src, size_t srcSize,
				110	void* dst, size_t dstCapacity, const streaming_operation streaming);
				111	/*! ZSTD_decodeLiteralsBlock() :
				112	* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
				113	* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
				114	* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
				115	* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
				116	*
				117	* @return : nb of bytes read from src (< srcSize )
				118	* note : symbol not declared but exposed for fullbench */
				119	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
				120	const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
				121	void* dst, size_t dstCapacity, const streaming_operation streaming)
				122	{
				123	DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
				124	RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
				125
				126	{ const BYTE* const istart = (const BYTE*) src;
				127	symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
				128
				129	switch(litEncType)
				130	{
				131	case set_repeat:
				132	DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
				133	RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
				134	ZSTD_FALLTHROUGH;
				135
				136	case set_compressed:
				137	RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
				138	{ size_t lhSize, litSize, litCSize;
				139	U32 singleStream=0;
				140	U32 const lhlCode = (istart[0] >> 2) & 3;
				141	U32 const lhc = MEM_readLE32(istart);
				142	size_t hufSuccess;
				143	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
				144	switch(lhlCode)
				145	{
				146	case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
				147	/* 2 - 2 - 10 - 10 */
				148	singleStream = !lhlCode;
				149	lhSize = 3;
				150	litSize = (lhc >> 4) & 0x3FF;
				151	litCSize = (lhc >> 14) & 0x3FF;
				152	break;
				153	case 2:
				154	/* 2 - 2 - 14 - 14 */
				155	lhSize = 4;
				156	litSize = (lhc >> 4) & 0x3FFF;
				157	litCSize = lhc >> 18;
				158	break;
				159	case 3:
				160	/* 2 - 2 - 18 - 18 */
				161	lhSize = 5;
				162	litSize = (lhc >> 4) & 0x3FFFF;
				163	litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
				164	break;
				165	}
				166	RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
				167	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
				168	RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
				169	RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
				170	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
				171
				172	/* prefetch huffman table if cold */
				173	if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
				174	PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
				175	}
				176
				177	if (litEncType==set_repeat) {
				178	if (singleStream) {
				179	hufSuccess = HUF_decompress1X_usingDTable_bmi2(
				180	dctx->litBuffer, litSize, istart+lhSize, litCSize,
				181	dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
				182	} else {
				183	hufSuccess = HUF_decompress4X_usingDTable_bmi2(
				184	dctx->litBuffer, litSize, istart+lhSize, litCSize,
				185	dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
				186	}
				187	} else {
				188	if (singleStream) {
				189	#if defined(HUF_FORCE_DECOMPRESS_X2)
				190	hufSuccess = HUF_decompress1X_DCtx_wksp(
				191	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				192	istart+lhSize, litCSize, dctx->workspace,
				193	sizeof(dctx->workspace));
				194	#else
				195	hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
				196	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				197	istart+lhSize, litCSize, dctx->workspace,
				198	sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
				199	#endif
				200	} else {
				201	hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
				202	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				203	istart+lhSize, litCSize, dctx->workspace,
				204	sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
				205	}
				206	}
				207	if (dctx->litBufferLocation == ZSTD_split)
				208	{
				209	ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
				210	ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
				211	dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
				212	dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
				213	}
				214
				215	RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
				216
				217	dctx->litPtr = dctx->litBuffer;
				218	dctx->litSize = litSize;
				219	dctx->litEntropy = 1;
				220	if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
				221	return litCSize + lhSize;
				222	}
				223
				224	case set_basic:
				225	{ size_t litSize, lhSize;
				226	U32 const lhlCode = ((istart[0]) >> 2) & 3;
				227	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
				228	switch(lhlCode)
				229	{
				230	case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
				231	lhSize = 1;
				232	litSize = istart[0] >> 3;
				233	break;
				234	case 1:
				235	lhSize = 2;
				236	litSize = MEM_readLE16(istart) >> 4;
				237	break;
				238	case 3:
				239	lhSize = 3;
				240	litSize = MEM_readLE24(istart) >> 4;
				241	break;
				242	}
				243
				244	RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
				245	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
				246	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
				247	if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
				248	RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
				249	if (dctx->litBufferLocation == ZSTD_split)
				250	{
				251	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
				252	ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
				253	}
				254	else
				255	{
				256	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
				257	}
				258	dctx->litPtr = dctx->litBuffer;
				259	dctx->litSize = litSize;
				260	return lhSize+litSize;
				261	}
				262	/* direct reference into compressed stream */
				263	dctx->litPtr = istart+lhSize;
				264	dctx->litSize = litSize;
				265	dctx->litBufferEnd = dctx->litPtr + litSize;
				266	dctx->litBufferLocation = ZSTD_not_in_dst;
				267	return lhSize+litSize;
				268	}
				269
				270	case set_rle:
				271	{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
				272	size_t litSize, lhSize;
				273	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
				274	switch(lhlCode)
				275	{
				276	case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
				277	lhSize = 1;
				278	litSize = istart[0] >> 3;
				279	break;
				280	case 1:
				281	lhSize = 2;
				282	litSize = MEM_readLE16(istart) >> 4;
				283	break;
				284	case 3:
				285	lhSize = 3;
				286	litSize = MEM_readLE24(istart) >> 4;
				287	RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
				288	break;
				289	}
				290	RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
				291	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
				292	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
				293	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
				294	if (dctx->litBufferLocation == ZSTD_split)
				295	{
				296	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
				297	ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
				298	}
				299	else
				300	{
				301	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
				302	}
				303	dctx->litPtr = dctx->litBuffer;
				304	dctx->litSize = litSize;
				305	return lhSize+1;
				306	}
				307	default:
				308	RETURN_ERROR(corruption_detected, "impossible");
				309	}
				310	}
				311	}
				312
				313	/* Default FSE distribution tables.
				314	* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
				315	* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
				316	* They were generated programmatically with following method :
				317	* - start from default distributions, present in /lib/common/zstd_internal.h
				318	* - generate tables normally, using ZSTD_buildFSETable()
				319	* - printout the content of tables
				320	* - pretify output, report below, test with fuzzer to ensure it's correct */
				321
				322	/* Default FSE distribution table for Literal Lengths */
				323	static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
				324	{ 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				325	/* nextState, nbAddBits, nbBits, baseVal */
				326	{ 0, 0, 4, 0}, { 16, 0, 4, 0},
				327	{ 32, 0, 5, 1}, { 0, 0, 5, 3},
				328	{ 0, 0, 5, 4}, { 0, 0, 5, 6},
				329	{ 0, 0, 5, 7}, { 0, 0, 5, 9},
				330	{ 0, 0, 5, 10}, { 0, 0, 5, 12},
				331	{ 0, 0, 6, 14}, { 0, 1, 5, 16},
				332	{ 0, 1, 5, 20}, { 0, 1, 5, 22},
				333	{ 0, 2, 5, 28}, { 0, 3, 5, 32},
				334	{ 0, 4, 5, 48}, { 32, 6, 5, 64},
				335	{ 0, 7, 5, 128}, { 0, 8, 6, 256},
				336	{ 0, 10, 6, 1024}, { 0, 12, 6, 4096},
				337	{ 32, 0, 4, 0}, { 0, 0, 4, 1},
				338	{ 0, 0, 5, 2}, { 32, 0, 5, 4},
				339	{ 0, 0, 5, 5}, { 32, 0, 5, 7},
				340	{ 0, 0, 5, 8}, { 32, 0, 5, 10},
				341	{ 0, 0, 5, 11}, { 0, 0, 6, 13},
				342	{ 32, 1, 5, 16}, { 0, 1, 5, 18},
				343	{ 32, 1, 5, 22}, { 0, 2, 5, 24},
				344	{ 32, 3, 5, 32}, { 0, 3, 5, 40},
				345	{ 0, 6, 4, 64}, { 16, 6, 4, 64},
				346	{ 32, 7, 5, 128}, { 0, 9, 6, 512},
				347	{ 0, 11, 6, 2048}, { 48, 0, 4, 0},
				348	{ 16, 0, 4, 1}, { 32, 0, 5, 2},
				349	{ 32, 0, 5, 3}, { 32, 0, 5, 5},
				350	{ 32, 0, 5, 6}, { 32, 0, 5, 8},
				351	{ 32, 0, 5, 9}, { 32, 0, 5, 11},
				352	{ 32, 0, 5, 12}, { 0, 0, 6, 15},
				353	{ 32, 1, 5, 18}, { 32, 1, 5, 20},
				354	{ 32, 2, 5, 24}, { 32, 2, 5, 28},
				355	{ 32, 3, 5, 40}, { 32, 4, 5, 48},
				356	{ 0, 16, 6,65536}, { 0, 15, 6,32768},
				357	{ 0, 14, 6,16384}, { 0, 13, 6, 8192},
				358	}; /* LL_defaultDTable */
				359
				360	/* Default FSE distribution table for Offset Codes */
				361	static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
				362	{ 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				363	/* nextState, nbAddBits, nbBits, baseVal */
				364	{ 0, 0, 5, 0}, { 0, 6, 4, 61},
				365	{ 0, 9, 5, 509}, { 0, 15, 5,32765},
				366	{ 0, 21, 5,2097149}, { 0, 3, 5, 5},
				367	{ 0, 7, 4, 125}, { 0, 12, 5, 4093},
				368	{ 0, 18, 5,262141}, { 0, 23, 5,8388605},
				369	{ 0, 5, 5, 29}, { 0, 8, 4, 253},
				370	{ 0, 14, 5,16381}, { 0, 20, 5,1048573},
				371	{ 0, 2, 5, 1}, { 16, 7, 4, 125},
				372	{ 0, 11, 5, 2045}, { 0, 17, 5,131069},
				373	{ 0, 22, 5,4194301}, { 0, 4, 5, 13},
				374	{ 16, 8, 4, 253}, { 0, 13, 5, 8189},
				375	{ 0, 19, 5,524285}, { 0, 1, 5, 1},
				376	{ 16, 6, 4, 61}, { 0, 10, 5, 1021},
				377	{ 0, 16, 5,65533}, { 0, 28, 5,268435453},
				378	{ 0, 27, 5,134217725}, { 0, 26, 5,67108861},
				379	{ 0, 25, 5,33554429}, { 0, 24, 5,16777213},
				380	}; /* OF_defaultDTable */
				381
				382
				383	/* Default FSE distribution table for Match Lengths */
				384	static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
				385	{ 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				386	/* nextState, nbAddBits, nbBits, baseVal */
				387	{ 0, 0, 6, 3}, { 0, 0, 4, 4},
				388	{ 32, 0, 5, 5}, { 0, 0, 5, 6},
				389	{ 0, 0, 5, 8}, { 0, 0, 5, 9},
				390	{ 0, 0, 5, 11}, { 0, 0, 6, 13},
				391	{ 0, 0, 6, 16}, { 0, 0, 6, 19},
				392	{ 0, 0, 6, 22}, { 0, 0, 6, 25},
				393	{ 0, 0, 6, 28}, { 0, 0, 6, 31},
				394	{ 0, 0, 6, 34}, { 0, 1, 6, 37},
				395	{ 0, 1, 6, 41}, { 0, 2, 6, 47},
				396	{ 0, 3, 6, 59}, { 0, 4, 6, 83},
				397	{ 0, 7, 6, 131}, { 0, 9, 6, 515},
				398	{ 16, 0, 4, 4}, { 0, 0, 4, 5},
				399	{ 32, 0, 5, 6}, { 0, 0, 5, 7},
				400	{ 32, 0, 5, 9}, { 0, 0, 5, 10},
				401	{ 0, 0, 6, 12}, { 0, 0, 6, 15},
				402	{ 0, 0, 6, 18}, { 0, 0, 6, 21},
				403	{ 0, 0, 6, 24}, { 0, 0, 6, 27},
				404	{ 0, 0, 6, 30}, { 0, 0, 6, 33},
				405	{ 0, 1, 6, 35}, { 0, 1, 6, 39},
				406	{ 0, 2, 6, 43}, { 0, 3, 6, 51},
				407	{ 0, 4, 6, 67}, { 0, 5, 6, 99},
				408	{ 0, 8, 6, 259}, { 32, 0, 4, 4},
				409	{ 48, 0, 4, 4}, { 16, 0, 4, 5},
				410	{ 32, 0, 5, 7}, { 32, 0, 5, 8},
				411	{ 32, 0, 5, 10}, { 32, 0, 5, 11},
				412	{ 0, 0, 6, 14}, { 0, 0, 6, 17},
				413	{ 0, 0, 6, 20}, { 0, 0, 6, 23},
				414	{ 0, 0, 6, 26}, { 0, 0, 6, 29},
				415	{ 0, 0, 6, 32}, { 0, 16, 6,65539},
				416	{ 0, 15, 6,32771}, { 0, 14, 6,16387},
				417	{ 0, 13, 6, 8195}, { 0, 12, 6, 4099},
				418	{ 0, 11, 6, 2051}, { 0, 10, 6, 1027},
				419	}; /* ML_defaultDTable */
				420
				421
				422	static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
				423	{
				424	void* ptr = dt;
				425	ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
				426	ZSTD_seqSymbol* const cell = dt + 1;
				427
				428	DTableH->tableLog = 0;
				429	DTableH->fastMode = 0;
				430
				431	cell->nbBits = 0;
				432	cell->nextState = 0;
				433	assert(nbAddBits < 255);
				434	cell->nbAdditionalBits = nbAddBits;
				435	cell->baseValue = baseValue;
				436	}
				437
				438
				439	/* ZSTD_buildFSETable() :
				440	* generate FSE decoding table for one symbol (ll, ml or off)
				441	* cannot fail if input is valid =>
				442	* all inputs are presumed validated at this stage */
				443	FORCE_INLINE_TEMPLATE
				444	void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
				445	const short* normalizedCounter, unsigned maxSymbolValue,
				446	const U32* baseValue, const U8* nbAdditionalBits,
				447	unsigned tableLog, void* wksp, size_t wkspSize)
				448	{
				449	ZSTD_seqSymbol* const tableDecode = dt+1;
				450	U32 const maxSV1 = maxSymbolValue + 1;
				451	U32 const tableSize = 1 << tableLog;
				452
				453	U16* symbolNext = (U16*)wksp;
				454	BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
				455	U32 highThreshold = tableSize - 1;
				456
				457
				458	/* Sanity Checks */
				459	assert(maxSymbolValue <= MaxSeq);
				460	assert(tableLog <= MaxFSELog);
				461	assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
				462	(void)wkspSize;
				463	/* Init, lay down lowprob symbols */
				464	{ ZSTD_seqSymbol_header DTableH;
				465	DTableH.tableLog = tableLog;
				466	DTableH.fastMode = 1;
				467	{ S16 const largeLimit= (S16)(1 << (tableLog-1));
				468	U32 s;
				469	for (s=0; s<maxSV1; s++) {
				470	if (normalizedCounter[s]==-1) {
				471	tableDecode[highThreshold--].baseValue = s;
				472	symbolNext[s] = 1;
				473	} else {
				474	if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
				475	assert(normalizedCounter[s]>=0);
				476	symbolNext[s] = (U16)normalizedCounter[s];
				477	} } }
				478	ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
				479	}
				480
				481	/* Spread symbols */
				482	assert(tableSize <= 512);
				483	/* Specialized symbol spreading for the case when there are
				484	* no low probability (-1 count) symbols. When compressing
				485	* small blocks we avoid low probability symbols to hit this
				486	* case, since header decoding speed matters more.
				487	*/
				488	if (highThreshold == tableSize - 1) {
				489	size_t const tableMask = tableSize-1;
				490	size_t const step = FSE_TABLESTEP(tableSize);
				491	/* First lay down the symbols in order.
				492	* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
				493	* misses since small blocks generally have small table logs, so nearly
				494	* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
				495	* our buffer to handle the over-write.
				496	*/
				497	{
				498	U64 const add = 0x0101010101010101ull;
				499	size_t pos = 0;
				500	U64 sv = 0;
				501	U32 s;
				502	for (s=0; s<maxSV1; ++s, sv += add) {
				503	int i;
				504	int const n = normalizedCounter[s];
				505	MEM_write64(spread + pos, sv);
				506	for (i = 8; i < n; i += 8) {
				507	MEM_write64(spread + pos + i, sv);
				508	}
				509	pos += n;
				510	}
				511	}
				512	/* Now we spread those positions across the table.
				513	* The benefit of doing it in two stages is that we avoid the the
				514	* variable size inner loop, which caused lots of branch misses.
				515	* Now we can run through all the positions without any branch misses.
				516	* We unroll the loop twice, since that is what emperically worked best.
				517	*/
				518	{
				519	size_t position = 0;
				520	size_t s;
				521	size_t const unroll = 2;
				522	assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
				523	for (s = 0; s < (size_t)tableSize; s += unroll) {
				524	size_t u;
				525	for (u = 0; u < unroll; ++u) {
				526	size_t const uPosition = (position + (u * step)) & tableMask;
				527	tableDecode[uPosition].baseValue = spread[s + u];
				528	}
				529	position = (position + (unroll * step)) & tableMask;
				530	}
				531	assert(position == 0);
				532	}
				533	} else {
				534	U32 const tableMask = tableSize-1;
				535	U32 const step = FSE_TABLESTEP(tableSize);
				536	U32 s, position = 0;
				537	for (s=0; s<maxSV1; s++) {
				538	int i;
				539	int const n = normalizedCounter[s];
				540	for (i=0; i<n; i++) {
				541	tableDecode[position].baseValue = s;
				542	position = (position + step) & tableMask;
				543	while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
				544	} }
				545	assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
				546	}
				547
				548	/* Build Decoding table */
				549	{
				550	U32 u;
				551	for (u=0; u<tableSize; u++) {
				552	U32 const symbol = tableDecode[u].baseValue;
				553	U32 const nextState = symbolNext[symbol]++;
				554	tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
				555	tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
				556	assert(nbAdditionalBits[symbol] < 255);
				557	tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
				558	tableDecode[u].baseValue = baseValue[symbol];
				559	}
				560	}
				561	}
				562
				563	/* Avoids the FORCE_INLINE of the _body() function. */
				564	static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
				565	const short* normalizedCounter, unsigned maxSymbolValue,
				566	const U32* baseValue, const U8* nbAdditionalBits,
				567	unsigned tableLog, void* wksp, size_t wkspSize)
				568	{
				569	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
				570	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				571	}
				572
				573	#if DYNAMIC_BMI2
				574	BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
				575	const short* normalizedCounter, unsigned maxSymbolValue,
				576	const U32* baseValue, const U8* nbAdditionalBits,
				577	unsigned tableLog, void* wksp, size_t wkspSize)
				578	{
				579	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
				580	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				581	}
				582	#endif
				583
				584	void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
				585	const short* normalizedCounter, unsigned maxSymbolValue,
				586	const U32* baseValue, const U8* nbAdditionalBits,
				587	unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
				588	{
				589	#if DYNAMIC_BMI2
				590	if (bmi2) {
				591	ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
				592	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				593	return;
				594	}
				595	#endif
				596	(void)bmi2;
				597	ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
				598	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				599	}
				600
				601
				602	/*! ZSTD_buildSeqTable() :
				603	* @return : nb bytes read from src,
				604	* or an error code if it fails */
				605	static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
				606	symbolEncodingType_e type, unsigned max, U32 maxLog,
				607	const void* src, size_t srcSize,
				608	const U32* baseValue, const U8* nbAdditionalBits,
				609	const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
				610	int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
				611	int bmi2)
				612	{
				613	switch(type)
				614	{
				615	case set_rle :
				616	RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
				617	RETURN_ERROR_IF(((const BYTE)src) > max, corruption_detected, "");
				618	{ U32 const symbol = (const BYTE)src;
				619	U32 const baseline = baseValue[symbol];
				620	U8 const nbBits = nbAdditionalBits[symbol];
				621	ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
				622	}
				623	*DTablePtr = DTableSpace;
				624	return 1;
				625	case set_basic :
				626	*DTablePtr = defaultTable;
				627	return 0;
				628	case set_repeat:
				629	RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
				630	/* prefetch FSE table if used */
				631	if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
				632	const void* const pStart = *DTablePtr;
				633	size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
				634	PREFETCH_AREA(pStart, pSize);
				635	}
				636	return 0;
				637	case set_compressed :
				638	{ unsigned tableLog;
				639	S16 norm[MaxSeq+1];
				640	size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
				641	RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
				642	RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
				643	ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
				644	*DTablePtr = DTableSpace;
				645	return headerSize;
				646	}
				647	default :
				648	assert(0);
				649	RETURN_ERROR(GENERIC, "impossible");
				650	}
				651	}
				652
				653	size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
				654	const void* src, size_t srcSize)
				655	{
				656	const BYTE* const istart = (const BYTE*)src;
				657	const BYTE* const iend = istart + srcSize;
				658	const BYTE* ip = istart;
				659	int nbSeq;
				660	DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
				661
				662	/* check */
				663	RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
				664
				665	/* SeqHead */
				666	nbSeq = *ip++;
				667	if (!nbSeq) {
				668	*nbSeqPtr=0;
				669	RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
				670	return 1;
				671	}
				672	if (nbSeq > 0x7F) {
				673	if (nbSeq == 0xFF) {
				674	RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
				675	nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
				676	ip+=2;
				677	} else {
				678	RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
				679	nbSeq = ((nbSeq-0x80)<<8) + *ip++;
				680	}
				681	}
				682	*nbSeqPtr = nbSeq;
				683
				684	/* FSE table descriptors */
				685	RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
				686	{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
				687	symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
				688	symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
				689	ip++;
				690
				691	/* Build DTables */
				692	{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
				693	LLtype, MaxLL, LLFSELog,
				694	ip, iend-ip,
				695	LL_base, LL_bits,
				696	LL_defaultDTable, dctx->fseEntropy,
				697	dctx->ddictIsCold, nbSeq,
				698	dctx->workspace, sizeof(dctx->workspace),
				699	ZSTD_DCtx_get_bmi2(dctx));
				700	RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				701	ip += llhSize;
				702	}
				703
				704	{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
				705	OFtype, MaxOff, OffFSELog,
				706	ip, iend-ip,
				707	OF_base, OF_bits,
				708	OF_defaultDTable, dctx->fseEntropy,
				709	dctx->ddictIsCold, nbSeq,
				710	dctx->workspace, sizeof(dctx->workspace),
				711	ZSTD_DCtx_get_bmi2(dctx));
				712	RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				713	ip += ofhSize;
				714	}
				715
				716	{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
				717	MLtype, MaxML, MLFSELog,
				718	ip, iend-ip,
				719	ML_base, ML_bits,
				720	ML_defaultDTable, dctx->fseEntropy,
				721	dctx->ddictIsCold, nbSeq,
				722	dctx->workspace, sizeof(dctx->workspace),
				723	ZSTD_DCtx_get_bmi2(dctx));
				724	RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				725	ip += mlhSize;
				726	}
				727	}
				728
				729	return ip-istart;
				730	}
				731
				732
				733	typedef struct {
				734	size_t litLength;
				735	size_t matchLength;
				736	size_t offset;
				737	} seq_t;
				738
				739	typedef struct {
				740	size_t state;
				741	const ZSTD_seqSymbol* table;
				742	} ZSTD_fseState;
				743
				744	typedef struct {
				745	BIT_DStream_t DStream;
				746	ZSTD_fseState stateLL;
				747	ZSTD_fseState stateOffb;
				748	ZSTD_fseState stateML;
				749	size_t prevOffset[ZSTD_REP_NUM];
				750	} seqState_t;
				751
				752	/*! ZSTD_overlapCopy8() :
				753	* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
				754	* If the offset is < 8 then the offset is spread to at least 8 bytes.
				755	*
				756	* Precondition: ip <= op
				757	* Postcondition: op - op >= 8
				758	*/
				759	HINT_INLINE void ZSTD_overlapCopy8(BYTE op, BYTE const ip, size_t offset) {
				760	assert(ip <= op);
				761	if (offset < 8) {
				762	/* close range match, overlap */
				763	static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
				764	static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
				765	int const sub2 = dec64table[offset];
				766	(op)[0] = (ip)[0];
				767	(op)[1] = (ip)[1];
				768	(op)[2] = (ip)[2];
				769	(op)[3] = (ip)[3];
				770	*ip += dec32table[offset];
				771	ZSTD_copy4(op+4, ip);
				772	*ip -= sub2;
				773	} else {
				774	ZSTD_copy8(op, ip);
				775	}
				776	*ip += 8;
				777	*op += 8;
				778	assert(op - ip >= 8);
				779	}
				780
				781	/*! ZSTD_safecopy() :
				782	* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
				783	* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
				784	* This function is only called in the uncommon case where the sequence is near the end of the block. It
				785	* should be fast for a single long sequence, but can be slow for several short sequences.
				786	*
				787	* @param ovtype controls the overlap detection
				788	* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
				789	* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
				790	* The src buffer must be before the dst buffer.
				791	*/
				792	static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
				793	ptrdiff_t const diff = op - ip;
				794	BYTE* const oend = op + length;
				795
				796	assert((ovtype == ZSTD_no_overlap && (diff <= -8 \|\| diff >= 8 \|\| op >= oend_w)) \|\|
				797	(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
				798
				799	if (length < 8) {
				800	/* Handle short lengths. */
				801	while (op < oend) op++ = ip++;
				802	return;
				803	}
				804	if (ovtype == ZSTD_overlap_src_before_dst) {
				805	/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
				806	assert(length >= 8);
				807	ZSTD_overlapCopy8(&op, &ip, diff);
				808	length -= 8;
				809	assert(op - ip >= 8);
				810	assert(op <= oend);
				811	}
				812
				813	if (oend <= oend_w) {
				814	/* No risk of overwrite. */
				815	ZSTD_wildcopy(op, ip, length, ovtype);
				816	return;
				817	}
				818	if (op <= oend_w) {
				819	/* Wildcopy until we get close to the end. */
				820	assert(oend > oend_w);
				821	ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
				822	ip += oend_w - op;
				823	op += oend_w - op;
				824	}
				825	/* Handle the leftovers. */
				826	while (op < oend) op++ = ip++;
				827	}
				828
				829	/* ZSTD_safecopyDstBeforeSrc():
				830	* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
				831	* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
				832	static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
				833	ptrdiff_t const diff = op - ip;
				834	BYTE* const oend = op + length;
				835
				836	if (length < 8 \|\| diff > -8) {
				837	/* Handle short lengths, close overlaps, and dst not before src. */
				838	while (op < oend) op++ = ip++;
				839	return;
				840	}
				841
				842	if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
				843	ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
				844	ip += oend - WILDCOPY_OVERLENGTH - op;
				845	op += oend - WILDCOPY_OVERLENGTH - op;
				846	}
				847
				848	/* Handle the leftovers. */
				849	while (op < oend) op++ = ip++;
				850	}
				851
				852	/* ZSTD_execSequenceEnd():
				853	* This version handles cases that are near the end of the output buffer. It requires
				854	* more careful checks to make sure there is no overflow. By separating out these hard
				855	* and unlikely cases, we can speed up the common cases.
				856	*
				857	* NOTE: This function needs to be fast for a single long sequence, but doesn't need
				858	* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
				859	*/
				860	FORCE_NOINLINE
				861	size_t ZSTD_execSequenceEnd(BYTE* op,
				862	BYTE* const oend, seq_t sequence,
				863	const BYTE** litPtr, const BYTE* const litLimit,
				864	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				865	{
				866	BYTE* const oLitEnd = op + sequence.litLength;
				867	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				868	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				869	const BYTE* match = oLitEnd - sequence.offset;
				870	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
				871
				872	/* bounds checks : careful of address space overflow in 32-bit mode */
				873	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
				874	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
				875	assert(op < op + sequenceLength);
				876	assert(oLitEnd < op + sequenceLength);
				877
				878	/* copy literals */
				879	ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
				880	op = oLitEnd;
				881	*litPtr = iLitEnd;
				882
				883	/* copy Match */
				884	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				885	/* offset beyond prefix */
				886	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
				887	match = dictEnd - (prefixStart - match);
				888	if (match + sequence.matchLength <= dictEnd) {
				889	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				890	return sequenceLength;
				891	}
				892	/* span extDict & currentPrefixSegment */
				893	{ size_t const length1 = dictEnd - match;
				894	ZSTD_memmove(oLitEnd, match, length1);
				895	op = oLitEnd + length1;
				896	sequence.matchLength -= length1;
				897	match = prefixStart;
				898	}
				899	}
				900	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
				901	return sequenceLength;
				902	}
				903
				904	/* ZSTD_execSequenceEndSplitLitBuffer():
				905	* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
				906	*/
				907	FORCE_NOINLINE
				908	size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
				909	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
				910	const BYTE** litPtr, const BYTE* const litLimit,
				911	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				912	{
				913	BYTE* const oLitEnd = op + sequence.litLength;
				914	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				915	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				916	const BYTE* match = oLitEnd - sequence.offset;
				917
				918
				919	/* bounds checks : careful of address space overflow in 32-bit mode */
				920	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
				921	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
				922	assert(op < op + sequenceLength);
				923	assert(oLitEnd < op + sequenceLength);
				924
				925	/* copy literals */
				926	RETURN_ERROR_IF(op > litPtr && op < litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
				927	ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
				928	op = oLitEnd;
				929	*litPtr = iLitEnd;
				930
				931	/* copy Match */
				932	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				933	/* offset beyond prefix */
				934	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
				935	match = dictEnd - (prefixStart - match);
				936	if (match + sequence.matchLength <= dictEnd) {
				937	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				938	return sequenceLength;
				939	}
				940	/* span extDict & currentPrefixSegment */
				941	{ size_t const length1 = dictEnd - match;
				942	ZSTD_memmove(oLitEnd, match, length1);
				943	op = oLitEnd + length1;
				944	sequence.matchLength -= length1;
				945	match = prefixStart;
				946	}
				947	}
				948	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
				949	return sequenceLength;
				950	}
				951
				952	HINT_INLINE
				953	size_t ZSTD_execSequence(BYTE* op,
				954	BYTE* const oend, seq_t sequence,
				955	const BYTE** litPtr, const BYTE* const litLimit,
				956	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				957	{
				958	BYTE* const oLitEnd = op + sequence.litLength;
				959	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				960	BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
				961	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
				962	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				963	const BYTE* match = oLitEnd - sequence.offset;
				964
				965	assert(op != NULL /* Precondition */);
				966	assert(oend_w < oend /* No underflow */);
				967	/* Handle edge cases in a slow path:
				968	* - Read beyond end of literals
				969	* - Match end is within WILDCOPY_OVERLIMIT of oend
				970	* - 32-bit mode and the match length overflows
				971	*/
				972	if (UNLIKELY(
				973	iLitEnd > litLimit \|\|
				974	oMatchEnd > oend_w \|\|
				975	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
				976	return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
				977
				978	/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
				979	assert(op <= oLitEnd /* No overflow */);
				980	assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
				981	assert(oMatchEnd <= oend /* No underflow */);
				982	assert(iLitEnd <= litLimit /* Literal length is in bounds */);
				983	assert(oLitEnd <= oend_w /* Can wildcopy literals */);
				984	assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
				985
				986	/* Copy Literals:
				987	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
				988	* We likely don't need the full 32-byte wildcopy.
				989	*/
				990	assert(WILDCOPY_OVERLENGTH >= 16);
				991	ZSTD_copy16(op, (*litPtr));
				992	if (UNLIKELY(sequence.litLength > 16)) {
				993	ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
				994	}
				995	op = oLitEnd;
				996	litPtr = iLitEnd; / update for next sequence */
				997
				998	/* Copy Match */
				999	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				1000	/* offset beyond prefix -> go into extDict */
				1001	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
				1002	match = dictEnd + (match - prefixStart);
				1003	if (match + sequence.matchLength <= dictEnd) {
				1004	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				1005	return sequenceLength;
				1006	}
				1007	/* span extDict & currentPrefixSegment */
				1008	{ size_t const length1 = dictEnd - match;
				1009	ZSTD_memmove(oLitEnd, match, length1);
				1010	op = oLitEnd + length1;
				1011	sequence.matchLength -= length1;
				1012	match = prefixStart;
				1013	}
				1014	}
				1015	/* Match within prefix of 1 or more bytes */
				1016	assert(op <= oMatchEnd);
				1017	assert(oMatchEnd <= oend_w);
				1018	assert(match >= prefixStart);
				1019	assert(sequence.matchLength >= 1);
				1020
				1021	/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
				1022	* without overlap checking.
				1023	*/
				1024	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
				1025	/* We bet on a full wildcopy for matches, since we expect matches to be
				1026	* longer than literals (in general). In silesia, ~10% of matches are longer
				1027	* than 16 bytes.
				1028	*/
				1029	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
				1030	return sequenceLength;
				1031	}
				1032	assert(sequence.offset < WILDCOPY_VECLEN);
				1033
				1034	/* Copy 8 bytes and spread the offset to be >= 8. */
				1035	ZSTD_overlapCopy8(&op, &match, sequence.offset);
				1036
				1037	/* If the match length is > 8 bytes, then continue with the wildcopy. */
				1038	if (sequence.matchLength > 8) {
				1039	assert(op < oMatchEnd);
				1040	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
				1041	}
				1042	return sequenceLength;
				1043	}
				1044
				1045	HINT_INLINE
				1046	size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
				1047	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
				1048	const BYTE** litPtr, const BYTE* const litLimit,
				1049	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				1050	{
				1051	BYTE* const oLitEnd = op + sequence.litLength;
				1052	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				1053	BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
				1054	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				1055	const BYTE* match = oLitEnd - sequence.offset;
				1056
				1057	assert(op != NULL /* Precondition */);
				1058	assert(oend_w < oend /* No underflow */);
				1059	/* Handle edge cases in a slow path:
				1060	* - Read beyond end of literals
				1061	* - Match end is within WILDCOPY_OVERLIMIT of oend
				1062	* - 32-bit mode and the match length overflows
				1063	*/
				1064	if (UNLIKELY(
				1065	iLitEnd > litLimit \|\|
				1066	oMatchEnd > oend_w \|\|
				1067	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
				1068	return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
				1069
				1070	/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
				1071	assert(op <= oLitEnd /* No overflow */);
				1072	assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
				1073	assert(oMatchEnd <= oend /* No underflow */);
				1074	assert(iLitEnd <= litLimit /* Literal length is in bounds */);
				1075	assert(oLitEnd <= oend_w /* Can wildcopy literals */);
				1076	assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
				1077
				1078	/* Copy Literals:
				1079	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
				1080	* We likely don't need the full 32-byte wildcopy.
				1081	*/
				1082	assert(WILDCOPY_OVERLENGTH >= 16);
				1083	ZSTD_copy16(op, (*litPtr));
				1084	if (UNLIKELY(sequence.litLength > 16)) {
				1085	ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
				1086	}
				1087	op = oLitEnd;
				1088	litPtr = iLitEnd; / update for next sequence */
				1089
				1090	/* Copy Match */
				1091	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				1092	/* offset beyond prefix -> go into extDict */
				1093	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
				1094	match = dictEnd + (match - prefixStart);
				1095	if (match + sequence.matchLength <= dictEnd) {
				1096	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				1097	return sequenceLength;
				1098	}
				1099	/* span extDict & currentPrefixSegment */
				1100	{ size_t const length1 = dictEnd - match;
				1101	ZSTD_memmove(oLitEnd, match, length1);
				1102	op = oLitEnd + length1;
				1103	sequence.matchLength -= length1;
				1104	match = prefixStart;
				1105	} }
				1106	/* Match within prefix of 1 or more bytes */
				1107	assert(op <= oMatchEnd);
				1108	assert(oMatchEnd <= oend_w);
				1109	assert(match >= prefixStart);
				1110	assert(sequence.matchLength >= 1);
				1111
				1112	/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
				1113	* without overlap checking.
				1114	*/
				1115	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
				1116	/* We bet on a full wildcopy for matches, since we expect matches to be
				1117	* longer than literals (in general). In silesia, ~10% of matches are longer
				1118	* than 16 bytes.
				1119	*/
				1120	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
				1121	return sequenceLength;
				1122	}
				1123	assert(sequence.offset < WILDCOPY_VECLEN);
				1124
				1125	/* Copy 8 bytes and spread the offset to be >= 8. */
				1126	ZSTD_overlapCopy8(&op, &match, sequence.offset);
				1127
				1128	/* If the match length is > 8 bytes, then continue with the wildcopy. */
				1129	if (sequence.matchLength > 8) {
				1130	assert(op < oMatchEnd);
				1131	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
				1132	}
				1133	return sequenceLength;
				1134	}
				1135
				1136
				1137	static void
				1138	ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
				1139	{
				1140	const void* ptr = dt;
				1141	const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
				1142	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
				1143	DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
				1144	(U32)DStatePtr->state, DTableH->tableLog);
				1145	BIT_reloadDStream(bitD);
				1146	DStatePtr->table = dt + 1;
				1147	}
				1148
				1149	FORCE_INLINE_TEMPLATE void
				1150	ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
				1151	{
				1152	size_t const lowBits = BIT_readBits(bitD, nbBits);
				1153	DStatePtr->state = nextState + lowBits;
				1154	}
				1155
				1156	/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
				1157	* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
				1158	* bits before reloading. This value is the maximum number of bytes we read
				1159	* after reloading when we are decoding long offsets.
				1160	*/
				1161	#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
				1162	(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
				1163	? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
				1164	: 0)
				1165
				1166	typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
				1167
				1168	FORCE_INLINE_TEMPLATE seq_t
				1169	ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
				1170	{
				1171	seq_t seq;
				1172	const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
				1173	const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
				1174	const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
				1175	seq.matchLength = mlDInfo->baseValue;
				1176	seq.litLength = llDInfo->baseValue;
				1177	{ U32 const ofBase = ofDInfo->baseValue;
				1178	BYTE const llBits = llDInfo->nbAdditionalBits;
				1179	BYTE const mlBits = mlDInfo->nbAdditionalBits;
				1180	BYTE const ofBits = ofDInfo->nbAdditionalBits;
				1181	BYTE const totalBits = llBits+mlBits+ofBits;
				1182
				1183	U16 const llNext = llDInfo->nextState;
				1184	U16 const mlNext = mlDInfo->nextState;
				1185	U16 const ofNext = ofDInfo->nextState;
				1186	U32 const llnbBits = llDInfo->nbBits;
				1187	U32 const mlnbBits = mlDInfo->nbBits;
				1188	U32 const ofnbBits = ofDInfo->nbBits;
				1189	/*
				1190	* As gcc has better branch and block analyzers, sometimes it is only
				1191	* valuable to mark likelyness for clang, it gives around 3-4% of
				1192	* performance.
				1193	*/
				1194
				1195	/* sequence */
				1196	{ size_t offset;
				1197	#if defined(__clang__)
				1198	if (LIKELY(ofBits > 1)) {
				1199	#else
				1200	if (ofBits > 1) {
				1201	#endif
				1202	ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
				1203	ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
				1204	assert(ofBits <= MaxOff);
				1205	if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
				1206	U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
				1207	offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
				1208	BIT_reloadDStream(&seqState->DStream);
				1209	if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
				1210	assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
				1211	} else {
				1212	offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/>0/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
				1213	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
				1214	}
				1215	seqState->prevOffset[2] = seqState->prevOffset[1];
				1216	seqState->prevOffset[1] = seqState->prevOffset[0];
				1217	seqState->prevOffset[0] = offset;
				1218	} else {
				1219	U32 const ll0 = (llDInfo->baseValue == 0);
				1220	if (LIKELY((ofBits == 0))) {
				1221	offset = seqState->prevOffset[ll0];
				1222	seqState->prevOffset[1] = seqState->prevOffset[!ll0];
				1223	seqState->prevOffset[0] = offset;
				1224	} else {
				1225	offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
				1226	{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
				1227	temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
				1228	if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
				1229	seqState->prevOffset[1] = seqState->prevOffset[0];
				1230	seqState->prevOffset[0] = offset = temp;
				1231	} } }
				1232	seq.offset = offset;
				1233	}
				1234
				1235	#if defined(__clang__)
				1236	if (UNLIKELY(mlBits > 0))
				1237	#else
				1238	if (mlBits > 0)
				1239	#endif
				1240	seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/>0/);
				1241
				1242	if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
				1243	BIT_reloadDStream(&seqState->DStream);
				1244	if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
				1245	BIT_reloadDStream(&seqState->DStream);
				1246	/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
				1247	ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
				1248
				1249	#if defined(__clang__)
				1250	if (UNLIKELY(llBits > 0))
				1251	#else
				1252	if (llBits > 0)
				1253	#endif
				1254	seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/>0/);
				1255
				1256	if (MEM_32bits())
				1257	BIT_reloadDStream(&seqState->DStream);
				1258
				1259	DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
				1260	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
				1261
				1262	ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
				1263	ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
				1264	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
				1265	ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
				1266	}
				1267
				1268	return seq;
				1269	}
				1270
				1271	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				1272	MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
				1273	{
				1274	size_t const windowSize = dctx->fParams.windowSize;
				1275	/* No dictionary used. */
				1276	if (dctx->dictContentEndForFuzzing == NULL) return 0;
				1277	/* Dictionary is our prefix. */
				1278	if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
				1279	/* Dictionary is not our ext-dict. */
				1280	if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
				1281	/* Dictionary is not within our window size. */
				1282	if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
				1283	/* Dictionary is active. */
				1284	return 1;
				1285	}
				1286
				1287	MEM_STATIC void ZSTD_assertValidSequence(
				1288	ZSTD_DCtx const* dctx,
				1289	BYTE const* op, BYTE const* oend,
				1290	seq_t const seq,
				1291	BYTE const* prefixStart, BYTE const* virtualStart)
				1292	{
				1293	#if DEBUGLEVEL >= 1
				1294	size_t const windowSize = dctx->fParams.windowSize;
				1295	size_t const sequenceSize = seq.litLength + seq.matchLength;
				1296	BYTE const* const oLitEnd = op + seq.litLength;
				1297	DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
				1298	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
				1299	assert(op <= oend);
				1300	assert((size_t)(oend - op) >= sequenceSize);
				1301	assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
				1302	if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
				1303	size_t const dictSize = (size_t)((char const)dctx->dictContentEndForFuzzing - (char const)dctx->dictContentBeginForFuzzing);
				1304	/* Offset must be within the dictionary. */
				1305	assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
				1306	assert(seq.offset <= windowSize + dictSize);
				1307	} else {
				1308	/* Offset must be within our window. */
				1309	assert(seq.offset <= windowSize);
				1310	}
				1311	#else
				1312	(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
				1313	#endif
				1314	}
				1315	#endif
				1316
				1317	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1318
				1319
				1320	FORCE_INLINE_TEMPLATE size_t
				1321	DONT_VECTORIZE
				1322	ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
				1323	void* dst, size_t maxDstSize,
				1324	const void* seqStart, size_t seqSize, int nbSeq,
				1325	const ZSTD_longOffset_e isLongOffset,
				1326	const int frame)
				1327	{
				1328	const BYTE* ip = (const BYTE*)seqStart;
				1329	const BYTE* const iend = ip + seqSize;
				1330	BYTE* const ostart = (BYTE*)dst;
				1331	BYTE* const oend = ostart + maxDstSize;
				1332	BYTE* op = ostart;
				1333	const BYTE* litPtr = dctx->litPtr;
				1334	const BYTE* litBufferEnd = dctx->litBufferEnd;
				1335	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
				1336	const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
				1337	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
				1338	DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
				1339	(void)frame;
				1340
				1341	/* Regen sequences */
				1342	if (nbSeq) {
				1343	seqState_t seqState;
				1344	dctx->fseEntropy = 1;
				1345	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1346	RETURN_ERROR_IF(
				1347	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
				1348	corruption_detected, "");
				1349	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1350	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1351	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1352	assert(dst != NULL);
				1353
				1354	ZSTD_STATIC_ASSERT(
				1355	BIT_DStream_unfinished < BIT_DStream_completed &&
				1356	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
				1357	BIT_DStream_completed < BIT_DStream_overflow);
				1358
				1359	/* decompress without overrunning litPtr begins */
				1360	{
				1361	seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1362	/* Align the decompression loop to 32 + 16 bytes.
				1363	*
				1364	* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
				1365	* speed swings based on the alignment of the decompression loop. This
				1366	* performance swing is caused by parts of the decompression loop falling
				1367	* out of the DSB. The entire decompression loop should fit in the DSB,
				1368	* when it can't we get much worse performance. You can measure if you've
				1369	* hit the good case or the bad case with this perf command for some
				1370	* compressed file test.zst:
				1371	*
				1372	* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
				1373	* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
				1374	*
				1375	* If you see most cycles served out of the MITE you've hit the bad case.
				1376	* If you see most cycles served out of the DSB you've hit the good case.
				1377	* If it is pretty even then you may be in an okay case.
				1378	*
				1379	* This issue has been reproduced on the following CPUs:
				1380	* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
				1381	* Use Instruments->Counters to get DSB/MITE cycles.
				1382	* I never got performance swings, but I was able to
				1383	* go from the good case of mostly DSB to half of the
				1384	* cycles served from MITE.
				1385	* - Coffeelake: Intel i9-9900k
				1386	* - Coffeelake: Intel i7-9700k
				1387	*
				1388	* I haven't been able to reproduce the instability or DSB misses on any
				1389	* of the following CPUS:
				1390	* - Haswell
				1391	* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
				1392	* - Skylake
				1393	*
				1394	* Alignment is done for each of the three major decompression loops:
				1395	* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
				1396	* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
				1397	* - ZSTD_decompressSequences_body
				1398	* Alignment choices are made to minimize large swings on bad cases and influence on performance
				1399	* from changes external to this code, rather than to overoptimize on the current commit.
				1400	*
				1401	* If you are seeing performance stability this script can help test.
				1402	* It tests on 4 commits in zstd where I saw performance change.
				1403	*
				1404	* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
				1405	*/
				1406	#if defined(__x86_64__)
				1407	__asm__(".p2align 6");
				1408	# if __GNUC__ >= 7
				1409	/* good for gcc-7, gcc-9, and gcc-11 */
				1410	__asm__("nop");
				1411	__asm__(".p2align 5");
				1412	__asm__("nop");
				1413	__asm__(".p2align 4");
				1414	# if __GNUC__ == 8 \|\| __GNUC__ == 10
				1415	/* good for gcc-8 and gcc-10 */
				1416	__asm__("nop");
				1417	__asm__(".p2align 3");
				1418	# endif
				1419	# endif
				1420	#endif
				1421
				1422	/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
				1423	for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
				1424	size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
				1425	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1426	assert(!ZSTD_isError(oneSeqSize));
				1427	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1428	#endif
				1429	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1430	return oneSeqSize;
				1431	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1432	op += oneSeqSize;
				1433	if (UNLIKELY(!--nbSeq))
				1434	break;
				1435	BIT_reloadDStream(&(seqState.DStream));
				1436	sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1437	}
				1438
				1439	/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
				1440	if (nbSeq > 0) {
				1441	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
				1442	if (leftoverLit)
				1443	{
				1444	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
				1445	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
				1446	sequence.litLength -= leftoverLit;
				1447	op += leftoverLit;
				1448	}
				1449	litPtr = dctx->litExtraBuffer;
				1450	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1451	dctx->litBufferLocation = ZSTD_not_in_dst;
				1452	{
				1453	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
				1454	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1455	assert(!ZSTD_isError(oneSeqSize));
				1456	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1457	#endif
				1458	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1459	return oneSeqSize;
				1460	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1461	op += oneSeqSize;
				1462	if (--nbSeq)
				1463	BIT_reloadDStream(&(seqState.DStream));
				1464	}
				1465	}
				1466	}
				1467
				1468	if (nbSeq > 0) /* there is remaining lit from extra buffer */
				1469	{
				1470
				1471	#if defined(__x86_64__)
				1472	__asm__(".p2align 6");
				1473	__asm__("nop");
				1474	# if __GNUC__ != 7
				1475	/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
				1476	__asm__(".p2align 4");
				1477	__asm__("nop");
				1478	__asm__(".p2align 3");
				1479	# elif __GNUC__ >= 11
				1480	__asm__(".p2align 3");
				1481	# else
				1482	__asm__(".p2align 5");
				1483	__asm__("nop");
				1484	__asm__(".p2align 3");
				1485	# endif
				1486	#endif
				1487
				1488	for (; ; ) {
				1489	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1490	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
				1491	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1492	assert(!ZSTD_isError(oneSeqSize));
				1493	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1494	#endif
				1495	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1496	return oneSeqSize;
				1497	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1498	op += oneSeqSize;
				1499	if (UNLIKELY(!--nbSeq))
				1500	break;
				1501	BIT_reloadDStream(&(seqState.DStream));
				1502	}
				1503	}
				1504
				1505	/* check if reached exact end */
				1506	DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
				1507	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
				1508	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
				1509	/* save reps for next block */
				1510	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1511	}
				1512
				1513	/* last literal segment */
				1514	if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
				1515	{
				1516	size_t const lastLLSize = litBufferEnd - litPtr;
				1517	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
				1518	if (op != NULL) {
				1519	ZSTD_memmove(op, litPtr, lastLLSize);
				1520	op += lastLLSize;
				1521	}
				1522	litPtr = dctx->litExtraBuffer;
				1523	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1524	dctx->litBufferLocation = ZSTD_not_in_dst;
				1525	}
				1526	{ size_t const lastLLSize = litBufferEnd - litPtr;
				1527	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1528	if (op != NULL) {
				1529	ZSTD_memcpy(op, litPtr, lastLLSize);
				1530	op += lastLLSize;
				1531	}
				1532	}
				1533
				1534	return op-ostart;
				1535	}
				1536
				1537	FORCE_INLINE_TEMPLATE size_t
				1538	DONT_VECTORIZE
				1539	ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
				1540	void* dst, size_t maxDstSize,
				1541	const void* seqStart, size_t seqSize, int nbSeq,
				1542	const ZSTD_longOffset_e isLongOffset,
				1543	const int frame)
				1544	{
				1545	const BYTE* ip = (const BYTE*)seqStart;
				1546	const BYTE* const iend = ip + seqSize;
				1547	BYTE* const ostart = (BYTE*)dst;
				1548	BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
				1549	BYTE* op = ostart;
				1550	const BYTE* litPtr = dctx->litPtr;
				1551	const BYTE* const litEnd = litPtr + dctx->litSize;
				1552	const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
				1553	const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
				1554	const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
				1555	DEBUGLOG(5, "ZSTD_decompressSequences_body");
				1556	(void)frame;
				1557
				1558	/* Regen sequences */
				1559	if (nbSeq) {
				1560	seqState_t seqState;
				1561	dctx->fseEntropy = 1;
				1562	{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1563	RETURN_ERROR_IF(
				1564	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
				1565	corruption_detected, "");
				1566	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1567	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1568	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1569	assert(dst != NULL);
				1570
				1571	ZSTD_STATIC_ASSERT(
				1572	BIT_DStream_unfinished < BIT_DStream_completed &&
				1573	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
				1574	BIT_DStream_completed < BIT_DStream_overflow);
				1575
				1576	#if defined(__x86_64__)
				1577	__asm__(".p2align 6");
				1578	__asm__("nop");
				1579	# if __GNUC__ >= 7
				1580	__asm__(".p2align 5");
				1581	__asm__("nop");
				1582	__asm__(".p2align 3");
				1583	# else
				1584	__asm__(".p2align 4");
				1585	__asm__("nop");
				1586	__asm__(".p2align 3");
				1587	# endif
				1588	#endif
				1589
				1590	for ( ; ; ) {
				1591	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1592	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
				1593	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1594	assert(!ZSTD_isError(oneSeqSize));
				1595	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1596	#endif
				1597	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
				1598	return oneSeqSize;
				1599	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1600	op += oneSeqSize;
				1601	if (UNLIKELY(!--nbSeq))
				1602	break;
				1603	BIT_reloadDStream(&(seqState.DStream));
				1604	}
				1605
				1606	/* check if reached exact end */
				1607	DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
				1608	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
				1609	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
				1610	/* save reps for next block */
				1611	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1612	}
				1613
				1614	/* last literal segment */
				1615	{ size_t const lastLLSize = litEnd - litPtr;
				1616	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1617	if (op != NULL) {
				1618	ZSTD_memcpy(op, litPtr, lastLLSize);
				1619	op += lastLLSize;
				1620	}
				1621	}
				1622
				1623	return op-ostart;
				1624	}
				1625
				1626	static size_t
				1627	ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
				1628	void* dst, size_t maxDstSize,
				1629	const void* seqStart, size_t seqSize, int nbSeq,
				1630	const ZSTD_longOffset_e isLongOffset,
				1631	const int frame)
				1632	{
				1633	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1634	}
				1635
				1636	static size_t
				1637	ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
				1638	void* dst, size_t maxDstSize,
				1639	const void* seqStart, size_t seqSize, int nbSeq,
				1640	const ZSTD_longOffset_e isLongOffset,
				1641	const int frame)
				1642	{
				1643	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1644	}
				1645	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1646
				1647	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1648
				1649	FORCE_INLINE_TEMPLATE size_t
				1650	ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
				1651	const BYTE* const prefixStart, const BYTE* const dictEnd)
				1652	{
				1653	prefetchPos += sequence.litLength;
				1654	{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
				1655	const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
				1656	* No consequence though : memory address is only used for prefetching, not for dereferencing */
				1657	PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
				1658	}
				1659	return prefetchPos + sequence.matchLength;
				1660	}
				1661
				1662	/* This decoding function employs prefetching
				1663	* to reduce latency impact of cache misses.
				1664	* It's generally employed when block contains a significant portion of long-distance matches
				1665	* or when coupled with a "cold" dictionary */
				1666	FORCE_INLINE_TEMPLATE size_t
				1667	ZSTD_decompressSequencesLong_body(
				1668	ZSTD_DCtx* dctx,
				1669	void* dst, size_t maxDstSize,
				1670	const void* seqStart, size_t seqSize, int nbSeq,
				1671	const ZSTD_longOffset_e isLongOffset,
				1672	const int frame)
				1673	{
				1674	const BYTE* ip = (const BYTE*)seqStart;
				1675	const BYTE* const iend = ip + seqSize;
				1676	BYTE* const ostart = (BYTE*)dst;
				1677	BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
				1678	BYTE* op = ostart;
				1679	const BYTE* litPtr = dctx->litPtr;
				1680	const BYTE* litBufferEnd = dctx->litBufferEnd;
				1681	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
				1682	const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
				1683	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
				1684	(void)frame;
				1685
				1686	/* Regen sequences */
				1687	if (nbSeq) {
				1688	#define STORED_SEQS 8
				1689	#define STORED_SEQS_MASK (STORED_SEQS-1)
				1690	#define ADVANCED_SEQS STORED_SEQS
				1691	seq_t sequences[STORED_SEQS];
				1692	int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
				1693	seqState_t seqState;
				1694	int seqNb;
				1695	size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
				1696
				1697	dctx->fseEntropy = 1;
				1698	{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1699	assert(dst != NULL);
				1700	assert(iend >= ip);
				1701	RETURN_ERROR_IF(
				1702	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
				1703	corruption_detected, "");
				1704	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1705	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1706	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1707
				1708	/* prepare in advance */
				1709	for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
				1710	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1711	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
				1712	sequences[seqNb] = sequence;
				1713	}
				1714	RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
				1715
				1716	/* decompress without stomping litBuffer */
				1717	for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
				1718	seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
				1719	size_t oneSeqSize;
				1720
				1721	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
				1722	{
				1723	/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
				1724	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
				1725	if (leftoverLit)
				1726	{
				1727	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
				1728	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
				1729	sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
				1730	op += leftoverLit;
				1731	}
				1732	litPtr = dctx->litExtraBuffer;
				1733	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1734	dctx->litBufferLocation = ZSTD_not_in_dst;
				1735	oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1736	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1737	assert(!ZSTD_isError(oneSeqSize));
				1738	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
				1739	#endif
				1740	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1741
				1742	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
				1743	sequences[seqNb & STORED_SEQS_MASK] = sequence;
				1744	op += oneSeqSize;
				1745	}
				1746	else
				1747	{
				1748	/* lit buffer is either wholly contained in first or second split, or not split at all*/
				1749	oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
				1750	ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
				1751	ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1752	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1753	assert(!ZSTD_isError(oneSeqSize));
				1754	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
				1755	#endif
				1756	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1757
				1758	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
				1759	sequences[seqNb & STORED_SEQS_MASK] = sequence;
				1760	op += oneSeqSize;
				1761	}
				1762	}
				1763	RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
				1764
				1765	/* finish queue */
				1766	seqNb -= seqAdvance;
				1767	for ( ; seqNb<nbSeq ; seqNb++) {
				1768	seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
				1769	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
				1770	{
				1771	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
				1772	if (leftoverLit)
				1773	{
				1774	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
				1775	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
				1776	sequence->litLength -= leftoverLit;
				1777	op += leftoverLit;
				1778	}
				1779	litPtr = dctx->litExtraBuffer;
				1780	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1781	dctx->litBufferLocation = ZSTD_not_in_dst;
				1782	{
				1783	size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1784	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1785	assert(!ZSTD_isError(oneSeqSize));
				1786	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
				1787	#endif
				1788	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1789	op += oneSeqSize;
				1790	}
				1791	}
				1792	else
				1793	{
				1794	size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
				1795	ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
				1796	ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
				1797	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1798	assert(!ZSTD_isError(oneSeqSize));
				1799	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
				1800	#endif
				1801	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1802	op += oneSeqSize;
				1803	}
				1804	}
				1805
				1806	/* save reps for next block */
				1807	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1808	}
				1809
				1810	/* last literal segment */
				1811	if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
				1812	{
				1813	size_t const lastLLSize = litBufferEnd - litPtr;
				1814	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
				1815	if (op != NULL) {
				1816	ZSTD_memmove(op, litPtr, lastLLSize);
				1817	op += lastLLSize;
				1818	}
				1819	litPtr = dctx->litExtraBuffer;
				1820	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
				1821	}
				1822	{ size_t const lastLLSize = litBufferEnd - litPtr;
				1823	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1824	if (op != NULL) {
				1825	ZSTD_memmove(op, litPtr, lastLLSize);
				1826	op += lastLLSize;
				1827	}
				1828	}
				1829
				1830	return op-ostart;
				1831	}
				1832
				1833	static size_t
				1834	ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
				1835	void* dst, size_t maxDstSize,
				1836	const void* seqStart, size_t seqSize, int nbSeq,
				1837	const ZSTD_longOffset_e isLongOffset,
				1838	const int frame)
				1839	{
				1840	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1841	}
				1842	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1843
				1844
				1845
				1846	#if DYNAMIC_BMI2
				1847
				1848	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1849	static BMI2_TARGET_ATTRIBUTE size_t
				1850	DONT_VECTORIZE
				1851	ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
				1852	void* dst, size_t maxDstSize,
				1853	const void* seqStart, size_t seqSize, int nbSeq,
				1854	const ZSTD_longOffset_e isLongOffset,
				1855	const int frame)
				1856	{
				1857	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1858	}
				1859	static BMI2_TARGET_ATTRIBUTE size_t
				1860	DONT_VECTORIZE
				1861	ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
				1862	void* dst, size_t maxDstSize,
				1863	const void* seqStart, size_t seqSize, int nbSeq,
				1864	const ZSTD_longOffset_e isLongOffset,
				1865	const int frame)
				1866	{
				1867	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1868	}
				1869	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1870
				1871	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1872	static BMI2_TARGET_ATTRIBUTE size_t
				1873	ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
				1874	void* dst, size_t maxDstSize,
				1875	const void* seqStart, size_t seqSize, int nbSeq,
				1876	const ZSTD_longOffset_e isLongOffset,
				1877	const int frame)
				1878	{
				1879	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1880	}
				1881	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1882
				1883	#endif /* DYNAMIC_BMI2 */
				1884
				1885	typedef size_t (*ZSTD_decompressSequences_t)(
				1886	ZSTD_DCtx* dctx,
				1887	void* dst, size_t maxDstSize,
				1888	const void* seqStart, size_t seqSize, int nbSeq,
				1889	const ZSTD_longOffset_e isLongOffset,
				1890	const int frame);
				1891
				1892	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1893	static size_t
				1894	ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
				1895	const void* seqStart, size_t seqSize, int nbSeq,
				1896	const ZSTD_longOffset_e isLongOffset,
				1897	const int frame)
				1898	{
				1899	DEBUGLOG(5, "ZSTD_decompressSequences");
				1900	#if DYNAMIC_BMI2
				1901	if (ZSTD_DCtx_get_bmi2(dctx)) {
				1902	return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1903	}
				1904	#endif
				1905	return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1906	}
				1907	static size_t
				1908	ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
				1909	const void* seqStart, size_t seqSize, int nbSeq,
				1910	const ZSTD_longOffset_e isLongOffset,
				1911	const int frame)
				1912	{
				1913	DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
				1914	#if DYNAMIC_BMI2
				1915	if (ZSTD_DCtx_get_bmi2(dctx)) {
				1916	return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1917	}
				1918	#endif
				1919	return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1920	}
				1921	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1922
				1923
				1924	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1925	/* ZSTD_decompressSequencesLong() :
				1926	* decompression function triggered when a minimum share of offsets is considered "long",
				1927	* aka out of cache.
				1928	* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
				1929	* This function will try to mitigate main memory latency through the use of prefetching */
				1930	static size_t
				1931	ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
				1932	void* dst, size_t maxDstSize,
				1933	const void* seqStart, size_t seqSize, int nbSeq,
				1934	const ZSTD_longOffset_e isLongOffset,
				1935	const int frame)
				1936	{
				1937	DEBUGLOG(5, "ZSTD_decompressSequencesLong");
				1938	#if DYNAMIC_BMI2
				1939	if (ZSTD_DCtx_get_bmi2(dctx)) {
				1940	return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1941	}
				1942	#endif
				1943	return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1944	}
				1945	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1946
				1947
				1948
				1949	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1950	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1951	/* ZSTD_getLongOffsetsShare() :
				1952	* condition : offTable must be valid
				1953	* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
				1954	* compared to maximum possible of (1<<OffFSELog) */
				1955	static unsigned
				1956	ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
				1957	{
				1958	const void* ptr = offTable;
				1959	U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
				1960	const ZSTD_seqSymbol* table = offTable + 1;
				1961	U32 const max = 1 << tableLog;
				1962	U32 u, total = 0;
				1963	DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
				1964
				1965	assert(max <= (1 << OffFSELog)); /* max not too large */
				1966	for (u=0; u<max; u++) {
				1967	if (table[u].nbAdditionalBits > 22) total += 1;
				1968	}
				1969
				1970	assert(tableLog <= OffFSELog);
				1971	total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
				1972
				1973	return total;
				1974	}
				1975	#endif
				1976
				1977	size_t
				1978	ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
				1979	void* dst, size_t dstCapacity,
				1980	const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
				1981	{ /* blockType == blockCompressed */
				1982	const BYTE* ip = (const BYTE*)src;
				1983	/* isLongOffset must be true if there are long offsets.
				1984	* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
				1985	* We don't expect that to be the case in 64-bit mode.
				1986	* In block mode, window size is not known, so we have to be conservative.
				1987	* (note: but it could be evaluated from current-lowLimit)
				1988	*/
				1989	ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame \|\| (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
				1990	DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
				1991
				1992	RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
				1993
				1994	/* Decode literals section */
				1995	{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
				1996	DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
				1997	if (ZSTD_isError(litCSize)) return litCSize;
				1998	ip += litCSize;
				1999	srcSize -= litCSize;
				2000	}
				2001
				2002	/* Build Decoding Tables */
				2003	{
				2004	/* These macros control at build-time which decompressor implementation
				2005	* we use. If neither is defined, we do some inspection and dispatch at
				2006	* runtime.
				2007	*/
				2008	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				2009	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				2010	int usePrefetchDecoder = dctx->ddictIsCold;
				2011	#endif
				2012	int nbSeq;
				2013	size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
				2014	if (ZSTD_isError(seqHSize)) return seqHSize;
				2015	ip += seqHSize;
				2016	srcSize -= seqHSize;
				2017
				2018	RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
				2019
				2020	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				2021	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				2022	if ( !usePrefetchDecoder
				2023	&& (!frame \|\| (dctx->fParams.windowSize > (1<<24)))
				2024	&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
				2025	U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
				2026	U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
				2027	usePrefetchDecoder = (shareLongOffsets >= minShare);
				2028	}
				2029	#endif
				2030
				2031	dctx->ddictIsCold = 0;
				2032
				2033	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				2034	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				2035	if (usePrefetchDecoder)
				2036	#endif
				2037	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				2038	return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				2039	#endif
				2040
				2041	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				2042	/* else */
				2043	if (dctx->litBufferLocation == ZSTD_split)
				2044	return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				2045	else
				2046	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				2047	#endif
				2048	}
				2049	}
				2050
				2051
				2052	void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
				2053	{
				2054	if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
				2055	dctx->dictEnd = dctx->previousDstEnd;
				2056	dctx->virtualStart = (const char)dst - ((const char)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
				2057	dctx->prefixStart = dst;
				2058	dctx->previousDstEnd = dst;
				2059	}
				2060	}
				2061
				2062
				2063	size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
				2064	void* dst, size_t dstCapacity,
				2065	const void* src, size_t srcSize)
				2066	{
				2067	size_t dSize;
				2068	ZSTD_checkContinuity(dctx, dst, dstCapacity);
				2069	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
				2070	dctx->previousDstEnd = (char*)dst + dSize;
				2071	return dSize;
				2072	}