Blame - lib/bzip2/bzlib_blocksort.c - filogic/uboot

blob: 36cf8436592b5a88d5f5c5cddeb0615539c5168e [file] [log] [blame]

Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	1
				2	/-------------------------------------------------------------/
				3	/--- Block sorting machinery ---/
				4	/--- blocksort.c ---/
				5	/-------------------------------------------------------------/
				6
				7	/*--
				8	This file is a part of bzip2 and/or libbzip2, a program and
				9	library for lossless, block-sorting data compression.
				10
				11	Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
				12
				13	Redistribution and use in source and binary forms, with or without
				14	modification, are permitted provided that the following conditions
				15	are met:
				16
				17	1. Redistributions of source code must retain the above copyright
				18	notice, this list of conditions and the following disclaimer.
				19
				20	2. The origin of this software must not be misrepresented; you must
				21	not claim that you wrote the original software. If you use this
				22	software in a product, an acknowledgment in the product
				23	documentation would be appreciated but is not required.
				24
				25	3. Altered source versions must be plainly marked as such, and must
				26	not be misrepresented as being the original software.
				27
				28	4. The name of the author may not be used to endorse or promote
				29	products derived from this software without specific prior written
				30	permission.
				31
				32	THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
				33	OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				34	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				35	ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
				36	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				37	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
				38	GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				39	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				40	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				41	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				42	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				43
				44	Julian Seward, Cambridge, UK.
				45	jseward@acm.org
				46	bzip2/libbzip2 version 1.0.6 of 6 September 2010
				47	Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
				48
				49	This program is based on (at least) the work of:
				50	Mike Burrows
				51	David Wheeler
				52	Peter Fenwick
				53	Alistair Moffat
				54	Radford Neal
				55	Ian H. Witten
				56	Robert Sedgewick
				57	Jon L. Bentley
				58
				59	For more information on these sources, see the manual.
				60	--*/
				61
				62	#include "bzlib_private.h"
Simon Glass	0f2af88	2020-05-10 11:40:05 -0600	[diff] [blame]	63	#include <log.h>
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	64
				65	/---------------------------------------------/
				66	/--- Fallback O(N log(N)^2) sorting ---/
				67	/--- algorithm, for repetitive blocks ---/
				68	/---------------------------------------------/
				69
				70	/---------------------------------------------/
				71	static
				72	__inline__
				73	void fallbackSimpleSort ( UInt32* fmap,
				74	UInt32* eclass,
				75	Int32 lo,
				76	Int32 hi )
				77	{
				78	Int32 i, j, tmp;
				79	UInt32 ec_tmp;
				80
				81	if (lo == hi) return;
				82
				83	if (hi - lo > 3) {
				84	for ( i = hi-4; i >= lo; i-- ) {
				85	tmp = fmap[i];
				86	ec_tmp = eclass[tmp];
				87	for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
				88	fmap[j-4] = fmap[j];
				89	fmap[j-4] = tmp;
				90	}
				91	}
				92
				93	for ( i = hi-1; i >= lo; i-- ) {
				94	tmp = fmap[i];
				95	ec_tmp = eclass[tmp];
				96	for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
				97	fmap[j-1] = fmap[j];
				98	fmap[j-1] = tmp;
				99	}
				100	}
				101
				102
				103	/---------------------------------------------/
				104	#define fswap(zz1, zz2) \
				105	{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
				106
				107	#define fvswap(zzp1, zzp2, zzn) \
				108	{ \
				109	Int32 yyp1 = (zzp1); \
				110	Int32 yyp2 = (zzp2); \
				111	Int32 yyn = (zzn); \
				112	while (yyn > 0) { \
				113	fswap(fmap[yyp1], fmap[yyp2]); \
				114	yyp1++; yyp2++; yyn--; \
				115	} \
				116	}
				117
				118
				119	#define fmin(a,b) ((a) < (b)) ? (a) : (b)
				120
				121	#define fpush(lz,hz) { stackLo[sp] = lz; \
				122	stackHi[sp] = hz; \
				123	sp++; }
				124
				125	#define fpop(lz,hz) { sp--; \
				126	lz = stackLo[sp]; \
				127	hz = stackHi[sp]; }
				128
				129	#define FALLBACK_QSORT_SMALL_THRESH 10
				130	#define FALLBACK_QSORT_STACK_SIZE 100
				131
				132
				133	static
				134	void fallbackQSort3 ( UInt32* fmap,
				135	UInt32* eclass,
				136	Int32 loSt,
				137	Int32 hiSt )
				138	{
				139	Int32 unLo, unHi, ltLo, gtHi, n, m;
				140	Int32 sp, lo, hi;
				141	UInt32 med, r, r3;
				142	Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
				143	Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
				144
				145	r = 0;
				146
				147	sp = 0;
				148	fpush ( loSt, hiSt );
				149
				150	while (sp > 0) {
				151
				152	AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
				153
				154	fpop ( lo, hi );
				155	if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
				156	fallbackSimpleSort ( fmap, eclass, lo, hi );
				157	continue;
				158	}
				159
				160	/* Random partitioning. Median of 3 sometimes fails to
				161	avoid bad cases. Median of 9 seems to help but
				162	looks rather expensive. This too seems to work but
				163	is cheaper. Guidance for the magic constants
				164	7621 and 32768 is taken from Sedgewick's algorithms
				165	book, chapter 35.
				166	*/
				167	r = ((r * 7621) + 1) % 32768;
				168	r3 = r % 3;
				169	if (r3 == 0) med = eclass[fmap[lo]]; else
				170	if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
				171	med = eclass[fmap[hi]];
				172
				173	unLo = ltLo = lo;
				174	unHi = gtHi = hi;
				175
				176	while (1) {
				177	while (1) {
				178	if (unLo > unHi) break;
				179	n = (Int32)eclass[fmap[unLo]] - (Int32)med;
				180	if (n == 0) {
				181	fswap(fmap[unLo], fmap[ltLo]);
				182	ltLo++; unLo++;
				183	continue;
				184	};
				185	if (n > 0) break;
				186	unLo++;
				187	}
				188	while (1) {
				189	if (unLo > unHi) break;
				190	n = (Int32)eclass[fmap[unHi]] - (Int32)med;
				191	if (n == 0) {
				192	fswap(fmap[unHi], fmap[gtHi]);
				193	gtHi--; unHi--;
				194	continue;
				195	};
				196	if (n < 0) break;
				197	unHi--;
				198	}
				199	if (unLo > unHi) break;
				200	fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
				201	}
				202
				203	AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
				204
				205	if (gtHi < ltLo) continue;
				206
				207	n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
				208	m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
				209
				210	n = lo + unLo - ltLo - 1;
				211	m = hi - (gtHi - unHi) + 1;
				212
				213	if (n - lo > hi - m) {
				214	fpush ( lo, n );
				215	fpush ( m, hi );
				216	} else {
				217	fpush ( m, hi );
				218	fpush ( lo, n );
				219	}
				220	}
				221	}
				222
				223	#undef fmin
				224	#undef fpush
				225	#undef fpop
				226	#undef fswap
				227	#undef fvswap
				228	#undef FALLBACK_QSORT_SMALL_THRESH
				229	#undef FALLBACK_QSORT_STACK_SIZE
				230
				231
				232	/---------------------------------------------/
				233	/* Pre:
				234	nblock > 0
				235	eclass exists for [0 .. nblock-1]
				236	((UChar*)eclass) [0 .. nblock-1] holds block
				237	ptr exists for [0 .. nblock-1]
				238
				239	Post:
				240	((UChar*)eclass) [0 .. nblock-1] holds block
				241	All other areas of eclass destroyed
				242	fmap [0 .. nblock-1] holds sorted order
				243	bhtab [ 0 .. 2+(nblock/32) ] destroyed
				244	*/
				245
				246	#define SET_BH(zz) bhtab[(zz) >> 5] \|= (1 << ((zz) & 31))
				247	#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
				248	#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
				249	#define WORD_BH(zz) bhtab[(zz) >> 5]
				250	#define UNALIGNED_BH(zz) ((zz) & 0x01f)
				251
				252	static
				253	void fallbackSort ( UInt32* fmap,
				254	UInt32* eclass,
				255	UInt32* bhtab,
				256	Int32 nblock,
				257	Int32 verb )
				258	{
				259	Int32 ftab[257];
				260	Int32 ftabCopy[256];
				261	Int32 H, i, j, k, l, r, cc, cc1;
				262	Int32 nNotDone;
				263	Int32 nBhtab;
				264	UChar* eclass8 = (UChar*)eclass;
				265
				266	/*--
				267	Initial 1-char radix sort to generate
				268	initial fmap and initial BH bits.
				269	--*/
				270	if (verb >= 4)
				271	VPrintf0 ( " bucket sorting ...\n" );
				272	for (i = 0; i < 257; i++) ftab[i] = 0;
				273	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
				274	for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
				275	for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
				276
				277	for (i = 0; i < nblock; i++) {
				278	j = eclass8[i];
				279	k = ftab[j] - 1;
				280	ftab[j] = k;
				281	fmap[k] = i;
				282	}
				283
				284	nBhtab = 2 + (nblock / 32);
				285	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
				286	for (i = 0; i < 256; i++) SET_BH(ftab[i]);
				287
				288	/*--
				289	Inductively refine the buckets. Kind-of an
				290	"exponential radix sort" (!), inspired by the
				291	Manber-Myers suffix array construction algorithm.
				292	--*/
				293
				294	/-- set sentinel bits for block-end detection --/
				295	for (i = 0; i < 32; i++) {
				296	SET_BH(nblock + 2*i);
				297	CLEAR_BH(nblock + 2*i + 1);
				298	}
				299
				300	/-- the log(N) loop --/
				301	H = 1;
				302	while (1) {
				303
				304	if (verb >= 4)
				305	VPrintf1 ( " depth %6d has ", H );
				306
				307	j = 0;
				308	for (i = 0; i < nblock; i++) {
				309	if (ISSET_BH(i)) j = i;
				310	k = fmap[i] - H; if (k < 0) k += nblock;
				311	eclass[k] = j;
				312	}
				313
				314	nNotDone = 0;
				315	r = -1;
				316	while (1) {
				317
				318	/-- find the next non-singleton bucket --/
				319	k = r + 1;
				320	while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
				321	if (ISSET_BH(k)) {
				322	while (WORD_BH(k) == 0xffffffff) k += 32;
				323	while (ISSET_BH(k)) k++;
				324	}
				325	l = k - 1;
				326	if (l >= nblock) break;
				327	while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
				328	if (!ISSET_BH(k)) {
				329	while (WORD_BH(k) == 0x00000000) k += 32;
				330	while (!ISSET_BH(k)) k++;
				331	}
				332	r = k - 1;
				333	if (r >= nblock) break;
				334
				335	/-- now [l, r] bracket current bucket --/
				336	if (r > l) {
				337	nNotDone += (r - l + 1);
				338	fallbackQSort3 ( fmap, eclass, l, r );
				339
				340	/-- scan bucket and generate header bits-- /
				341	cc = -1;
				342	for (i = l; i <= r; i++) {
				343	cc1 = eclass[fmap[i]];
				344	if (cc != cc1) { SET_BH(i); cc = cc1; };
				345	}
				346	}
				347	}
				348
				349	if (verb >= 4)
				350	VPrintf1 ( "%6d unresolved strings\n", nNotDone );
				351
				352	H *= 2;
				353	if (H > nblock \|\| nNotDone == 0) break;
				354	}
				355
				356	/*--
				357	Reconstruct the original block in
				358	eclass8 [0 .. nblock-1], since the
				359	previous phase destroyed it.
				360	--*/
				361	if (verb >= 4)
				362	VPrintf0 ( " reconstructing block ...\n" );
				363	j = 0;
				364	for (i = 0; i < nblock; i++) {
				365	while (ftabCopy[j] == 0) j++;
				366	ftabCopy[j]--;
				367	eclass8[fmap[i]] = (UChar)j;
				368	}
				369	AssertH ( j < 256, 1005 );
				370	}
				371
				372	#undef SET_BH
				373	#undef CLEAR_BH
				374	#undef ISSET_BH
				375	#undef WORD_BH
				376	#undef UNALIGNED_BH
				377
				378
				379	/---------------------------------------------/
				380	/--- The main, O(N^2 log(N)) sorting ---/
				381	/--- algorithm. Faster for "normal" ---/
				382	/--- non-repetitive blocks. ---/
				383	/---------------------------------------------/
				384
				385	/---------------------------------------------/
				386	static
				387	__inline__
				388	Bool mainGtU ( UInt32 i1,
				389	UInt32 i2,
				390	UChar* block,
				391	UInt16* quadrant,
				392	UInt32 nblock,
				393	Int32* budget )
				394	{
				395	Int32 k;
				396	UChar c1, c2;
				397	UInt16 s1, s2;
				398
				399	AssertD ( i1 != i2, "mainGtU" );
				400	/* 1 */
				401	c1 = block[i1]; c2 = block[i2];
				402	if (c1 != c2) return (c1 > c2);
				403	i1++; i2++;
				404	/* 2 */
				405	c1 = block[i1]; c2 = block[i2];
				406	if (c1 != c2) return (c1 > c2);
				407	i1++; i2++;
				408	/* 3 */
				409	c1 = block[i1]; c2 = block[i2];
				410	if (c1 != c2) return (c1 > c2);
				411	i1++; i2++;
				412	/* 4 */
				413	c1 = block[i1]; c2 = block[i2];
				414	if (c1 != c2) return (c1 > c2);
				415	i1++; i2++;
				416	/* 5 */
				417	c1 = block[i1]; c2 = block[i2];
				418	if (c1 != c2) return (c1 > c2);
				419	i1++; i2++;
				420	/* 6 */
				421	c1 = block[i1]; c2 = block[i2];
				422	if (c1 != c2) return (c1 > c2);
				423	i1++; i2++;
				424	/* 7 */
				425	c1 = block[i1]; c2 = block[i2];
				426	if (c1 != c2) return (c1 > c2);
				427	i1++; i2++;
				428	/* 8 */
				429	c1 = block[i1]; c2 = block[i2];
				430	if (c1 != c2) return (c1 > c2);
				431	i1++; i2++;
				432	/* 9 */
				433	c1 = block[i1]; c2 = block[i2];
				434	if (c1 != c2) return (c1 > c2);
				435	i1++; i2++;
				436	/* 10 */
				437	c1 = block[i1]; c2 = block[i2];
				438	if (c1 != c2) return (c1 > c2);
				439	i1++; i2++;
				440	/* 11 */
				441	c1 = block[i1]; c2 = block[i2];
				442	if (c1 != c2) return (c1 > c2);
				443	i1++; i2++;
				444	/* 12 */
				445	c1 = block[i1]; c2 = block[i2];
				446	if (c1 != c2) return (c1 > c2);
				447	i1++; i2++;
				448
				449	k = nblock + 8;
				450
				451	do {
				452	/* 1 */
				453	c1 = block[i1]; c2 = block[i2];
				454	if (c1 != c2) return (c1 > c2);
				455	s1 = quadrant[i1]; s2 = quadrant[i2];
				456	if (s1 != s2) return (s1 > s2);
				457	i1++; i2++;
				458	/* 2 */
				459	c1 = block[i1]; c2 = block[i2];
				460	if (c1 != c2) return (c1 > c2);
				461	s1 = quadrant[i1]; s2 = quadrant[i2];
				462	if (s1 != s2) return (s1 > s2);
				463	i1++; i2++;
				464	/* 3 */
				465	c1 = block[i1]; c2 = block[i2];
				466	if (c1 != c2) return (c1 > c2);
				467	s1 = quadrant[i1]; s2 = quadrant[i2];
				468	if (s1 != s2) return (s1 > s2);
				469	i1++; i2++;
				470	/* 4 */
				471	c1 = block[i1]; c2 = block[i2];
				472	if (c1 != c2) return (c1 > c2);
				473	s1 = quadrant[i1]; s2 = quadrant[i2];
				474	if (s1 != s2) return (s1 > s2);
				475	i1++; i2++;
				476	/* 5 */
				477	c1 = block[i1]; c2 = block[i2];
				478	if (c1 != c2) return (c1 > c2);
				479	s1 = quadrant[i1]; s2 = quadrant[i2];
				480	if (s1 != s2) return (s1 > s2);
				481	i1++; i2++;
				482	/* 6 */
				483	c1 = block[i1]; c2 = block[i2];
				484	if (c1 != c2) return (c1 > c2);
				485	s1 = quadrant[i1]; s2 = quadrant[i2];
				486	if (s1 != s2) return (s1 > s2);
				487	i1++; i2++;
				488	/* 7 */
				489	c1 = block[i1]; c2 = block[i2];
				490	if (c1 != c2) return (c1 > c2);
				491	s1 = quadrant[i1]; s2 = quadrant[i2];
				492	if (s1 != s2) return (s1 > s2);
				493	i1++; i2++;
				494	/* 8 */
				495	c1 = block[i1]; c2 = block[i2];
				496	if (c1 != c2) return (c1 > c2);
				497	s1 = quadrant[i1]; s2 = quadrant[i2];
				498	if (s1 != s2) return (s1 > s2);
				499	i1++; i2++;
				500
				501	if (i1 >= nblock) i1 -= nblock;
				502	if (i2 >= nblock) i2 -= nblock;
				503
				504	k -= 8;
				505	(*budget)--;
				506	}
				507	while (k >= 0);
				508
				509	return False;
				510	}
				511
				512
				513	/---------------------------------------------/
				514	/*--
				515	Knuth's increments seem to work better
				516	than Incerpi-Sedgewick here. Possibly
				517	because the number of elems to sort is
				518	usually small, typically <= 20.
				519	--*/
				520	static
				521	Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
				522	9841, 29524, 88573, 265720,
				523	797161, 2391484 };
				524
				525	static
				526	void mainSimpleSort ( UInt32* ptr,
				527	UChar* block,
				528	UInt16* quadrant,
				529	Int32 nblock,
				530	Int32 lo,
				531	Int32 hi,
				532	Int32 d,
				533	Int32* budget )
				534	{
				535	Int32 i, j, h, bigN, hp;
				536	UInt32 v;
				537
				538	bigN = hi - lo + 1;
				539	if (bigN < 2) return;
				540
				541	hp = 0;
				542	while (incs[hp] < bigN) hp++;
				543	hp--;
				544
				545	for (; hp >= 0; hp--) {
				546	h = incs[hp];
				547
				548	i = lo + h;
				549	while (True) {
				550
				551	/-- copy 1 --/
				552	if (i > hi) break;
				553	v = ptr[i];
				554	j = i;
				555	while ( mainGtU (
				556	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				557	) ) {
				558	ptr[j] = ptr[j-h];
				559	j = j - h;
				560	if (j <= (lo + h - 1)) break;
				561	}
				562	ptr[j] = v;
				563	i++;
				564
				565	/-- copy 2 --/
				566	if (i > hi) break;
				567	v = ptr[i];
				568	j = i;
				569	while ( mainGtU (
				570	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				571	) ) {
				572	ptr[j] = ptr[j-h];
				573	j = j - h;
				574	if (j <= (lo + h - 1)) break;
				575	}
				576	ptr[j] = v;
				577	i++;
				578
				579	/-- copy 3 --/
				580	if (i > hi) break;
				581	v = ptr[i];
				582	j = i;
				583	while ( mainGtU (
				584	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				585	) ) {
				586	ptr[j] = ptr[j-h];
				587	j = j - h;
				588	if (j <= (lo + h - 1)) break;
				589	}
				590	ptr[j] = v;
				591	i++;
				592
				593	if (*budget < 0) return;
				594	}
				595	}
				596	}
				597
				598
				599	/---------------------------------------------/
				600	/*--
				601	The following is an implementation of
				602	an elegant 3-way quicksort for strings,
				603	described in a paper "Fast Algorithms for
				604	Sorting and Searching Strings", by Robert
				605	Sedgewick and Jon L. Bentley.
				606	--*/
				607
				608	#define mswap(zz1, zz2) \
				609	{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
				610
				611	#define mvswap(zzp1, zzp2, zzn) \
				612	{ \
				613	Int32 yyp1 = (zzp1); \
				614	Int32 yyp2 = (zzp2); \
				615	Int32 yyn = (zzn); \
				616	while (yyn > 0) { \
				617	mswap(ptr[yyp1], ptr[yyp2]); \
				618	yyp1++; yyp2++; yyn--; \
				619	} \
				620	}
				621
				622	static
				623	__inline__
				624	UChar mmed3 ( UChar a, UChar b, UChar c )
				625	{
				626	UChar t;
				627	if (a > b) { t = a; a = b; b = t; };
				628	if (b > c) {
				629	b = c;
				630	if (a > b) b = a;
				631	}
				632	return b;
				633	}
				634
				635	#define mmin(a,b) ((a) < (b)) ? (a) : (b)
				636
				637	#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
				638	stackHi[sp] = hz; \
				639	stackD [sp] = dz; \
				640	sp++; }
				641
				642	#define mpop(lz,hz,dz) { sp--; \
				643	lz = stackLo[sp]; \
				644	hz = stackHi[sp]; \
				645	dz = stackD [sp]; }
				646
				647
				648	#define mnextsize(az) (nextHi[az]-nextLo[az])
				649
				650	#define mnextswap(az,bz) \
				651	{ Int32 tz; \
				652	tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
				653	tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
				654	tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
				655
				656
				657	#define MAIN_QSORT_SMALL_THRESH 20
				658	#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
				659	#define MAIN_QSORT_STACK_SIZE 100
				660
				661	static
				662	void mainQSort3 ( UInt32* ptr,
				663	UChar* block,
				664	UInt16* quadrant,
				665	Int32 nblock,
				666	Int32 loSt,
				667	Int32 hiSt,
				668	Int32 dSt,
				669	Int32* budget )
				670	{
				671	Int32 unLo, unHi, ltLo, gtHi, n, m, med;
				672	Int32 sp, lo, hi, d;
				673
				674	Int32 stackLo[MAIN_QSORT_STACK_SIZE];
				675	Int32 stackHi[MAIN_QSORT_STACK_SIZE];
				676	Int32 stackD [MAIN_QSORT_STACK_SIZE];
				677
				678	Int32 nextLo[3];
				679	Int32 nextHi[3];
				680	Int32 nextD [3];
				681
				682	sp = 0;
				683	mpush ( loSt, hiSt, dSt );
				684
				685	while (sp > 0) {
				686
				687	AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
				688
				689	mpop ( lo, hi, d );
				690	if (hi - lo < MAIN_QSORT_SMALL_THRESH \|\|
				691	d > MAIN_QSORT_DEPTH_THRESH) {
				692	mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
				693	if (*budget < 0) return;
				694	continue;
				695	}
				696
				697	med = (Int32)
				698	mmed3 ( block[ptr[ lo ]+d],
				699	block[ptr[ hi ]+d],
				700	block[ptr[ (lo+hi)>>1 ]+d] );
				701
				702	unLo = ltLo = lo;
				703	unHi = gtHi = hi;
				704
				705	while (True) {
				706	while (True) {
				707	if (unLo > unHi) break;
				708	n = ((Int32)block[ptr[unLo]+d]) - med;
				709	if (n == 0) {
				710	mswap(ptr[unLo], ptr[ltLo]);
				711	ltLo++; unLo++; continue;
				712	};
				713	if (n > 0) break;
				714	unLo++;
				715	}
				716	while (True) {
				717	if (unLo > unHi) break;
				718	n = ((Int32)block[ptr[unHi]+d]) - med;
				719	if (n == 0) {
				720	mswap(ptr[unHi], ptr[gtHi]);
				721	gtHi--; unHi--; continue;
				722	};
				723	if (n < 0) break;
				724	unHi--;
				725	}
				726	if (unLo > unHi) break;
				727	mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
				728	}
				729
				730	AssertD ( unHi == unLo-1, "mainQSort3(2)" );
				731
				732	if (gtHi < ltLo) {
				733	mpush(lo, hi, d+1 );
				734	continue;
				735	}
				736
				737	n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
				738	m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
				739
				740	n = lo + unLo - ltLo - 1;
				741	m = hi - (gtHi - unHi) + 1;
				742
				743	nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
				744	nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
				745	nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
				746
				747	if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
				748	if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
				749	if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
				750
				751	AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
				752	AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
				753
				754	mpush (nextLo[0], nextHi[0], nextD[0]);
				755	mpush (nextLo[1], nextHi[1], nextD[1]);
				756	mpush (nextLo[2], nextHi[2], nextD[2]);
				757	}
				758	}
				759
				760	#undef mswap
				761	#undef mvswap
				762	#undef mpush
				763	#undef mpop
				764	#undef mmin
				765	#undef mnextsize
				766	#undef mnextswap
				767	#undef MAIN_QSORT_SMALL_THRESH
				768	#undef MAIN_QSORT_DEPTH_THRESH
				769	#undef MAIN_QSORT_STACK_SIZE
				770
				771
				772	/---------------------------------------------/
				773	/* Pre:
				774	nblock > N_OVERSHOOT
				775	block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
				776	((UChar*)block32) [0 .. nblock-1] holds block
				777	ptr exists for [0 .. nblock-1]
				778
				779	Post:
				780	((UChar*)block32) [0 .. nblock-1] holds block
				781	All other areas of block32 destroyed
				782	ftab [0 .. 65536 ] destroyed
				783	ptr [0 .. nblock-1] holds sorted order
				784	if (*budget < 0), sorting was abandoned
				785	*/
				786
				787	#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
				788	#define SETMASK (1 << 21)
				789	#define CLEARMASK (~(SETMASK))
				790
				791	static
				792	void mainSort ( UInt32* ptr,
				793	UChar* block,
				794	UInt16* quadrant,
				795	UInt32* ftab,
				796	Int32 nblock,
				797	Int32 verb,
				798	Int32* budget )
				799	{
				800	Int32 i, j, k, ss, sb;
				801	Int32 runningOrder[256];
				802	Bool bigDone[256];
				803	Int32 copyStart[256];
				804	Int32 copyEnd [256];
				805	UChar c1;
				806	Int32 numQSorted;
				807	UInt16 s;
				808	if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
				809
				810	/-- set up the 2-byte frequency table --/
				811	for (i = 65536; i >= 0; i--) ftab[i] = 0;
				812
				813	j = block[0] << 8;
				814	i = nblock-1;
				815	for (; i >= 3; i -= 4) {
				816	quadrant[i] = 0;
				817	j = (j >> 8) \| ( ((UInt16)block[i]) << 8);
				818	ftab[j]++;
				819	quadrant[i-1] = 0;
				820	j = (j >> 8) \| ( ((UInt16)block[i-1]) << 8);
				821	ftab[j]++;
				822	quadrant[i-2] = 0;
				823	j = (j >> 8) \| ( ((UInt16)block[i-2]) << 8);
				824	ftab[j]++;
				825	quadrant[i-3] = 0;
				826	j = (j >> 8) \| ( ((UInt16)block[i-3]) << 8);
				827	ftab[j]++;
				828	}
				829	for (; i >= 0; i--) {
				830	quadrant[i] = 0;
				831	j = (j >> 8) \| ( ((UInt16)block[i]) << 8);
				832	ftab[j]++;
				833	}
				834
				835	/-- (emphasises close relationship of block & quadrant) --/
				836	for (i = 0; i < BZ_N_OVERSHOOT; i++) {
				837	block [nblock+i] = block[i];
				838	quadrant[nblock+i] = 0;
				839	}
				840
				841	if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
				842
				843	/-- Complete the initial radix sort --/
				844	for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
				845
				846	s = block[0] << 8;
				847	i = nblock-1;
				848	for (; i >= 3; i -= 4) {
				849	s = (s >> 8) \| (block[i] << 8);
				850	j = ftab[s] -1;
				851	ftab[s] = j;
				852	ptr[j] = i;
				853	s = (s >> 8) \| (block[i-1] << 8);
				854	j = ftab[s] -1;
				855	ftab[s] = j;
				856	ptr[j] = i-1;
				857	s = (s >> 8) \| (block[i-2] << 8);
				858	j = ftab[s] -1;
				859	ftab[s] = j;
				860	ptr[j] = i-2;
				861	s = (s >> 8) \| (block[i-3] << 8);
				862	j = ftab[s] -1;
				863	ftab[s] = j;
				864	ptr[j] = i-3;
				865	}
				866	for (; i >= 0; i--) {
				867	s = (s >> 8) \| (block[i] << 8);
				868	j = ftab[s] -1;
				869	ftab[s] = j;
				870	ptr[j] = i;
				871	}
				872
				873	/*--
				874	Now ftab contains the first loc of every small bucket.
				875	Calculate the running order, from smallest to largest
				876	big bucket.
				877	--*/
				878	for (i = 0; i <= 255; i++) {
				879	bigDone [i] = False;
				880	runningOrder[i] = i;
				881	}
				882
				883	{
				884	Int32 vv;
				885	Int32 h = 1;
				886	do h = 3 * h + 1; while (h <= 256);
				887	do {
				888	h = h / 3;
				889	for (i = h; i <= 255; i++) {
				890	vv = runningOrder[i];
				891	j = i;
				892	while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
				893	runningOrder[j] = runningOrder[j-h];
				894	j = j - h;
				895	if (j <= (h - 1)) goto zero;
				896	}
				897	zero:
				898	runningOrder[j] = vv;
				899	}
				900	} while (h != 1);
				901	}
				902
				903	/*--
				904	The main sorting loop.
				905	--*/
				906
				907	numQSorted = 0;
				908
				909	for (i = 0; i <= 255; i++) {
				910
				911	/*--
				912	Process big buckets, starting with the least full.
				913	Basically this is a 3-step process in which we call
				914	mainQSort3 to sort the small buckets [ss, j], but
				915	also make a big effort to avoid the calls if we can.
				916	--*/
				917	ss = runningOrder[i];
				918
				919	/*--
				920	Step 1:
				921	Complete the big bucket [ss] by quicksorting
				922	any unsorted small buckets [ss, j], for j != ss.
				923	Hopefully previous pointer-scanning phases have already
				924	completed many of the small buckets [ss, j], so
				925	we don't have to sort them at all.
				926	--*/
				927	for (j = 0; j <= 255; j++) {
				928	if (j != ss) {
				929	sb = (ss << 8) + j;
				930	if ( ! (ftab[sb] & SETMASK) ) {
				931	Int32 lo = ftab[sb] & CLEARMASK;
				932	Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
				933	if (hi > lo) {
				934	if (verb >= 4)
				935	VPrintf4 ( " qsort [0x%x, 0x%x] "
				936	"done %d this %d\n",
				937	ss, j, numQSorted, hi - lo + 1 );
				938	mainQSort3 (
				939	ptr, block, quadrant, nblock,
				940	lo, hi, BZ_N_RADIX, budget
				941	);
				942	numQSorted += (hi - lo + 1);
				943	if (*budget < 0) return;
				944	}
				945	}
				946	ftab[sb] \|= SETMASK;
				947	}
				948	}
				949
				950	AssertH ( !bigDone[ss], 1006 );
				951
				952	/*--
				953	Step 2:
				954	Now scan this big bucket [ss] so as to synthesise the
				955	sorted order for small buckets [t, ss] for all t,
				956	including, magically, the bucket [ss,ss] too.
				957	This will avoid doing Real Work in subsequent Step 1's.
				958	--*/
				959	{
				960	for (j = 0; j <= 255; j++) {
				961	copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
				962	copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
				963	}
				964	for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
				965	k = ptr[j]-1; if (k < 0) k += nblock;
				966	c1 = block[k];
				967	if (!bigDone[c1])
				968	ptr[ copyStart[c1]++ ] = k;
				969	}
				970	for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
				971	k = ptr[j]-1; if (k < 0) k += nblock;
				972	c1 = block[k];
				973	if (!bigDone[c1])
				974	ptr[ copyEnd[c1]-- ] = k;
				975	}
				976	}
				977
				978	AssertH ( (copyStart[ss]-1 == copyEnd[ss])
				979	\|\|
				980	/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
				981	Necessity for this case is demonstrated by compressing
				982	a sequence of approximately 48.5 million of character
				983	251; 1.0.0/1.0.1 will then die here. */
				984	(copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
				985	1007 )
				986
				987	for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] \|= SETMASK;
				988
				989	/*--
				990	Step 3:
				991	The [ss] big bucket is now done. Record this fact,
				992	and update the quadrant descriptors. Remember to
				993	update quadrants in the overshoot area too, if
				994	necessary. The "if (i < 255)" test merely skips
				995	this updating for the last bucket processed, since
				996	updating for the last bucket is pointless.
				997
				998	The quadrant array provides a way to incrementally
				999	cache sort orderings, as they appear, so as to
				1000	make subsequent comparisons in fullGtU() complete
				1001	faster. For repetitive blocks this makes a big
				1002	difference (but not big enough to be able to avoid
				1003	the fallback sorting mechanism, exponential radix sort).
				1004
				1005	The precise meaning is: at all times:
				1006
				1007	for 0 <= i < nblock and 0 <= j <= nblock
				1008
				1009	if block[i] != block[j],
				1010
				1011	then the relative values of quadrant[i] and
				1012	quadrant[j] are meaningless.
				1013
				1014	else {
				1015	if quadrant[i] < quadrant[j]
				1016	then the string starting at i lexicographically
				1017	precedes the string starting at j
				1018
				1019	else if quadrant[i] > quadrant[j]
				1020	then the string starting at j lexicographically
				1021	precedes the string starting at i
				1022
				1023	else
				1024	the relative ordering of the strings starting
				1025	at i and j has not yet been determined.
				1026	}
				1027	--*/
				1028	bigDone[ss] = True;
				1029
				1030	if (i < 255) {
				1031	Int32 bbStart = ftab[ss << 8] & CLEARMASK;
				1032	Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
				1033	Int32 shifts = 0;
				1034
				1035	while ((bbSize >> shifts) > 65534) shifts++;
				1036
				1037	for (j = bbSize-1; j >= 0; j--) {
				1038	Int32 a2update = ptr[bbStart + j];
				1039	UInt16 qVal = (UInt16)(j >> shifts);
				1040	quadrant[a2update] = qVal;
				1041	if (a2update < BZ_N_OVERSHOOT)
				1042	quadrant[a2update + nblock] = qVal;
				1043	}
				1044	AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
				1045	}
				1046
				1047	}
				1048
				1049	if (verb >= 4)
				1050	VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
				1051	nblock, numQSorted, nblock - numQSorted );
				1052	}
				1053
				1054	#undef BIGFREQ
				1055	#undef SETMASK
				1056	#undef CLEARMASK
				1057
				1058
				1059	/---------------------------------------------/
				1060	/* Pre:
				1061	nblock > 0
				1062	arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
				1063	((UChar*)arr2) [0 .. nblock-1] holds block
				1064	arr1 exists for [0 .. nblock-1]
				1065
				1066	Post:
				1067	((UChar*)arr2) [0 .. nblock-1] holds block
				1068	All other areas of block destroyed
				1069	ftab [ 0 .. 65536 ] destroyed
				1070	arr1 [0 .. nblock-1] holds sorted order
				1071	*/
				1072	void BZ2_blockSort ( EState* s )
				1073	{
				1074	UInt32* ptr = s->ptr;
				1075	UChar* block = s->block;
				1076	UInt32* ftab = s->ftab;
				1077	Int32 nblock = s->nblock;
				1078	Int32 verb = s->verbosity;
				1079	Int32 wfact = s->workFactor;
				1080	UInt16* quadrant;
				1081	Int32 budget;
				1082	Int32 budgetInit;
				1083	Int32 i;
				1084
				1085	if (nblock < 10000) {
				1086	fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
				1087	} else {
				1088	/* Calculate the location for quadrant, remembering to get
				1089	the alignment right. Assumes that &(block[0]) is at least
				1090	2-byte aligned -- this should be ok since block is really
				1091	the first section of arr2.
				1092	*/
				1093	i = nblock+BZ_N_OVERSHOOT;
				1094	if (i & 1) i++;
				1095	quadrant = (UInt16*)(&(block[i]));
				1096
				1097	/* (wfact-1) / 3 puts the default-factor-30
				1098	transition point at very roughly the same place as
				1099	with v0.1 and v0.9.0.
				1100	Not that it particularly matters any more, since the
				1101	resulting compressed stream is now the same regardless
				1102	of whether or not we use the main sort or fallback sort.
				1103	*/
				1104	if (wfact < 1 ) wfact = 1;
				1105	if (wfact > 100) wfact = 100;
				1106	budgetInit = nblock * ((wfact-1) / 3);
				1107	budget = budgetInit;
				1108
				1109	mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
				1110	if (verb >= 3)
				1111	VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
				1112	budgetInit - budget,
				1113	nblock,
				1114	(float)(budgetInit - budget) /
				1115	(float)(nblock==0 ? 1 : nblock) );
				1116	if (budget < 0) {
				1117	if (verb >= 2)
				1118	VPrintf0 ( " too repetitive; using fallback"
				1119	" sorting algorithm\n" );
				1120	fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
				1121	}
				1122	}
				1123
				1124	s->origPtr = -1;
				1125	for (i = 0; i < s->nblock; i++)
				1126	if (ptr[i] == 0)
				1127	{ s->origPtr = i; break; };
				1128
				1129	AssertH( s->origPtr != -1, 1003 );
				1130	}
				1131
				1132
				1133	/-------------------------------------------------------------/
				1134	/--- end blocksort.c ---/
				1135	/-------------------------------------------------------------/