Blame - lib/bzip2/bzlib_blocksort.c - filogic/uboot

blob: 278552150222908beeb7e96e87b69890400d619e [file] [log] [blame]

Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	1
				2	/-------------------------------------------------------------/
				3	/--- Block sorting machinery ---/
				4	/--- blocksort.c ---/
				5	/-------------------------------------------------------------/
				6
				7	/*--
				8	This file is a part of bzip2 and/or libbzip2, a program and
				9	library for lossless, block-sorting data compression.
				10
				11	Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
				12
				13	Redistribution and use in source and binary forms, with or without
				14	modification, are permitted provided that the following conditions
				15	are met:
				16
				17	1. Redistributions of source code must retain the above copyright
				18	notice, this list of conditions and the following disclaimer.
				19
				20	2. The origin of this software must not be misrepresented; you must
				21	not claim that you wrote the original software. If you use this
				22	software in a product, an acknowledgment in the product
				23	documentation would be appreciated but is not required.
				24
				25	3. Altered source versions must be plainly marked as such, and must
				26	not be misrepresented as being the original software.
				27
				28	4. The name of the author may not be used to endorse or promote
				29	products derived from this software without specific prior written
				30	permission.
				31
				32	THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
				33	OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				34	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				35	ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
				36	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				37	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
				38	GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				39	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				40	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				41	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				42	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				43
				44	Julian Seward, Cambridge, UK.
				45	jseward@acm.org
				46	bzip2/libbzip2 version 1.0.6 of 6 September 2010
				47	Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
				48
				49	This program is based on (at least) the work of:
				50	Mike Burrows
				51	David Wheeler
				52	Peter Fenwick
				53	Alistair Moffat
				54	Radford Neal
				55	Ian H. Witten
				56	Robert Sedgewick
				57	Jon L. Bentley
				58
				59	For more information on these sources, see the manual.
				60	--*/
				61
				62	#include "bzlib_private.h"
				63
				64	/---------------------------------------------/
				65	/--- Fallback O(N log(N)^2) sorting ---/
				66	/--- algorithm, for repetitive blocks ---/
				67	/---------------------------------------------/
				68
				69	/---------------------------------------------/
				70	static
				71	__inline__
				72	void fallbackSimpleSort ( UInt32* fmap,
				73	UInt32* eclass,
				74	Int32 lo,
				75	Int32 hi )
				76	{
				77	Int32 i, j, tmp;
				78	UInt32 ec_tmp;
				79
				80	if (lo == hi) return;
				81
				82	if (hi - lo > 3) {
				83	for ( i = hi-4; i >= lo; i-- ) {
				84	tmp = fmap[i];
				85	ec_tmp = eclass[tmp];
				86	for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
				87	fmap[j-4] = fmap[j];
				88	fmap[j-4] = tmp;
				89	}
				90	}
				91
				92	for ( i = hi-1; i >= lo; i-- ) {
				93	tmp = fmap[i];
				94	ec_tmp = eclass[tmp];
				95	for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
				96	fmap[j-1] = fmap[j];
				97	fmap[j-1] = tmp;
				98	}
				99	}
				100
				101
				102	/---------------------------------------------/
				103	#define fswap(zz1, zz2) \
				104	{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
				105
				106	#define fvswap(zzp1, zzp2, zzn) \
				107	{ \
				108	Int32 yyp1 = (zzp1); \
				109	Int32 yyp2 = (zzp2); \
				110	Int32 yyn = (zzn); \
				111	while (yyn > 0) { \
				112	fswap(fmap[yyp1], fmap[yyp2]); \
				113	yyp1++; yyp2++; yyn--; \
				114	} \
				115	}
				116
				117
				118	#define fmin(a,b) ((a) < (b)) ? (a) : (b)
				119
				120	#define fpush(lz,hz) { stackLo[sp] = lz; \
				121	stackHi[sp] = hz; \
				122	sp++; }
				123
				124	#define fpop(lz,hz) { sp--; \
				125	lz = stackLo[sp]; \
				126	hz = stackHi[sp]; }
				127
				128	#define FALLBACK_QSORT_SMALL_THRESH 10
				129	#define FALLBACK_QSORT_STACK_SIZE 100
				130
				131
				132	static
				133	void fallbackQSort3 ( UInt32* fmap,
				134	UInt32* eclass,
				135	Int32 loSt,
				136	Int32 hiSt )
				137	{
				138	Int32 unLo, unHi, ltLo, gtHi, n, m;
				139	Int32 sp, lo, hi;
				140	UInt32 med, r, r3;
				141	Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
				142	Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
				143
				144	r = 0;
				145
				146	sp = 0;
				147	fpush ( loSt, hiSt );
				148
				149	while (sp > 0) {
				150
				151	AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
				152
				153	fpop ( lo, hi );
				154	if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
				155	fallbackSimpleSort ( fmap, eclass, lo, hi );
				156	continue;
				157	}
				158
				159	/* Random partitioning. Median of 3 sometimes fails to
				160	avoid bad cases. Median of 9 seems to help but
				161	looks rather expensive. This too seems to work but
				162	is cheaper. Guidance for the magic constants
				163	7621 and 32768 is taken from Sedgewick's algorithms
				164	book, chapter 35.
				165	*/
				166	r = ((r * 7621) + 1) % 32768;
				167	r3 = r % 3;
				168	if (r3 == 0) med = eclass[fmap[lo]]; else
				169	if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
				170	med = eclass[fmap[hi]];
				171
				172	unLo = ltLo = lo;
				173	unHi = gtHi = hi;
				174
				175	while (1) {
				176	while (1) {
				177	if (unLo > unHi) break;
				178	n = (Int32)eclass[fmap[unLo]] - (Int32)med;
				179	if (n == 0) {
				180	fswap(fmap[unLo], fmap[ltLo]);
				181	ltLo++; unLo++;
				182	continue;
				183	};
				184	if (n > 0) break;
				185	unLo++;
				186	}
				187	while (1) {
				188	if (unLo > unHi) break;
				189	n = (Int32)eclass[fmap[unHi]] - (Int32)med;
				190	if (n == 0) {
				191	fswap(fmap[unHi], fmap[gtHi]);
				192	gtHi--; unHi--;
				193	continue;
				194	};
				195	if (n < 0) break;
				196	unHi--;
				197	}
				198	if (unLo > unHi) break;
				199	fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
				200	}
				201
				202	AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
				203
				204	if (gtHi < ltLo) continue;
				205
				206	n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
				207	m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
				208
				209	n = lo + unLo - ltLo - 1;
				210	m = hi - (gtHi - unHi) + 1;
				211
				212	if (n - lo > hi - m) {
				213	fpush ( lo, n );
				214	fpush ( m, hi );
				215	} else {
				216	fpush ( m, hi );
				217	fpush ( lo, n );
				218	}
				219	}
				220	}
				221
				222	#undef fmin
				223	#undef fpush
				224	#undef fpop
				225	#undef fswap
				226	#undef fvswap
				227	#undef FALLBACK_QSORT_SMALL_THRESH
				228	#undef FALLBACK_QSORT_STACK_SIZE
				229
				230
				231	/---------------------------------------------/
				232	/* Pre:
				233	nblock > 0
				234	eclass exists for [0 .. nblock-1]
				235	((UChar*)eclass) [0 .. nblock-1] holds block
				236	ptr exists for [0 .. nblock-1]
				237
				238	Post:
				239	((UChar*)eclass) [0 .. nblock-1] holds block
				240	All other areas of eclass destroyed
				241	fmap [0 .. nblock-1] holds sorted order
				242	bhtab [ 0 .. 2+(nblock/32) ] destroyed
				243	*/
				244
				245	#define SET_BH(zz) bhtab[(zz) >> 5] \|= (1 << ((zz) & 31))
				246	#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
				247	#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
				248	#define WORD_BH(zz) bhtab[(zz) >> 5]
				249	#define UNALIGNED_BH(zz) ((zz) & 0x01f)
				250
				251	static
				252	void fallbackSort ( UInt32* fmap,
				253	UInt32* eclass,
				254	UInt32* bhtab,
				255	Int32 nblock,
				256	Int32 verb )
				257	{
				258	Int32 ftab[257];
				259	Int32 ftabCopy[256];
				260	Int32 H, i, j, k, l, r, cc, cc1;
				261	Int32 nNotDone;
				262	Int32 nBhtab;
				263	UChar* eclass8 = (UChar*)eclass;
				264
				265	/*--
				266	Initial 1-char radix sort to generate
				267	initial fmap and initial BH bits.
				268	--*/
				269	if (verb >= 4)
				270	VPrintf0 ( " bucket sorting ...\n" );
				271	for (i = 0; i < 257; i++) ftab[i] = 0;
				272	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
				273	for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
				274	for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
				275
				276	for (i = 0; i < nblock; i++) {
				277	j = eclass8[i];
				278	k = ftab[j] - 1;
				279	ftab[j] = k;
				280	fmap[k] = i;
				281	}
				282
				283	nBhtab = 2 + (nblock / 32);
				284	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
				285	for (i = 0; i < 256; i++) SET_BH(ftab[i]);
				286
				287	/*--
				288	Inductively refine the buckets. Kind-of an
				289	"exponential radix sort" (!), inspired by the
				290	Manber-Myers suffix array construction algorithm.
				291	--*/
				292
				293	/-- set sentinel bits for block-end detection --/
				294	for (i = 0; i < 32; i++) {
				295	SET_BH(nblock + 2*i);
				296	CLEAR_BH(nblock + 2*i + 1);
				297	}
				298
				299	/-- the log(N) loop --/
				300	H = 1;
				301	while (1) {
				302
				303	if (verb >= 4)
				304	VPrintf1 ( " depth %6d has ", H );
				305
				306	j = 0;
				307	for (i = 0; i < nblock; i++) {
				308	if (ISSET_BH(i)) j = i;
				309	k = fmap[i] - H; if (k < 0) k += nblock;
				310	eclass[k] = j;
				311	}
				312
				313	nNotDone = 0;
				314	r = -1;
				315	while (1) {
				316
				317	/-- find the next non-singleton bucket --/
				318	k = r + 1;
				319	while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
				320	if (ISSET_BH(k)) {
				321	while (WORD_BH(k) == 0xffffffff) k += 32;
				322	while (ISSET_BH(k)) k++;
				323	}
				324	l = k - 1;
				325	if (l >= nblock) break;
				326	while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
				327	if (!ISSET_BH(k)) {
				328	while (WORD_BH(k) == 0x00000000) k += 32;
				329	while (!ISSET_BH(k)) k++;
				330	}
				331	r = k - 1;
				332	if (r >= nblock) break;
				333
				334	/-- now [l, r] bracket current bucket --/
				335	if (r > l) {
				336	nNotDone += (r - l + 1);
				337	fallbackQSort3 ( fmap, eclass, l, r );
				338
				339	/-- scan bucket and generate header bits-- /
				340	cc = -1;
				341	for (i = l; i <= r; i++) {
				342	cc1 = eclass[fmap[i]];
				343	if (cc != cc1) { SET_BH(i); cc = cc1; };
				344	}
				345	}
				346	}
				347
				348	if (verb >= 4)
				349	VPrintf1 ( "%6d unresolved strings\n", nNotDone );
				350
				351	H *= 2;
				352	if (H > nblock \|\| nNotDone == 0) break;
				353	}
				354
				355	/*--
				356	Reconstruct the original block in
				357	eclass8 [0 .. nblock-1], since the
				358	previous phase destroyed it.
				359	--*/
				360	if (verb >= 4)
				361	VPrintf0 ( " reconstructing block ...\n" );
				362	j = 0;
				363	for (i = 0; i < nblock; i++) {
				364	while (ftabCopy[j] == 0) j++;
				365	ftabCopy[j]--;
				366	eclass8[fmap[i]] = (UChar)j;
				367	}
				368	AssertH ( j < 256, 1005 );
				369	}
				370
				371	#undef SET_BH
				372	#undef CLEAR_BH
				373	#undef ISSET_BH
				374	#undef WORD_BH
				375	#undef UNALIGNED_BH
				376
				377
				378	/---------------------------------------------/
				379	/--- The main, O(N^2 log(N)) sorting ---/
				380	/--- algorithm. Faster for "normal" ---/
				381	/--- non-repetitive blocks. ---/
				382	/---------------------------------------------/
				383
				384	/---------------------------------------------/
				385	static
				386	__inline__
				387	Bool mainGtU ( UInt32 i1,
				388	UInt32 i2,
				389	UChar* block,
				390	UInt16* quadrant,
				391	UInt32 nblock,
				392	Int32* budget )
				393	{
				394	Int32 k;
				395	UChar c1, c2;
				396	UInt16 s1, s2;
				397
				398	AssertD ( i1 != i2, "mainGtU" );
				399	/* 1 */
				400	c1 = block[i1]; c2 = block[i2];
				401	if (c1 != c2) return (c1 > c2);
				402	i1++; i2++;
				403	/* 2 */
				404	c1 = block[i1]; c2 = block[i2];
				405	if (c1 != c2) return (c1 > c2);
				406	i1++; i2++;
				407	/* 3 */
				408	c1 = block[i1]; c2 = block[i2];
				409	if (c1 != c2) return (c1 > c2);
				410	i1++; i2++;
				411	/* 4 */
				412	c1 = block[i1]; c2 = block[i2];
				413	if (c1 != c2) return (c1 > c2);
				414	i1++; i2++;
				415	/* 5 */
				416	c1 = block[i1]; c2 = block[i2];
				417	if (c1 != c2) return (c1 > c2);
				418	i1++; i2++;
				419	/* 6 */
				420	c1 = block[i1]; c2 = block[i2];
				421	if (c1 != c2) return (c1 > c2);
				422	i1++; i2++;
				423	/* 7 */
				424	c1 = block[i1]; c2 = block[i2];
				425	if (c1 != c2) return (c1 > c2);
				426	i1++; i2++;
				427	/* 8 */
				428	c1 = block[i1]; c2 = block[i2];
				429	if (c1 != c2) return (c1 > c2);
				430	i1++; i2++;
				431	/* 9 */
				432	c1 = block[i1]; c2 = block[i2];
				433	if (c1 != c2) return (c1 > c2);
				434	i1++; i2++;
				435	/* 10 */
				436	c1 = block[i1]; c2 = block[i2];
				437	if (c1 != c2) return (c1 > c2);
				438	i1++; i2++;
				439	/* 11 */
				440	c1 = block[i1]; c2 = block[i2];
				441	if (c1 != c2) return (c1 > c2);
				442	i1++; i2++;
				443	/* 12 */
				444	c1 = block[i1]; c2 = block[i2];
				445	if (c1 != c2) return (c1 > c2);
				446	i1++; i2++;
				447
				448	k = nblock + 8;
				449
				450	do {
				451	/* 1 */
				452	c1 = block[i1]; c2 = block[i2];
				453	if (c1 != c2) return (c1 > c2);
				454	s1 = quadrant[i1]; s2 = quadrant[i2];
				455	if (s1 != s2) return (s1 > s2);
				456	i1++; i2++;
				457	/* 2 */
				458	c1 = block[i1]; c2 = block[i2];
				459	if (c1 != c2) return (c1 > c2);
				460	s1 = quadrant[i1]; s2 = quadrant[i2];
				461	if (s1 != s2) return (s1 > s2);
				462	i1++; i2++;
				463	/* 3 */
				464	c1 = block[i1]; c2 = block[i2];
				465	if (c1 != c2) return (c1 > c2);
				466	s1 = quadrant[i1]; s2 = quadrant[i2];
				467	if (s1 != s2) return (s1 > s2);
				468	i1++; i2++;
				469	/* 4 */
				470	c1 = block[i1]; c2 = block[i2];
				471	if (c1 != c2) return (c1 > c2);
				472	s1 = quadrant[i1]; s2 = quadrant[i2];
				473	if (s1 != s2) return (s1 > s2);
				474	i1++; i2++;
				475	/* 5 */
				476	c1 = block[i1]; c2 = block[i2];
				477	if (c1 != c2) return (c1 > c2);
				478	s1 = quadrant[i1]; s2 = quadrant[i2];
				479	if (s1 != s2) return (s1 > s2);
				480	i1++; i2++;
				481	/* 6 */
				482	c1 = block[i1]; c2 = block[i2];
				483	if (c1 != c2) return (c1 > c2);
				484	s1 = quadrant[i1]; s2 = quadrant[i2];
				485	if (s1 != s2) return (s1 > s2);
				486	i1++; i2++;
				487	/* 7 */
				488	c1 = block[i1]; c2 = block[i2];
				489	if (c1 != c2) return (c1 > c2);
				490	s1 = quadrant[i1]; s2 = quadrant[i2];
				491	if (s1 != s2) return (s1 > s2);
				492	i1++; i2++;
				493	/* 8 */
				494	c1 = block[i1]; c2 = block[i2];
				495	if (c1 != c2) return (c1 > c2);
				496	s1 = quadrant[i1]; s2 = quadrant[i2];
				497	if (s1 != s2) return (s1 > s2);
				498	i1++; i2++;
				499
				500	if (i1 >= nblock) i1 -= nblock;
				501	if (i2 >= nblock) i2 -= nblock;
				502
				503	k -= 8;
				504	(*budget)--;
				505	}
				506	while (k >= 0);
				507
				508	return False;
				509	}
				510
				511
				512	/---------------------------------------------/
				513	/*--
				514	Knuth's increments seem to work better
				515	than Incerpi-Sedgewick here. Possibly
				516	because the number of elems to sort is
				517	usually small, typically <= 20.
				518	--*/
				519	static
				520	Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
				521	9841, 29524, 88573, 265720,
				522	797161, 2391484 };
				523
				524	static
				525	void mainSimpleSort ( UInt32* ptr,
				526	UChar* block,
				527	UInt16* quadrant,
				528	Int32 nblock,
				529	Int32 lo,
				530	Int32 hi,
				531	Int32 d,
				532	Int32* budget )
				533	{
				534	Int32 i, j, h, bigN, hp;
				535	UInt32 v;
				536
				537	bigN = hi - lo + 1;
				538	if (bigN < 2) return;
				539
				540	hp = 0;
				541	while (incs[hp] < bigN) hp++;
				542	hp--;
				543
				544	for (; hp >= 0; hp--) {
				545	h = incs[hp];
				546
				547	i = lo + h;
				548	while (True) {
				549
				550	/-- copy 1 --/
				551	if (i > hi) break;
				552	v = ptr[i];
				553	j = i;
				554	while ( mainGtU (
				555	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				556	) ) {
				557	ptr[j] = ptr[j-h];
				558	j = j - h;
				559	if (j <= (lo + h - 1)) break;
				560	}
				561	ptr[j] = v;
				562	i++;
				563
				564	/-- copy 2 --/
				565	if (i > hi) break;
				566	v = ptr[i];
				567	j = i;
				568	while ( mainGtU (
				569	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				570	) ) {
				571	ptr[j] = ptr[j-h];
				572	j = j - h;
				573	if (j <= (lo + h - 1)) break;
				574	}
				575	ptr[j] = v;
				576	i++;
				577
				578	/-- copy 3 --/
				579	if (i > hi) break;
				580	v = ptr[i];
				581	j = i;
				582	while ( mainGtU (
				583	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				584	) ) {
				585	ptr[j] = ptr[j-h];
				586	j = j - h;
				587	if (j <= (lo + h - 1)) break;
				588	}
				589	ptr[j] = v;
				590	i++;
				591
				592	if (*budget < 0) return;
				593	}
				594	}
				595	}
				596
				597
				598	/---------------------------------------------/
				599	/*--
				600	The following is an implementation of
				601	an elegant 3-way quicksort for strings,
				602	described in a paper "Fast Algorithms for
				603	Sorting and Searching Strings", by Robert
				604	Sedgewick and Jon L. Bentley.
				605	--*/
				606
				607	#define mswap(zz1, zz2) \
				608	{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
				609
				610	#define mvswap(zzp1, zzp2, zzn) \
				611	{ \
				612	Int32 yyp1 = (zzp1); \
				613	Int32 yyp2 = (zzp2); \
				614	Int32 yyn = (zzn); \
				615	while (yyn > 0) { \
				616	mswap(ptr[yyp1], ptr[yyp2]); \
				617	yyp1++; yyp2++; yyn--; \
				618	} \
				619	}
				620
				621	static
				622	__inline__
				623	UChar mmed3 ( UChar a, UChar b, UChar c )
				624	{
				625	UChar t;
				626	if (a > b) { t = a; a = b; b = t; };
				627	if (b > c) {
				628	b = c;
				629	if (a > b) b = a;
				630	}
				631	return b;
				632	}
				633
				634	#define mmin(a,b) ((a) < (b)) ? (a) : (b)
				635
				636	#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
				637	stackHi[sp] = hz; \
				638	stackD [sp] = dz; \
				639	sp++; }
				640
				641	#define mpop(lz,hz,dz) { sp--; \
				642	lz = stackLo[sp]; \
				643	hz = stackHi[sp]; \
				644	dz = stackD [sp]; }
				645
				646
				647	#define mnextsize(az) (nextHi[az]-nextLo[az])
				648
				649	#define mnextswap(az,bz) \
				650	{ Int32 tz; \
				651	tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
				652	tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
				653	tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
				654
				655
				656	#define MAIN_QSORT_SMALL_THRESH 20
				657	#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
				658	#define MAIN_QSORT_STACK_SIZE 100
				659
				660	static
				661	void mainQSort3 ( UInt32* ptr,
				662	UChar* block,
				663	UInt16* quadrant,
				664	Int32 nblock,
				665	Int32 loSt,
				666	Int32 hiSt,
				667	Int32 dSt,
				668	Int32* budget )
				669	{
				670	Int32 unLo, unHi, ltLo, gtHi, n, m, med;
				671	Int32 sp, lo, hi, d;
				672
				673	Int32 stackLo[MAIN_QSORT_STACK_SIZE];
				674	Int32 stackHi[MAIN_QSORT_STACK_SIZE];
				675	Int32 stackD [MAIN_QSORT_STACK_SIZE];
				676
				677	Int32 nextLo[3];
				678	Int32 nextHi[3];
				679	Int32 nextD [3];
				680
				681	sp = 0;
				682	mpush ( loSt, hiSt, dSt );
				683
				684	while (sp > 0) {
				685
				686	AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
				687
				688	mpop ( lo, hi, d );
				689	if (hi - lo < MAIN_QSORT_SMALL_THRESH \|\|
				690	d > MAIN_QSORT_DEPTH_THRESH) {
				691	mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
				692	if (*budget < 0) return;
				693	continue;
				694	}
				695
				696	med = (Int32)
				697	mmed3 ( block[ptr[ lo ]+d],
				698	block[ptr[ hi ]+d],
				699	block[ptr[ (lo+hi)>>1 ]+d] );
				700
				701	unLo = ltLo = lo;
				702	unHi = gtHi = hi;
				703
				704	while (True) {
				705	while (True) {
				706	if (unLo > unHi) break;
				707	n = ((Int32)block[ptr[unLo]+d]) - med;
				708	if (n == 0) {
				709	mswap(ptr[unLo], ptr[ltLo]);
				710	ltLo++; unLo++; continue;
				711	};
				712	if (n > 0) break;
				713	unLo++;
				714	}
				715	while (True) {
				716	if (unLo > unHi) break;
				717	n = ((Int32)block[ptr[unHi]+d]) - med;
				718	if (n == 0) {
				719	mswap(ptr[unHi], ptr[gtHi]);
				720	gtHi--; unHi--; continue;
				721	};
				722	if (n < 0) break;
				723	unHi--;
				724	}
				725	if (unLo > unHi) break;
				726	mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
				727	}
				728
				729	AssertD ( unHi == unLo-1, "mainQSort3(2)" );
				730
				731	if (gtHi < ltLo) {
				732	mpush(lo, hi, d+1 );
				733	continue;
				734	}
				735
				736	n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
				737	m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
				738
				739	n = lo + unLo - ltLo - 1;
				740	m = hi - (gtHi - unHi) + 1;
				741
				742	nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
				743	nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
				744	nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
				745
				746	if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
				747	if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
				748	if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
				749
				750	AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
				751	AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
				752
				753	mpush (nextLo[0], nextHi[0], nextD[0]);
				754	mpush (nextLo[1], nextHi[1], nextD[1]);
				755	mpush (nextLo[2], nextHi[2], nextD[2]);
				756	}
				757	}
				758
				759	#undef mswap
				760	#undef mvswap
				761	#undef mpush
				762	#undef mpop
				763	#undef mmin
				764	#undef mnextsize
				765	#undef mnextswap
				766	#undef MAIN_QSORT_SMALL_THRESH
				767	#undef MAIN_QSORT_DEPTH_THRESH
				768	#undef MAIN_QSORT_STACK_SIZE
				769
				770
				771	/---------------------------------------------/
				772	/* Pre:
				773	nblock > N_OVERSHOOT
				774	block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
				775	((UChar*)block32) [0 .. nblock-1] holds block
				776	ptr exists for [0 .. nblock-1]
				777
				778	Post:
				779	((UChar*)block32) [0 .. nblock-1] holds block
				780	All other areas of block32 destroyed
				781	ftab [0 .. 65536 ] destroyed
				782	ptr [0 .. nblock-1] holds sorted order
				783	if (*budget < 0), sorting was abandoned
				784	*/
				785
				786	#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
				787	#define SETMASK (1 << 21)
				788	#define CLEARMASK (~(SETMASK))
				789
				790	static
				791	void mainSort ( UInt32* ptr,
				792	UChar* block,
				793	UInt16* quadrant,
				794	UInt32* ftab,
				795	Int32 nblock,
				796	Int32 verb,
				797	Int32* budget )
				798	{
				799	Int32 i, j, k, ss, sb;
				800	Int32 runningOrder[256];
				801	Bool bigDone[256];
				802	Int32 copyStart[256];
				803	Int32 copyEnd [256];
				804	UChar c1;
				805	Int32 numQSorted;
				806	UInt16 s;
				807	if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
				808
				809	/-- set up the 2-byte frequency table --/
				810	for (i = 65536; i >= 0; i--) ftab[i] = 0;
				811
				812	j = block[0] << 8;
				813	i = nblock-1;
				814	for (; i >= 3; i -= 4) {
				815	quadrant[i] = 0;
				816	j = (j >> 8) \| ( ((UInt16)block[i]) << 8);
				817	ftab[j]++;
				818	quadrant[i-1] = 0;
				819	j = (j >> 8) \| ( ((UInt16)block[i-1]) << 8);
				820	ftab[j]++;
				821	quadrant[i-2] = 0;
				822	j = (j >> 8) \| ( ((UInt16)block[i-2]) << 8);
				823	ftab[j]++;
				824	quadrant[i-3] = 0;
				825	j = (j >> 8) \| ( ((UInt16)block[i-3]) << 8);
				826	ftab[j]++;
				827	}
				828	for (; i >= 0; i--) {
				829	quadrant[i] = 0;
				830	j = (j >> 8) \| ( ((UInt16)block[i]) << 8);
				831	ftab[j]++;
				832	}
				833
				834	/-- (emphasises close relationship of block & quadrant) --/
				835	for (i = 0; i < BZ_N_OVERSHOOT; i++) {
				836	block [nblock+i] = block[i];
				837	quadrant[nblock+i] = 0;
				838	}
				839
				840	if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
				841
				842	/-- Complete the initial radix sort --/
				843	for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
				844
				845	s = block[0] << 8;
				846	i = nblock-1;
				847	for (; i >= 3; i -= 4) {
				848	s = (s >> 8) \| (block[i] << 8);
				849	j = ftab[s] -1;
				850	ftab[s] = j;
				851	ptr[j] = i;
				852	s = (s >> 8) \| (block[i-1] << 8);
				853	j = ftab[s] -1;
				854	ftab[s] = j;
				855	ptr[j] = i-1;
				856	s = (s >> 8) \| (block[i-2] << 8);
				857	j = ftab[s] -1;
				858	ftab[s] = j;
				859	ptr[j] = i-2;
				860	s = (s >> 8) \| (block[i-3] << 8);
				861	j = ftab[s] -1;
				862	ftab[s] = j;
				863	ptr[j] = i-3;
				864	}
				865	for (; i >= 0; i--) {
				866	s = (s >> 8) \| (block[i] << 8);
				867	j = ftab[s] -1;
				868	ftab[s] = j;
				869	ptr[j] = i;
				870	}
				871
				872	/*--
				873	Now ftab contains the first loc of every small bucket.
				874	Calculate the running order, from smallest to largest
				875	big bucket.
				876	--*/
				877	for (i = 0; i <= 255; i++) {
				878	bigDone [i] = False;
				879	runningOrder[i] = i;
				880	}
				881
				882	{
				883	Int32 vv;
				884	Int32 h = 1;
				885	do h = 3 * h + 1; while (h <= 256);
				886	do {
				887	h = h / 3;
				888	for (i = h; i <= 255; i++) {
				889	vv = runningOrder[i];
				890	j = i;
				891	while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
				892	runningOrder[j] = runningOrder[j-h];
				893	j = j - h;
				894	if (j <= (h - 1)) goto zero;
				895	}
				896	zero:
				897	runningOrder[j] = vv;
				898	}
				899	} while (h != 1);
				900	}
				901
				902	/*--
				903	The main sorting loop.
				904	--*/
				905
				906	numQSorted = 0;
				907
				908	for (i = 0; i <= 255; i++) {
				909
				910	/*--
				911	Process big buckets, starting with the least full.
				912	Basically this is a 3-step process in which we call
				913	mainQSort3 to sort the small buckets [ss, j], but
				914	also make a big effort to avoid the calls if we can.
				915	--*/
				916	ss = runningOrder[i];
				917
				918	/*--
				919	Step 1:
				920	Complete the big bucket [ss] by quicksorting
				921	any unsorted small buckets [ss, j], for j != ss.
				922	Hopefully previous pointer-scanning phases have already
				923	completed many of the small buckets [ss, j], so
				924	we don't have to sort them at all.
				925	--*/
				926	for (j = 0; j <= 255; j++) {
				927	if (j != ss) {
				928	sb = (ss << 8) + j;
				929	if ( ! (ftab[sb] & SETMASK) ) {
				930	Int32 lo = ftab[sb] & CLEARMASK;
				931	Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
				932	if (hi > lo) {
				933	if (verb >= 4)
				934	VPrintf4 ( " qsort [0x%x, 0x%x] "
				935	"done %d this %d\n",
				936	ss, j, numQSorted, hi - lo + 1 );
				937	mainQSort3 (
				938	ptr, block, quadrant, nblock,
				939	lo, hi, BZ_N_RADIX, budget
				940	);
				941	numQSorted += (hi - lo + 1);
				942	if (*budget < 0) return;
				943	}
				944	}
				945	ftab[sb] \|= SETMASK;
				946	}
				947	}
				948
				949	AssertH ( !bigDone[ss], 1006 );
				950
				951	/*--
				952	Step 2:
				953	Now scan this big bucket [ss] so as to synthesise the
				954	sorted order for small buckets [t, ss] for all t,
				955	including, magically, the bucket [ss,ss] too.
				956	This will avoid doing Real Work in subsequent Step 1's.
				957	--*/
				958	{
				959	for (j = 0; j <= 255; j++) {
				960	copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
				961	copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
				962	}
				963	for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
				964	k = ptr[j]-1; if (k < 0) k += nblock;
				965	c1 = block[k];
				966	if (!bigDone[c1])
				967	ptr[ copyStart[c1]++ ] = k;
				968	}
				969	for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
				970	k = ptr[j]-1; if (k < 0) k += nblock;
				971	c1 = block[k];
				972	if (!bigDone[c1])
				973	ptr[ copyEnd[c1]-- ] = k;
				974	}
				975	}
				976
				977	AssertH ( (copyStart[ss]-1 == copyEnd[ss])
				978	\|\|
				979	/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
				980	Necessity for this case is demonstrated by compressing
				981	a sequence of approximately 48.5 million of character
				982	251; 1.0.0/1.0.1 will then die here. */
				983	(copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
				984	1007 )
				985
				986	for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] \|= SETMASK;
				987
				988	/*--
				989	Step 3:
				990	The [ss] big bucket is now done. Record this fact,
				991	and update the quadrant descriptors. Remember to
				992	update quadrants in the overshoot area too, if
				993	necessary. The "if (i < 255)" test merely skips
				994	this updating for the last bucket processed, since
				995	updating for the last bucket is pointless.
				996
				997	The quadrant array provides a way to incrementally
				998	cache sort orderings, as they appear, so as to
				999	make subsequent comparisons in fullGtU() complete
				1000	faster. For repetitive blocks this makes a big
				1001	difference (but not big enough to be able to avoid
				1002	the fallback sorting mechanism, exponential radix sort).
				1003
				1004	The precise meaning is: at all times:
				1005
				1006	for 0 <= i < nblock and 0 <= j <= nblock
				1007
				1008	if block[i] != block[j],
				1009
				1010	then the relative values of quadrant[i] and
				1011	quadrant[j] are meaningless.
				1012
				1013	else {
				1014	if quadrant[i] < quadrant[j]
				1015	then the string starting at i lexicographically
				1016	precedes the string starting at j
				1017
				1018	else if quadrant[i] > quadrant[j]
				1019	then the string starting at j lexicographically
				1020	precedes the string starting at i
				1021
				1022	else
				1023	the relative ordering of the strings starting
				1024	at i and j has not yet been determined.
				1025	}
				1026	--*/
				1027	bigDone[ss] = True;
				1028
				1029	if (i < 255) {
				1030	Int32 bbStart = ftab[ss << 8] & CLEARMASK;
				1031	Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
				1032	Int32 shifts = 0;
				1033
				1034	while ((bbSize >> shifts) > 65534) shifts++;
				1035
				1036	for (j = bbSize-1; j >= 0; j--) {
				1037	Int32 a2update = ptr[bbStart + j];
				1038	UInt16 qVal = (UInt16)(j >> shifts);
				1039	quadrant[a2update] = qVal;
				1040	if (a2update < BZ_N_OVERSHOOT)
				1041	quadrant[a2update + nblock] = qVal;
				1042	}
				1043	AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
				1044	}
				1045
				1046	}
				1047
				1048	if (verb >= 4)
				1049	VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
				1050	nblock, numQSorted, nblock - numQSorted );
				1051	}
				1052
				1053	#undef BIGFREQ
				1054	#undef SETMASK
				1055	#undef CLEARMASK
				1056
				1057
				1058	/---------------------------------------------/
				1059	/* Pre:
				1060	nblock > 0
				1061	arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
				1062	((UChar*)arr2) [0 .. nblock-1] holds block
				1063	arr1 exists for [0 .. nblock-1]
				1064
				1065	Post:
				1066	((UChar*)arr2) [0 .. nblock-1] holds block
				1067	All other areas of block destroyed
				1068	ftab [ 0 .. 65536 ] destroyed
				1069	arr1 [0 .. nblock-1] holds sorted order
				1070	*/
				1071	void BZ2_blockSort ( EState* s )
				1072	{
				1073	UInt32* ptr = s->ptr;
				1074	UChar* block = s->block;
				1075	UInt32* ftab = s->ftab;
				1076	Int32 nblock = s->nblock;
				1077	Int32 verb = s->verbosity;
				1078	Int32 wfact = s->workFactor;
				1079	UInt16* quadrant;
				1080	Int32 budget;
				1081	Int32 budgetInit;
				1082	Int32 i;
				1083
				1084	if (nblock < 10000) {
				1085	fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
				1086	} else {
				1087	/* Calculate the location for quadrant, remembering to get
				1088	the alignment right. Assumes that &(block[0]) is at least
				1089	2-byte aligned -- this should be ok since block is really
				1090	the first section of arr2.
				1091	*/
				1092	i = nblock+BZ_N_OVERSHOOT;
				1093	if (i & 1) i++;
				1094	quadrant = (UInt16*)(&(block[i]));
				1095
				1096	/* (wfact-1) / 3 puts the default-factor-30
				1097	transition point at very roughly the same place as
				1098	with v0.1 and v0.9.0.
				1099	Not that it particularly matters any more, since the
				1100	resulting compressed stream is now the same regardless
				1101	of whether or not we use the main sort or fallback sort.
				1102	*/
				1103	if (wfact < 1 ) wfact = 1;
				1104	if (wfact > 100) wfact = 100;
				1105	budgetInit = nblock * ((wfact-1) / 3);
				1106	budget = budgetInit;
				1107
				1108	mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
				1109	if (verb >= 3)
				1110	VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
				1111	budgetInit - budget,
				1112	nblock,
				1113	(float)(budgetInit - budget) /
				1114	(float)(nblock==0 ? 1 : nblock) );
				1115	if (budget < 0) {
				1116	if (verb >= 2)
				1117	VPrintf0 ( " too repetitive; using fallback"
				1118	" sorting algorithm\n" );
				1119	fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
				1120	}
				1121	}
				1122
				1123	s->origPtr = -1;
				1124	for (i = 0; i < s->nblock; i++)
				1125	if (ptr[i] == 0)
				1126	{ s->origPtr = i; break; };
				1127
				1128	AssertH( s->origPtr != -1, 1003 );
				1129	}
				1130
				1131
				1132	/-------------------------------------------------------------/
				1133	/--- end blocksort.c ---/
				1134	/-------------------------------------------------------------/