Blame - lib/bzip2/bzlib_blocksort.c - filogic/uboot

blob: 2408ee90c18920b7f0ebbe859f4476f0f0118450 [file] [log] [blame]

Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	1
				2	/-------------------------------------------------------------/
				3	/--- Block sorting machinery ---/
				4	/--- blocksort.c ---/
				5	/-------------------------------------------------------------/
				6
				7	/*--
				8	This file is a part of bzip2 and/or libbzip2, a program and
				9	library for lossless, block-sorting data compression.
				10
				11	Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
				12
				13	Redistribution and use in source and binary forms, with or without
				14	modification, are permitted provided that the following conditions
				15	are met:
				16
				17	1. Redistributions of source code must retain the above copyright
				18	notice, this list of conditions and the following disclaimer.
				19
				20	2. The origin of this software must not be misrepresented; you must
				21	not claim that you wrote the original software. If you use this
				22	software in a product, an acknowledgment in the product
				23	documentation would be appreciated but is not required.
				24
				25	3. Altered source versions must be plainly marked as such, and must
				26	not be misrepresented as being the original software.
				27
				28	4. The name of the author may not be used to endorse or promote
				29	products derived from this software without specific prior written
				30	permission.
				31
				32	THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
				33	OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				34	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				35	ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
				36	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				37	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
				38	GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				39	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				40	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				41	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				42	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				43
				44	Julian Seward, Cambridge, UK.
				45	jseward@acm.org
				46	bzip2/libbzip2 version 1.0.6 of 6 September 2010
				47	Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
				48
				49	This program is based on (at least) the work of:
				50	Mike Burrows
				51	David Wheeler
				52	Peter Fenwick
				53	Alistair Moffat
				54	Radford Neal
				55	Ian H. Witten
				56	Robert Sedgewick
				57	Jon L. Bentley
				58
				59	For more information on these sources, see the manual.
				60	--*/
				61
				62	#include "bzlib_private.h"
Simon Glass	0f2af88	2020-05-10 11:40:05 -0600	[diff] [blame]	63	#include <log.h>
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	64
				65	/---------------------------------------------/
				66	/--- Fallback O(N log(N)^2) sorting ---/
				67	/--- algorithm, for repetitive blocks ---/
				68	/---------------------------------------------/
				69
				70	/---------------------------------------------/
				71	static
				72	__inline__
				73	void fallbackSimpleSort ( UInt32* fmap,
				74	UInt32* eclass,
				75	Int32 lo,
				76	Int32 hi )
				77	{
				78	Int32 i, j, tmp;
				79	UInt32 ec_tmp;
				80
				81	if (lo == hi) return;
				82
				83	if (hi - lo > 3) {
				84	for ( i = hi-4; i >= lo; i-- ) {
				85	tmp = fmap[i];
				86	ec_tmp = eclass[tmp];
				87	for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
				88	fmap[j-4] = fmap[j];
				89	fmap[j-4] = tmp;
				90	}
				91	}
				92
				93	for ( i = hi-1; i >= lo; i-- ) {
				94	tmp = fmap[i];
				95	ec_tmp = eclass[tmp];
				96	for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
				97	fmap[j-1] = fmap[j];
				98	fmap[j-1] = tmp;
				99	}
				100	}
				101
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	102	/---------------------------------------------/
				103	#define fswap(zz1, zz2) \
				104	{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
				105
				106	#define fvswap(zzp1, zzp2, zzn) \
				107	{ \
				108	Int32 yyp1 = (zzp1); \
				109	Int32 yyp2 = (zzp2); \
				110	Int32 yyn = (zzn); \
				111	while (yyn > 0) { \
				112	fswap(fmap[yyp1], fmap[yyp2]); \
				113	yyp1++; yyp2++; yyn--; \
				114	} \
				115	}
				116
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	117	#define fmin(a,b) ((a) < (b)) ? (a) : (b)
				118
				119	#define fpush(lz,hz) { stackLo[sp] = lz; \
				120	stackHi[sp] = hz; \
				121	sp++; }
				122
				123	#define fpop(lz,hz) { sp--; \
				124	lz = stackLo[sp]; \
				125	hz = stackHi[sp]; }
				126
				127	#define FALLBACK_QSORT_SMALL_THRESH 10
				128	#define FALLBACK_QSORT_STACK_SIZE 100
				129
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	130	static
				131	void fallbackQSort3 ( UInt32* fmap,
				132	UInt32* eclass,
				133	Int32 loSt,
				134	Int32 hiSt )
				135	{
				136	Int32 unLo, unHi, ltLo, gtHi, n, m;
				137	Int32 sp, lo, hi;
				138	UInt32 med, r, r3;
				139	Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
				140	Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
				141
				142	r = 0;
				143
				144	sp = 0;
				145	fpush ( loSt, hiSt );
				146
				147	while (sp > 0) {
				148
				149	AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
				150
				151	fpop ( lo, hi );
				152	if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
				153	fallbackSimpleSort ( fmap, eclass, lo, hi );
				154	continue;
				155	}
				156
				157	/* Random partitioning. Median of 3 sometimes fails to
				158	avoid bad cases. Median of 9 seems to help but
				159	looks rather expensive. This too seems to work but
				160	is cheaper. Guidance for the magic constants
				161	7621 and 32768 is taken from Sedgewick's algorithms
				162	book, chapter 35.
				163	*/
				164	r = ((r * 7621) + 1) % 32768;
				165	r3 = r % 3;
				166	if (r3 == 0) med = eclass[fmap[lo]]; else
				167	if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
				168	med = eclass[fmap[hi]];
				169
				170	unLo = ltLo = lo;
				171	unHi = gtHi = hi;
				172
				173	while (1) {
				174	while (1) {
				175	if (unLo > unHi) break;
				176	n = (Int32)eclass[fmap[unLo]] - (Int32)med;
				177	if (n == 0) {
				178	fswap(fmap[unLo], fmap[ltLo]);
				179	ltLo++; unLo++;
				180	continue;
				181	};
				182	if (n > 0) break;
				183	unLo++;
				184	}
				185	while (1) {
				186	if (unLo > unHi) break;
				187	n = (Int32)eclass[fmap[unHi]] - (Int32)med;
				188	if (n == 0) {
				189	fswap(fmap[unHi], fmap[gtHi]);
				190	gtHi--; unHi--;
				191	continue;
				192	};
				193	if (n < 0) break;
				194	unHi--;
				195	}
				196	if (unLo > unHi) break;
				197	fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
				198	}
				199
				200	AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
				201
				202	if (gtHi < ltLo) continue;
				203
				204	n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
				205	m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
				206
				207	n = lo + unLo - ltLo - 1;
				208	m = hi - (gtHi - unHi) + 1;
				209
				210	if (n - lo > hi - m) {
				211	fpush ( lo, n );
				212	fpush ( m, hi );
				213	} else {
				214	fpush ( m, hi );
				215	fpush ( lo, n );
				216	}
				217	}
				218	}
				219
				220	#undef fmin
				221	#undef fpush
				222	#undef fpop
				223	#undef fswap
				224	#undef fvswap
				225	#undef FALLBACK_QSORT_SMALL_THRESH
				226	#undef FALLBACK_QSORT_STACK_SIZE
				227
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	228	/---------------------------------------------/
				229	/* Pre:
				230	nblock > 0
				231	eclass exists for [0 .. nblock-1]
				232	((UChar*)eclass) [0 .. nblock-1] holds block
				233	ptr exists for [0 .. nblock-1]
				234
				235	Post:
				236	((UChar*)eclass) [0 .. nblock-1] holds block
				237	All other areas of eclass destroyed
				238	fmap [0 .. nblock-1] holds sorted order
				239	bhtab [ 0 .. 2+(nblock/32) ] destroyed
				240	*/
				241
				242	#define SET_BH(zz) bhtab[(zz) >> 5] \|= (1 << ((zz) & 31))
				243	#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
				244	#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
				245	#define WORD_BH(zz) bhtab[(zz) >> 5]
				246	#define UNALIGNED_BH(zz) ((zz) & 0x01f)
				247
				248	static
				249	void fallbackSort ( UInt32* fmap,
				250	UInt32* eclass,
				251	UInt32* bhtab,
				252	Int32 nblock,
				253	Int32 verb )
				254	{
				255	Int32 ftab[257];
				256	Int32 ftabCopy[256];
				257	Int32 H, i, j, k, l, r, cc, cc1;
				258	Int32 nNotDone;
				259	Int32 nBhtab;
				260	UChar* eclass8 = (UChar*)eclass;
				261
				262	/*--
				263	Initial 1-char radix sort to generate
				264	initial fmap and initial BH bits.
				265	--*/
				266	if (verb >= 4)
				267	VPrintf0 ( " bucket sorting ...\n" );
				268	for (i = 0; i < 257; i++) ftab[i] = 0;
				269	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
				270	for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
				271	for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
				272
				273	for (i = 0; i < nblock; i++) {
				274	j = eclass8[i];
				275	k = ftab[j] - 1;
				276	ftab[j] = k;
				277	fmap[k] = i;
				278	}
				279
				280	nBhtab = 2 + (nblock / 32);
				281	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
				282	for (i = 0; i < 256; i++) SET_BH(ftab[i]);
				283
				284	/*--
				285	Inductively refine the buckets. Kind-of an
				286	"exponential radix sort" (!), inspired by the
				287	Manber-Myers suffix array construction algorithm.
				288	--*/
				289
				290	/-- set sentinel bits for block-end detection --/
				291	for (i = 0; i < 32; i++) {
				292	SET_BH(nblock + 2*i);
				293	CLEAR_BH(nblock + 2*i + 1);
				294	}
				295
				296	/-- the log(N) loop --/
				297	H = 1;
				298	while (1) {
				299
				300	if (verb >= 4)
				301	VPrintf1 ( " depth %6d has ", H );
				302
				303	j = 0;
				304	for (i = 0; i < nblock; i++) {
				305	if (ISSET_BH(i)) j = i;
				306	k = fmap[i] - H; if (k < 0) k += nblock;
				307	eclass[k] = j;
				308	}
				309
				310	nNotDone = 0;
				311	r = -1;
				312	while (1) {
				313
				314	/-- find the next non-singleton bucket --/
				315	k = r + 1;
				316	while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
				317	if (ISSET_BH(k)) {
				318	while (WORD_BH(k) == 0xffffffff) k += 32;
				319	while (ISSET_BH(k)) k++;
				320	}
				321	l = k - 1;
				322	if (l >= nblock) break;
				323	while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
				324	if (!ISSET_BH(k)) {
				325	while (WORD_BH(k) == 0x00000000) k += 32;
				326	while (!ISSET_BH(k)) k++;
				327	}
				328	r = k - 1;
				329	if (r >= nblock) break;
				330
				331	/-- now [l, r] bracket current bucket --/
				332	if (r > l) {
				333	nNotDone += (r - l + 1);
				334	fallbackQSort3 ( fmap, eclass, l, r );
				335
				336	/-- scan bucket and generate header bits-- /
				337	cc = -1;
				338	for (i = l; i <= r; i++) {
				339	cc1 = eclass[fmap[i]];
				340	if (cc != cc1) { SET_BH(i); cc = cc1; };
				341	}
				342	}
				343	}
				344
				345	if (verb >= 4)
				346	VPrintf1 ( "%6d unresolved strings\n", nNotDone );
				347
				348	H *= 2;
				349	if (H > nblock \|\| nNotDone == 0) break;
				350	}
				351
				352	/*--
				353	Reconstruct the original block in
				354	eclass8 [0 .. nblock-1], since the
				355	previous phase destroyed it.
				356	--*/
				357	if (verb >= 4)
				358	VPrintf0 ( " reconstructing block ...\n" );
				359	j = 0;
				360	for (i = 0; i < nblock; i++) {
				361	while (ftabCopy[j] == 0) j++;
				362	ftabCopy[j]--;
				363	eclass8[fmap[i]] = (UChar)j;
				364	}
				365	AssertH ( j < 256, 1005 );
				366	}
				367
				368	#undef SET_BH
				369	#undef CLEAR_BH
				370	#undef ISSET_BH
				371	#undef WORD_BH
				372	#undef UNALIGNED_BH
				373
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	374	/---------------------------------------------/
				375	/--- The main, O(N^2 log(N)) sorting ---/
				376	/--- algorithm. Faster for "normal" ---/
				377	/--- non-repetitive blocks. ---/
				378	/---------------------------------------------/
				379
				380	/---------------------------------------------/
				381	static
				382	__inline__
				383	Bool mainGtU ( UInt32 i1,
				384	UInt32 i2,
				385	UChar* block,
				386	UInt16* quadrant,
				387	UInt32 nblock,
				388	Int32* budget )
				389	{
				390	Int32 k;
				391	UChar c1, c2;
				392	UInt16 s1, s2;
				393
				394	AssertD ( i1 != i2, "mainGtU" );
				395	/* 1 */
				396	c1 = block[i1]; c2 = block[i2];
				397	if (c1 != c2) return (c1 > c2);
				398	i1++; i2++;
				399	/* 2 */
				400	c1 = block[i1]; c2 = block[i2];
				401	if (c1 != c2) return (c1 > c2);
				402	i1++; i2++;
				403	/* 3 */
				404	c1 = block[i1]; c2 = block[i2];
				405	if (c1 != c2) return (c1 > c2);
				406	i1++; i2++;
				407	/* 4 */
				408	c1 = block[i1]; c2 = block[i2];
				409	if (c1 != c2) return (c1 > c2);
				410	i1++; i2++;
				411	/* 5 */
				412	c1 = block[i1]; c2 = block[i2];
				413	if (c1 != c2) return (c1 > c2);
				414	i1++; i2++;
				415	/* 6 */
				416	c1 = block[i1]; c2 = block[i2];
				417	if (c1 != c2) return (c1 > c2);
				418	i1++; i2++;
				419	/* 7 */
				420	c1 = block[i1]; c2 = block[i2];
				421	if (c1 != c2) return (c1 > c2);
				422	i1++; i2++;
				423	/* 8 */
				424	c1 = block[i1]; c2 = block[i2];
				425	if (c1 != c2) return (c1 > c2);
				426	i1++; i2++;
				427	/* 9 */
				428	c1 = block[i1]; c2 = block[i2];
				429	if (c1 != c2) return (c1 > c2);
				430	i1++; i2++;
				431	/* 10 */
				432	c1 = block[i1]; c2 = block[i2];
				433	if (c1 != c2) return (c1 > c2);
				434	i1++; i2++;
				435	/* 11 */
				436	c1 = block[i1]; c2 = block[i2];
				437	if (c1 != c2) return (c1 > c2);
				438	i1++; i2++;
				439	/* 12 */
				440	c1 = block[i1]; c2 = block[i2];
				441	if (c1 != c2) return (c1 > c2);
				442	i1++; i2++;
				443
				444	k = nblock + 8;
				445
				446	do {
				447	/* 1 */
				448	c1 = block[i1]; c2 = block[i2];
				449	if (c1 != c2) return (c1 > c2);
				450	s1 = quadrant[i1]; s2 = quadrant[i2];
				451	if (s1 != s2) return (s1 > s2);
				452	i1++; i2++;
				453	/* 2 */
				454	c1 = block[i1]; c2 = block[i2];
				455	if (c1 != c2) return (c1 > c2);
				456	s1 = quadrant[i1]; s2 = quadrant[i2];
				457	if (s1 != s2) return (s1 > s2);
				458	i1++; i2++;
				459	/* 3 */
				460	c1 = block[i1]; c2 = block[i2];
				461	if (c1 != c2) return (c1 > c2);
				462	s1 = quadrant[i1]; s2 = quadrant[i2];
				463	if (s1 != s2) return (s1 > s2);
				464	i1++; i2++;
				465	/* 4 */
				466	c1 = block[i1]; c2 = block[i2];
				467	if (c1 != c2) return (c1 > c2);
				468	s1 = quadrant[i1]; s2 = quadrant[i2];
				469	if (s1 != s2) return (s1 > s2);
				470	i1++; i2++;
				471	/* 5 */
				472	c1 = block[i1]; c2 = block[i2];
				473	if (c1 != c2) return (c1 > c2);
				474	s1 = quadrant[i1]; s2 = quadrant[i2];
				475	if (s1 != s2) return (s1 > s2);
				476	i1++; i2++;
				477	/* 6 */
				478	c1 = block[i1]; c2 = block[i2];
				479	if (c1 != c2) return (c1 > c2);
				480	s1 = quadrant[i1]; s2 = quadrant[i2];
				481	if (s1 != s2) return (s1 > s2);
				482	i1++; i2++;
				483	/* 7 */
				484	c1 = block[i1]; c2 = block[i2];
				485	if (c1 != c2) return (c1 > c2);
				486	s1 = quadrant[i1]; s2 = quadrant[i2];
				487	if (s1 != s2) return (s1 > s2);
				488	i1++; i2++;
				489	/* 8 */
				490	c1 = block[i1]; c2 = block[i2];
				491	if (c1 != c2) return (c1 > c2);
				492	s1 = quadrant[i1]; s2 = quadrant[i2];
				493	if (s1 != s2) return (s1 > s2);
				494	i1++; i2++;
				495
				496	if (i1 >= nblock) i1 -= nblock;
				497	if (i2 >= nblock) i2 -= nblock;
				498
				499	k -= 8;
				500	(*budget)--;
				501	}
				502	while (k >= 0);
				503
				504	return False;
				505	}
				506
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	507	/---------------------------------------------/
				508	/*--
				509	Knuth's increments seem to work better
				510	than Incerpi-Sedgewick here. Possibly
				511	because the number of elems to sort is
				512	usually small, typically <= 20.
				513	--*/
				514	static
				515	Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
				516	9841, 29524, 88573, 265720,
				517	797161, 2391484 };
				518
				519	static
				520	void mainSimpleSort ( UInt32* ptr,
				521	UChar* block,
				522	UInt16* quadrant,
				523	Int32 nblock,
				524	Int32 lo,
				525	Int32 hi,
				526	Int32 d,
				527	Int32* budget )
				528	{
				529	Int32 i, j, h, bigN, hp;
				530	UInt32 v;
				531
				532	bigN = hi - lo + 1;
				533	if (bigN < 2) return;
				534
				535	hp = 0;
				536	while (incs[hp] < bigN) hp++;
				537	hp--;
				538
				539	for (; hp >= 0; hp--) {
				540	h = incs[hp];
				541
				542	i = lo + h;
				543	while (True) {
				544
				545	/-- copy 1 --/
				546	if (i > hi) break;
				547	v = ptr[i];
				548	j = i;
				549	while ( mainGtU (
				550	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				551	) ) {
				552	ptr[j] = ptr[j-h];
				553	j = j - h;
				554	if (j <= (lo + h - 1)) break;
				555	}
				556	ptr[j] = v;
				557	i++;
				558
				559	/-- copy 2 --/
				560	if (i > hi) break;
				561	v = ptr[i];
				562	j = i;
				563	while ( mainGtU (
				564	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				565	) ) {
				566	ptr[j] = ptr[j-h];
				567	j = j - h;
				568	if (j <= (lo + h - 1)) break;
				569	}
				570	ptr[j] = v;
				571	i++;
				572
				573	/-- copy 3 --/
				574	if (i > hi) break;
				575	v = ptr[i];
				576	j = i;
				577	while ( mainGtU (
				578	ptr[j-h]+d, v+d, block, quadrant, nblock, budget
				579	) ) {
				580	ptr[j] = ptr[j-h];
				581	j = j - h;
				582	if (j <= (lo + h - 1)) break;
				583	}
				584	ptr[j] = v;
				585	i++;
				586
				587	if (*budget < 0) return;
				588	}
				589	}
				590	}
				591
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	592	/---------------------------------------------/
				593	/*--
				594	The following is an implementation of
				595	an elegant 3-way quicksort for strings,
				596	described in a paper "Fast Algorithms for
				597	Sorting and Searching Strings", by Robert
				598	Sedgewick and Jon L. Bentley.
				599	--*/
				600
				601	#define mswap(zz1, zz2) \
				602	{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
				603
				604	#define mvswap(zzp1, zzp2, zzn) \
				605	{ \
				606	Int32 yyp1 = (zzp1); \
				607	Int32 yyp2 = (zzp2); \
				608	Int32 yyn = (zzn); \
				609	while (yyn > 0) { \
				610	mswap(ptr[yyp1], ptr[yyp2]); \
				611	yyp1++; yyp2++; yyn--; \
				612	} \
				613	}
				614
				615	static
				616	__inline__
				617	UChar mmed3 ( UChar a, UChar b, UChar c )
				618	{
				619	UChar t;
				620	if (a > b) { t = a; a = b; b = t; };
				621	if (b > c) {
				622	b = c;
				623	if (a > b) b = a;
				624	}
				625	return b;
				626	}
				627
				628	#define mmin(a,b) ((a) < (b)) ? (a) : (b)
				629
				630	#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
				631	stackHi[sp] = hz; \
				632	stackD [sp] = dz; \
				633	sp++; }
				634
				635	#define mpop(lz,hz,dz) { sp--; \
				636	lz = stackLo[sp]; \
				637	hz = stackHi[sp]; \
				638	dz = stackD [sp]; }
				639
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	640	#define mnextsize(az) (nextHi[az]-nextLo[az])
				641
				642	#define mnextswap(az,bz) \
				643	{ Int32 tz; \
				644	tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
				645	tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
				646	tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
				647
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	648	#define MAIN_QSORT_SMALL_THRESH 20
				649	#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
				650	#define MAIN_QSORT_STACK_SIZE 100
				651
				652	static
				653	void mainQSort3 ( UInt32* ptr,
				654	UChar* block,
				655	UInt16* quadrant,
				656	Int32 nblock,
				657	Int32 loSt,
				658	Int32 hiSt,
				659	Int32 dSt,
				660	Int32* budget )
				661	{
				662	Int32 unLo, unHi, ltLo, gtHi, n, m, med;
				663	Int32 sp, lo, hi, d;
				664
				665	Int32 stackLo[MAIN_QSORT_STACK_SIZE];
				666	Int32 stackHi[MAIN_QSORT_STACK_SIZE];
				667	Int32 stackD [MAIN_QSORT_STACK_SIZE];
				668
				669	Int32 nextLo[3];
				670	Int32 nextHi[3];
				671	Int32 nextD [3];
				672
				673	sp = 0;
				674	mpush ( loSt, hiSt, dSt );
				675
				676	while (sp > 0) {
				677
				678	AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
				679
				680	mpop ( lo, hi, d );
				681	if (hi - lo < MAIN_QSORT_SMALL_THRESH \|\|
				682	d > MAIN_QSORT_DEPTH_THRESH) {
				683	mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
				684	if (*budget < 0) return;
				685	continue;
				686	}
				687
				688	med = (Int32)
				689	mmed3 ( block[ptr[ lo ]+d],
				690	block[ptr[ hi ]+d],
				691	block[ptr[ (lo+hi)>>1 ]+d] );
				692
				693	unLo = ltLo = lo;
				694	unHi = gtHi = hi;
				695
				696	while (True) {
				697	while (True) {
				698	if (unLo > unHi) break;
				699	n = ((Int32)block[ptr[unLo]+d]) - med;
				700	if (n == 0) {
				701	mswap(ptr[unLo], ptr[ltLo]);
				702	ltLo++; unLo++; continue;
				703	};
				704	if (n > 0) break;
				705	unLo++;
				706	}
				707	while (True) {
				708	if (unLo > unHi) break;
				709	n = ((Int32)block[ptr[unHi]+d]) - med;
				710	if (n == 0) {
				711	mswap(ptr[unHi], ptr[gtHi]);
				712	gtHi--; unHi--; continue;
				713	};
				714	if (n < 0) break;
				715	unHi--;
				716	}
				717	if (unLo > unHi) break;
				718	mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
				719	}
				720
				721	AssertD ( unHi == unLo-1, "mainQSort3(2)" );
				722
				723	if (gtHi < ltLo) {
				724	mpush(lo, hi, d+1 );
				725	continue;
				726	}
				727
				728	n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
				729	m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
				730
				731	n = lo + unLo - ltLo - 1;
				732	m = hi - (gtHi - unHi) + 1;
				733
				734	nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
				735	nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
				736	nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
				737
				738	if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
				739	if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
				740	if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
				741
				742	AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
				743	AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
				744
				745	mpush (nextLo[0], nextHi[0], nextD[0]);
				746	mpush (nextLo[1], nextHi[1], nextD[1]);
				747	mpush (nextLo[2], nextHi[2], nextD[2]);
				748	}
				749	}
				750
				751	#undef mswap
				752	#undef mvswap
				753	#undef mpush
				754	#undef mpop
				755	#undef mmin
				756	#undef mnextsize
				757	#undef mnextswap
				758	#undef MAIN_QSORT_SMALL_THRESH
				759	#undef MAIN_QSORT_DEPTH_THRESH
				760	#undef MAIN_QSORT_STACK_SIZE
				761
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	762	/---------------------------------------------/
				763	/* Pre:
				764	nblock > N_OVERSHOOT
				765	block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
				766	((UChar*)block32) [0 .. nblock-1] holds block
				767	ptr exists for [0 .. nblock-1]
				768
				769	Post:
				770	((UChar*)block32) [0 .. nblock-1] holds block
				771	All other areas of block32 destroyed
				772	ftab [0 .. 65536 ] destroyed
				773	ptr [0 .. nblock-1] holds sorted order
				774	if (*budget < 0), sorting was abandoned
				775	*/
				776
				777	#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
				778	#define SETMASK (1 << 21)
				779	#define CLEARMASK (~(SETMASK))
				780
				781	static
				782	void mainSort ( UInt32* ptr,
				783	UChar* block,
				784	UInt16* quadrant,
				785	UInt32* ftab,
				786	Int32 nblock,
				787	Int32 verb,
				788	Int32* budget )
				789	{
				790	Int32 i, j, k, ss, sb;
				791	Int32 runningOrder[256];
				792	Bool bigDone[256];
				793	Int32 copyStart[256];
				794	Int32 copyEnd [256];
				795	UChar c1;
				796	Int32 numQSorted;
				797	UInt16 s;
				798	if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
				799
				800	/-- set up the 2-byte frequency table --/
				801	for (i = 65536; i >= 0; i--) ftab[i] = 0;
				802
				803	j = block[0] << 8;
				804	i = nblock-1;
				805	for (; i >= 3; i -= 4) {
				806	quadrant[i] = 0;
				807	j = (j >> 8) \| ( ((UInt16)block[i]) << 8);
				808	ftab[j]++;
				809	quadrant[i-1] = 0;
				810	j = (j >> 8) \| ( ((UInt16)block[i-1]) << 8);
				811	ftab[j]++;
				812	quadrant[i-2] = 0;
				813	j = (j >> 8) \| ( ((UInt16)block[i-2]) << 8);
				814	ftab[j]++;
				815	quadrant[i-3] = 0;
				816	j = (j >> 8) \| ( ((UInt16)block[i-3]) << 8);
				817	ftab[j]++;
				818	}
				819	for (; i >= 0; i--) {
				820	quadrant[i] = 0;
				821	j = (j >> 8) \| ( ((UInt16)block[i]) << 8);
				822	ftab[j]++;
				823	}
				824
				825	/-- (emphasises close relationship of block & quadrant) --/
				826	for (i = 0; i < BZ_N_OVERSHOOT; i++) {
				827	block [nblock+i] = block[i];
				828	quadrant[nblock+i] = 0;
				829	}
				830
				831	if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
				832
				833	/-- Complete the initial radix sort --/
				834	for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
				835
				836	s = block[0] << 8;
				837	i = nblock-1;
				838	for (; i >= 3; i -= 4) {
				839	s = (s >> 8) \| (block[i] << 8);
				840	j = ftab[s] -1;
				841	ftab[s] = j;
				842	ptr[j] = i;
				843	s = (s >> 8) \| (block[i-1] << 8);
				844	j = ftab[s] -1;
				845	ftab[s] = j;
				846	ptr[j] = i-1;
				847	s = (s >> 8) \| (block[i-2] << 8);
				848	j = ftab[s] -1;
				849	ftab[s] = j;
				850	ptr[j] = i-2;
				851	s = (s >> 8) \| (block[i-3] << 8);
				852	j = ftab[s] -1;
				853	ftab[s] = j;
				854	ptr[j] = i-3;
				855	}
				856	for (; i >= 0; i--) {
				857	s = (s >> 8) \| (block[i] << 8);
				858	j = ftab[s] -1;
				859	ftab[s] = j;
				860	ptr[j] = i;
				861	}
				862
				863	/*--
				864	Now ftab contains the first loc of every small bucket.
				865	Calculate the running order, from smallest to largest
				866	big bucket.
				867	--*/
				868	for (i = 0; i <= 255; i++) {
				869	bigDone [i] = False;
				870	runningOrder[i] = i;
				871	}
				872
				873	{
				874	Int32 vv;
				875	Int32 h = 1;
				876	do h = 3 * h + 1; while (h <= 256);
				877	do {
				878	h = h / 3;
				879	for (i = h; i <= 255; i++) {
				880	vv = runningOrder[i];
				881	j = i;
				882	while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
				883	runningOrder[j] = runningOrder[j-h];
				884	j = j - h;
				885	if (j <= (h - 1)) goto zero;
				886	}
				887	zero:
				888	runningOrder[j] = vv;
				889	}
				890	} while (h != 1);
				891	}
				892
				893	/*--
				894	The main sorting loop.
				895	--*/
				896
				897	numQSorted = 0;
				898
				899	for (i = 0; i <= 255; i++) {
				900
				901	/*--
				902	Process big buckets, starting with the least full.
				903	Basically this is a 3-step process in which we call
				904	mainQSort3 to sort the small buckets [ss, j], but
				905	also make a big effort to avoid the calls if we can.
				906	--*/
				907	ss = runningOrder[i];
				908
				909	/*--
				910	Step 1:
				911	Complete the big bucket [ss] by quicksorting
				912	any unsorted small buckets [ss, j], for j != ss.
				913	Hopefully previous pointer-scanning phases have already
				914	completed many of the small buckets [ss, j], so
				915	we don't have to sort them at all.
				916	--*/
				917	for (j = 0; j <= 255; j++) {
				918	if (j != ss) {
				919	sb = (ss << 8) + j;
				920	if ( ! (ftab[sb] & SETMASK) ) {
				921	Int32 lo = ftab[sb] & CLEARMASK;
				922	Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
				923	if (hi > lo) {
				924	if (verb >= 4)
				925	VPrintf4 ( " qsort [0x%x, 0x%x] "
				926	"done %d this %d\n",
				927	ss, j, numQSorted, hi - lo + 1 );
				928	mainQSort3 (
				929	ptr, block, quadrant, nblock,
				930	lo, hi, BZ_N_RADIX, budget
				931	);
				932	numQSorted += (hi - lo + 1);
				933	if (*budget < 0) return;
				934	}
				935	}
				936	ftab[sb] \|= SETMASK;
				937	}
				938	}
				939
				940	AssertH ( !bigDone[ss], 1006 );
				941
				942	/*--
				943	Step 2:
				944	Now scan this big bucket [ss] so as to synthesise the
				945	sorted order for small buckets [t, ss] for all t,
				946	including, magically, the bucket [ss,ss] too.
				947	This will avoid doing Real Work in subsequent Step 1's.
				948	--*/
				949	{
				950	for (j = 0; j <= 255; j++) {
				951	copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
				952	copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
				953	}
				954	for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
				955	k = ptr[j]-1; if (k < 0) k += nblock;
				956	c1 = block[k];
				957	if (!bigDone[c1])
				958	ptr[ copyStart[c1]++ ] = k;
				959	}
				960	for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
				961	k = ptr[j]-1; if (k < 0) k += nblock;
				962	c1 = block[k];
				963	if (!bigDone[c1])
				964	ptr[ copyEnd[c1]-- ] = k;
				965	}
				966	}
				967
				968	AssertH ( (copyStart[ss]-1 == copyEnd[ss])
				969	\|\|
				970	/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
				971	Necessity for this case is demonstrated by compressing
				972	a sequence of approximately 48.5 million of character
				973	251; 1.0.0/1.0.1 will then die here. */
				974	(copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
				975	1007 )
				976
				977	for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] \|= SETMASK;
				978
				979	/*--
				980	Step 3:
				981	The [ss] big bucket is now done. Record this fact,
				982	and update the quadrant descriptors. Remember to
				983	update quadrants in the overshoot area too, if
				984	necessary. The "if (i < 255)" test merely skips
				985	this updating for the last bucket processed, since
				986	updating for the last bucket is pointless.
				987
				988	The quadrant array provides a way to incrementally
				989	cache sort orderings, as they appear, so as to
				990	make subsequent comparisons in fullGtU() complete
				991	faster. For repetitive blocks this makes a big
				992	difference (but not big enough to be able to avoid
				993	the fallback sorting mechanism, exponential radix sort).
				994
				995	The precise meaning is: at all times:
				996
				997	for 0 <= i < nblock and 0 <= j <= nblock
				998
				999	if block[i] != block[j],
				1000
				1001	then the relative values of quadrant[i] and
				1002	quadrant[j] are meaningless.
				1003
				1004	else {
				1005	if quadrant[i] < quadrant[j]
				1006	then the string starting at i lexicographically
				1007	precedes the string starting at j
				1008
				1009	else if quadrant[i] > quadrant[j]
				1010	then the string starting at j lexicographically
				1011	precedes the string starting at i
				1012
				1013	else
				1014	the relative ordering of the strings starting
				1015	at i and j has not yet been determined.
				1016	}
				1017	--*/
				1018	bigDone[ss] = True;
				1019
				1020	if (i < 255) {
				1021	Int32 bbStart = ftab[ss << 8] & CLEARMASK;
				1022	Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
				1023	Int32 shifts = 0;
				1024
				1025	while ((bbSize >> shifts) > 65534) shifts++;
				1026
				1027	for (j = bbSize-1; j >= 0; j--) {
				1028	Int32 a2update = ptr[bbStart + j];
				1029	UInt16 qVal = (UInt16)(j >> shifts);
				1030	quadrant[a2update] = qVal;
				1031	if (a2update < BZ_N_OVERSHOOT)
				1032	quadrant[a2update + nblock] = qVal;
				1033	}
				1034	AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
				1035	}
				1036
				1037	}
				1038
				1039	if (verb >= 4)
				1040	VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
				1041	nblock, numQSorted, nblock - numQSorted );
				1042	}
				1043
				1044	#undef BIGFREQ
				1045	#undef SETMASK
				1046	#undef CLEARMASK
				1047
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	1048	/---------------------------------------------/
				1049	/* Pre:
				1050	nblock > 0
				1051	arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
				1052	((UChar*)arr2) [0 .. nblock-1] holds block
				1053	arr1 exists for [0 .. nblock-1]
				1054
				1055	Post:
				1056	((UChar*)arr2) [0 .. nblock-1] holds block
				1057	All other areas of block destroyed
				1058	ftab [ 0 .. 65536 ] destroyed
				1059	arr1 [0 .. nblock-1] holds sorted order
				1060	*/
				1061	void BZ2_blockSort ( EState* s )
				1062	{
				1063	UInt32* ptr = s->ptr;
				1064	UChar* block = s->block;
				1065	UInt32* ftab = s->ftab;
				1066	Int32 nblock = s->nblock;
				1067	Int32 verb = s->verbosity;
				1068	Int32 wfact = s->workFactor;
				1069	UInt16* quadrant;
				1070	Int32 budget;
				1071	Int32 budgetInit;
				1072	Int32 i;
				1073
				1074	if (nblock < 10000) {
				1075	fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
				1076	} else {
				1077	/* Calculate the location for quadrant, remembering to get
				1078	the alignment right. Assumes that &(block[0]) is at least
				1079	2-byte aligned -- this should be ok since block is really
				1080	the first section of arr2.
				1081	*/
				1082	i = nblock+BZ_N_OVERSHOOT;
				1083	if (i & 1) i++;
				1084	quadrant = (UInt16*)(&(block[i]));
				1085
				1086	/* (wfact-1) / 3 puts the default-factor-30
				1087	transition point at very roughly the same place as
				1088	with v0.1 and v0.9.0.
				1089	Not that it particularly matters any more, since the
				1090	resulting compressed stream is now the same regardless
				1091	of whether or not we use the main sort or fallback sort.
				1092	*/
				1093	if (wfact < 1 ) wfact = 1;
				1094	if (wfact > 100) wfact = 100;
				1095	budgetInit = nblock * ((wfact-1) / 3);
				1096	budget = budgetInit;
				1097
				1098	mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
				1099	if (verb >= 3)
				1100	VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
				1101	budgetInit - budget,
				1102	nblock,
				1103	(float)(budgetInit - budget) /
				1104	(float)(nblock==0 ? 1 : nblock) );
				1105	if (budget < 0) {
				1106	if (verb >= 2)
				1107	VPrintf0 ( " too repetitive; using fallback"
				1108	" sorting algorithm\n" );
				1109	fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
				1110	}
				1111	}
				1112
				1113	s->origPtr = -1;
				1114	for (i = 0; i < s->nblock; i++)
				1115	if (ptr[i] == 0)
				1116	{ s->origPtr = i; break; };
				1117
				1118	AssertH( s->origPtr != -1, 1003 );
				1119	}
				1120
Simon Glass	0a30e42	2016-01-05 09:31:00 -0700	[diff] [blame]	1121	/-------------------------------------------------------------/
				1122	/--- end blocksort.c ---/
				1123	/-------------------------------------------------------------/