blob: baac4047943eb307731264d1a48a5c6b2991f816 [file] [log] [blame]
Willy Tarreauae5f7da2007-05-13 11:40:04 +02001/*
2 This file only show how many operations a hash is able to handle.
3 It don't show the distribution nor collisions.
4
5 gcc -Wall -O3 -o test_hashes test_hashes.c
6 ./test_hashes |sort -k 3 -r
7 */
8#include <sys/time.h>
9#include <time.h>
10#include <stdlib.h>
11#include <stdbool.h>
12#include <string.h>
13#include <stdio.h>
14//#include <stdint.h>
15
16
17static struct timeval timeval_current(void)
18{
19 struct timeval tv;
20 gettimeofday(&tv, NULL);
21 return tv;
22}
23
24static double timeval_elapsed(struct timeval *tv)
25{
26 struct timeval tv2 = timeval_current();
27 return (tv2.tv_sec - tv->tv_sec) +
28 (tv2.tv_usec - tv->tv_usec)*1.0e-6;
29}
30
31#define HAPROXY_BACKENDS 4
32
33unsigned long haproxy_uri_hash(char *uri, int uri_len){
34
35 unsigned long hash = 0;
36 int c;
37
38 while (uri_len--) {
39 c = *uri++;
40 if (c == '?')
41 break;
42 hash = c + (hash << 6) + (hash << 16) - hash;
43 }
44
45 return hash%HAPROXY_BACKENDS; /* I assume 4 active backends */
46} /* end haproxy_hash() */
47
48/*
49 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
50 */
51unsigned sax_hash ( void *key, int len )
52{
53 unsigned char *p = key;
54 unsigned h = 0;
55 int i;
56
57 for ( i = 0; i < len; i++ )
58 h ^= ( h << 5 ) + ( h >> 2 ) + p[i];
59
60 return h;
61}
62
63#include <arpa/inet.h>
64/* len 4 for ipv4 and 16 for ipv6 */
65unsigned int haproxy_server_hash(const char *addr, int len){
66 unsigned int h, l;
67 l = h = 0;
68
69 while ((l + sizeof (int)) <= len) {
70 h ^= ntohl(*(unsigned int *)(&addr[l]));
71 l += sizeof (int);
72 }
73 return h %= HAPROXY_BACKENDS;
74}/* end haproxy_server_hash() */
75
76
77int hashpjw(const void *key) {
78
79 const char *ptr;
80 unsigned int val;
81 /*********************************************************************
82 * *
83 * Hash the key by performing a number of bit operations on it. *
84 * *
85 *********************************************************************/
86
87 val = 0;
88 ptr = key;
89
90 while (*ptr != '\0') {
91
92 int tmp;
93
94 val = (val << 4) + (*ptr);
95
96 if((tmp = (val & 0xf0000000))) {
97 val = val ^ (tmp >> 24);
98 val = val ^ tmp;
99 }
100 ptr++;
101 }/* end while */
102
103 return val;
104}/* end hashpjw */
105
106static unsigned long
107hash_djbx33(
108 register unsigned char *key,
109 register size_t len)
110{
111 register unsigned long hash = 5381;
112
113 /* the hash unrolled eight times */
114 for (; len >= 8; len -= 8) {
115 hash = ((hash << 5) + hash) + *key++;
116 hash = ((hash << 5) + hash) + *key++;
117 hash = ((hash << 5) + hash) + *key++;
118 hash = ((hash << 5) + hash) + *key++;
119 hash = ((hash << 5) + hash) + *key++;
120 hash = ((hash << 5) + hash) + *key++;
121 hash = ((hash << 5) + hash) + *key++;
122 hash = ((hash << 5) + hash) + *key++;
123 }
124 switch (len) {
125 case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
126 case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
127 case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
128 case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
129 case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
130 case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
131 case 1: hash = ((hash << 5) + hash) + *key++; break;
132 default: /* case 0: */ break;
133 }
134 return hash;
135}
136
137typedef unsigned long int ub4; /* unsigned 4-byte quantities */
138typedef unsigned char ub1; /* unsigned 1-byte quantities */
139
140ub4 bernstein(ub1 *key, ub4 len, ub4 level){
141 ub4 hash = level;
142 ub4 i;
143 for (i=0; i<len; ++i) hash = 33*hash + key[i];
144 return hash;
145}
146
147/*
148 * http://www.azillionmonkeys.com/qed/hash.html
149 */
150#undef get16bits
151#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
152 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
153#define get16bits(d) (*((const uint16_t *) (d)))
154#endif
155
156#if !defined (get16bits)
157#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
158 +(uint32_t)(((const uint8_t *)(d))[0]) )
159#endif
160
161/*
162 * This function has a hole of 11 unused bits in bytes 2 and 3 of each block of
163 * 32 bits.
164 */
165uint32_t SuperFastHash (const char * data, int len) {
166uint32_t hash = len, tmp;
167int rem;
168
169 if (len <= 0 || data == NULL) return 0;
170
171 rem = len & 3;
172 len >>= 2;
173
174 /* Main loop */
175 for (;len > 0; len--) {
176 hash += get16bits (data);
177 tmp = (get16bits (data+2) << 11) ^ hash;
178 hash = (hash << 16) ^ tmp;
179 data += 2*sizeof (uint16_t);
180 hash += hash >> 11;
181 }
182
183 /* Handle end cases */
184 switch (rem) {
185 case 3: hash += get16bits (data);
186 hash ^= hash << 16;
187 hash ^= data[sizeof (uint16_t)] << 18;
188 hash += hash >> 11;
189 break;
190 case 2: hash += get16bits (data);
191 hash ^= hash << 11;
192 hash += hash >> 17;
193 break;
194 case 1: hash += *data;
195 hash ^= hash << 10;
196 hash += hash >> 1;
197 }
198
199 /* Force "avalanching" of final 127 bits */
200 hash ^= hash << 3;
201 hash += hash >> 5;
202 hash ^= hash << 4;
203 hash += hash >> 17;
204 hash ^= hash << 25;
205 hash += hash >> 6;
206
207 return hash;
208}
209
210/*
211 * This variant uses all bits from the input block, and is about 15% faster.
212 */
213uint32_t SuperFastHash2 (const char * data, int len) {
214uint32_t hash = len, tmp;
215int rem;
216
217 if (len <= 0 || data == NULL) return 0;
218
219 rem = len & 3;
220 len >>= 2;
221
222 /* Main loop */
223 for (;len > 0; len--) {
224 register uint32_t next;
225 next = get16bits(data+2);
226 hash += get16bits(data);
227 tmp = ((next << 11) | (next >> 21)) ^ hash;
228 hash = (hash << 16) ^ tmp;
229 data += 2*sizeof (uint16_t);
230 hash += hash >> 11;
231 }
232
233 /* Handle end cases */
234 switch (rem) {
235 case 3: hash += get16bits (data);
236 hash ^= hash << 16;
237 hash ^= data[sizeof (uint16_t)] << 18;
238 hash += hash >> 11;
239 break;
240 case 2: hash += get16bits (data);
241 hash ^= hash << 11;
242 hash += hash >> 17;
243 break;
244 case 1: hash += *data;
245 hash ^= hash << 10;
246 hash += hash >> 1;
247 }
248
249 /* Force "avalanching" of final 127 bits */
250 hash ^= hash << 3;
251 hash += hash >> 5;
252 hash ^= hash << 4;
253 hash += hash >> 17;
254 hash ^= hash << 25;
255 hash += hash >> 6;
256
257 return hash;
258}
259
260/*
261 * 32 bit FNV-0 hash type
262 */
263typedef unsigned long Fnv32_t;
264
265/*
266 * fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string
267 *
268 * input:
269 * str - string to hash
270 * hval - previous hash value or 0 if first call
271 *
272 * returns:
273 * 32 bit hash as a static hash type
274 *
275 * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
276 * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
277 */
278Fnv32_t
279fnv_32a_str(char *str, Fnv32_t hval)
280{
281 unsigned char *s = (unsigned char *)str; /* unsigned string */
282
283 /*
284 * FNV-1a hash each octet in the buffer
285 */
286 while (*s) {
287
288 /* xor the bottom with the current octet */
289 hval ^= (Fnv32_t)*s++;
290
291/* #define NO_FNV_GCC_OPTIMIZATION */
292 /* multiply by the 32 bit FNV magic prime mod 2^32 */
293#if defined(NO_FNV_GCC_OPTIMIZATION)
294 /*
295 * 32 bit magic FNV-1a prime
296 */
297#define FNV_32_PRIME ((Fnv32_t)0x01000193)
298 hval *= FNV_32_PRIME;
299#else
300 hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
301#endif
302 }
303
304 /* return our new hash value */
305 return hval;
306}
307
308/*
309 * from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
310 */
311
312#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
313
314/*
315-------------------------------------------------------------------------------
316mix -- mix 3 32-bit values reversibly.
317
318This is reversible, so any information in (a,b,c) before mix() is
319still in (a,b,c) after mix().
320
321If four pairs of (a,b,c) inputs are run through mix(), or through
322mix() in reverse, there are at least 32 bits of the output that
323are sometimes the same for one pair and different for another pair.
324This was tested for:
325* pairs that differed by one bit, by two bits, in any combination
326 of top bits of (a,b,c), or in any combination of bottom bits of
327 (a,b,c).
328* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
329 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
330 is commonly produced by subtraction) look like a single 1-bit
331 difference.
332* the base values were pseudorandom, all zero but one bit set, or
333 all zero plus a counter that starts at zero.
334
335Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
336satisfy this are
337 4 6 8 16 19 4
338 9 15 3 18 27 15
339 14 9 3 7 17 3
340Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
341for "differ" defined as + with a one-bit base and a two-bit delta. I
342used http://burtleburtle.net/bob/hash/avalanche.html to choose
343the operations, constants, and arrangements of the variables.
344
345This does not achieve avalanche. There are input bits of (a,b,c)
346that fail to affect some output bits of (a,b,c), especially of a. The
347most thoroughly mixed value is c, but it doesn't really even achieve
348avalanche in c.
349
350This allows some parallelism. Read-after-writes are good at doubling
351the number of bits affected, so the goal of mixing pulls in the opposite
352direction as the goal of parallelism. I did what I could. Rotates
353seem to cost as much as shifts on every machine I could lay my hands
354on, and rotates are much kinder to the top and bottom bits, so I used
355rotates.
356-------------------------------------------------------------------------------
357*/
358#define mix(a,b,c) \
359{ \
360 a -= c; a ^= rot(c, 4); c += b; \
361 b -= a; b ^= rot(a, 6); a += c; \
362 c -= b; c ^= rot(b, 8); b += a; \
363 a -= c; a ^= rot(c,16); c += b; \
364 b -= a; b ^= rot(a,19); a += c; \
365 c -= b; c ^= rot(b, 4); b += a; \
366}
367
368/*
369-------------------------------------------------------------------------------
370final -- final mixing of 3 32-bit values (a,b,c) into c
371
372Pairs of (a,b,c) values differing in only a few bits will usually
373produce values of c that look totally different. This was tested for
374* pairs that differed by one bit, by two bits, in any combination
375 of top bits of (a,b,c), or in any combination of bottom bits of
376 (a,b,c).
377* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
378 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
379 is commonly produced by subtraction) look like a single 1-bit
380 difference.
381* the base values were pseudorandom, all zero but one bit set, or
382 all zero plus a counter that starts at zero.
383
384These constants passed:
385 14 11 25 16 4 14 24
386 12 14 25 16 4 14 24
387and these came close:
388 4 8 15 26 3 22 24
389 10 8 15 26 3 22 24
390 11 8 15 26 3 22 24
391-------------------------------------------------------------------------------
392*/
393#define final(a,b,c) \
394{ \
395 c ^= b; c -= rot(b,14); \
396 a ^= c; a -= rot(c,11); \
397 b ^= a; b -= rot(a,25); \
398 c ^= b; c -= rot(b,16); \
399 a ^= c; a -= rot(c,4); \
400 b ^= a; b -= rot(a,14); \
401 c ^= b; c -= rot(b,24); \
402}
403
404/*
405--------------------------------------------------------------------
406 This works on all machines. To be useful, it requires
407 -- that the key be an array of uint32_t's, and
408 -- that the length be the number of uint32_t's in the key
409
410 The function hashword() is identical to hashlittle() on little-endian
411 machines, and identical to hashbig() on big-endian machines,
412 except that the length has to be measured in uint32_ts rather than in
413 bytes. hashlittle() is more complicated than hashword() only because
414 hashlittle() has to dance around fitting the key bytes into registers.
415--------------------------------------------------------------------
416*/
417uint32_t hashword(
418const uint32_t *k, /* the key, an array of uint32_t values */
419size_t length, /* the length of the key, in uint32_ts */
420uint32_t initval) /* the previous hash, or an arbitrary value */
421{
422 uint32_t a,b,c;
423
424 /* Set up the internal state */
425 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
426
427 /*------------------------------------------------- handle most of the key */
428 while (length > 3)
429 {
430 a += k[0];
431 b += k[1];
432 c += k[2];
433 mix(a,b,c);
434 length -= 3;
435 k += 3;
436 }
437
438 /*------------------------------------------- handle the last 3 uint32_t's */
439 switch(length) /* all the case statements fall through */
440 {
441 case 3 : c+=k[2];
442 case 2 : b+=k[1];
443 case 1 : a+=k[0];
444 final(a,b,c);
445 case 0: /* case 0: nothing left to add */
446 break;
447 }
448 /*------------------------------------------------------ report the result */
449 return c;
450}
451
452/* from K&R book site 139 */
453#define HASHSIZE 101
454
455unsigned kr_hash(char *s){
456 unsigned hashval;
457
458 for(hashval = 0; *s != '\0';s++)
459 hashval = *s + 31 * hashval;
460
461 return hashval % HASHSIZE;
462
463} /* end kr_hash() */
464
465unsigned fnv_hash ( void *key, int len )
466{
467 unsigned char *p = key;
468 unsigned h = 2166136261;
469 int i;
470
471 for ( i = 0; i < len; i++ )
472 h = ( h * 16777619 ) ^ p[i];
473
474 return h;
475}
476
477unsigned oat_hash ( void *key, int len )
478{
479 unsigned char *p = key;
480 unsigned h = 0;
481 int i;
482
483 for ( i = 0; i < len; i++ ) {
484 h += p[i];
485 h += ( h << 10 );
486 h ^= ( h >> 6 );
487 }
488
489 h += ( h << 3 );
490 h ^= ( h >> 11 );
491 h += ( h << 15 );
492
493 return h;
494}
495
496
497#define run_test(fct, args) { \
498 unsigned long loop, count; \
499 volatile unsigned long result; \
500 double delta; \
501 struct timeval tv; \
502 fprintf(stderr, "Starting %s\n", #fct); \
503 tv = timeval_current(); \
504 count = 0; \
505 do { \
506 delta = timeval_elapsed(&tv); \
507 for (loop = 0; loop < 1000; loop++) { \
508 result = fct args; \
509 count++; \
510 } \
511 } while (delta < 1.0); \
512 fprintf(stdout, "%-20s : %10.0f run/sec\n", #fct, count/delta); \
513 fflush(stdout); \
514}
515
516int main(){
517
518 char **start;
519 int len;
520
521 char *urls[] = {
522 "http://www.microsoft.com/shared/core/1/webservice/navigation.asmx/DisplayDownlevelNavHtml",
523 NULL
524 };
525
526 start = urls;
527 len = strlen(*urls);
528
529 run_test(SuperFastHash2, (*urls, len));
530 run_test(SuperFastHash, (*urls, len));
531 run_test(haproxy_uri_hash, (*urls, len));
532 run_test(haproxy_server_hash, (*urls, len));
533 run_test(hashpjw, (*urls));
534 run_test(hash_djbx33, ((unsigned char *)*urls, len));
535 run_test(bernstein, ((unsigned char *)*urls, len, 4));
536 run_test(fnv_32a_str, (*urls, 0));
537 run_test(hashword, ((const uint32_t *)*urls,strlen(*urls),0));
538 run_test(kr_hash, (*urls));
539 run_test(sax_hash, (*urls, len));
540 run_test(fnv_hash, (*urls, len));
541 run_test(oat_hash, (*urls, len));
542
543 return 0;
544
545}/* end main() */