blob: 39cb965f375215a2830b26cc7998c3fb8b5e7691 [file] [log] [blame]
Willy Tarreauae5f7da2007-05-13 11:40:04 +02001/*
2 This file only show how many operations a hash is able to handle.
3 It don't show the distribution nor collisions.
4
5 gcc -Wall -O3 -o test_hashes test_hashes.c
6 ./test_hashes |sort -k 3 -r
7 */
8#include <sys/time.h>
9#include <time.h>
10#include <stdlib.h>
11#include <stdbool.h>
12#include <string.h>
13#include <stdio.h>
14//#include <stdint.h>
15
16
17static struct timeval timeval_current(void)
18{
19 struct timeval tv;
20 gettimeofday(&tv, NULL);
21 return tv;
22}
23
24static double timeval_elapsed(struct timeval *tv)
25{
26 struct timeval tv2 = timeval_current();
27 return (tv2.tv_sec - tv->tv_sec) +
28 (tv2.tv_usec - tv->tv_usec)*1.0e-6;
29}
30
31#define HAPROXY_BACKENDS 4
32
33unsigned long haproxy_uri_hash(char *uri, int uri_len){
34
35 unsigned long hash = 0;
36 int c;
37
38 while (uri_len--) {
39 c = *uri++;
40 if (c == '?')
41 break;
42 hash = c + (hash << 6) + (hash << 16) - hash;
43 }
44
45 return hash%HAPROXY_BACKENDS; /* I assume 4 active backends */
46} /* end haproxy_hash() */
47
48/*
49 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
50 */
51unsigned sax_hash ( void *key, int len )
52{
53 unsigned char *p = key;
54 unsigned h = 0;
55 int i;
56
57 for ( i = 0; i < len; i++ )
58 h ^= ( h << 5 ) + ( h >> 2 ) + p[i];
59
60 return h;
61}
62
63#include <arpa/inet.h>
64/* len 4 for ipv4 and 16 for ipv6 */
65unsigned int haproxy_server_hash(const char *addr, int len){
66 unsigned int h, l;
67 l = h = 0;
68
69 while ((l + sizeof (int)) <= len) {
70 h ^= ntohl(*(unsigned int *)(&addr[l]));
71 l += sizeof (int);
72 }
73 return h %= HAPROXY_BACKENDS;
74}/* end haproxy_server_hash() */
75
76
77int hashpjw(const void *key) {
78
79 const char *ptr;
80 unsigned int val;
81 /*********************************************************************
82 * *
83 * Hash the key by performing a number of bit operations on it. *
84 * *
85 *********************************************************************/
86
87 val = 0;
88 ptr = key;
89
90 while (*ptr != '\0') {
91
92 int tmp;
93
94 val = (val << 4) + (*ptr);
95
96 if((tmp = (val & 0xf0000000))) {
97 val = val ^ (tmp >> 24);
98 val = val ^ tmp;
99 }
100 ptr++;
101 }/* end while */
102
103 return val;
104}/* end hashpjw */
105
106static unsigned long
107hash_djbx33(
108 register unsigned char *key,
109 register size_t len)
110{
111 register unsigned long hash = 5381;
112
113 /* the hash unrolled eight times */
114 for (; len >= 8; len -= 8) {
115 hash = ((hash << 5) + hash) + *key++;
116 hash = ((hash << 5) + hash) + *key++;
117 hash = ((hash << 5) + hash) + *key++;
118 hash = ((hash << 5) + hash) + *key++;
119 hash = ((hash << 5) + hash) + *key++;
120 hash = ((hash << 5) + hash) + *key++;
121 hash = ((hash << 5) + hash) + *key++;
122 hash = ((hash << 5) + hash) + *key++;
123 }
124 switch (len) {
125 case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
126 case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
127 case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
128 case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
129 case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
130 case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
131 case 1: hash = ((hash << 5) + hash) + *key++; break;
132 default: /* case 0: */ break;
133 }
134 return hash;
135}
136
137typedef unsigned long int ub4; /* unsigned 4-byte quantities */
138typedef unsigned char ub1; /* unsigned 1-byte quantities */
139
140ub4 bernstein(ub1 *key, ub4 len, ub4 level){
141 ub4 hash = level;
142 ub4 i;
143 for (i=0; i<len; ++i) hash = 33*hash + key[i];
144 return hash;
145}
146
147/*
148 * http://www.azillionmonkeys.com/qed/hash.html
149 */
150#undef get16bits
151#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
152 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
153#define get16bits(d) (*((const uint16_t *) (d)))
154#endif
155
156#if !defined (get16bits)
157#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
158 +(uint32_t)(((const uint8_t *)(d))[0]) )
159#endif
160
Willy Tarreauae5f7da2007-05-13 11:40:04 +0200161uint32_t SuperFastHash (const char * data, int len) {
162uint32_t hash = len, tmp;
163int rem;
164
165 if (len <= 0 || data == NULL) return 0;
166
167 rem = len & 3;
168 len >>= 2;
169
170 /* Main loop */
171 for (;len > 0; len--) {
172 hash += get16bits (data);
173 tmp = (get16bits (data+2) << 11) ^ hash;
174 hash = (hash << 16) ^ tmp;
175 data += 2*sizeof (uint16_t);
176 hash += hash >> 11;
177 }
178
179 /* Handle end cases */
180 switch (rem) {
181 case 3: hash += get16bits (data);
182 hash ^= hash << 16;
183 hash ^= data[sizeof (uint16_t)] << 18;
184 hash += hash >> 11;
185 break;
186 case 2: hash += get16bits (data);
187 hash ^= hash << 11;
188 hash += hash >> 17;
189 break;
190 case 1: hash += *data;
191 hash ^= hash << 10;
192 hash += hash >> 1;
193 }
194
195 /* Force "avalanching" of final 127 bits */
196 hash ^= hash << 3;
197 hash += hash >> 5;
198 hash ^= hash << 4;
199 hash += hash >> 17;
200 hash ^= hash << 25;
201 hash += hash >> 6;
202
203 return hash;
204}
205
206/*
Willy Tarreauab28b8b2007-09-09 21:13:47 +0200207 * This variant is about 15% faster.
Willy Tarreauae5f7da2007-05-13 11:40:04 +0200208 */
209uint32_t SuperFastHash2 (const char * data, int len) {
210uint32_t hash = len, tmp;
211int rem;
212
213 if (len <= 0 || data == NULL) return 0;
214
215 rem = len & 3;
216 len >>= 2;
217
218 /* Main loop */
219 for (;len > 0; len--) {
220 register uint32_t next;
221 next = get16bits(data+2);
222 hash += get16bits(data);
Willy Tarreauab28b8b2007-09-09 21:13:47 +0200223 tmp = (next << 11) ^ hash;
Willy Tarreauae5f7da2007-05-13 11:40:04 +0200224 hash = (hash << 16) ^ tmp;
225 data += 2*sizeof (uint16_t);
226 hash += hash >> 11;
227 }
228
229 /* Handle end cases */
230 switch (rem) {
231 case 3: hash += get16bits (data);
232 hash ^= hash << 16;
233 hash ^= data[sizeof (uint16_t)] << 18;
234 hash += hash >> 11;
235 break;
236 case 2: hash += get16bits (data);
237 hash ^= hash << 11;
238 hash += hash >> 17;
239 break;
240 case 1: hash += *data;
241 hash ^= hash << 10;
242 hash += hash >> 1;
243 }
244
245 /* Force "avalanching" of final 127 bits */
246 hash ^= hash << 3;
247 hash += hash >> 5;
248 hash ^= hash << 4;
249 hash += hash >> 17;
250 hash ^= hash << 25;
251 hash += hash >> 6;
252
253 return hash;
254}
255
256/*
257 * 32 bit FNV-0 hash type
258 */
259typedef unsigned long Fnv32_t;
260
261/*
262 * fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string
263 *
264 * input:
265 * str - string to hash
266 * hval - previous hash value or 0 if first call
267 *
268 * returns:
269 * 32 bit hash as a static hash type
270 *
271 * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
272 * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
273 */
274Fnv32_t
275fnv_32a_str(char *str, Fnv32_t hval)
276{
277 unsigned char *s = (unsigned char *)str; /* unsigned string */
278
279 /*
280 * FNV-1a hash each octet in the buffer
281 */
282 while (*s) {
283
284 /* xor the bottom with the current octet */
285 hval ^= (Fnv32_t)*s++;
286
287/* #define NO_FNV_GCC_OPTIMIZATION */
288 /* multiply by the 32 bit FNV magic prime mod 2^32 */
289#if defined(NO_FNV_GCC_OPTIMIZATION)
290 /*
291 * 32 bit magic FNV-1a prime
292 */
293#define FNV_32_PRIME ((Fnv32_t)0x01000193)
294 hval *= FNV_32_PRIME;
295#else
296 hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
297#endif
298 }
299
300 /* return our new hash value */
301 return hval;
302}
303
304/*
305 * from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
306 */
307
308#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
309
310/*
311-------------------------------------------------------------------------------
312mix -- mix 3 32-bit values reversibly.
313
314This is reversible, so any information in (a,b,c) before mix() is
315still in (a,b,c) after mix().
316
317If four pairs of (a,b,c) inputs are run through mix(), or through
318mix() in reverse, there are at least 32 bits of the output that
319are sometimes the same for one pair and different for another pair.
320This was tested for:
321* pairs that differed by one bit, by two bits, in any combination
322 of top bits of (a,b,c), or in any combination of bottom bits of
323 (a,b,c).
324* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
325 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
326 is commonly produced by subtraction) look like a single 1-bit
327 difference.
328* the base values were pseudorandom, all zero but one bit set, or
329 all zero plus a counter that starts at zero.
330
331Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
332satisfy this are
333 4 6 8 16 19 4
334 9 15 3 18 27 15
335 14 9 3 7 17 3
336Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
337for "differ" defined as + with a one-bit base and a two-bit delta. I
338used http://burtleburtle.net/bob/hash/avalanche.html to choose
339the operations, constants, and arrangements of the variables.
340
341This does not achieve avalanche. There are input bits of (a,b,c)
342that fail to affect some output bits of (a,b,c), especially of a. The
343most thoroughly mixed value is c, but it doesn't really even achieve
344avalanche in c.
345
346This allows some parallelism. Read-after-writes are good at doubling
347the number of bits affected, so the goal of mixing pulls in the opposite
348direction as the goal of parallelism. I did what I could. Rotates
349seem to cost as much as shifts on every machine I could lay my hands
350on, and rotates are much kinder to the top and bottom bits, so I used
351rotates.
352-------------------------------------------------------------------------------
353*/
354#define mix(a,b,c) \
355{ \
356 a -= c; a ^= rot(c, 4); c += b; \
357 b -= a; b ^= rot(a, 6); a += c; \
358 c -= b; c ^= rot(b, 8); b += a; \
359 a -= c; a ^= rot(c,16); c += b; \
360 b -= a; b ^= rot(a,19); a += c; \
361 c -= b; c ^= rot(b, 4); b += a; \
362}
363
364/*
365-------------------------------------------------------------------------------
366final -- final mixing of 3 32-bit values (a,b,c) into c
367
368Pairs of (a,b,c) values differing in only a few bits will usually
369produce values of c that look totally different. This was tested for
370* pairs that differed by one bit, by two bits, in any combination
371 of top bits of (a,b,c), or in any combination of bottom bits of
372 (a,b,c).
373* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
374 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
375 is commonly produced by subtraction) look like a single 1-bit
376 difference.
377* the base values were pseudorandom, all zero but one bit set, or
378 all zero plus a counter that starts at zero.
379
380These constants passed:
381 14 11 25 16 4 14 24
382 12 14 25 16 4 14 24
383and these came close:
384 4 8 15 26 3 22 24
385 10 8 15 26 3 22 24
386 11 8 15 26 3 22 24
387-------------------------------------------------------------------------------
388*/
389#define final(a,b,c) \
390{ \
391 c ^= b; c -= rot(b,14); \
392 a ^= c; a -= rot(c,11); \
393 b ^= a; b -= rot(a,25); \
394 c ^= b; c -= rot(b,16); \
395 a ^= c; a -= rot(c,4); \
396 b ^= a; b -= rot(a,14); \
397 c ^= b; c -= rot(b,24); \
398}
399
400/*
401--------------------------------------------------------------------
402 This works on all machines. To be useful, it requires
403 -- that the key be an array of uint32_t's, and
404 -- that the length be the number of uint32_t's in the key
405
406 The function hashword() is identical to hashlittle() on little-endian
407 machines, and identical to hashbig() on big-endian machines,
408 except that the length has to be measured in uint32_ts rather than in
409 bytes. hashlittle() is more complicated than hashword() only because
410 hashlittle() has to dance around fitting the key bytes into registers.
411--------------------------------------------------------------------
412*/
413uint32_t hashword(
414const uint32_t *k, /* the key, an array of uint32_t values */
415size_t length, /* the length of the key, in uint32_ts */
416uint32_t initval) /* the previous hash, or an arbitrary value */
417{
418 uint32_t a,b,c;
419
420 /* Set up the internal state */
421 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
422
423 /*------------------------------------------------- handle most of the key */
424 while (length > 3)
425 {
426 a += k[0];
427 b += k[1];
428 c += k[2];
429 mix(a,b,c);
430 length -= 3;
431 k += 3;
432 }
433
434 /*------------------------------------------- handle the last 3 uint32_t's */
435 switch(length) /* all the case statements fall through */
436 {
437 case 3 : c+=k[2];
438 case 2 : b+=k[1];
439 case 1 : a+=k[0];
440 final(a,b,c);
441 case 0: /* case 0: nothing left to add */
442 break;
443 }
444 /*------------------------------------------------------ report the result */
445 return c;
446}
447
448/* from K&R book site 139 */
449#define HASHSIZE 101
450
451unsigned kr_hash(char *s){
452 unsigned hashval;
453
454 for(hashval = 0; *s != '\0';s++)
455 hashval = *s + 31 * hashval;
456
457 return hashval % HASHSIZE;
458
459} /* end kr_hash() */
460
461unsigned fnv_hash ( void *key, int len )
462{
463 unsigned char *p = key;
464 unsigned h = 2166136261;
465 int i;
466
467 for ( i = 0; i < len; i++ )
468 h = ( h * 16777619 ) ^ p[i];
469
470 return h;
471}
472
473unsigned oat_hash ( void *key, int len )
474{
475 unsigned char *p = key;
476 unsigned h = 0;
477 int i;
478
479 for ( i = 0; i < len; i++ ) {
480 h += p[i];
481 h += ( h << 10 );
482 h ^= ( h >> 6 );
483 }
484
485 h += ( h << 3 );
486 h ^= ( h >> 11 );
487 h += ( h << 15 );
488
489 return h;
490}
491
Willy Tarreauab28b8b2007-09-09 21:13:47 +0200492unsigned wt_hash ( void *key, int len )
493{
494 unsigned char *p = key;
495 unsigned h = 0x783c965aUL;
496 unsigned step = 16;
497
498 for (; len > 0; len--) {
499 h ^= *p * 9;
500 p++;
501 h = (h << step) | (h >> (32-step));
502 step ^= h;
503 step &= 0x1F;
504 }
505
506 return h;
507}
508
Willy Tarreauae5f7da2007-05-13 11:40:04 +0200509
510#define run_test(fct, args) { \
511 unsigned long loop, count; \
512 volatile unsigned long result; \
513 double delta; \
514 struct timeval tv; \
515 fprintf(stderr, "Starting %s\n", #fct); \
516 tv = timeval_current(); \
517 count = 0; \
518 do { \
519 delta = timeval_elapsed(&tv); \
520 for (loop = 0; loop < 1000; loop++) { \
521 result = fct args; \
522 count++; \
523 } \
524 } while (delta < 1.0); \
525 fprintf(stdout, "%-20s : %10.0f run/sec\n", #fct, count/delta); \
526 fflush(stdout); \
527}
528
529int main(){
530
531 char **start;
532 int len;
533
534 char *urls[] = {
535 "http://www.microsoft.com/shared/core/1/webservice/navigation.asmx/DisplayDownlevelNavHtml",
536 NULL
537 };
538
539 start = urls;
540 len = strlen(*urls);
541
Willy Tarreauab28b8b2007-09-09 21:13:47 +0200542 run_test(wt_hash, (*urls, len));
Willy Tarreauae5f7da2007-05-13 11:40:04 +0200543 run_test(SuperFastHash2, (*urls, len));
544 run_test(SuperFastHash, (*urls, len));
545 run_test(haproxy_uri_hash, (*urls, len));
546 run_test(haproxy_server_hash, (*urls, len));
547 run_test(hashpjw, (*urls));
548 run_test(hash_djbx33, ((unsigned char *)*urls, len));
549 run_test(bernstein, ((unsigned char *)*urls, len, 4));
550 run_test(fnv_32a_str, (*urls, 0));
551 run_test(hashword, ((const uint32_t *)*urls,strlen(*urls),0));
552 run_test(kr_hash, (*urls));
553 run_test(sax_hash, (*urls, len));
554 run_test(fnv_hash, (*urls, len));
555 run_test(oat_hash, (*urls, len));
556
557 return 0;
558
559}/* end main() */