blob: 8bb2d488fc424e418516ad495b2991c0648cc0fe [file] [log] [blame]
Willy Tarreau53cfa0e2008-04-12 22:28:32 +02001/*
2 * Integer hashing tests. These functions work with 32-bit integers, so are
3 * perfectly suited for IPv4 addresses. A few tests show that they may also
4 * be chained for larger keys (eg: IPv6), this way :
5 * f(x[0-3]) = f(f(f(f(x[0])^x[1])^x[2])^x[3])
6 *
7 * See also bob jenkin's site for more info on hashing, and check perfect
8 * hashing for constants (eg: header names).
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include <arpa/inet.h>
14#include <math.h>
15
16#define NSERV 8
17#define MAXLINE 1000
18
19
20int counts_id[NSERV][NSERV];
21uint32_t hash_id( uint32_t a)
22{
23 return a;
24}
25
26/* Full-avalanche integer hashing function from Thomas Wang, suitable for use
27 * with a modulo. See below, worth a read !
28 * http://www.concentric.net/~Ttwang/tech/inthash.htm
29 *
30 * See also tests performed by Bob Jenkins (says it's faster than his) :
31 * http://burtleburtle.net/bob/hash/integer.html
32 *
33 * This function is small and fast. It does not seem as smooth as bj6 though.
34 * About 0x40 bytes, 6 shifts.
35 */
36int counts_tw1[NSERV][NSERV];
37uint32_t hash_tw1(uint32_t a)
38{
39 a += ~(a<<15);
40 a ^= (a>>10);
41 a += (a<<3);
42 a ^= (a>>6);
43 a += ~(a<<11);
44 a ^= (a>>16);
45 return a;
46}
47
48/* Thomas Wang's mix function. The multiply is optimized away by the compiler
49 * on most platforms.
50 * It is about equivalent to the one above.
51 */
52int counts_tw2[NSERV][NSERV];
53uint32_t hash_tw2(uint32_t a)
54{
55 a = ~a + (a << 15);
56 a = a ^ (a >> 12);
57 a = a + (a << 2);
58 a = a ^ (a >> 4);
59 a = a * 2057;
60 a = a ^ (a >> 16);
61 return a;
62}
63
64/* Thomas Wang's multiplicative hash function. About 0x30 bytes, and it is
65 * extremely fast on recent processors with a fast multiply. However, it
66 * must not be used on low bits only, as multiples of 0x00100010 only return
67 * even values !
68 */
69int counts_tw3[NSERV][NSERV];
70uint32_t hash_tw3(uint32_t a)
71{
72 a = (a ^ 61) ^ (a >> 16);
73 a = a + (a << 3);
74 a = a ^ (a >> 4);
75 a = a * 0x27d4eb2d;
76 a = a ^ (a >> 15);
77 return a;
78}
79
80
81/* Full-avalanche integer hashing function from Bob Jenkins, suitable for use
82 * with a modulo. It has a very smooth distribution.
83 * http://burtleburtle.net/bob/hash/integer.html
84 * About 0x50 bytes, 6 shifts.
85 */
86int counts_bj6[NSERV][NSERV];
Willy Tarreaua5323242008-04-13 09:27:00 +020087int counts_bj6x[NSERV][NSERV];
Willy Tarreau53cfa0e2008-04-12 22:28:32 +020088uint32_t hash_bj6(uint32_t a)
89{
90 a = (a+0x7ed55d16) + (a<<12);
91 a = (a^0xc761c23c) ^ (a>>19);
92 a = (a+0x165667b1) + (a<<5);
93 a = (a+0xd3a2646c) ^ (a<<9);
94 a = (a+0xfd7046c5) + (a<<3);
95 a = (a^0xb55a4f09) ^ (a>>16);
96 return a;
97}
98
99/* Similar function with one more shift and no magic number. It is slightly
100 * slower but provides the overall smoothest distribution.
101 * About 0x40 bytes, 7 shifts.
102 */
103int counts_bj7[NSERV][NSERV];
Willy Tarreaua5323242008-04-13 09:27:00 +0200104int counts_bj7x[NSERV][NSERV];
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200105uint32_t hash_bj7(uint32_t a)
106{
107 a -= (a<<6);
108 a ^= (a>>17);
109 a -= (a<<9);
110 a ^= (a<<4);
111 a -= (a<<3);
112 a ^= (a<<10);
113 a ^= (a>>15);
114 return a;
115}
116
117
118void count_hash_results(unsigned long hash, int counts[NSERV][NSERV]) {
119 int srv, nsrv;
120
121 for (nsrv = 0; nsrv < NSERV; nsrv++) {
122 srv = hash % (nsrv + 1);
123 counts[nsrv][srv]++;
124 }
125}
126
127void dump_hash_results(char *name, int counts[NSERV][NSERV]) {
128 int srv, nsrv;
129 double err, total_err, max_err;
130
131 printf("%s:\n", name);
132 for (nsrv = 0; nsrv < NSERV; nsrv++) {
133 total_err = 0.0;
134 max_err = 0.0;
135 printf("%02d srv: ", nsrv+1);
136 for (srv = 0; srv <= nsrv; srv++) {
137 err = 100.0*(counts[nsrv][srv] - (double)counts[0][0]/(nsrv+1)) / (double)counts[0][0];
138 //printf("%6d ", counts[nsrv][srv]);
Willy Tarreaua5323242008-04-13 09:27:00 +0200139 printf("% 3.1f%%%c ", err,
140 counts[nsrv][srv]?' ':'*'); /* display '*' when a server is never selected */
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200141 err = fabs(err);
142 total_err += err;
143 if (err > max_err)
144 max_err = err;
145 }
146 total_err /= (double)(nsrv+1);
147 for (srv = nsrv+1; srv < NSERV; srv++)
Willy Tarreaua5323242008-04-13 09:27:00 +0200148 printf(" ");
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200149 printf(" avg_err=%3.1f, max_err=%3.1f\n", total_err, max_err);
150 }
151 printf("\n");
152}
153
154int main() {
155 int nr;
156 unsigned int address = 0;
157 unsigned int mask = ~0;
158
159 memset(counts_id, 0, sizeof(counts_id));
160 memset(counts_tw1, 0, sizeof(counts_tw1));
161 memset(counts_tw2, 0, sizeof(counts_tw2));
162 memset(counts_tw3, 0, sizeof(counts_tw3));
163 memset(counts_bj6, 0, sizeof(counts_bj6));
164 memset(counts_bj7, 0, sizeof(counts_bj7));
165
Willy Tarreaua5323242008-04-13 09:27:00 +0200166 address = 0x10000000;
167 mask = 0xffffff00; // user mask to apply to addresses
168 for (nr = 0; nr < 0x10; nr++) {
169 //address += ~nr; // semi-random addresses.
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200170 //address += 1;
Willy Tarreaua5323242008-04-13 09:27:00 +0200171 address += 0x00000100;
172 //address += 0x11111111;
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200173 //address += 7;
174 //address += 8;
175 //address += 256;
176 //address += 65536;
177 //address += 131072;
178 //address += 0x00100010; // this increment kills tw3 !
179 count_hash_results(hash_id (address & mask), counts_id); // 0.69s / 100M
180 count_hash_results(hash_tw1(address & mask), counts_tw1); // 1.04s / 100M
181 count_hash_results(hash_tw2(address & mask), counts_tw2); // 1.13s / 100M
182 count_hash_results(hash_tw3(address & mask), counts_tw3); // 1.01s / 100M
183 count_hash_results(hash_bj6(address & mask), counts_bj6); // 1.07s / 100M
184 count_hash_results(hash_bj7(address & mask), counts_bj7); // 1.20s / 100M
Willy Tarreaua5323242008-04-13 09:27:00 +0200185 /* adding the original address after the hash reduces the error
186 * rate in in presence of very small data sets (eg: 16 source
187 * addresses for 8 servers). In this case, bj7 is very good.
188 */
189 count_hash_results(hash_bj6(address & mask)+(address&mask), counts_bj6x); // 1.07s / 100M
190 count_hash_results(hash_bj7(address & mask)+(address&mask), counts_bj7x); // 1.20s / 100M
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200191 }
192
193 dump_hash_results("hash_id", counts_id);
194 dump_hash_results("hash_tw1", counts_tw1);
195 dump_hash_results("hash_tw2", counts_tw2);
196 dump_hash_results("hash_tw3", counts_tw3);
197 dump_hash_results("hash_bj6", counts_bj6);
Willy Tarreaua5323242008-04-13 09:27:00 +0200198 dump_hash_results("hash_bj6x", counts_bj6x);
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200199 dump_hash_results("hash_bj7", counts_bj7);
Willy Tarreaua5323242008-04-13 09:27:00 +0200200 dump_hash_results("hash_bj7x", counts_bj7x);
Willy Tarreau53cfa0e2008-04-12 22:28:32 +0200201 return 0;
202}