Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Integer hashing tests. These functions work with 32-bit integers, so are |
| 3 | * perfectly suited for IPv4 addresses. A few tests show that they may also |
| 4 | * be chained for larger keys (eg: IPv6), this way : |
| 5 | * f(x[0-3]) = f(f(f(f(x[0])^x[1])^x[2])^x[3]) |
| 6 | * |
| 7 | * See also bob jenkin's site for more info on hashing, and check perfect |
| 8 | * hashing for constants (eg: header names). |
| 9 | */ |
| 10 | |
| 11 | #include <stdio.h> |
| 12 | #include <string.h> |
| 13 | #include <arpa/inet.h> |
| 14 | #include <math.h> |
| 15 | |
| 16 | #define NSERV 8 |
| 17 | #define MAXLINE 1000 |
| 18 | |
| 19 | |
| 20 | int counts_id[NSERV][NSERV]; |
| 21 | uint32_t hash_id( uint32_t a) |
| 22 | { |
| 23 | return a; |
| 24 | } |
| 25 | |
| 26 | /* Full-avalanche integer hashing function from Thomas Wang, suitable for use |
| 27 | * with a modulo. See below, worth a read ! |
| 28 | * http://www.concentric.net/~Ttwang/tech/inthash.htm |
| 29 | * |
| 30 | * See also tests performed by Bob Jenkins (says it's faster than his) : |
| 31 | * http://burtleburtle.net/bob/hash/integer.html |
| 32 | * |
| 33 | * This function is small and fast. It does not seem as smooth as bj6 though. |
| 34 | * About 0x40 bytes, 6 shifts. |
| 35 | */ |
| 36 | int counts_tw1[NSERV][NSERV]; |
| 37 | uint32_t hash_tw1(uint32_t a) |
| 38 | { |
| 39 | a += ~(a<<15); |
| 40 | a ^= (a>>10); |
| 41 | a += (a<<3); |
| 42 | a ^= (a>>6); |
| 43 | a += ~(a<<11); |
| 44 | a ^= (a>>16); |
| 45 | return a; |
| 46 | } |
| 47 | |
| 48 | /* Thomas Wang's mix function. The multiply is optimized away by the compiler |
| 49 | * on most platforms. |
| 50 | * It is about equivalent to the one above. |
| 51 | */ |
| 52 | int counts_tw2[NSERV][NSERV]; |
| 53 | uint32_t hash_tw2(uint32_t a) |
| 54 | { |
| 55 | a = ~a + (a << 15); |
| 56 | a = a ^ (a >> 12); |
| 57 | a = a + (a << 2); |
| 58 | a = a ^ (a >> 4); |
| 59 | a = a * 2057; |
| 60 | a = a ^ (a >> 16); |
| 61 | return a; |
| 62 | } |
| 63 | |
| 64 | /* Thomas Wang's multiplicative hash function. About 0x30 bytes, and it is |
| 65 | * extremely fast on recent processors with a fast multiply. However, it |
| 66 | * must not be used on low bits only, as multiples of 0x00100010 only return |
| 67 | * even values ! |
| 68 | */ |
| 69 | int counts_tw3[NSERV][NSERV]; |
| 70 | uint32_t hash_tw3(uint32_t a) |
| 71 | { |
| 72 | a = (a ^ 61) ^ (a >> 16); |
| 73 | a = a + (a << 3); |
| 74 | a = a ^ (a >> 4); |
| 75 | a = a * 0x27d4eb2d; |
| 76 | a = a ^ (a >> 15); |
| 77 | return a; |
| 78 | } |
| 79 | |
| 80 | |
| 81 | /* Full-avalanche integer hashing function from Bob Jenkins, suitable for use |
| 82 | * with a modulo. It has a very smooth distribution. |
| 83 | * http://burtleburtle.net/bob/hash/integer.html |
| 84 | * About 0x50 bytes, 6 shifts. |
| 85 | */ |
| 86 | int counts_bj6[NSERV][NSERV]; |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 87 | int counts_bj6x[NSERV][NSERV]; |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 88 | uint32_t hash_bj6(uint32_t a) |
| 89 | { |
| 90 | a = (a+0x7ed55d16) + (a<<12); |
| 91 | a = (a^0xc761c23c) ^ (a>>19); |
| 92 | a = (a+0x165667b1) + (a<<5); |
| 93 | a = (a+0xd3a2646c) ^ (a<<9); |
| 94 | a = (a+0xfd7046c5) + (a<<3); |
| 95 | a = (a^0xb55a4f09) ^ (a>>16); |
| 96 | return a; |
| 97 | } |
| 98 | |
| 99 | /* Similar function with one more shift and no magic number. It is slightly |
| 100 | * slower but provides the overall smoothest distribution. |
| 101 | * About 0x40 bytes, 7 shifts. |
| 102 | */ |
| 103 | int counts_bj7[NSERV][NSERV]; |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 104 | int counts_bj7x[NSERV][NSERV]; |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 105 | uint32_t hash_bj7(uint32_t a) |
| 106 | { |
| 107 | a -= (a<<6); |
| 108 | a ^= (a>>17); |
| 109 | a -= (a<<9); |
| 110 | a ^= (a<<4); |
| 111 | a -= (a<<3); |
| 112 | a ^= (a<<10); |
| 113 | a ^= (a>>15); |
| 114 | return a; |
| 115 | } |
| 116 | |
| 117 | |
| 118 | void count_hash_results(unsigned long hash, int counts[NSERV][NSERV]) { |
| 119 | int srv, nsrv; |
| 120 | |
| 121 | for (nsrv = 0; nsrv < NSERV; nsrv++) { |
| 122 | srv = hash % (nsrv + 1); |
| 123 | counts[nsrv][srv]++; |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | void dump_hash_results(char *name, int counts[NSERV][NSERV]) { |
| 128 | int srv, nsrv; |
| 129 | double err, total_err, max_err; |
| 130 | |
| 131 | printf("%s:\n", name); |
| 132 | for (nsrv = 0; nsrv < NSERV; nsrv++) { |
| 133 | total_err = 0.0; |
| 134 | max_err = 0.0; |
| 135 | printf("%02d srv: ", nsrv+1); |
| 136 | for (srv = 0; srv <= nsrv; srv++) { |
| 137 | err = 100.0*(counts[nsrv][srv] - (double)counts[0][0]/(nsrv+1)) / (double)counts[0][0]; |
| 138 | //printf("%6d ", counts[nsrv][srv]); |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 139 | printf("% 3.1f%%%c ", err, |
| 140 | counts[nsrv][srv]?' ':'*'); /* display '*' when a server is never selected */ |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 141 | err = fabs(err); |
| 142 | total_err += err; |
| 143 | if (err > max_err) |
| 144 | max_err = err; |
| 145 | } |
| 146 | total_err /= (double)(nsrv+1); |
| 147 | for (srv = nsrv+1; srv < NSERV; srv++) |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 148 | printf(" "); |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 149 | printf(" avg_err=%3.1f, max_err=%3.1f\n", total_err, max_err); |
| 150 | } |
| 151 | printf("\n"); |
| 152 | } |
| 153 | |
| 154 | int main() { |
| 155 | int nr; |
| 156 | unsigned int address = 0; |
| 157 | unsigned int mask = ~0; |
| 158 | |
| 159 | memset(counts_id, 0, sizeof(counts_id)); |
| 160 | memset(counts_tw1, 0, sizeof(counts_tw1)); |
| 161 | memset(counts_tw2, 0, sizeof(counts_tw2)); |
| 162 | memset(counts_tw3, 0, sizeof(counts_tw3)); |
| 163 | memset(counts_bj6, 0, sizeof(counts_bj6)); |
| 164 | memset(counts_bj7, 0, sizeof(counts_bj7)); |
| 165 | |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 166 | address = 0x10000000; |
| 167 | mask = 0xffffff00; // user mask to apply to addresses |
| 168 | for (nr = 0; nr < 0x10; nr++) { |
| 169 | //address += ~nr; // semi-random addresses. |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 170 | //address += 1; |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 171 | address += 0x00000100; |
| 172 | //address += 0x11111111; |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 173 | //address += 7; |
| 174 | //address += 8; |
| 175 | //address += 256; |
| 176 | //address += 65536; |
| 177 | //address += 131072; |
| 178 | //address += 0x00100010; // this increment kills tw3 ! |
| 179 | count_hash_results(hash_id (address & mask), counts_id); // 0.69s / 100M |
| 180 | count_hash_results(hash_tw1(address & mask), counts_tw1); // 1.04s / 100M |
| 181 | count_hash_results(hash_tw2(address & mask), counts_tw2); // 1.13s / 100M |
| 182 | count_hash_results(hash_tw3(address & mask), counts_tw3); // 1.01s / 100M |
| 183 | count_hash_results(hash_bj6(address & mask), counts_bj6); // 1.07s / 100M |
| 184 | count_hash_results(hash_bj7(address & mask), counts_bj7); // 1.20s / 100M |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 185 | /* adding the original address after the hash reduces the error |
| 186 | * rate in in presence of very small data sets (eg: 16 source |
| 187 | * addresses for 8 servers). In this case, bj7 is very good. |
| 188 | */ |
| 189 | count_hash_results(hash_bj6(address & mask)+(address&mask), counts_bj6x); // 1.07s / 100M |
| 190 | count_hash_results(hash_bj7(address & mask)+(address&mask), counts_bj7x); // 1.20s / 100M |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 191 | } |
| 192 | |
| 193 | dump_hash_results("hash_id", counts_id); |
| 194 | dump_hash_results("hash_tw1", counts_tw1); |
| 195 | dump_hash_results("hash_tw2", counts_tw2); |
| 196 | dump_hash_results("hash_tw3", counts_tw3); |
| 197 | dump_hash_results("hash_bj6", counts_bj6); |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 198 | dump_hash_results("hash_bj6x", counts_bj6x); |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 199 | dump_hash_results("hash_bj7", counts_bj7); |
Willy Tarreau | a532324 | 2008-04-13 09:27:00 +0200 | [diff] [blame] | 200 | dump_hash_results("hash_bj7x", counts_bj7x); |
Willy Tarreau | 53cfa0e | 2008-04-12 22:28:32 +0200 | [diff] [blame] | 201 | return 0; |
| 202 | } |