| /* |
| * Integer hashing tests. These functions work with 32-bit integers, so are |
| * perfectly suited for IPv4 addresses. A few tests show that they may also |
| * be chained for larger keys (eg: IPv6), this way : |
| * f(x[0-3]) = f(f(f(f(x[0])^x[1])^x[2])^x[3]) |
| * |
| * See also bob jenkin's site for more info on hashing, and check perfect |
| * hashing for constants (eg: header names). |
| */ |
| |
| #include <stdio.h> |
| #include <string.h> |
| #include <arpa/inet.h> |
| #include <math.h> |
| |
| #define NSERV 8 |
| #define MAXLINE 1000 |
| |
| |
| int counts_id[NSERV][NSERV]; |
| uint32_t hash_id( uint32_t a) |
| { |
| return a; |
| } |
| |
| /* Full-avalanche integer hashing function from Thomas Wang, suitable for use |
| * with a modulo. See below, worth a read ! |
| * http://www.concentric.net/~Ttwang/tech/inthash.htm |
| * |
| * See also tests performed by Bob Jenkins (says it's faster than his) : |
| * http://burtleburtle.net/bob/hash/integer.html |
| * |
| * This function is small and fast. It does not seem as smooth as bj6 though. |
| * About 0x40 bytes, 6 shifts. |
| */ |
| int counts_tw1[NSERV][NSERV]; |
| uint32_t hash_tw1(uint32_t a) |
| { |
| a += ~(a<<15); |
| a ^= (a>>10); |
| a += (a<<3); |
| a ^= (a>>6); |
| a += ~(a<<11); |
| a ^= (a>>16); |
| return a; |
| } |
| |
| /* Thomas Wang's mix function. The multiply is optimized away by the compiler |
| * on most platforms. |
| * It is about equivalent to the one above. |
| */ |
| int counts_tw2[NSERV][NSERV]; |
| uint32_t hash_tw2(uint32_t a) |
| { |
| a = ~a + (a << 15); |
| a = a ^ (a >> 12); |
| a = a + (a << 2); |
| a = a ^ (a >> 4); |
| a = a * 2057; |
| a = a ^ (a >> 16); |
| return a; |
| } |
| |
| /* Thomas Wang's multiplicative hash function. About 0x30 bytes, and it is |
| * extremely fast on recent processors with a fast multiply. However, it |
| * must not be used on low bits only, as multiples of 0x00100010 only return |
| * even values ! |
| */ |
| int counts_tw3[NSERV][NSERV]; |
| uint32_t hash_tw3(uint32_t a) |
| { |
| a = (a ^ 61) ^ (a >> 16); |
| a = a + (a << 3); |
| a = a ^ (a >> 4); |
| a = a * 0x27d4eb2d; |
| a = a ^ (a >> 15); |
| return a; |
| } |
| |
| |
| /* Full-avalanche integer hashing function from Bob Jenkins, suitable for use |
| * with a modulo. It has a very smooth distribution. |
| * http://burtleburtle.net/bob/hash/integer.html |
| * About 0x50 bytes, 6 shifts. |
| */ |
| int counts_bj6[NSERV][NSERV]; |
| int counts_bj6x[NSERV][NSERV]; |
| uint32_t hash_bj6(uint32_t a) |
| { |
| a = (a+0x7ed55d16) + (a<<12); |
| a = (a^0xc761c23c) ^ (a>>19); |
| a = (a+0x165667b1) + (a<<5); |
| a = (a+0xd3a2646c) ^ (a<<9); |
| a = (a+0xfd7046c5) + (a<<3); |
| a = (a^0xb55a4f09) ^ (a>>16); |
| return a; |
| } |
| |
| /* Similar function with one more shift and no magic number. It is slightly |
| * slower but provides the overall smoothest distribution. |
| * About 0x40 bytes, 7 shifts. |
| */ |
| int counts_bj7[NSERV][NSERV]; |
| int counts_bj7x[NSERV][NSERV]; |
| uint32_t hash_bj7(uint32_t a) |
| { |
| a -= (a<<6); |
| a ^= (a>>17); |
| a -= (a<<9); |
| a ^= (a<<4); |
| a -= (a<<3); |
| a ^= (a<<10); |
| a ^= (a>>15); |
| return a; |
| } |
| |
| |
| void count_hash_results(unsigned long hash, int counts[NSERV][NSERV]) { |
| int srv, nsrv; |
| |
| for (nsrv = 0; nsrv < NSERV; nsrv++) { |
| srv = hash % (nsrv + 1); |
| counts[nsrv][srv]++; |
| } |
| } |
| |
| void dump_hash_results(char *name, int counts[NSERV][NSERV]) { |
| int srv, nsrv; |
| double err, total_err, max_err; |
| |
| printf("%s:\n", name); |
| for (nsrv = 0; nsrv < NSERV; nsrv++) { |
| total_err = 0.0; |
| max_err = 0.0; |
| printf("%02d srv: ", nsrv+1); |
| for (srv = 0; srv <= nsrv; srv++) { |
| err = 100.0*(counts[nsrv][srv] - (double)counts[0][0]/(nsrv+1)) / (double)counts[0][0]; |
| //printf("%6d ", counts[nsrv][srv]); |
| printf("% 3.1f%%%c ", err, |
| counts[nsrv][srv]?' ':'*'); /* display '*' when a server is never selected */ |
| err = fabs(err); |
| total_err += err; |
| if (err > max_err) |
| max_err = err; |
| } |
| total_err /= (double)(nsrv+1); |
| for (srv = nsrv+1; srv < NSERV; srv++) |
| printf(" "); |
| printf(" avg_err=%3.1f, max_err=%3.1f\n", total_err, max_err); |
| } |
| printf("\n"); |
| } |
| |
| int main() { |
| int nr; |
| unsigned int address = 0; |
| unsigned int mask = ~0; |
| |
| memset(counts_id, 0, sizeof(counts_id)); |
| memset(counts_tw1, 0, sizeof(counts_tw1)); |
| memset(counts_tw2, 0, sizeof(counts_tw2)); |
| memset(counts_tw3, 0, sizeof(counts_tw3)); |
| memset(counts_bj6, 0, sizeof(counts_bj6)); |
| memset(counts_bj7, 0, sizeof(counts_bj7)); |
| |
| address = 0x10000000; |
| mask = 0xffffff00; // user mask to apply to addresses |
| for (nr = 0; nr < 0x10; nr++) { |
| //address += ~nr; // semi-random addresses. |
| //address += 1; |
| address += 0x00000100; |
| //address += 0x11111111; |
| //address += 7; |
| //address += 8; |
| //address += 256; |
| //address += 65536; |
| //address += 131072; |
| //address += 0x00100010; // this increment kills tw3 ! |
| count_hash_results(hash_id (address & mask), counts_id); // 0.69s / 100M |
| count_hash_results(hash_tw1(address & mask), counts_tw1); // 1.04s / 100M |
| count_hash_results(hash_tw2(address & mask), counts_tw2); // 1.13s / 100M |
| count_hash_results(hash_tw3(address & mask), counts_tw3); // 1.01s / 100M |
| count_hash_results(hash_bj6(address & mask), counts_bj6); // 1.07s / 100M |
| count_hash_results(hash_bj7(address & mask), counts_bj7); // 1.20s / 100M |
| /* adding the original address after the hash reduces the error |
| * rate in in presence of very small data sets (eg: 16 source |
| * addresses for 8 servers). In this case, bj7 is very good. |
| */ |
| count_hash_results(hash_bj6(address & mask)+(address&mask), counts_bj6x); // 1.07s / 100M |
| count_hash_results(hash_bj7(address & mask)+(address&mask), counts_bj7x); // 1.20s / 100M |
| } |
| |
| dump_hash_results("hash_id", counts_id); |
| dump_hash_results("hash_tw1", counts_tw1); |
| dump_hash_results("hash_tw2", counts_tw2); |
| dump_hash_results("hash_tw3", counts_tw3); |
| dump_hash_results("hash_bj6", counts_bj6); |
| dump_hash_results("hash_bj6x", counts_bj6x); |
| dump_hash_results("hash_bj7", counts_bj7); |
| dump_hash_results("hash_bj7x", counts_bj7x); |
| return 0; |
| } |