MEDIUM: backend: Enhance hash-type directive with an algorithm options

Summary:
In testing at tumblr, we found that using djb2 hashing instead of the
default sdbm hashing resulted is better workload distribution to our backends.

This commit implements a change, that allows the user to specify the hash
function they want to use. It does not limit itself to consistent hashing
scenarios.

The supported hash functions are sdbm (default), and djb2.

For a discussion of the feature and analysis, see mailing list thread
"Consistent hashing alternative to sdbm" :

      http://marc.info/?l=haproxy&m=138213693909219

Note: This change does NOT make changes to new features, for instance,
applying an avalance hashing always being performed before applying
consistent hashing.
diff --git a/src/backend.c b/src/backend.c
index 48c8761..bf4a81d 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -23,6 +23,7 @@
 #include <common/compat.h>
 #include <common/config.h>
 #include <common/debug.h>
+#include <common/hash.h>
 #include <common/ticks.h>
 #include <common/time.h>
 
@@ -51,6 +52,25 @@
 #include <proto/stream_interface.h>
 #include <proto/task.h>
 
+/* helper function to invoke the correct hash method */
+static unsigned long gen_hash(const struct proxy* px, const char* key, unsigned long len)
+{
+	unsigned long hash;
+
+	switch (px->lbprm.algo & BE_LB_HASH_FUNC) {
+	case BE_LB_HFCN_DJB2:
+		hash = hash_djb2(key, len);
+		break;
+	case BE_LB_HFCN_SDBM:
+		/* this is the default hash function */
+	default:
+		hash = hash_sdbm(key, len);
+		break;
+	}
+
+	return hash;
+}
+
 /*
  * This function recounts the number of usable active and backup servers for
  * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
@@ -153,6 +173,7 @@
 	unsigned long hash = 0;
 	int c;
 	int slashes = 0;
+	const char *start, *end;
 
 	if (px->lbprm.tot_weight == 0)
 		return NULL;
@@ -164,8 +185,9 @@
 	if (px->uri_len_limit)
 		uri_len = MIN(uri_len, px->uri_len_limit);
 
+	start = end = uri;
 	while (uri_len--) {
-		c = *uri++;
+		c = *end++;
 		if (c == '/') {
 			slashes++;
 			if (slashes == px->uri_dirs_depth1) /* depth+1 */
@@ -173,9 +195,10 @@
 		}
 		else if (c == '?' && !px->uri_whole)
 			break;
-
-		hash = c + (hash << 6) + (hash << 16) - hash;
 	}
+
+	hash = gen_hash(px, start, (end - start));
+
 	if ((px->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP)
 		hash = full_hash(hash);
  hash_done:
@@ -197,6 +220,7 @@
 struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
 {
 	unsigned long hash = 0;
+	const char *start, *end;
 	const char *p;
 	const char *params;
 	int plen;
@@ -223,15 +247,18 @@
 				 * skip the equal symbol
 				 */
 				p += plen + 1;
+				start = end = p;
 				uri_len -= plen + 1;
 
-				while (uri_len && *p != '&') {
-					hash = *p + (hash << 6) + (hash << 16) - hash;
+				while (uri_len && *end != '&') {
 					uri_len--;
-					p++;
+					end++;
 				}
+				hash = gen_hash(px, start, (end - start));
+
 				if ((px->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP)
 					hash = full_hash(hash);
+
 				if (px->lbprm.algo & BE_LB_LKUP_CHTREE)
 					return chash_get_server_hash(px, hash);
 				else
@@ -263,6 +290,7 @@
 	unsigned long    len  = msg->body_len;
 	const char      *params = b_ptr(req->buf, (int)(msg->sov - req->buf->o));
 	const char      *p    = params;
+	const char      *start, *end;
 
 	if (len > buffer_len(req->buf) - msg->sov)
 		len = buffer_len(req->buf) - msg->sov;
@@ -282,12 +310,13 @@
 				 * skip the equal symbol
 				 */
 				p += plen + 1;
+				start = end = p;
 				len -= plen + 1;
 
-				while (len && *p != '&') {
+				while (len && *end != '&') {
 					if (unlikely(!HTTP_IS_TOKEN(*p))) {
 						/* if in a POST, body must be URI encoded or it's not a URI.
-						 * Do not interprete any possible binary data as a parameter.
+						 * Do not interpret any possible binary data as a parameter.
 						 */
 						if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
 							break;
@@ -295,13 +324,15 @@
 									      * This body does not contain parameters.
 									      */
 					}
-					hash = *p + (hash << 6) + (hash << 16) - hash;
 					len--;
-					p++;
+					end++;
 					/* should we break if vlen exceeds limit? */
 				}
+				hash = gen_hash(px, start, (end - start));
+
 				if ((px->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP)
 					hash = full_hash(hash);
+
 				if (px->lbprm.algo & BE_LB_LKUP_CHTREE)
 					return chash_get_server_hash(px, hash);
 				else
@@ -338,6 +369,7 @@
 	unsigned long    len;
 	struct hdr_ctx   ctx;
 	const char      *p;
+	const char *start, *end;
 
 	/* tot_weight appears to mean srv_count */
 	if (px->lbprm.tot_weight == 0)
@@ -362,14 +394,11 @@
 	len = ctx.vlen;
 	p = (char *)ctx.line + ctx.val;
 	if (!px->hh_match_domain) {
-		while (len) {
-			hash = *p + (hash << 6) + (hash << 16) - hash;
-			len--;
-			p++;
-		}
+		hash = gen_hash(px, p, len);
 	} else {
 		int dohash = 0;
 		p += len - 1;
+		start = end = p;
 		/* special computation, use only main domain name, not tld/host
 		 * going back from the end of string, start hashing at first
 		 * dot stop at next.
@@ -378,17 +407,20 @@
 		 */
 		while (len) {
 			if (*p == '.') {
-				if (!dohash)
+				if (!dohash) {
 					dohash = 1;
+					start = end = p - 1;
+				}
 				else
 					break;
 			} else {
 				if (dohash)
-					hash = *p + (hash << 6) + (hash << 16) - hash;
+					start--;
 			}
 			len--;
 			p--;
 		}
+		hash = gen_hash(px, start, (end - start));
 	}
 	if ((px->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP)
 		hash = full_hash(hash);
@@ -405,7 +437,6 @@
 	unsigned long    hash = 0;
 	struct proxy    *px   = s->be;
 	unsigned long    len;
-	const char      *p;
 	int              ret;
 	struct sample    smp;
 	int rewind;
@@ -433,12 +464,8 @@
 	/* Found a the hh_name in the headers.
 	 * we will compute the hash based on this value ctx.val.
 	 */
-	p = smp.data.str.str;
-	while (len) {
-		hash = *p + (hash << 6) + (hash << 16) - hash;
-		len--;
-		p++;
-	}
+	hash = gen_hash(px, smp.data.str.str, len);
+
 	if ((px->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP)
 		hash = full_hash(hash);
  hash_done:
@@ -1318,7 +1345,6 @@
 			}
 			curproxy->hh_match_domain = 1;
 		}
-
 	}
 	else if (!strncmp(args[0], "rdp-cookie", 10)) {
 		curproxy->lbprm.algo &= ~BE_LB_ALGO;
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 41c1949..051f600 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -4085,19 +4085,18 @@
 		}
 	}
 	else if (!strcmp(args[0], "hash-type")) { /* set hashing method */
+		curproxy->lbprm.algo &= ~(BE_LB_HASH_TYPE | BE_LB_HASH_FUNC);
+
 		if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
 			err_code |= ERR_WARN;
 
 		if (strcmp(args[1], "consistent") == 0) {	/* use consistent hashing */
-			curproxy->lbprm.algo &= ~BE_LB_HASH_TYPE;
 			curproxy->lbprm.algo |= BE_LB_HASH_CONS;
 		}
 		else if (strcmp(args[1], "map-based") == 0) {	/* use map-based hashing */
-			curproxy->lbprm.algo &= ~BE_LB_HASH_TYPE;
 			curproxy->lbprm.algo |= BE_LB_HASH_MAP;
 		}
 		else if (strcmp(args[1], "avalanche") == 0) {	/* use full hash before map-based hashing */
-			curproxy->lbprm.algo &= ~BE_LB_HASH_TYPE;
 			curproxy->lbprm.algo |= BE_LB_HASH_AVAL;
 		}
 		else {
@@ -4105,6 +4104,19 @@
 			err_code |= ERR_ALERT | ERR_FATAL;
 			goto out;
 		}
+
+		/* set the hash function to use */
+		if (!*args[2]) {
+			curproxy->lbprm.algo |= BE_LB_HFCN_SDBM;
+		} else if (!strcmp(args[2], "sdbm")) {
+			curproxy->lbprm.algo |= BE_LB_HFCN_SDBM;
+		} else if (!strcmp(args[2], "djb2")) {
+			curproxy->lbprm.algo |= BE_LB_HFCN_DJB2;
+		} else {
+			Alert("parsing [%s:%d] : '%s' only supports 'sdbm' and 'djb2' hash functions.\n", file, linenum, args[0]);
+			err_code |= ERR_ALERT | ERR_FATAL;
+			goto out;
+		}
 	}
 	else if (!strcmp(args[0], "server") || !strcmp(args[0], "default-server")) {  /* server address */
 		int cur_arg;
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..8da3003
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,61 @@
+/*
+ * Hash function implementation
+ *
+ * See mailing list thread on "Consistent hashing alternative to sdbm"
+ * http://marc.info/?l=haproxy&m=138213693909219
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+
+#include <common/hash.h>
+
+
+unsigned long hash_djb2(const char *key, int len)
+{
+	unsigned long hash = 5381;
+
+	/* the hash unrolled eight times */
+	for (; len >= 8; len -= 8) {
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+		hash = ((hash << 5) + hash) + *key++;
+	}
+	switch (len) {
+		case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+		case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+		case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+		case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+		case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+		case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+		case 1: hash = ((hash << 5) + hash) + *key++; break;
+		default: /* case 0: */ break;
+	}
+	return hash;
+}
+
+unsigned long hash_sdbm(const char *key, int len)
+{
+	unsigned long hash = 0;
+	int c;
+
+	while (len--) {
+		c = *key++;
+		hash = c + (hash << 6) + (hash << 16) - hash;
+	}
+
+	return hash;
+}
+
+