[MEDIUM] introduce the "url_param" balance method

Some applications do not have a strict persistence requirement, yet
it is still desirable for performance considerations, due to local
caches on the servers. For some reasons, there are some applications
which cannot rely on cookies, and for which the last resort is to use
a parameter passed in the URL.

The new 'url_param' balance method is there to solve this issue. It
accepts a parameter name which is looked up from the URL and which
is then hashed to select a server. If the parameter is not found,
then the round robin algorithm is used in order to provide a normal
load balancing across the servers for the first requests. It would
have been possible to use a source IP hash instead, but since such
applications are generally buried behind multiple levels of
reverse-proxies, it would not provide a good balance.

The doc has been updated, and two regression testing configurations
have been added.
diff --git a/doc/haproxy-en.txt b/doc/haproxy-en.txt
index 3e8ce10..64a63ac 100644
--- a/doc/haproxy-en.txt
+++ b/doc/haproxy-en.txt
@@ -986,6 +986,26 @@
         server squid1 192.168.1.1
         server squid2 192.168.1.2
 
+Version 1.3.14 introduced the "balance url_param" method. It consists in
+relying on a parameter passed in the URL to perform a hash. This is mostly
+useful for applications which do not have strict persistence requirements,
+but for which it still provides a performance boost due to local caching.
+Some of these applications may not be able to use a cookie for whatever reason,
+but may be able to look for a parameter passed in the URL. If the parameter is
+missing from the URL, then the 'round robin' method applies.
+
+Example :
+---------
+
+# Hash the "basket_id" argument from the URL to determine the server
+
+    listen http_proxy
+        bind :3128
+        mode http
+        balance url_param basket_id
+        server ebiz1 192.168.1.1
+        server ebiz2 192.168.1.2
+
 
 3.1) Server monitoring
 ----------------------
diff --git a/doc/haproxy-fr.txt b/doc/haproxy-fr.txt
index 7974546..8e9bde9 100644
--- a/doc/haproxy-fr.txt
+++ b/doc/haproxy-fr.txt
@@ -994,6 +994,27 @@
         server squid1 192.168.1.1
         server squid2 192.168.1.2
 
+La version 1.3.14 a apporté une nouvelle méthode 'balance url_param'. Elle
+consiste à se baser sur un paramètre passé dans l'URL pour effectuer un hachage
+utilisé pour déterminer le serveur à utiliser. Ceci est principalement utile
+pour des applications n'ayant pas une exigence stricte de persistance, mais
+pour lesquelles elle procure un gain de performance notable dans des
+environnements où il n'est pas toujours possible d'utiliser des cookies. En cas
+d'absence du paramètre dans l'URL, alors une répartition de type 'round robin'
+est effectuée.
+
+Example :
+---------
+
+# hache le paramètre "basket_id" dans l'URL pour déterminer le serveur
+
+    listen http_proxy
+        bind :3128
+        mode http
+        balance url_param basket_id
+        server ebiz1 192.168.1.1
+        server ebiz2 192.168.1.2
+
 
 3.1) Surveillance des serveurs
 ------------------------------
diff --git a/include/types/backend.h b/include/types/backend.h
index 3c5273b..9089469 100644
--- a/include/types/backend.h
+++ b/include/types/backend.h
@@ -64,6 +64,7 @@
 #define PR_O_BALANCE_SH 0x20000000      /* balance on source IP hash */
 #define PR_O_BALANCE_L4 0x30000000      /* mask to match layer4-based algorithms */
 #define PR_O_BALANCE_UH 0x40000000      /* balance on URI hash */
+#define PR_O_BALANCE_PH 0x50000000      /* balance on URL parameter hash */
 #define PR_O_BALANCE_L7 0x40000000      /* mask to match layer7-based algorithms */
 #define PR_O_BALANCE    0x70000000      /* mask to extract BALANCE algorithm */
 
diff --git a/include/types/proxy.h b/include/types/proxy.h
index 04d1162..bb8b872 100644
--- a/include/types/proxy.h
+++ b/include/types/proxy.h
@@ -87,6 +87,8 @@
 	int srv_rr_idx;				/* next server to be elected in round robin mode */
 	char *cookie_name;			/* name of the cookie to look for */
 	int  cookie_len;			/* strlen(cookie_name), computed only once */
+	char *url_param_name;			/* name of the URL parameter used for hashing */
+	int  url_param_len;			/* strlen(url_param_name), computed only once */
 	char *appsession_name;			/* name of the cookie to look for */
 	int  appsession_name_len;		/* strlen(appsession_name), computed only once */
 	int  appsession_len;			/* length of the appsession cookie value to be used */
diff --git a/src/backend.c b/src/backend.c
index 0cb76f0..2ae25c3 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -134,6 +134,69 @@
 	px->map_state &= ~PR_MAP_RECALC;
 }
 
+/* 
+ * This function tries to find a running server for the proxy <px> following
+ * the URL parameter hash method. It looks for a specific parameter in the
+ * URL and hashes it to compute the server ID. This is useful to optimize
+ * performance by avoiding bounces between servers in contexts where sessions
+ * are shared but cookies are not usable. If the parameter is not found, NULL
+ * is returned. If any server is found, it will be returned. If no valid server
+ * is found, NULL is returned.
+ *
+ */
+struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
+{
+	unsigned long hash = 0;
+	char *p;
+	int plen;
+
+	if (px->map_state & PR_MAP_RECALC)
+		recalc_server_map(px);
+
+	if (px->srv_map_sz == 0)
+		return NULL;
+
+	p = memchr(uri, '?', uri_len);
+	if (!p)
+		return NULL;
+	p++;
+
+	uri_len -= (p - uri);
+	plen = px->url_param_len;
+
+	if (uri_len <= plen)
+		return NULL;
+
+	while (uri_len > plen) {
+		/* Look for the parameter name followed by an equal symbol */
+		if (p[plen] == '=') {
+			/* skip the equal symbol */
+			uri = p;
+			p += plen + 1;
+			uri_len -= plen + 1;
+			if (memcmp(uri, px->url_param_name, plen) == 0) {
+				/* OK, we have the parameter here at <uri>, and
+				 * the value after the equal sign, at <p>
+				 */
+				while (uri_len && *p != '&') {
+					hash = *p + (hash << 6) + (hash << 16) - hash;
+					uri_len--;
+					p++;
+				}
+				return px->srv_map[hash % px->srv_map_sz];
+			}
+		}
+
+		/* skip to next parameter */
+		uri = p;
+		p = memchr(uri, '&', uri_len);
+		if (!p)
+			return NULL;
+		p++;
+		uri_len -= (p - uri);
+	}
+	return NULL;
+}
 
 /*
  * This function marks the session as 'assigned' in direct or dispatch modes,
@@ -197,6 +260,18 @@
 				s->srv = get_server_uh(s->be,
 						       s->txn.req.sol + s->txn.req.sl.rq.u,
 						       s->txn.req.sl.rq.u_l);
+				break;
+			case PR_O_BALANCE_PH:
+				/* URL Parameter hashing */
+				s->srv = get_server_ph(s->be,
+						       s->txn.req.sol + s->txn.req.sl.rq.u,
+						       s->txn.req.sl.rq.u_l);
+				if (!s->srv) {
+					/* parameter not found, fall back to round robin */
+					s->srv = get_server_rr_with_conns(s->be);
+					if (!s->srv)
+						return SRV_STATUS_FULL;
+				}
 				break;
 			default:
 				/* unknown balancing algorithm */
@@ -762,8 +837,20 @@
 		curproxy->options &= ~PR_O_BALANCE;
 		curproxy->options |= PR_O_BALANCE_UH;
 	}
+	else if (!strcmp(args[0], "url_param")) {
+		if (!*args[1]) {
+			snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
+			return -1;
+		}
+		curproxy->options &= ~PR_O_BALANCE;
+		curproxy->options |= PR_O_BALANCE_PH;
+		if (curproxy->url_param_name)
+			free(curproxy->url_param_name);
+		curproxy->url_param_name = strdup(args[1]);
+		curproxy->url_param_len = strlen(args[1]);
+	}
 	else {
-		snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source' and 'uri' options.");
+		snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source', 'uri' and 'url_param' options.");
 		return -1;
 	}
 	return 0;
diff --git a/src/cfgparse.c b/src/cfgparse.c
index b98089e..5e6ce4e 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -606,6 +606,10 @@
 			if (defproxy.cookie_name)
 				curproxy->cookie_name = strdup(defproxy.cookie_name);
 			curproxy->cookie_len = defproxy.cookie_len;
+
+			if (defproxy.url_param_name)
+				curproxy->url_param_name = strdup(defproxy.url_param_name);
+			curproxy->url_param_len = defproxy.url_param_len;
 		}
 
 		if (curproxy->cap & PR_CAP_RS) {
@@ -651,6 +655,7 @@
 		 */
 		if (defproxy.check_req)     free(defproxy.check_req);
 		if (defproxy.cookie_name)   free(defproxy.cookie_name);
+		if (defproxy.url_param_name) free(defproxy.url_param_name);
 		if (defproxy.capture_name)  free(defproxy.capture_name);
 		if (defproxy.monitor_uri)   free(defproxy.monitor_uri);
 		if (defproxy.defbe.name)    free(defproxy.defbe.name);
diff --git a/src/haproxy.c b/src/haproxy.c
index 5152a3c..eaf491e 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -627,6 +627,9 @@
 		if (p->cookie_name)
 			free(p->cookie_name);
 
+		if (p->url_param_name)
+			free(p->url_param_name);
+
 		if (p->capture_name)
 			free(p->capture_name);
 
diff --git a/tests/test-balance.cfg b/tests/test-balance.cfg
new file mode 100644
index 0000000..f46e531
--- /dev/null
+++ b/tests/test-balance.cfg
@@ -0,0 +1,65 @@
+# This is a test configuration.
+# It tests the "balance" parser for several parameter combinations.
+
+
+global
+	maxconn		10000
+
+defaults
+	balance
+
+defaults
+	balance roundrobin
+
+defaults
+	balance source
+
+defaults
+	balance uri
+
+defaults
+	balance url_param foo
+
+defaults
+	mode tcp
+
+# must produce an error
+#defaults
+#	balance url_param
+
+backend tcp-bk1
+	balance
+
+backend tcp-bk2
+	balance roundrobin
+
+backend tcp-bk3
+	balance source
+
+backend tcp-bk4
+	balance uri
+
+backend tcp-bk5
+	balance url_param foo
+
+defaults
+	mode http
+
+backend http-bk1
+	balance
+
+backend http-bk2
+	balance roundrobin
+
+backend http-bk3
+	balance source
+
+backend http-bk4
+	balance uri
+
+backend http-bk5
+	balance url_param foo
+
+# must produce an error
+#backend http-bk6
+#	balance url_param
diff --git a/tests/test-url-hash.cfg b/tests/test-url-hash.cfg
new file mode 100644
index 0000000..d4285ff
--- /dev/null
+++ b/tests/test-url-hash.cfg
@@ -0,0 +1,23 @@
+# This is a test configuration.
+# It exercises the "url_param" balance algorithm. It looks for
+# an URL parameter named "foo".
+
+global
+	maxconn 100
+	log		127.0.0.1 local0
+
+listen  vip1
+	log		global
+        bind		:8000
+        mode		http
+        maxconn		100
+        clitimeout	5000
+	contimeout	5000
+	srvtimeout	5000
+	balance		url_param foo
+	server		srv1 127.0.0.1:80
+	server		srv2 127.0.0.1:80
+
+	# control activity this way
+	stats		uri /stat
+