[MAJOR] implement parameter hashing for POST requests

This patch extends the "url_param" load balancing method by introducing
the "check_post" option. Using this option enables analysis of the beginning
of POST requests to search for the specified URL parameter.

The patch also fixes a few minor typos in comments that were discovered
during code review.
diff --git a/src/backend.c b/src/backend.c
index 436a4d9..38f81d2 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <syslog.h>
 #include <string.h>
+#include <ctype.h>
 
 #include <common/compat.h>
 #include <common/config.h>
@@ -1122,42 +1123,41 @@
  * are shared but cookies are not usable. If the parameter is not found, NULL
  * is returned. If any server is found, it will be returned. If no valid server
  * is found, NULL is returned.
- *
  */
 struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
 {
 	unsigned long hash = 0;
-	char *p;
+	const char *p;
+	const char *params;
 	int plen;
 
+	/* when tot_weight is 0 then so is srv_count */
 	if (px->lbprm.tot_weight == 0)
 		return NULL;
 
+	if ((p = memchr(uri, '?', uri_len)) == NULL)
+		return NULL;
+
 	if (px->lbprm.map.state & PR_MAP_RECALC)
 		recalc_server_map(px);
 
-	p = memchr(uri, '?', uri_len);
-	if (!p)
-		return NULL;
 	p++;
 
 	uri_len -= (p - uri);
 	plen = px->url_param_len;
-
-	if (uri_len <= plen)
-		return NULL;
+	params = p;
 
 	while (uri_len > plen) {
 		/* Look for the parameter name followed by an equal symbol */
-		if (p[plen] == '=') {
-			/* skip the equal symbol */
-			uri = p;
-			p += plen + 1;
-			uri_len -= plen + 1;
-			if (memcmp(uri, px->url_param_name, plen) == 0) {
-				/* OK, we have the parameter here at <uri>, and
+		if (params[plen] == '=') {
+			if (memcmp(params, px->url_param_name, plen) == 0) {
+				/* OK, we have the parameter here at <params>, and
 				 * the value after the equal sign, at <p>
+				 * skip the equal symbol
 				 */
+				p += plen + 1;
+				uri_len -= plen + 1;
+
 				while (uri_len && *p != '&') {
 					hash = *p + (hash << 6) + (hash << 16) - hash;
 					uri_len--;
@@ -1165,19 +1165,117 @@
 				}
 				return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
 			}
+		}
+		/* skip to next parameter */
+		p = memchr(params, '&', uri_len);
+		if (!p)
+			return NULL;
+		p++;
+		uri_len -= (p - params);
+		params = p;
+	}
+	return NULL;
+}
+
+/*
+ * this does the same as the previous server_ph, but check the body contents
+ */
+struct server *get_server_ph_post(struct session *s)
+{
+	unsigned long    hash = 0;
+	struct http_txn *txn  = &s->txn;
+	struct buffer   *req  = s->req;
+	struct http_msg *msg  = &txn->req;
+	struct proxy    *px   = s->be;
+	unsigned int     plen = px->url_param_len;
+
+	/* tot_weight appears to mean srv_count */
+	if (px->lbprm.tot_weight == 0)
+		return NULL;
+
+        unsigned long body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
+        unsigned long len  = req->total - body;
+        const char *params = req->data + body;
+
+	if ( len == 0 )
+		return NULL;
+
+	if (px->lbprm.map.state & PR_MAP_RECALC)
+		recalc_server_map(px);
+
+	struct hdr_ctx ctx;
+	ctx.idx = 0;
+
+	/* if the message is chunked, we skip the chunk size, but use the value as len */
+	http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
+	if ( ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",ctx.vlen)==0) {
+		unsigned int chunk = 0;
+		while ( params < req->rlim && !HTTP_IS_CRLF(*params)) {
+			char c = *params;
+			if (ishex(c)) {
+				unsigned int hex = toupper(c) - '0';
+				if ( hex > 9 )
+					hex -= 'A' - '9' - 1;
+				chunk = (chunk << 4) | hex;
+			}
+			else
+				return NULL;
+			params++;
+			len--;
 		}
+		/* spec says we get CRLF */
+		if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
+			params += 2;
+		else
+			return NULL;
+		/* ok we have some encoded length, just inspect the first chunk */
+		len = chunk;
+	}
 
+	const char   *p = params;
+
+	while (len > plen) {
+		/* Look for the parameter name followed by an equal symbol */
+		if (params[plen] == '=') {
+			if (memcmp(params, px->url_param_name, plen) == 0) {
+				/* OK, we have the parameter here at <params>, and
+				 * the value after the equal sign, at <p>
+				 * skip the equal symbol
+				 */
+				p += plen + 1;
+				len -= plen + 1;
+
+				while (len && *p != '&') {
+					if (unlikely(!HTTP_IS_TOKEN(*p))) {
+					/* if in a POST, body must be URI encoded or its not a URI.
+					 * Do not interprete any possible binary data as a parameter.
+					 */
+						if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
+							break;
+						return NULL;                 /* oh, no; this is not uri-encoded.
+									      * This body does not contain parameters.
+									      */
+					}
+					hash = *p + (hash << 6) + (hash << 16) - hash;
+					len--;
+					p++;
+					/* should we break if vlen exceeds limit? */
+				}
+				return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
+			}
+		}
 		/* skip to next parameter */
-		uri = p;
-		p = memchr(uri, '&', uri_len);
+		p = memchr(params, '&', len);
 		if (!p)
 			return NULL;
 		p++;
-		uri_len -= (p - uri);
+		len -= (p - params);
+		params = p;
 	}
 	return NULL;
 }
 
+
 /*
  * This function marks the session as 'assigned' in direct or dispatch modes,
  * or tries to assign one in balance mode, according to the algorithm. It does
@@ -1254,9 +1352,15 @@
 				break;
 			case BE_LB_ALGO_PH:
 				/* URL Parameter hashing */
-				s->srv = get_server_ph(s->be,
-						       s->txn.req.sol + s->txn.req.sl.rq.u,
-						       s->txn.req.sl.rq.u_l);
+				if (s->txn.meth == HTTP_METH_POST &&
+                                    memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
+                                           s->txn.req.sl.rq.u_l ) == NULL)
+					s->srv = get_server_ph_post(s);
+				else
+					s->srv = get_server_ph(s->be,
+							       s->txn.req.sol + s->txn.req.sl.rq.u,
+							       s->txn.req.sl.rq.u_l);
+
 				if (!s->srv) {
 					/* parameter not found, fall back to round robin on the map */
 					s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
@@ -1620,7 +1724,7 @@
 			return SN_ERR_RESOURCE;
 		}
 	}
-	
+
 	if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
 	    (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
 
@@ -1879,6 +1983,21 @@
 			free(curproxy->url_param_name);
 		curproxy->url_param_name = strdup(args[1]);
 		curproxy->url_param_len = strlen(args[1]);
+		if ( *args[2] ) {
+			if (strcmp(args[2], "check_post")) {
+				snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
+				return -1;
+			}
+			if (*args[3]) {
+				/* TODO: maybe issue a warning if there is no value, no digits or too long */
+				curproxy->url_param_post_limit = str2ui(args[3]);
+			}
+			/* if no limit, or faul value in args[3], then default to a moderate wordlen */
+			if (!curproxy->url_param_post_limit)
+				curproxy->url_param_post_limit = 48;
+			else if ( curproxy->url_param_post_limit < 3 )
+				curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
+		}
 	}
 	else {
 		snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");