url32+src - like base32+src but whole url including parameters

I have a need to limit traffic to each url from each source address. much
like base32+src but the whole url including parameters (this came from
looking at the recent 'Haproxy rate limit per matching request' thread)

attached is patch that seems to do the job, its a copy and paste job of the
base32 functions

the url32 function seems to work too and using 2 machines to request the
same url locks me out of both if I abuse from either with the url32 key
function and only the one if I use url32_src.

Neil
diff --git a/src/proto_http.c b/src/proto_http.c
index a8b7728..e825167 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -9462,8 +9462,8 @@
  * the Host header followed by the path component if it begins with a slash ('/').
  * This means that '*' will not be added, resulting in exactly the first Host
  * entry. If no Host header is found, then the path is used. The resulting value
- * is hashed using the url hash followed by a full avalanche hash and provides a
- * 32-bit integer value. This fetch is useful for tracking per-URL activity on
+ * is hashed using the path hash followed by a full avalanche hash and provides a
+ * 32-bit integer value. This fetch is useful for tracking per-path activity on
  * high-traffic sites without having to store whole paths.
  */
 static int
@@ -9508,9 +9508,9 @@
 }
 
 /* This concatenates the source address with the 32-bit hash of the Host and
- * URL as returned by smp_fetch_base32(). The idea is to have per-source and
- * per-url counters. The result is a binary block from 8 to 20 bytes depending
- * on the source address length. The URL hash is stored before the address so
+ * path as returned by smp_fetch_base32(). The idea is to have per-source and
+ * per-path counters. The result is a binary block from 8 to 20 bytes depending
+ * on the source address length. The path hash is stored before the address so
  * that in environments where IPv6 is insignificant, truncating the output to
  * 8 bytes would still work.
  */
@@ -10040,6 +10040,95 @@
 	return ret;
 }
 
+/* This produces a 32-bit hash of the concatenation of the first occurrence of
+ * the Host header followed by the path component if it begins with a slash ('/').
+ * This means that '*' will not be added, resulting in exactly the first Host
+ * entry. If no Host header is found, then the path is used. The resulting value
+ * is hashed using the url hash followed by a full avalanche hash and provides a
+ * 32-bit integer value. This fetch is useful for tracking per-URL activity on
+ * high-traffic sites without having to store whole paths.
+ * this differs from the base32 functions in that it includes the url parameters
+ * as well as the path
+ */
+static int
+smp_fetch_url32(struct proxy *px, struct session *l4, void *l7, unsigned int opt,
+                 const struct arg *args, struct sample *smp)
+{
+	struct http_txn *txn = l7;
+	struct hdr_ctx ctx;
+	unsigned int hash = 0;
+	char *ptr, *beg, *end;
+	int len;
+
+	CHECK_HTTP_MESSAGE_FIRST();
+
+	ctx.idx = 0;
+	if (http_find_header2("Host", 4, txn->req.chn->buf->p + txn->req.sol, &txn->hdr_idx, &ctx)) {
+		/* OK we have the header value in ctx.line+ctx.val for ctx.vlen bytes */
+		ptr = ctx.line + ctx.val;
+		len = ctx.vlen;
+		while (len--)
+			hash = *(ptr++) + (hash << 6) + (hash << 16) - hash;
+	}
+
+	/* now retrieve the path */
+	end = txn->req.chn->buf->p + txn->req.sol + txn->req.sl.rq.u + txn->req.sl.rq.u_l;
+	beg = http_get_path(txn);
+	if (!beg)
+		beg = end;
+
+	for (ptr = beg; ptr < end ; ptr++);
+
+	if (beg < ptr && *beg == '/') {
+		while (beg < ptr)
+			hash = *(beg++) + (hash << 6) + (hash << 16) - hash;
+	}
+	hash = full_hash(hash);
+
+	smp->type = SMP_T_UINT;
+	smp->data.uint = hash;
+	smp->flags = SMP_F_VOL_1ST;
+	return 1;
+}
+
+/* This concatenates the source address with the 32-bit hash of the Host and
+ * URL as returned by smp_fetch_base32(). The idea is to have per-source and
+ * per-url counters. The result is a binary block from 8 to 20 bytes depending
+ * on the source address length. The URL hash is stored before the address so
+ * that in environments where IPv6 is insignificant, truncating the output to
+ * 8 bytes would still work.
+ */
+static int
+smp_fetch_url32_src(struct proxy *px, struct session *l4, void *l7, unsigned int opt,
+                     const struct arg *args, struct sample *smp)
+{
+	struct chunk *temp;
+
+	if (!smp_fetch_url32(px, l4, l7, opt, args, smp))
+		return 0;
+
+	temp = get_trash_chunk();
+	memcpy(temp->str + temp->len, &smp->data.uint, sizeof(smp->data.uint));
+	temp->len += sizeof(smp->data.uint);
+
+	switch (l4->si[0].conn->addr.from.ss_family) {
+	case AF_INET:
+		memcpy(temp->str + temp->len, &((struct sockaddr_in *)&l4->si[0].conn->addr.from)->sin_addr, 4);
+		temp->len += 4;
+		break;
+	case AF_INET6:
+		memcpy(temp->str + temp->len, &((struct sockaddr_in6 *)(&l4->si[0].conn->addr.from))->sin6_addr, 16);
+		temp->len += 16;
+		break;
+	default:
+		return 0;
+	}
+
+	smp->data.str = *temp;
+	smp->type = SMP_T_BIN;
+	return 1;
+}
+
 /* This function is used to validate the arguments passed to any "hdr" fetch
  * keyword. These keywords support an optional positive or negative occurrence
  * number. We must ensure that the number is greater than -MAX_HDR_HISTORY. It
@@ -10259,6 +10348,8 @@
 
 	{ "status",          smp_fetch_stcode,         0,                NULL,    SMP_T_UINT, SMP_USE_HRSHP },
 	{ "url",             smp_fetch_url,            0,                NULL,    SMP_T_CSTR, SMP_USE_HRQHV },
+	{ "url32",           smp_fetch_url32,          0,                NULL,    SMP_T_UINT, SMP_USE_HRQHV },
+	{ "url32+src",       smp_fetch_url32_src,      0,                NULL,    SMP_T_BIN,  SMP_USE_HRQHV },
 	{ "url_ip",          smp_fetch_url_ip,         0,                NULL,    SMP_T_IPV4, SMP_USE_HRQHV },
 	{ "url_port",        smp_fetch_url_port,       0,                NULL,    SMP_T_UINT, SMP_USE_HRQHV },
 	{ "url_param",       smp_fetch_url_param,      ARG2(1,STR,STR),  NULL,    SMP_T_CSTR, SMP_USE_HRQHV },