[MAJOR] implement parameter hashing for POST requests
This patch extends the "url_param" load balancing method by introducing
the "check_post" option. Using this option enables analysis of the beginning
of POST requests to search for the specified URL parameter.
The patch also fixes a few minor typos in comments that were discovered
during code review.
diff --git a/src/backend.c b/src/backend.c
index 436a4d9..38f81d2 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -16,6 +16,7 @@
#include <stdlib.h>
#include <syslog.h>
#include <string.h>
+#include <ctype.h>
#include <common/compat.h>
#include <common/config.h>
@@ -1122,42 +1123,41 @@
* are shared but cookies are not usable. If the parameter is not found, NULL
* is returned. If any server is found, it will be returned. If no valid server
* is found, NULL is returned.
- *
*/
struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
{
unsigned long hash = 0;
- char *p;
+ const char *p;
+ const char *params;
int plen;
+ /* when tot_weight is 0 then so is srv_count */
if (px->lbprm.tot_weight == 0)
return NULL;
+ if ((p = memchr(uri, '?', uri_len)) == NULL)
+ return NULL;
+
if (px->lbprm.map.state & PR_MAP_RECALC)
recalc_server_map(px);
- p = memchr(uri, '?', uri_len);
- if (!p)
- return NULL;
p++;
uri_len -= (p - uri);
plen = px->url_param_len;
-
- if (uri_len <= plen)
- return NULL;
+ params = p;
while (uri_len > plen) {
/* Look for the parameter name followed by an equal symbol */
- if (p[plen] == '=') {
- /* skip the equal symbol */
- uri = p;
- p += plen + 1;
- uri_len -= plen + 1;
- if (memcmp(uri, px->url_param_name, plen) == 0) {
- /* OK, we have the parameter here at <uri>, and
+ if (params[plen] == '=') {
+ if (memcmp(params, px->url_param_name, plen) == 0) {
+ /* OK, we have the parameter here at <params>, and
* the value after the equal sign, at <p>
+ * skip the equal symbol
*/
+ p += plen + 1;
+ uri_len -= plen + 1;
+
while (uri_len && *p != '&') {
hash = *p + (hash << 6) + (hash << 16) - hash;
uri_len--;
@@ -1165,19 +1165,117 @@
}
return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
}
+ }
+ /* skip to next parameter */
+ p = memchr(params, '&', uri_len);
+ if (!p)
+ return NULL;
+ p++;
+ uri_len -= (p - params);
+ params = p;
+ }
+ return NULL;
+}
+
+/*
+ * this does the same as the previous server_ph, but check the body contents
+ */
+struct server *get_server_ph_post(struct session *s)
+{
+ unsigned long hash = 0;
+ struct http_txn *txn = &s->txn;
+ struct buffer *req = s->req;
+ struct http_msg *msg = &txn->req;
+ struct proxy *px = s->be;
+ unsigned int plen = px->url_param_len;
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ unsigned long body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
+ unsigned long len = req->total - body;
+ const char *params = req->data + body;
+
+ if ( len == 0 )
+ return NULL;
+
+ if (px->lbprm.map.state & PR_MAP_RECALC)
+ recalc_server_map(px);
+
+ struct hdr_ctx ctx;
+ ctx.idx = 0;
+
+ /* if the message is chunked, we skip the chunk size, but use the value as len */
+ http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
+ if ( ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",ctx.vlen)==0) {
+ unsigned int chunk = 0;
+ while ( params < req->rlim && !HTTP_IS_CRLF(*params)) {
+ char c = *params;
+ if (ishex(c)) {
+ unsigned int hex = toupper(c) - '0';
+ if ( hex > 9 )
+ hex -= 'A' - '9' - 1;
+ chunk = (chunk << 4) | hex;
+ }
+ else
+ return NULL;
+ params++;
+ len--;
}
+ /* spec says we get CRLF */
+ if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
+ params += 2;
+ else
+ return NULL;
+ /* ok we have some encoded length, just inspect the first chunk */
+ len = chunk;
+ }
+ const char *p = params;
+
+ while (len > plen) {
+ /* Look for the parameter name followed by an equal symbol */
+ if (params[plen] == '=') {
+ if (memcmp(params, px->url_param_name, plen) == 0) {
+ /* OK, we have the parameter here at <params>, and
+ * the value after the equal sign, at <p>
+ * skip the equal symbol
+ */
+ p += plen + 1;
+ len -= plen + 1;
+
+ while (len && *p != '&') {
+ if (unlikely(!HTTP_IS_TOKEN(*p))) {
+ /* if in a POST, body must be URI encoded or its not a URI.
+ * Do not interprete any possible binary data as a parameter.
+ */
+ if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
+ break;
+ return NULL; /* oh, no; this is not uri-encoded.
+ * This body does not contain parameters.
+ */
+ }
+ hash = *p + (hash << 6) + (hash << 16) - hash;
+ len--;
+ p++;
+ /* should we break if vlen exceeds limit? */
+ }
+ return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
+ }
+ }
/* skip to next parameter */
- uri = p;
- p = memchr(uri, '&', uri_len);
+ p = memchr(params, '&', len);
if (!p)
return NULL;
p++;
- uri_len -= (p - uri);
+ len -= (p - params);
+ params = p;
}
return NULL;
}
+
/*
* This function marks the session as 'assigned' in direct or dispatch modes,
* or tries to assign one in balance mode, according to the algorithm. It does
@@ -1254,9 +1352,15 @@
break;
case BE_LB_ALGO_PH:
/* URL Parameter hashing */
- s->srv = get_server_ph(s->be,
- s->txn.req.sol + s->txn.req.sl.rq.u,
- s->txn.req.sl.rq.u_l);
+ if (s->txn.meth == HTTP_METH_POST &&
+ memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
+ s->txn.req.sl.rq.u_l ) == NULL)
+ s->srv = get_server_ph_post(s);
+ else
+ s->srv = get_server_ph(s->be,
+ s->txn.req.sol + s->txn.req.sl.rq.u,
+ s->txn.req.sl.rq.u_l);
+
if (!s->srv) {
/* parameter not found, fall back to round robin on the map */
s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
@@ -1620,7 +1724,7 @@
return SN_ERR_RESOURCE;
}
}
-
+
if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
(errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
@@ -1879,6 +1983,21 @@
free(curproxy->url_param_name);
curproxy->url_param_name = strdup(args[1]);
curproxy->url_param_len = strlen(args[1]);
+ if ( *args[2] ) {
+ if (strcmp(args[2], "check_post")) {
+ snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
+ return -1;
+ }
+ if (*args[3]) {
+ /* TODO: maybe issue a warning if there is no value, no digits or too long */
+ curproxy->url_param_post_limit = str2ui(args[3]);
+ }
+ /* if no limit, or faul value in args[3], then default to a moderate wordlen */
+ if (!curproxy->url_param_post_limit)
+ curproxy->url_param_post_limit = 48;
+ else if ( curproxy->url_param_post_limit < 3 )
+ curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
+ }
}
else {
snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");
diff --git a/src/client.c b/src/client.c
index bff5cd9..410c3f0 100644
--- a/src/client.c
+++ b/src/client.c
@@ -232,7 +232,8 @@
if (p->mode == PR_MODE_HTTP) {
txn->status = -1;
-
+ txn->req.hdr_content_len = 0LL;
+ txn->rsp.hdr_content_len = 0LL;
txn->req.msg_state = HTTP_MSG_RQBEFORE; /* at the very beginning of the request */
txn->rsp.msg_state = HTTP_MSG_RPBEFORE; /* at the very beginning of the response */
txn->req.sol = txn->req.eol = NULL;
diff --git a/src/ev_poll.c b/src/ev_poll.c
index 0166bd6..54cd138 100644
--- a/src/ev_poll.c
+++ b/src/ev_poll.c
@@ -102,8 +102,8 @@
#define FDSETS_ARE_INT_ALIGNED
#ifdef FDSETS_ARE_INT_ALIGNED
-#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
-#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
+#define WE_REALLY_KNOW_THAT_FDSETS_ARE_INTS
+#ifdef WE_REALLY_KNOW_THAT_FDSETS_ARE_INTS
sr = (rn >> count) & 1;
sw = (wn >> count) & 1;
#else
diff --git a/src/proto_http.c b/src/proto_http.c
index 3f8e0ac..2c07030 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -287,7 +287,7 @@
};
/* It is about twice as fast on recent architectures to lookup a byte in a
- * table than two perform a boolean AND or OR between two tests. Refer to
+ * table than to perform a boolean AND or OR between two tests. Refer to
* RFC2616 for those chars.
*/
@@ -2065,6 +2065,83 @@
goto return_bad_req;
t->flags |= SN_CONN_CLOSED;
}
+ /* Before we switch to data, was assignment set in manage_client_side_cookie?
+ * If not assigned, perhaps we are balancing on url_param, but this is a
+ * POST; and the parameters are in the body, maybe scan there to find our server.
+ * (unless headers overflowed the buffer?)
+ */
+ if (!(t->flags & (SN_ASSIGNED|SN_DIRECT)) &&
+ t->txn.meth == HTTP_METH_POST && t->be->url_param_name != NULL &&
+ t->be->url_param_post_limit != 0 && req->total < BUFSIZE &&
+ memchr(msg->sol + msg->sl.rq.u, '?', msg->sl.rq.u_l) == NULL) {
+ /* are there enough bytes here? total == l || r || rlim ?
+ * len is unsigned, but eoh is int,
+ * how many bytes of body have we received?
+ * eoh is the first empty line of the header
+ */
+ /* already established CRLF or LF at eoh, move to start of message, find message length in buffer */
+ unsigned long len = req->total - (msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1);
+
+ /* If we have HTTP/1.1 and Expect: 100-continue, then abort.
+ * We can't assume responsibility for the server's decision,
+ * on this URI and header set. See rfc2616: 14.20, 8.2.3,
+ * We also can't change our mind later, about which server to choose, so round robin.
+ */
+ if ((likely(msg->sl.rq.v_l == 8) && req->data[msg->som + msg->sl.rq.v + 7] == '1')) {
+ struct hdr_ctx ctx;
+ ctx.idx = 0;
+ /* Expect is allowed in 1.1, look for it */
+ http_find_header2("Expect", 6, msg->sol, &txn->hdr_idx, &ctx);
+ if (ctx.idx != 0 &&
+ unlikely(ctx.vlen == 12 && strncasecmp(ctx.line+ctx.val,"100-continue",12)==0))
+ /* We can't reliablly stall and wait for data, because of
+ * .NET clients that don't conform to rfc2616; so, no need for
+ * the next block to check length expectations.
+ * We could send 100 status back to the client, but then we need to
+ * re-write headers, and send the message. And this isn't the right
+ * place for that action.
+ * TODO: support Expect elsewhere and delete this block.
+ */
+ goto end_check_maybe_wait_for_body;
+ }
+ if ( likely(len > t->be->url_param_post_limit) ) {
+ /* nothing to do, we got enough */
+ } else {
+ /* limit implies we are supposed to need this many bytes
+ * to find the parameter. Let's see how many bytes we can wait for.
+ */
+ long long hint = len;
+ struct hdr_ctx ctx;
+ ctx.idx = 0;
+ http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
+ if (unlikely(ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",7)==0)) {
+ t->srv_state = SV_STANALYZE;
+ } else {
+ ctx.idx = 0;
+ http_find_header2("Content-Length", 14, msg->sol, &txn->hdr_idx, &ctx);
+ /* now if we have a length, we'll take the hint */
+ if ( ctx.idx ) {
+ /* We have Content-Length */
+ if ( strl2llrc(ctx.line+ctx.val,ctx.vlen, &hint) )
+ hint = 0; /* parse failure, untrusted client */
+ else {
+ if ( hint > 0 )
+ msg->hdr_content_len = hint;
+ else
+ hint = 0; /* bad client, sent negative length */
+ }
+ }
+ /* but limited to what we care about, maybe we don't expect any entity data (hint == 0) */
+ if ( t->be->url_param_post_limit < hint )
+ hint = t->be->url_param_post_limit;
+ /* now do we really need to buffer more data? */
+ if ( len < hint )
+ t->srv_state = SV_STANALYZE;
+ /* else... There are no body bytes to wait for */
+ }
+ }
+ }
+ end_check_maybe_wait_for_body:
/*************************************************************
* OK, that's finished for the headers. We have done what we *
@@ -2436,7 +2513,12 @@
//EV_FD_ISSET(t->srv_fd, DIR_RD), EV_FD_ISSET(t->srv_fd, DIR_WR)
//);
if (s == SV_STIDLE) {
- if (c == CL_STHEADERS)
+ /* NOTE: The client processor may switch to SV_STANALYZE, which switches back SV_STIDLE.
+ * This is logcially after CL_STHEADERS completed, CL_STDATA has started, but
+ * we need to defer server selection until more data arrives, if possible.
+ * This is rare, and only if balancing on parameter hash with values in the entity of a POST
+ */
+ if (c == CL_STHEADERS )
return 0; /* stay in idle, waiting for data to reach the client side */
else if (c == CL_STCLOSE || c == CL_STSHUTW ||
(c == CL_STSHUTR &&
@@ -3531,6 +3613,60 @@
}
return 0;
}
+ else if (s == SV_STANALYZE){
+ /* this server state is set by the client to study the body for server assignment */
+
+ /* Have we been through this long enough to timeout? */
+ if (!tv_isle(&req->rex, &now)) {
+ /* balance url_param check_post should have been the only to get into this.
+ * just wait for data, check to compare how much
+ */
+ struct http_msg * msg = &t->txn.req;
+ unsigned long body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 :msg->eoh + 1;
+ unsigned long len = req->total - body;
+ long long limit = t->be->url_param_post_limit;
+ struct hdr_ctx ctx;
+ ctx.idx = 0;
+ /* now if we have a length, we'll take the hint */
+ http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
+ if ( ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",ctx.vlen)==0) {
+ unsigned int chunk = 0;
+ while ( body < req->total && !HTTP_IS_CRLF(msg->sol[body])) {
+ char c = msg->sol[body];
+ if (ishex(c)) {
+ unsigned int hex = toupper(c) - '0';
+ if ( hex > 9 )
+ hex -= 'A' - '9' - 1;
+ chunk = (chunk << 4) | hex;
+ }
+ else break;
+ body++;
+ len--;
+ }
+ if ( body == req->total )
+ return 0; /* end of buffer? data missing! */
+
+ if ( memcmp(msg->sol+body, "\r\n", 2) != 0 )
+ return 0; /* chunked encoding len ends with CRLF, and we don't have it yet */
+
+ /* if we support more then one chunk here, we have to do it again when assigning server
+ 1. how much entity data do we have? new var
+ 2. should save entity_start, entity_cursor, elen & rlen in req; so we don't repeat scanning here
+ 3. test if elen > limit, or set new limit to elen if 0 (end of entity found)
+ */
+
+ if ( chunk < limit )
+ limit = chunk; /* only reading one chunk */
+ } else {
+ if ( msg->hdr_content_len < limit )
+ limit = msg->hdr_content_len;
+ }
+ if ( len < limit )
+ return 0;
+ }
+ t->srv_state=SV_STIDLE;
+ return 1;
+ }
else { /* SV_STCLOSE : nothing to do */
if ((global.mode & MODE_DEBUG) && (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
int len;
@@ -3549,7 +3685,7 @@
* called with s->cli_state == CL_STSHUTR. Right now, only statistics can be
* produced. It stops by itself by unsetting the SN_SELF_GEN flag from the
* session, which it uses to keep on being called when there is free space in
- * the buffer, of simply by letting an empty buffer upon return. It returns 1
+ * the buffer, or simply by letting an empty buffer upon return. It returns 1
* if it changes the session state from CL_STSHUTR, otherwise 0.
*/
int produce_content(struct session *s)
@@ -3640,7 +3776,7 @@
/* Swithing Proxy */
t->be = (struct proxy *) exp->replace;
- /* right now, the backend switch is not too much complicated
+ /* right now, the backend switch is not overly complicated
* because we have associated req_cap and rsp_cap to the
* frontend, and the beconn will be updated later.
*/