MEDIUM: http-rules: Add wait-for-body action on request and response side

Historically, an option was added to wait for the request payload (option
http-buffer-request). This option has 2 drawbacks. First, it is an ON/OFF
option for the whole proxy. It cannot be enabled on demand depending on the
message. Then, as its name suggests, it only works on the request side. The
only option to wait for the response payload was to write a dedicated
filter. While it is an acceptable solution for complex applications, it is a
bit overkill to simply match strings in the body.

To make everyone happy, this patch adds a dedicated HTTP action to wait for
the message payload, for the request or the response depending it is used in
an http-request or an http-response ruleset. The time to wait is
configurable and, optionally, the minimum payload size to have before stop
to wait.

Both the http action and the old http analyzer rely on the same internal
function.
diff --git a/src/http_act.c b/src/http_act.c
index 838deb5..e71a369 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -2072,6 +2072,111 @@
 	return ACT_RET_PRS_OK;
 }
 
+
+
+/* This function executes a wait-for-body action. It waits for the message
+ * payload for a max configured time (.arg.p[0]) and eventually for only first
+ * <arg.p[1]> bytes (0 means no limit). It relies on http_wait_for_msg_body()
+ * function. it returns ACT_RET_CONT when conditions are met to stop to wait.
+ * Otherwise ACT_RET_YIELD is returned to wait for more data. ACT_RET_INV is
+ * returned if a parsing error is raised by lower level and ACT_RET_ERR if an
+ * internal error occured. Finally ACT_RET_ABRT is returned when a timeout
+ * occured.
+ */
+static enum act_return http_action_wait_for_body(struct act_rule *rule, struct proxy *px,
+						 struct session *sess, struct stream *s, int flags)
+{
+	struct channel *chn = ((rule->from == ACT_F_HTTP_REQ) ? &s->req : &s->res);
+	unsigned int time = (uintptr_t)rule->arg.act.p[0];
+	unsigned int bytes = (uintptr_t)rule->arg.act.p[1];
+
+	switch (http_wait_for_msg_body(s, chn, time, bytes)) {
+	case HTTP_RULE_RES_CONT:
+		return ACT_RET_CONT;
+	case HTTP_RULE_RES_YIELD:
+		return ACT_RET_YIELD;
+	case HTTP_RULE_RES_BADREQ:
+		return ACT_RET_INV;
+	case HTTP_RULE_RES_ERROR:
+		return ACT_RET_ERR;
+	case HTTP_RULE_RES_ABRT:
+		return ACT_RET_ABRT;
+	default:
+		return ACT_RET_ERR;
+	}
+}
+
+/* Parse a "wait-for-body" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_wait_for_body(const char **args, int *orig_arg, struct proxy *px,
+						   struct act_rule *rule, char **err)
+{
+	int cur_arg;
+	unsigned int time, bytes;
+	const char *res;
+
+	cur_arg = *orig_arg;
+	if (!*args[cur_arg]) {
+		memprintf(err, "expects time <time> [ at-least <bytes> ]");
+		return ACT_RET_PRS_ERR;
+	}
+
+	time = UINT_MAX; /* To be sure it is set */
+	bytes = 0; /* Default value, wait all the body */
+	while (*(args[cur_arg])) {
+		if (strcmp(args[cur_arg], "time") == 0) {
+			if (!*args[cur_arg + 1]) {
+				memprintf(err, "missing argument for '%s'", args[cur_arg]);
+				return ACT_RET_PRS_ERR;
+			}
+			res = parse_time_err(args[cur_arg+1], &time, TIME_UNIT_MS);
+			if (res == PARSE_TIME_OVER) {
+				memprintf(err, "time overflow (maximum value is 2147483647 ms or ~24.8 days)");
+				return ACT_RET_PRS_ERR;
+			}
+			if (res == PARSE_TIME_UNDER) {
+				memprintf(err, "time underflow (minimum non-null value is 1 ms)");
+				return ACT_RET_PRS_ERR;
+			}
+			if (res) {
+				memprintf(err, "unexpected character '%c'", *res);
+				return ACT_RET_PRS_ERR;
+			}
+			cur_arg++;
+		}
+		else if (strcmp(args[cur_arg], "at-least") == 0) {
+			if (!*args[cur_arg + 1]) {
+				memprintf(err, "missing argument for '%s'", args[cur_arg]);
+				return ACT_RET_PRS_ERR;
+			}
+			res = parse_size_err(args[cur_arg+1], &bytes);
+			if (res) {
+				memprintf(err, "unexpected character '%c'", *res);
+				return ACT_RET_PRS_ERR;
+			}
+			cur_arg++;
+		}
+		else
+			break;
+		cur_arg++;
+	}
+
+	if (time == UINT_MAX) {
+		memprintf(err, "expects time <time> [ at-least <bytes> ]");
+		return ACT_RET_PRS_ERR;
+	}
+
+	rule->arg.act.p[0] = (void *)(uintptr_t)time;
+	rule->arg.act.p[1] = (void *)(uintptr_t)bytes;
+
+	*orig_arg = cur_arg;
+
+	rule->action = ACT_CUSTOM;
+	rule->action_ptr = http_action_wait_for_body;
+	return ACT_RET_PRS_OK;
+}
+
 /************************************************************************/
 /*   All supported http-request action keywords must be declared here.  */
 /************************************************************************/
@@ -2112,6 +2217,7 @@
 		{ "tarpit",           parse_http_deny,                 0 },
 		{ "track-sc",         parse_http_track_sc,             1 },
 		{ "set-timeout",      parse_http_set_timeout,          0 },
+		{ "wait-for-body",    parse_http_wait_for_body,        0 },
 		{ NULL, NULL }
 	}
 };
@@ -2141,6 +2247,7 @@
 		{ "set-tos",         parse_http_set_tos,        0 },
 		{ "strict-mode",     parse_http_strict_mode,    0 },
 		{ "track-sc",        parse_http_track_sc,       1 },
+		{ "wait-for-body",   parse_http_wait_for_body,  0 },
 		{ NULL, NULL }
 	}
 };
diff --git a/src/http_ana.c b/src/http_ana.c
index 51445e9..33d8b7f 100644
--- a/src/http_ana.c
+++ b/src/http_ana.c
@@ -913,65 +913,23 @@
 	struct session *sess = s->sess;
 	struct http_txn *txn = s->txn;
 	struct http_msg *msg = &s->txn->req;
-	struct htx *htx;
 
 	DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
 
-	htx = htxbuf(&req->buf);
 
-	if (htx->flags & HTX_FL_PARSING_ERROR)
+	switch (http_wait_for_msg_body(s, req, s->be->timeout.httpreq, 0)) {
+	case HTTP_RULE_RES_CONT:
+		goto http_end;
+	case HTTP_RULE_RES_YIELD:
+		goto missing_data_or_waiting;
+	case HTTP_RULE_RES_BADREQ:
 		goto return_bad_req;
-	if (htx->flags & HTX_FL_PROCESSING_ERROR)
+	case HTTP_RULE_RES_ERROR:
 		goto return_int_err;
-
-	/* Do nothing for bodyless and CONNECT requests */
-	if (txn->meth == HTTP_METH_CONNECT || (msg->flags & HTTP_MSGF_BODYLESS))
-		goto http_end;
-
-	/* We have to parse the HTTP request body to find any required data.
-	 * "balance url_param check_post" should have been the only way to get
-	 * into this. We were brought here after HTTP header analysis, so all
-	 * related structures are ready.
-	 */
-
-	if (msg->msg_state < HTTP_MSG_DATA) {
-		if (http_handle_expect_hdr(s, htx, msg) == -1)
-			goto return_int_err;
-	}
-
-	msg->msg_state = HTTP_MSG_DATA;
-
-	/* Now we're in HTTP_MSG_DATA. We just need to know if all data have
-	 * been received or if the buffer is full.
-	 */
-	if ((htx->flags & HTX_FL_EOM) || htx_get_tail_type(htx) > HTX_BLK_DATA ||
-	    channel_htx_full(req, htx, global.tune.maxrewrite))
-		goto http_end;
-
-	if ((req->flags & CF_READ_TIMEOUT) || tick_is_expired(req->analyse_exp, now_ms)) {
-		txn->status = 408;
-		if (!(s->flags & SF_ERR_MASK))
-			s->flags |= SF_ERR_CLITO;
-		_HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_req, 1);
-		if (sess->listener && sess->listener->counters)
-			_HA_ATOMIC_ADD(&sess->listener->counters->failed_req, 1);
+	case HTTP_RULE_RES_ABRT:
 		goto return_prx_cond;
-	}
-
-	/* we get here if we need to wait for more data */
-	if (!(req->flags & (CF_SHUTR | CF_READ_ERROR))) {
-		/* Not enough data. We'll re-use the http-request
-		 * timeout here. Ideally, we should set the timeout
-		 * relative to the accept() date. We just set the
-		 * request timeout once at the beginning of the
-		 * request.
-		 */
-		channel_dont_connect(req);
-		if (!tick_isset(req->analyse_exp))
-			req->analyse_exp = tick_add_ifset(now_ms, s->be->timeout.httpreq);
-		DBG_TRACE_DEVEL("waiting for more data",
-				STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
-		return 0;
+	default:
+		goto return_int_err;
 	}
 
  http_end:
@@ -982,6 +940,12 @@
 	DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
 	return 1;
 
+ missing_data_or_waiting:
+	channel_dont_connect(req);
+	DBG_TRACE_DEVEL("waiting for more data",
+			STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+	return 0;
+
  return_int_err:
 	txn->status = 500;
 	if (!(s->flags & SF_ERR_MASK))
@@ -991,7 +955,7 @@
 		_HA_ATOMIC_ADD(&s->be->be_counters.internal_errors, 1);
 	if (sess->listener && sess->listener->counters)
 		_HA_ATOMIC_ADD(&sess->listener->counters->internal_errors, 1);
-	goto return_prx_cond;
+	goto return_prx_err;
 
  return_bad_req: /* let's centralize all bad requests */
 	txn->status = 400;
@@ -1000,9 +964,11 @@
 		_HA_ATOMIC_ADD(&sess->listener->counters->failed_req, 1);
 	/* fall through */
 
- return_prx_cond:
+ return_prx_err:
 	http_reply_and_close(s, txn->status, http_error_message(s));
+	/* fall through */
 
+ return_prx_cond:
 	if (!(s->flags & SF_ERR_MASK))
 		s->flags |= SF_ERR_PRXCOND;
 	if (!(s->flags & SF_FINST_MASK))
@@ -4134,6 +4100,115 @@
 	return 1;
 }
 
+/* This function waits for the message payload at most <time> milliseconds (may
+ * be set to TICK_ETERNITY). It stops to wait if at least <bytes> bytes of the
+ * payload are received (0 means no limit). It returns HTTP_RULE_* depending on
+ * the result:
+ *
+ *   - HTTP_RULE_RES_CONT when  conditions are met to stop waiting
+ *   - HTTP_RULE_RES_YIELD to wait for more data
+ *   - HTTP_RULE_RES_ABRT when a timeout occured.
+ *   - HTTP_RULE_RES_BADREQ if a parsing error is raised by lower level
+ *   - HTTP_RULE_RES_ERROR if an internal error occured
+ *
+ * If a timeout occured, this function is responsible to emit the right response
+ * to the client, depending on the channel (408 on request side, 504 on response
+ * side). All other errors must be handled by the caller.
+ */
+enum rule_result http_wait_for_msg_body(struct stream *s, struct channel *chn,
+					unsigned int time, unsigned int bytes)
+{
+	struct session *sess = s->sess;
+	struct http_txn *txn = s->txn;
+	struct http_msg *msg = ((chn->flags & CF_ISRESP) ? &txn->rsp : &txn->req);
+	struct htx *htx;
+	enum rule_result ret = HTTP_RULE_RES_CONT;
+
+	htx = htxbuf(&chn->buf);
+
+	if (htx->flags & HTX_FL_PARSING_ERROR) {
+		ret = HTTP_RULE_RES_BADREQ;
+		goto end;
+	}
+	if (htx->flags & HTX_FL_PROCESSING_ERROR) {
+		ret = HTTP_RULE_RES_ERROR;
+		goto end;
+	}
+
+	/* Do nothing for bodyless and CONNECT requests */
+	if (txn->meth == HTTP_METH_CONNECT || (msg->flags & HTTP_MSGF_BODYLESS))
+		goto end;
+
+	if (!(chn->flags & CF_ISRESP) && msg->msg_state < HTTP_MSG_DATA) {
+		if (http_handle_expect_hdr(s, htx, msg) == -1) {
+			ret = HTTP_RULE_RES_ERROR;
+			goto end;
+		}
+	}
+
+	msg->msg_state = HTTP_MSG_DATA;
+
+	/* Now we're in HTTP_MSG_DATA. We just need to know if all data have
+	 * been received or if the buffer is full.
+	 */
+	if ((htx->flags & HTX_FL_EOM) || htx_get_tail_type(htx) > HTX_BLK_DATA ||
+	    channel_htx_full(chn, htx, global.tune.maxrewrite))
+		goto end;
+
+	if (bytes) {
+		struct htx_blk *blk;
+		unsigned int len = 0;
+
+		for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+			if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+				continue;
+			len += htx_get_blksz(blk);
+			if (len >= bytes)
+				goto end;
+		}
+	}
+
+	if ((chn->flags & CF_READ_TIMEOUT) || tick_is_expired(chn->analyse_exp, now_ms)) {
+		if (!(chn->flags & CF_ISRESP))
+			goto abort_req;
+		goto abort_res;
+	}
+
+	/* we get here if we need to wait for more data */
+	if (!(chn->flags & (CF_SHUTR | CF_READ_ERROR))) {
+		if (!tick_isset(chn->analyse_exp))
+			chn->analyse_exp = tick_add_ifset(now_ms, time);
+		ret = HTTP_RULE_RES_YIELD;
+	}
+
+  end:
+	return ret;
+
+  abort_req:
+	txn->status = 408;
+	if (!(s->flags & SF_ERR_MASK))
+		s->flags |= SF_ERR_CLITO;
+	if (!(s->flags & SF_FINST_MASK))
+		s->flags |= SF_FINST_D;
+	_HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_req, 1);
+	if (sess->listener && sess->listener->counters)
+		_HA_ATOMIC_ADD(&sess->listener->counters->failed_req, 1);
+	http_reply_and_close(s, txn->status, http_error_message(s));
+	ret = HTTP_RULE_RES_ABRT;
+	goto end;
+
+  abort_res:
+	txn->status = 504;
+	if (!(s->flags & SF_ERR_MASK))
+		s->flags |= SF_ERR_SRVTO;
+	if (!(s->flags & SF_FINST_MASK))
+		s->flags |= SF_FINST_D;
+	stream_inc_http_fail_ctr(s);
+	http_reply_and_close(s, txn->status, http_error_message(s));
+	ret = HTTP_RULE_RES_ABRT;
+	goto end;
+}
+
 void http_perform_server_redirect(struct stream *s, struct stream_interface *si)
 {
 	struct channel *req = &s->req;