BUG/MAJOR: tcpcheck: Allocate input and output buffers from the buffer pool

Historically, the input and output buffers of a check are allocated by hand
during the startup, with a specific size (not necessarily the same than
other buffers). But since the recent refactoring of the checks to rely
exclusively on the tcp-checks and to use the underlying mux layer, this part
is totally buggy. Indeed, because these buffers are now passed to a mux,
they maybe be swapped if a zero-copy is possible. In fact, for now it is
only possible in h2_rcv_buf(). Thus the bug concretely only exists if a h2
health-check is performed. But, it is a latent bug for other muxes.

Another problem is the size of these buffers. because it may differ for the
other buffer size, it might be source of bugs.

Finally, for configurations with hundreds of thousands of servers, having 2
buffers per check always allocated may be an issue.

To fix the bug, we now allocate these buffers when required using the buffer
pool. Thus not-running checks don't waste memory and muxes may swap them if
possible. The only drawback is the check buffers have now always the same
size than buffers used by the streams. This deprecates indirectly the
"tune.chksize" global option.

In addition, the http-check regtest have been update to perform some h2
health-checks.

Many thanks to @VigneshSP94 for its help on this bug.

This patch should solve the issue #936. It relies on the commit "MINOR:
tcpcheck: Don't handle anymore in-progress send rules in tcpcheck_main".
Both must be backport as far as 2.2.

bla

(cherry picked from commit b381a505c1010bb11abbe7b31e8d2307c4dab541)
Signed-off-by: William Lallemand <wlallemand@haproxy.org>
diff --git a/src/check.c b/src/check.c
index 9836d36..337030f 100644
--- a/src/check.c
+++ b/src/check.c
@@ -37,6 +37,7 @@
 #include <haproxy/chunk.h>
 #include <haproxy/dgram.h>
 #include <haproxy/dns.h>
+#include <haproxy/dynbuf-t.h>
 #include <haproxy/extcheck.h>
 #include <haproxy/fd.h>
 #include <haproxy/global.h>
@@ -866,8 +867,6 @@
 		set_server_check_status(check, HCHK_STATUS_START, NULL);
 
 		check->state |= CHK_ST_INPROGRESS;
-		b_reset(&check->bi);
-		b_reset(&check->bo);
 
 		task_set_affinity(t, tid_bit);
 
@@ -936,7 +935,9 @@
 			}
 		}
 		task_set_affinity(t, MAX_THREADS_MASK);
-		check->state &= ~CHK_ST_INPROGRESS;
+		check_release_buf(check, &check->bi);
+		check_release_buf(check, &check->bo);
+		check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
 
 		if (check->server) {
 			rv = 0;
@@ -961,18 +962,65 @@
 /**************************************************************************/
 /************************** Init/deinit checks ****************************/
 /**************************************************************************/
-const char *init_check(struct check *check, int type)
+/*
+ * Tries to grab a buffer and to re-enables processing on check <target>. The
+ * check flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the I/O tasklet is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+int check_buf_available(void *target)
 {
-	check->type = type;
+	struct check *check = target;
+
+	if ((check->state & CHK_ST_IN_ALLOC) && b_alloc_margin(&check->bi, 0)) {
+		check->state &= ~CHK_ST_IN_ALLOC;
+		tasklet_wakeup(check->wait_list.tasklet);
+		return 1;
+	}
+	if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc_margin(&check->bo, 0)) {
+		check->state &= ~CHK_ST_OUT_ALLOC;
+		tasklet_wakeup(check->wait_list.tasklet);
+		return 1;
+	}
+
+	return 0;
+}
 
-	b_reset(&check->bi); check->bi.size = global.tune.chksize;
-	b_reset(&check->bo); check->bo.size = global.tune.chksize;
+/*
+ * Allocate a buffer. If if fails, it adds the check in buffer wait queue.
+ */
+struct buffer *check_get_buf(struct check *check, struct buffer *bptr)
+{
+	struct buffer *buf = NULL;
 
-	check->bi.area = calloc(check->bi.size, sizeof(*check->bi.area));
-	check->bo.area = calloc(check->bo.size, sizeof(*check->bo.area));
+	if (likely(!MT_LIST_ADDED(&check->buf_wait.list)) &&
+	    unlikely((buf = b_alloc_margin(bptr, 0)) == NULL)) {
+		check->buf_wait.target = check;
+		check->buf_wait.wakeup_cb = check_buf_available;
+		MT_LIST_ADDQ(&buffer_wq, &check->buf_wait.list);
+	}
+	return buf;
+}
+
+/*
+ * Release a buffer, if any, and try to wake up entities waiting in the buffer
+ * wait queue.
+ */
+void check_release_buf(struct check *check, struct buffer *bptr)
+{
+	if (bptr->size) {
+		b_free(bptr);
+		offer_buffers(check->buf_wait.target, tasks_run_queue);
+	}
+}
+
+const char *init_check(struct check *check, int type)
+{
+	check->type = type;
 
-	if (!check->bi.area || !check->bo.area)
-		return "out of memory while allocating check buffer";
+	check->bi = BUF_NULL;
+	check->bo = BUF_NULL;
+	MT_LIST_INIT(&check->buf_wait.list);
 
 	check->wait_list.tasklet = tasklet_new();
 	if (!check->wait_list.tasklet)
@@ -989,8 +1037,8 @@
 	if (check->wait_list.tasklet)
 		tasklet_free(check->wait_list.tasklet);
 
-	free(check->bi.area);
-	free(check->bo.area);
+	check_release_buf(check, &check->bi);
+	check_release_buf(check, &check->bo);
 	if (check->cs) {
 		free(check->cs->conn);
 		check->cs->conn = NULL;
diff --git a/src/tcpcheck.c b/src/tcpcheck.c
index 964dcf8..accc699 100644
--- a/src/tcpcheck.c
+++ b/src/tcpcheck.c
@@ -993,6 +993,10 @@
 	 *   3: release and replace the old one on success
 	 */
 
+	/* Always release input and output buffer when a new connect is evaluated */
+	check_release_buf(check, &check->bi);
+	check_release_buf(check, &check->bo);
+
 	/* 2- prepare new connection */
 	cs = cs_new(NULL, (s ? &s->obj_type : &proxy->obj_type));
 	if (!cs) {
@@ -1222,13 +1226,23 @@
 	struct buffer *tmp = NULL;
 	struct htx *htx = NULL;
 
+	if (check->state & CHK_ST_OUT_ALLOC) {
+		ret = TCPCHK_EVAL_WAIT;
+		goto out;
+	}
+
+	if (!check_get_buf(check, &check->bo)) {
+		check->state |= CHK_ST_OUT_ALLOC;
+		ret = TCPCHK_EVAL_WAIT;
+		goto out;
+	}
+
 	/* Data already pending in the output buffer, send them now */
 	if (b_data(&check->bo))
 		goto do_send;
 
-	/* reset the read & write buffer */
-	b_reset(&check->bi);
-	b_reset(&check->bo);
+	/* Always release input buffer when a new send is evaluated */
+	check_release_buf(check, &check->bi);
 
 	switch (send->type) {
 	case TCPCHK_SEND_STRING:
@@ -1372,6 +1386,8 @@
 
   out:
 	free_trash_chunk(tmp);
+	if (!b_data(&check->bo) || ret == TCPCHK_EVAL_STOP)
+		check_release_buf(check, &check->bo);
 	return ret;
 
   error_htx:
@@ -1414,6 +1430,14 @@
 	if (cs->flags & CS_FL_EOS)
 		goto end_recv;
 
+	if (check->state & CHK_ST_IN_ALLOC)
+		goto wait_more_data;
+
+	if (!check_get_buf(check, &check->bi)) {
+		check->state |= CHK_ST_IN_ALLOC;
+		goto wait_more_data;
+	}
+
 	/* errors on the connection and the conn-stream were already checked */
 
 	/* prepare to detect if the mux needs more room */
@@ -1461,6 +1485,8 @@
 	}
 
   out:
+	if (!b_data(&check->bi) || ret == TCPCHK_EVAL_STOP)
+		check_release_buf(check, &check->bi);
 	return ret;
 
   stop:
@@ -2115,6 +2141,10 @@
 	if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR))
 		chk_report_conn_err(check, errno, 0);
 
+	/* the tcpcheck is finished, release in/out buffer now */
+	check_release_buf(check, &check->bi);
+	check_release_buf(check, &check->bo);
+
   out:
 	return retcode;
 }