BUG/MEDIUM: tcpcheck: Don't destroy connection in the wake callback context When a tcpcheck ruleset uses multiple connections, the existing one must be closed and destroyed before openning the new one. This part is handled in the tcpcheck_main() function, when called from the wake callback function (wake_srv_chk). But it is indeed a problem, because this function may be called from the mux layer. This means a mux may call the wake callback function of the data layer, which may release the connection and the mux. It is easy to see how it is hazardous. And actually, depending on the scheduling, it leads to crashes. Thus, we must avoid to release the connection in the wake callback context, and move this part in the check's process function instead. To do so, we rely on the CHK_ST_CLOSE_CONN flags. When a connection must be replaced by a new one, this flag is set on the check, in tcpcheck_main() function, and the check's task is woken up. Then, the connection is really closed in process_chk_conn() function. This patch must be backported as far as 2.2, with some adaptations however because the code is not exactly the same. (cherry picked from commit 8f100427c4bf56acff967e313cd9cb8d42da035d) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>

commit: 444b7b1dc8a8c973d2d05ac39427dec9979e56d6 [log] [tgz]
author: Christopher Faulet <cfaulet@haproxy.com> Mon Jan 18 15:47:03 2021 +0100
committer: Christopher Faulet <cfaulet@haproxy.com> Tue Jan 26 10:41:26 2021 +0100
tree: ac307d6fd960b5b83062e3623ed3e1529b735b7a
parent: c3f67cdac3c23db5d4487130121690ed2c332c68 [diff]
diff --git a/include/haproxy/check-t.h b/include/haproxy/check-t.h
index 274e162..b78a9f9 100644
--- a/include/haproxy/check-t.h
+++ b/include/haproxy/check-t.h

@@ -52,6 +52,7 @@
 #define CHK_ST_PORT_MISS        0x0020  /* check can't be send because no port is configured to run it */
 #define CHK_ST_IN_ALLOC         0x0040  /* check blocked waiting for input buffer allocation */
 #define CHK_ST_OUT_ALLOC        0x0080  /* check blocked waiting for output buffer allocation */
+#define CHK_ST_CLOSE_CONN       0x0100  /* check is waiting that the connection gets closed */
 
 /* check status */
 enum healthcheck_status {

diff --git a/src/check.c b/src/check.c
index f56854b..b95b4ae 100644
--- a/src/check.c
+++ b/src/check.c

@@ -794,13 +794,17 @@
 		task_wakeup(check->task, TASK_WOKEN_IO);
 	}
 
-	if (check->result != CHK_RES_UNKNOWN) {
+	if (check->result != CHK_RES_UNKNOWN || ret == -1) {
 		/* Check complete or aborted. If connection not yet closed do it
 		 * now and wake the check task up to be sure the result is
 		 * handled ASAP. */
 		conn_sock_drain(conn);
 		cs_close(cs);
 		ret = -1;
+
+		if (check->wait_list.events)
+			cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
+
 		/* We may have been scheduled to run, and the
 		 * I/O handler expects to have a cs, so remove
 		 * the tasklet
@@ -883,8 +887,18 @@
 			if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) {
 				chk_report_conn_err(check, 0, expired);
 			}
-			else
-				goto out_unlock; /* timeout not reached, wait again */
+			else {
+				if (check->state & CHK_ST_CLOSE_CONN) {
+					cs_destroy(cs);
+					cs = NULL;
+					conn = NULL;
+					check->cs = NULL;
+					check->state &= ~CHK_ST_CLOSE_CONN;
+					tcpcheck_main(check);
+				}
+				if (check->result == CHK_RES_UNKNOWN)
+					goto out_unlock; /* timeout not reached, wait again */
+			}
 		}
 
 		/* check complete or aborted */

diff --git a/src/tcpcheck.c b/src/tcpcheck.c
index 7c9bd78..f85e0b3 100644
--- a/src/tcpcheck.c
+++ b/src/tcpcheck.c

@@ -984,20 +984,20 @@
 	struct tcpcheck_rule *next;
 	int status, port;
 
-	/* For a connect action we'll create a new connection. We may also have
-	 * to kill a previous one. But we don't want to leave *without* a
+	/* For a connect action we'll create a new connection. The previous one,
+	 * if any  should have been killed. We don't want to leave *without* a
 	 * connection if we came here from the connection layer, hence with a
 	 * connection.  Thus we'll proceed in the following order :
 	 *   1: close but not release previous connection (handled by the caller)
-	 *   2: try to get a new connection
-	 *   3: release and replace the old one on success
+	 *   2: Wait the connection is released (handled by process_chk_conn())
+	 *   3: try to get a new connection
 	 */
 
 	/* Always release input and output buffer when a new connect is evaluated */
 	check_release_buf(check, &check->bi);
 	check_release_buf(check, &check->bo);
 
-	/* 2- prepare new connection */
+	/* prepare new connection */
 	cs = cs_new(NULL, (s ? &s->obj_type : &proxy->obj_type));
 	if (!cs) {
 		chunk_printf(&trash, "TCPCHK error allocating connection at step %d",
@@ -1009,19 +1009,6 @@
 		goto out;
 	}
 
-	/* 3- release and replace the old one on success */
-	if (check->cs) {
-		if (check->wait_list.events)
-			check->cs->conn->mux->unsubscribe(check->cs, check->wait_list.events,
-							  &check->wait_list);
-
-		/* We may have been scheduled to run, and the I/O handler
-		 * expects to have a cs, so remove the tasklet
-		 */
-		tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
-		cs_destroy(check->cs);
-	}
-
 	tasklet_set_tid(check->wait_list.tasklet, tid);
 
 	check->cs = cs;
@@ -1980,30 +1967,38 @@
 	/* 2- check if we are waiting for the connection establishment. It only
 	 *    happens during TCPCHK_ACT_CONNECT. */
 	if (check->current_step && check->current_step->action == TCPCHK_ACT_CONNECT) {
-		if (conn->flags & CO_FL_WAIT_XPRT) {
-			struct tcpcheck_rule *next;
+		if (!cs)
+			rule = check->current_step;
+		else if (cs && (check->state & CHK_ST_CLOSE_CONN)) {
+			/* We are still waiting the connection gets closed */
+			goto out;
+		}
+		else {
+			if (conn->flags & CO_FL_WAIT_XPRT) {
+				struct tcpcheck_rule *next;
 
-			next = get_next_tcpcheck_rule(check->tcpcheck_rules, check->current_step);
-			if (next && next->action == TCPCHK_ACT_SEND) {
-				if (!(check->wait_list.events & SUB_RETRY_SEND))
-					conn->mux->subscribe(cs, SUB_RETRY_SEND, &check->wait_list);
-				goto out;
-			}
-			else {
-				eval_ret = tcpcheck_eval_recv(check, check->current_step);
-				if (eval_ret == TCPCHK_EVAL_STOP)
-					goto out_end_tcpcheck;
-				else if (eval_ret == TCPCHK_EVAL_WAIT)
+				next = get_next_tcpcheck_rule(check->tcpcheck_rules, check->current_step);
+				if (next && next->action == TCPCHK_ACT_SEND) {
+					if (!(check->wait_list.events & SUB_RETRY_SEND))
+						conn->mux->subscribe(cs, SUB_RETRY_SEND, &check->wait_list);
 					goto out;
-				last_read = ((conn->flags & CO_FL_ERROR) || (cs->flags & (CS_FL_ERROR|CS_FL_EOS)));
-				must_read = 0;
+				}
+				else {
+					eval_ret = tcpcheck_eval_recv(check, check->current_step);
+					if (eval_ret == TCPCHK_EVAL_STOP)
+						goto out_end_tcpcheck;
+					else if (eval_ret == TCPCHK_EVAL_WAIT)
+						goto out;
+					last_read = ((conn->flags & CO_FL_ERROR) || (cs->flags & (CS_FL_ERROR|CS_FL_EOS)));
+					must_read = 0;
+				}
 			}
-		}
 
-                if (check->proxy->timeout.check)
-                    check->task->expire = tick_add_ifset(now_ms, check->proxy->timeout.check);
+			if (check->proxy->timeout.check)
+				check->task->expire = tick_add_ifset(now_ms, check->proxy->timeout.check);
 
-		rule = LIST_NEXT(&check->current_step->list, typeof(rule), list);
+			rule = LIST_NEXT(&check->current_step->list, typeof(rule), list);
+		}
 	}
 
 	/* 3- check if a rule must be resume. It happens if check->current_step
@@ -2043,13 +2038,20 @@
 		check->code = 0;
 		switch (rule->action) {
 		case TCPCHK_ACT_CONNECT:
+			/* Not the first connection, release it first */
+			if (check->cs && check->current_step != rule) {
+				check->state |= CHK_ST_CLOSE_CONN;
+				retcode = -1;
+			}
+
 			check->current_step = rule;
 
-			/* close but not release yet previous connection  */
-			if (check->cs) {
-				cs_close(check->cs);
-				retcode = -1; /* do not reuse the fd in the caller! */
+			/* We are still waiting the connection gets closed */
+			if (check->cs && (check->state & CHK_ST_CLOSE_CONN)) {
+				eval_ret = TCPCHK_EVAL_WAIT;
+				break;
 			}
+
 			eval_ret = tcpcheck_eval_connect(check, rule);
 
 			/* Refresh conn-stream and connection */
commit	444b7b1dc8a8c973d2d05ac39427dec9979e56d6	[log] [tgz]
author	Christopher Faulet <cfaulet@haproxy.com>	Mon Jan 18 15:47:03 2021 +0100
committer	Christopher Faulet <cfaulet@haproxy.com>	Tue Jan 26 10:41:26 2021 +0100
tree	ac307d6fd960b5b83062e3623ed3e1529b735b7a
parent	c3f67cdac3c23db5d4487130121690ed2c332c68 [diff]