BUG/MINOR: checks: Handle synchronous connect when a tcpcheck is started A connection may be synchronously established. In the tcpcheck context, it may be a problem if several connections come one after another. In this case, there is no event to close the very first connection before starting the next one. The checks is thus blocked and timed out, a L7 timeout error is reported. To fix the bug, when a tcpcheck is started, we immediately evaluate its state. Most of time, nothing is performed and we must wait. But it is thus possible to handle the result of a successfull connection. This patch should fix the issue #1234. It must be backported as far as 2.2. (cherry picked from commit 92017a3215545c26cd9baad11a1cdfb2859acc6c) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>

commit: 67da481a105f1c030eb971818a6b43bb2b5ba0bf [log] [tgz]
author: Christopher Faulet <cfaulet@haproxy.com> Thu May 06 16:01:18 2021 +0200
committer: Christopher Faulet <cfaulet@haproxy.com> Fri May 07 15:46:21 2021 +0200
tree: a26cf735d29fd4a4605d050f3b579d325d4ac339
parent: d2d264fe64dfc5d9eaa690371d86c972149023ac [diff]
diff --git a/src/check.c b/src/check.c
index 9073628..4be202a 100644
--- a/src/check.c
+++ b/src/check.c

@@ -852,13 +852,14 @@
 {
 	struct check *check = context;
 	struct proxy *proxy = check->proxy;
-	struct conn_stream *cs = check->cs;
-	struct connection *conn = cs_conn(cs);
+	struct conn_stream *cs;
+	struct connection *conn;
 	int rv;
 	int expired = tick_is_expired(t->expire, now_ms);
 
 	if (check->server)
 		HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+
 	if (!(check->state & CHK_ST_INPROGRESS)) {
 		/* no check currently running */
 		if (!expired) /* woke up too early */
@@ -881,91 +882,93 @@
 
 		check->current_step = NULL;
 		tcpcheck_main(check);
-		goto out_unlock;
+		expired = 0;
 	}
-	else {
-		/* there was a test running.
-		 * First, let's check whether there was an uncaught error,
-		 * which can happen on connect timeout or error.
-		 */
-		if (check->result == CHK_RES_UNKNOWN) {
-			if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) {
-				chk_report_conn_err(check, 0, expired);
-			}
-			else {
-				if (check->state & CHK_ST_CLOSE_CONN) {
-					cs_destroy(cs);
-					cs = NULL;
-					conn = NULL;
-					check->cs = NULL;
-					check->state &= ~CHK_ST_CLOSE_CONN;
-					tcpcheck_main(check);
-				}
-				if (check->result == CHK_RES_UNKNOWN)
-					goto out_unlock; /* timeout not reached, wait again */
+
+	cs = check->cs;
+	conn = cs_conn(cs);
+
+	/* there was a test running.
+	 * First, let's check whether there was an uncaught error,
+	 * which can happen on connect timeout or error.
+	 */
+	if (check->result == CHK_RES_UNKNOWN) {
+		if ((conn && ((conn->flags & CO_FL_ERROR) || (cs->flags & CS_FL_ERROR))) || expired) {
+			chk_report_conn_err(check, 0, expired);
+		}
+		else {
+			if (check->state & CHK_ST_CLOSE_CONN) {
+				cs_destroy(cs);
+				cs = NULL;
+				conn = NULL;
+				check->cs = NULL;
+				check->state &= ~CHK_ST_CLOSE_CONN;
+				tcpcheck_main(check);
 			}
+			if (check->result == CHK_RES_UNKNOWN)
+				goto out_unlock; /* timeout not reached, wait again */
 		}
+	}
 
-		/* check complete or aborted */
+	/* check complete or aborted */
 
-		check->current_step = NULL;
+	check->current_step = NULL;
 
-		if (conn && conn->xprt) {
-			/* The check was aborted and the connection was not yet closed.
-			 * This can happen upon timeout, or when an external event such
-			 * as a failed response coupled with "observe layer7" caused the
-			 * server state to be suddenly changed.
-			 */
-			conn_sock_drain(conn);
-			cs_close(cs);
-		}
+	if (conn && conn->xprt) {
+		/* The check was aborted and the connection was not yet closed.
+		 * This can happen upon timeout, or when an external event such
+		 * as a failed response coupled with "observe layer7" caused the
+		 * server state to be suddenly changed.
+		 */
+		conn_sock_drain(conn);
+		cs_close(cs);
+	}
 
-		if (cs) {
-			if (check->wait_list.events)
-				cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
-			/* We may have been scheduled to run, and the
-			 * I/O handler expects to have a cs, so remove
-			 * the tasklet
-			 */
-			tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
-			cs_destroy(cs);
-			cs = check->cs = NULL;
-			conn = NULL;
-		}
+	if (cs) {
+		if (check->wait_list.events)
+			cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
+		/* We may have been scheduled to run, and the
+		 * I/O handler expects to have a cs, so remove
+		 * the tasklet
+		 */
+		tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
+		cs_destroy(cs);
+		cs = check->cs = NULL;
+		conn = NULL;
+	}
 
-		if (check->sess != NULL) {
-			vars_prune(&check->vars, check->sess, NULL);
-			session_free(check->sess);
-			check->sess = NULL;
-		}
+	if (check->sess != NULL) {
+		vars_prune(&check->vars, check->sess, NULL);
+		session_free(check->sess);
+		check->sess = NULL;
+	}
 
-		if (check->server) {
-			if (check->result == CHK_RES_FAILED) {
-				/* a failure or timeout detected */
-				check_notify_failure(check);
-			}
-			else if (check->result == CHK_RES_CONDPASS) {
-				/* check is OK but asks for stopping mode */
-				check_notify_stopping(check);
-			}
-			else if (check->result == CHK_RES_PASSED) {
-				/* a success was detected */
-				check_notify_success(check);
-			}
+	if (check->server) {
+		if (check->result == CHK_RES_FAILED) {
+			/* a failure or timeout detected */
+			check_notify_failure(check);
+		}
+		else if (check->result == CHK_RES_CONDPASS) {
+			/* check is OK but asks for stopping mode */
+			check_notify_stopping(check);
 		}
-		task_set_affinity(t, MAX_THREADS_MASK);
-		check_release_buf(check, &check->bi);
-		check_release_buf(check, &check->bo);
-		check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
+		else if (check->result == CHK_RES_PASSED) {
+			/* a success was detected */
+			check_notify_success(check);
+		}
+	}
+	task_set_affinity(t, MAX_THREADS_MASK);
+	check_release_buf(check, &check->bi);
+	check_release_buf(check, &check->bo);
+	check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
 
-		if (check->server) {
-			rv = 0;
-			if (global.spread_checks > 0) {
-				rv = srv_getinter(check) * global.spread_checks / 100;
-				rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
-			}
-			t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
+	if (check->server) {
+		rv = 0;
+		if (global.spread_checks > 0) {
+			rv = srv_getinter(check) * global.spread_checks / 100;
+			rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
 		}
+		t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
 	}
 
  reschedule:
commit	67da481a105f1c030eb971818a6b43bb2b5ba0bf	[log] [tgz]
author	Christopher Faulet <cfaulet@haproxy.com>	Thu May 06 16:01:18 2021 +0200
committer	Christopher Faulet <cfaulet@haproxy.com>	Fri May 07 15:46:21 2021 +0200
tree	a26cf735d29fd4a4605d050f3b579d325d4ac339
parent	d2d264fe64dfc5d9eaa690371d86c972149023ac [diff]