MAJOR: checks: completely use the connection transport layer With this change, we now use the connection's transport layer to receive and send data during health checks. It even becomes possible to send data in multiple times, which was not possible before. The transport layer used is the same as the one used for the traffic, unless a specific address and/or port is specified for the checks using "port" or "addr", in which case the transport layer defaults to raw_sock. An option will be provided to force SSL checks on different IP/ports later. Connection errors and timeouts are still reported. Some situations where strerror() was able to report a precise error after a failed connect() in the past might not be reported with as much precision anymore, but the error message was already meaningless. During the tests, no situation was found where a message became less precise.

commit: f150317671be5374fe64c81a8cc940ca7ba8cbcf [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Fri Sep 28 19:39:36 2012 +0200
committer: Willy Tarreau <w@1wt.eu> Fri Oct 05 00:33:14 2012 +0200
tree: 684cd4fe18eab506f3981cd215e9e0025a9bb7eb
parent: f4288ee4bacadeea288c3819031a37ff9d908c20 [diff]
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 0376520..57139ea 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c

@@ -6294,6 +6294,8 @@
 				newsrv->ssl_ctx.reused_sess = NULL;
 				if (newsrv->use_ssl)
 					newsrv->xprt = &ssl_sock;
+				if (newsrv->check.use_ssl)
+					newsrv->check.xprt = &ssl_sock;
 				newsrv->ssl_ctx.ctx = SSL_CTX_new(SSLv23_client_method());
 				if(!newsrv->ssl_ctx.ctx) {
 

diff --git a/src/checks.c b/src/checks.c
index bf77574..52f70d2 100644
--- a/src/checks.c
+++ b/src/checks.c

@@ -771,10 +771,8 @@
 	int fd = conn->t.sock.fd;
 	struct task *t = s->check.task;
 
-	if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR)) {
+	if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR))
 		conn->flags |= CO_FL_ERROR;
-		return;
-	}
 
 	if (unlikely(conn->flags & CO_FL_ERROR)) {
 		int skerr, err = errno;
@@ -796,78 +794,23 @@
 		if (s->check.bo->o) {
 			int ret;
 
-			ret = send(fd, bo_ptr(s->check.bo), s->check.bo->o, MSG_DONTWAIT | MSG_NOSIGNAL);
-			if (ret > 0) {
-				if (conn->flags & CO_FL_WAIT_L4_CONN)
-					conn->flags &= ~CO_FL_WAIT_L4_CONN;
-
-				s->check.bo->o -= ret;
-				if (!s->check.bo->o) {
-					/* full request sent, we allow up to <timeout.check> if nonzero for a response */
-					if (s->proxy->timeout.check) {
-						t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
-						task_queue(t);
-					}
-					__conn_data_want_recv(conn);   /* prepare for reading reply */
-					goto out_nowake;
-				}
-				/* some data remains */
-				goto out_poll;
-			}
-			else if (ret == 0 || errno == EAGAIN)
-				goto out_poll;
-			else {
-				switch (errno) {
-					case ECONNREFUSED:
-					case ENETUNREACH:
-						set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
-						break;
-
-					default:
-						set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
-				}
-
-				goto out_error;
+			ret = conn->xprt->snd_buf(conn, s->check.bo, MSG_DONTWAIT | MSG_NOSIGNAL);
+			if (conn->flags & CO_FL_ERROR) {
+				set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
+				goto out_wakeup;
 			}
 		}
-		else {
-			/* We have no data to send to check the connection, and
-			 * getsockopt() will not inform us whether the connection
-			 * is still pending. So we'll reuse connect() to check the
-			 * state of the socket. This has the advantage of givig us
-			 * the following info :
-			 *  - error
-			 *  - connecting (EALREADY, EINPROGRESS)
-			 *  - connected (EISCONN, 0)
-			 */
-
-			struct sockaddr_storage sa;
-
-			if (is_addr(&s->check.addr))
-				sa = s->check.addr;
-			else
-				sa = s->addr;
-
-			set_host_port(&sa, s->check.port);
-
-			if (connect(fd, (struct sockaddr *)&sa, get_addr_len(&sa)) == 0)
-				errno = 0;
 
-			if (errno == EALREADY || errno == EINPROGRESS)
-				goto out_poll;
-
-			if (errno && errno != EISCONN) {
-				set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
-				goto out_error;
+		if (!s->check.bo->o) {
+			/* full request sent, we allow up to <timeout.check> if nonzero for a response */
+			if (s->proxy->timeout.check) {
+				t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
+				task_queue(t);
 			}
-
-			if (conn->flags & CO_FL_WAIT_L4_CONN)
-				conn->flags &= ~CO_FL_WAIT_L4_CONN;
-
-			/* good TCP connection is enough */
-			set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
-			goto out_wakeup;
+			__conn_data_want_recv(conn);   /* prepare for reading reply */
+			goto out_nowake;
 		}
+		goto out_poll;
 	}
  out_wakeup:
 	task_wakeup(t, TASK_WOKEN_IO);
@@ -901,16 +844,13 @@
 {
 	int len;
 	struct server *s = conn->owner;
-	int fd = conn->t.sock.fd;
 	struct task *t = s->check.task;
 	char *desc;
 	int done;
 	unsigned short msglen;
 
-	if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR)) {
+	if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR))
 		conn->flags |= CO_FL_ERROR;
-		return;
-	}
 
 	if (unlikely((s->result & SRV_CHK_ERROR) || (conn->flags & CO_FL_ERROR))) {
 		/* in case of TCP only, this tells us if the connection failed */
@@ -934,31 +874,22 @@
 	 */
 
 	done = 0;
-	for (len = 0; s->check.bi->i < s->check.bi->size; s->check.bi->i += len) {
-		len = recv(fd, s->check.bi->data + s->check.bi->i, s->check.bi->size - s->check.bi->i, 0);
-		if (len <= 0)
-			break;
-	}
 
-	if (len == 0)
-		done = 1; /* connection hangup received */
-	else if (len < 0 && errno != EAGAIN) {
-		/* Report network errors only if we got no other data. Otherwise
-		 * we'll let the upper layers decide whether the response is OK
-		 * or not. It is very common that an RST sent by the server is
-		 * reported as an error just after the last data chunk.
-		 */
+	len = conn->xprt->rcv_buf(conn, s->check.bi, s->check.bi->size);
+	if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
 		done = 1;
-		if (!s->check.bi->i) {
+		if ((conn->flags & CO_FL_ERROR) && !s->check.bi->i) {
+			/* Report network errors only if we got no other data. Otherwise
+			 * we'll let the upper layers decide whether the response is OK
+			 * or not. It is very common that an RST sent by the server is
+			 * reported as an error just after the last data chunk.
+			 */
 			if (!(s->result & SRV_CHK_ERROR))
 				set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
 			goto out_wakeup;
 		}
 	}
 
-	if (len >= 0 && (conn->flags & CO_FL_WAIT_L4_CONN))
-		conn->flags &= ~CO_FL_WAIT_L4_CONN;
-
 	/* Intermediate or complete response received.
 	 * Terminate string in check.bi->data buffer.
 	 */
@@ -1237,7 +1168,10 @@
 	s->check.bi->i = 0;
 
 	/* Close the connection... */
-	shutdown(fd, SHUT_RDWR);
+	if (conn->xprt && conn->xprt->shutw)
+		conn->xprt->shutw(conn, 0);
+	if (!(conn->flags & (CO_FL_WAIT_L4_CONN|CO_FL_SOCK_WR_SH)))
+		shutdown(conn->t.sock.fd, SHUT_RDWR);
 	__conn_data_stop_recv(conn);
 	task_wakeup(t, TASK_WOKEN_IO);
 	return;
@@ -1453,7 +1387,34 @@
 		goto new_chk;
 	}
 	else {
-		/* there was a test running */
+		/* there was a test running.
+		 * First, let's check whether there was an uncaught error,
+		 * which can happen on connect timeout or error.
+		 */
+		if (s->result == SRV_CHK_UNKNOWN) {
+			if (conn->flags & CO_FL_CONNECTED) {
+				/* good TCP connection is enough */
+				if (s->check.use_ssl)
+					set_server_check_status(s, HCHK_STATUS_L6OK, NULL);
+				else
+					set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
+			}
+			else if (conn->flags & CO_FL_WAIT_L4_CONN) {
+				/* L4 failed */
+				if (conn->flags & CO_FL_ERROR)
+					set_server_check_status(s, HCHK_STATUS_L4CON, NULL);
+				else
+					set_server_check_status(s, HCHK_STATUS_L4TOUT, NULL);
+			}
+			else if (conn->flags & CO_FL_WAIT_L6_CONN) {
+				/* L6 failed */
+				if (conn->flags & CO_FL_ERROR)
+					set_server_check_status(s, HCHK_STATUS_L6RSP, NULL);
+				else
+					set_server_check_status(s, HCHK_STATUS_L6TOUT, NULL);
+			}
+		}
+
 		if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
 			/* we may have to add/remove this server from the LB group */
 			if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
commit	f150317671be5374fe64c81a8cc940ca7ba8cbcf	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Fri Sep 28 19:39:36 2012 +0200
committer	Willy Tarreau <w@1wt.eu>	Fri Oct 05 00:33:14 2012 +0200
tree	684cd4fe18eab506f3981cd215e9e0025a9bb7eb
parent	f4288ee4bacadeea288c3819031a37ff9d908c20 [diff]