MAJOR: checks: completely use the connection transport layer
With this change, we now use the connection's transport layer to receive
and send data during health checks. It even becomes possible to send data
in multiple times, which was not possible before.
The transport layer used is the same as the one used for the traffic, unless
a specific address and/or port is specified for the checks using "port" or
"addr", in which case the transport layer defaults to raw_sock. An option
will be provided to force SSL checks on different IP/ports later.
Connection errors and timeouts are still reported.
Some situations where strerror() was able to report a precise error after
a failed connect() in the past might not be reported with as much precision
anymore, but the error message was already meaningless. During the tests,
no situation was found where a message became less precise.
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 0376520..57139ea 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -6294,6 +6294,8 @@
newsrv->ssl_ctx.reused_sess = NULL;
if (newsrv->use_ssl)
newsrv->xprt = &ssl_sock;
+ if (newsrv->check.use_ssl)
+ newsrv->check.xprt = &ssl_sock;
newsrv->ssl_ctx.ctx = SSL_CTX_new(SSLv23_client_method());
if(!newsrv->ssl_ctx.ctx) {
diff --git a/src/checks.c b/src/checks.c
index bf77574..52f70d2 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -771,10 +771,8 @@
int fd = conn->t.sock.fd;
struct task *t = s->check.task;
- if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR)) {
+ if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR))
conn->flags |= CO_FL_ERROR;
- return;
- }
if (unlikely(conn->flags & CO_FL_ERROR)) {
int skerr, err = errno;
@@ -796,78 +794,23 @@
if (s->check.bo->o) {
int ret;
- ret = send(fd, bo_ptr(s->check.bo), s->check.bo->o, MSG_DONTWAIT | MSG_NOSIGNAL);
- if (ret > 0) {
- if (conn->flags & CO_FL_WAIT_L4_CONN)
- conn->flags &= ~CO_FL_WAIT_L4_CONN;
-
- s->check.bo->o -= ret;
- if (!s->check.bo->o) {
- /* full request sent, we allow up to <timeout.check> if nonzero for a response */
- if (s->proxy->timeout.check) {
- t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
- task_queue(t);
- }
- __conn_data_want_recv(conn); /* prepare for reading reply */
- goto out_nowake;
- }
- /* some data remains */
- goto out_poll;
- }
- else if (ret == 0 || errno == EAGAIN)
- goto out_poll;
- else {
- switch (errno) {
- case ECONNREFUSED:
- case ENETUNREACH:
- set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
- break;
-
- default:
- set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
- }
-
- goto out_error;
+ ret = conn->xprt->snd_buf(conn, s->check.bo, MSG_DONTWAIT | MSG_NOSIGNAL);
+ if (conn->flags & CO_FL_ERROR) {
+ set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
+ goto out_wakeup;
}
}
- else {
- /* We have no data to send to check the connection, and
- * getsockopt() will not inform us whether the connection
- * is still pending. So we'll reuse connect() to check the
- * state of the socket. This has the advantage of givig us
- * the following info :
- * - error
- * - connecting (EALREADY, EINPROGRESS)
- * - connected (EISCONN, 0)
- */
-
- struct sockaddr_storage sa;
-
- if (is_addr(&s->check.addr))
- sa = s->check.addr;
- else
- sa = s->addr;
-
- set_host_port(&sa, s->check.port);
-
- if (connect(fd, (struct sockaddr *)&sa, get_addr_len(&sa)) == 0)
- errno = 0;
- if (errno == EALREADY || errno == EINPROGRESS)
- goto out_poll;
-
- if (errno && errno != EISCONN) {
- set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
- goto out_error;
+ if (!s->check.bo->o) {
+ /* full request sent, we allow up to <timeout.check> if nonzero for a response */
+ if (s->proxy->timeout.check) {
+ t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
+ task_queue(t);
}
-
- if (conn->flags & CO_FL_WAIT_L4_CONN)
- conn->flags &= ~CO_FL_WAIT_L4_CONN;
-
- /* good TCP connection is enough */
- set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
- goto out_wakeup;
+ __conn_data_want_recv(conn); /* prepare for reading reply */
+ goto out_nowake;
}
+ goto out_poll;
}
out_wakeup:
task_wakeup(t, TASK_WOKEN_IO);
@@ -901,16 +844,13 @@
{
int len;
struct server *s = conn->owner;
- int fd = conn->t.sock.fd;
struct task *t = s->check.task;
char *desc;
int done;
unsigned short msglen;
- if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR)) {
+ if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR))
conn->flags |= CO_FL_ERROR;
- return;
- }
if (unlikely((s->result & SRV_CHK_ERROR) || (conn->flags & CO_FL_ERROR))) {
/* in case of TCP only, this tells us if the connection failed */
@@ -934,31 +874,22 @@
*/
done = 0;
- for (len = 0; s->check.bi->i < s->check.bi->size; s->check.bi->i += len) {
- len = recv(fd, s->check.bi->data + s->check.bi->i, s->check.bi->size - s->check.bi->i, 0);
- if (len <= 0)
- break;
- }
- if (len == 0)
- done = 1; /* connection hangup received */
- else if (len < 0 && errno != EAGAIN) {
- /* Report network errors only if we got no other data. Otherwise
- * we'll let the upper layers decide whether the response is OK
- * or not. It is very common that an RST sent by the server is
- * reported as an error just after the last data chunk.
- */
+ len = conn->xprt->rcv_buf(conn, s->check.bi, s->check.bi->size);
+ if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
done = 1;
- if (!s->check.bi->i) {
+ if ((conn->flags & CO_FL_ERROR) && !s->check.bi->i) {
+ /* Report network errors only if we got no other data. Otherwise
+ * we'll let the upper layers decide whether the response is OK
+ * or not. It is very common that an RST sent by the server is
+ * reported as an error just after the last data chunk.
+ */
if (!(s->result & SRV_CHK_ERROR))
set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
goto out_wakeup;
}
}
- if (len >= 0 && (conn->flags & CO_FL_WAIT_L4_CONN))
- conn->flags &= ~CO_FL_WAIT_L4_CONN;
-
/* Intermediate or complete response received.
* Terminate string in check.bi->data buffer.
*/
@@ -1237,7 +1168,10 @@
s->check.bi->i = 0;
/* Close the connection... */
- shutdown(fd, SHUT_RDWR);
+ if (conn->xprt && conn->xprt->shutw)
+ conn->xprt->shutw(conn, 0);
+ if (!(conn->flags & (CO_FL_WAIT_L4_CONN|CO_FL_SOCK_WR_SH)))
+ shutdown(conn->t.sock.fd, SHUT_RDWR);
__conn_data_stop_recv(conn);
task_wakeup(t, TASK_WOKEN_IO);
return;
@@ -1453,7 +1387,34 @@
goto new_chk;
}
else {
- /* there was a test running */
+ /* there was a test running.
+ * First, let's check whether there was an uncaught error,
+ * which can happen on connect timeout or error.
+ */
+ if (s->result == SRV_CHK_UNKNOWN) {
+ if (conn->flags & CO_FL_CONNECTED) {
+ /* good TCP connection is enough */
+ if (s->check.use_ssl)
+ set_server_check_status(s, HCHK_STATUS_L6OK, NULL);
+ else
+ set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
+ }
+ else if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ /* L4 failed */
+ if (conn->flags & CO_FL_ERROR)
+ set_server_check_status(s, HCHK_STATUS_L4CON, NULL);
+ else
+ set_server_check_status(s, HCHK_STATUS_L4TOUT, NULL);
+ }
+ else if (conn->flags & CO_FL_WAIT_L6_CONN) {
+ /* L6 failed */
+ if (conn->flags & CO_FL_ERROR)
+ set_server_check_status(s, HCHK_STATUS_L6RSP, NULL);
+ else
+ set_server_check_status(s, HCHK_STATUS_L6TOUT, NULL);
+ }
+ }
+
if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
/* we may have to add/remove this server from the LB group */
if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {