MAJOR: checks: make use of the connection layer to send checks
This is a first step, we now use the connection layer without the data
layers (send/recv are still used by hand). The connection is established
using tcp_connect_server() and raw_sock is assumed and forced for now.
fdtab is not manipulated anymore and polling is managed via the connection
layer.
It becomes quite clear that the server needs a second ->ctrl and ->xprt
dedicated to the checks.
diff --git a/src/cfgparse.c b/src/cfgparse.c
index af89219..e4c4111 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -3978,7 +3978,6 @@
newsrv->uweight = newsrv->iweight
= curproxy->defsrv.iweight;
- newsrv->curfd = -1; /* no health-check in progress */
newsrv->health = newsrv->rise; /* up, but will fall down at first failure */
cur_arg = 3;
@@ -4590,6 +4589,7 @@
goto out;
}
+ newsrv->check_conn->t.sock.fd = -1; /* no check in progress yet */
newsrv->check_status = HCHK_STATUS_INI;
newsrv->state |= SRV_CHECKED;
}
diff --git a/src/checks.c b/src/checks.c
index cb59d58..af11c1d 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -44,6 +44,7 @@
#include <proto/proto_http.h>
#include <proto/proto_tcp.h>
#include <proto/proxy.h>
+#include <proto/raw_sock.h>
#include <proto/server.h>
#include <proto/session.h>
#include <proto/stream_interface.h>
@@ -764,13 +765,18 @@
* it sends the request. In other cases, it calls set_server_check_status()
* to set s->check_status, s->check_duration and s->result.
*/
-static void event_srv_chk_w(int fd)
+static void event_srv_chk_w(struct connection *conn)
{
- __label__ out_wakeup, out_nowake, out_poll, out_error;
- struct task *t = fdtab[fd].owner;
- struct server *s = t->context;
+ struct server *s = conn->owner;
+ int fd = conn->t.sock.fd;
+ struct task *t = s->check;
+
+ if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR)) {
+ conn->flags |= CO_FL_ERROR;
+ return;
+ }
- if (unlikely((s->check_conn->flags & CO_FL_ERROR) || (fdtab[fd].ev & FD_POLL_ERR))) {
+ if (unlikely(conn->flags & CO_FL_ERROR)) {
int skerr, err = errno;
socklen_t lskerr = sizeof(skerr);
@@ -781,8 +787,10 @@
goto out_error;
}
- /* here, we know that the connection is established */
+ if (conn->flags & CO_FL_HANDSHAKE)
+ return;
+ /* here, we know that the connection is established */
if (!(s->result & SRV_CHK_ERROR)) {
/* we don't want to mark 'UP' a server on which we detected an error earlier */
if (s->proxy->options2 & PR_O2_CHK_ANY) {
@@ -813,12 +821,15 @@
ret = send(fd, check_req, check_len, MSG_DONTWAIT | MSG_NOSIGNAL);
if (ret == check_len) {
+ if (conn->flags & CO_FL_WAIT_L4_CONN)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
/* we allow up to <timeout.check> if nonzero for a responce */
if (s->proxy->timeout.check) {
t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
task_queue(t);
}
- fd_want_recv(fd); /* prepare for reading reply */
+ __conn_data_want_recv(conn); /* prepare for reading reply */
goto out_nowake;
}
else if (ret == 0 || errno == EAGAIN)
@@ -868,6 +879,9 @@
goto out_error;
}
+ if (conn->flags & CO_FL_WAIT_L4_CONN)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
/* good TCP connection is enough */
set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
goto out_wakeup;
@@ -876,17 +890,13 @@
out_wakeup:
task_wakeup(t, TASK_WOKEN_IO);
out_nowake:
- fd_stop_send(fd); /* nothing more to write */
- fdtab[fd].ev &= ~FD_POLL_OUT;
+ __conn_data_stop_send(conn); /* nothing more to write */
return;
out_poll:
- /* The connection is still pending. We'll have to poll it
- * before attempting to go further. */
- fdtab[fd].ev &= ~FD_POLL_OUT;
- fd_poll_send(fd);
+ __conn_data_poll_send(conn);
return;
out_error:
- s->check_conn->flags |= CO_FL_ERROR;
+ conn->flags |= CO_FL_ERROR;
goto out_wakeup;
}
@@ -905,17 +915,22 @@
* call it with a proper error status like HCHK_STATUS_L7STS, HCHK_STATUS_L6RSP,
* etc.
*/
-static void event_srv_chk_r(int fd)
+static void event_srv_chk_r(struct connection *conn)
{
- __label__ out_wakeup;
int len;
- struct task *t = fdtab[fd].owner;
- struct server *s = t->context;
+ struct server *s = conn->owner;
+ int fd = conn->t.sock.fd;
+ struct task *t = s->check;
char *desc;
int done;
unsigned short msglen;
+ if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_WAIT_WR)) {
+ conn->flags |= CO_FL_ERROR;
+ return;
+ }
+
- if (unlikely((s->result & SRV_CHK_ERROR) || (s->check_conn->flags & CO_FL_ERROR))) {
+ if (unlikely((s->result & SRV_CHK_ERROR) || (conn->flags & CO_FL_ERROR))) {
/* in case of TCP only, this tells us if the connection failed */
if (!(s->result & SRV_CHK_ERROR))
set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
@@ -923,6 +938,9 @@
goto out_wakeup;
}
+ if (conn->flags & CO_FL_HANDSHAKE)
+ return;
+
/* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
* but the connection was closed on the remote end. Fortunately, recv still
* works correctly and we don't need to do the getsockopt() on linux.
@@ -956,6 +974,9 @@
}
}
+ if (len >= 0 && (conn->flags & CO_FL_WAIT_L4_CONN))
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
/* Intermediate or complete response received.
* Terminate string in check_data buffer.
*/
@@ -1227,7 +1248,7 @@
out_wakeup:
if (s->result & SRV_CHK_ERROR)
- s->check_conn->flags |= CO_FL_ERROR;
+ conn->flags |= CO_FL_ERROR;
/* Reset the check buffer... */
*s->check_data = '\0';
@@ -1235,32 +1256,35 @@
/* Close the connection... */
shutdown(fd, SHUT_RDWR);
- fd_stop_recv(fd);
+ __conn_data_stop_recv(conn);
task_wakeup(t, TASK_WOKEN_IO);
- fdtab[fd].ev &= ~FD_POLL_IN;
return;
wait_more_data:
- fdtab[fd].ev &= ~FD_POLL_IN;
- fd_poll_recv(fd);
+ __conn_data_poll_recv(conn);
}
-/* I/O call back for the health checks. Returns 0. */
-static int check_iocb(int fd)
+/*
+ * This function is used only for server health-checks. It handles connection
+ * status updates including errors. If necessary, it wakes the check task up.
+ * It always returns 0.
+ */
+static int wake_srv_chk(struct connection *conn)
{
- int e;
+ struct server *s = conn->owner;
- if (!fdtab[fd].owner)
- return 0;
+ if (unlikely(conn->flags & CO_FL_ERROR))
+ task_wakeup(s->check, TASK_WOKEN_IO);
- e = fdtab[fd].ev;
- if (e & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
- event_srv_chk_r(fd);
- if (e & (FD_POLL_OUT | FD_POLL_ERR))
- event_srv_chk_w(fd);
return 0;
}
+struct data_cb check_conn_cb = {
+ .recv = event_srv_chk_r,
+ .send = event_srv_chk_w,
+ .wake = wake_srv_chk,
+};
+
/*
* updates the server's weight during a warmup stage. Once the final weight is
* reached, the task automatically stops. Note that any server status change
@@ -1312,9 +1336,10 @@
{
int attempts = 0;
struct server *s = t->context;
- struct sockaddr_storage sa;
+ struct connection *conn = s->check_conn;
int fd;
int rv;
+ int ret;
new_chk:
if (attempts++ > 0) {
@@ -1323,7 +1348,8 @@
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
return t;
}
- fd = s->curfd;
+
+ fd = conn->t.sock.fd;
if (fd < 0) { /* no check currently running */
if (!tick_is_expired(t->expire, now_ms)) /* woke up too early */
return t;
@@ -1339,188 +1365,58 @@
/* we'll initiate a new check */
set_server_check_status(s, HCHK_STATUS_START, NULL);
- if ((fd = socket(s->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) != -1) {
- if ((fd < global.maxsock) &&
- (fcntl(fd, F_SETFL, O_NONBLOCK) != -1) &&
- (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) != -1)) {
- //fprintf(stderr, "process_chk: 3\n");
-
- if (s->proxy->options & PR_O_TCP_NOLING) {
- /* We don't want to useless data */
- setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
- }
-
- if (is_addr(&s->check_addr))
- /* we'll connect to the check addr specified on the server */
- sa = s->check_addr;
- else
- /* we'll connect to the addr on the server */
- sa = s->addr;
-
- set_host_port(&sa, s->check_port);
-
- /* allow specific binding :
- * - server-specific at first
- * - proxy-specific next
- */
- if (s->state & SRV_BIND_SRC) {
- struct sockaddr_storage *remote = NULL;
- int ret, flags = 0;
-
-#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
- if ((s->state & SRV_TPROXY_MASK) == SRV_TPROXY_ADDR) {
- remote = &s->tproxy_addr;
- flags = 3;
- }
-#endif
-#ifdef SO_BINDTODEVICE
- /* Note: this might fail if not CAP_NET_RAW */
- if (s->iface_name)
- setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
- s->iface_name, s->iface_len + 1);
-#endif
- if (s->sport_range) {
- int bind_attempts = 10; /* should be more than enough to find a spare port */
- struct sockaddr_storage src;
-
- ret = 1;
- src = s->source_addr;
+ set_target_server(&conn->target, s);
+ conn_prepare(conn, &check_conn_cb, s->proto, &raw_sock, s);
- do {
- /* note: in case of retry, we may have to release a previously
- * allocated port, hence this loop's construct.
- */
- port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
- fdinfo[fd].port_range = NULL;
-
- if (!bind_attempts)
- break;
- bind_attempts--;
-
- fdinfo[fd].local_port = port_range_alloc_port(s->sport_range);
- if (!fdinfo[fd].local_port)
- break;
-
- fdinfo[fd].port_range = s->sport_range;
- set_host_port(&src, fdinfo[fd].local_port);
-
- ret = tcp_bind_socket(fd, flags, &src, remote);
- } while (ret != 0); /* binding NOK */
- }
- else {
- ret = tcp_bind_socket(fd, flags, &s->source_addr, remote);
- }
+ if (is_addr(&s->check_addr))
+ /* we'll connect to the check addr specified on the server */
+ conn->addr.to = s->check_addr;
+ else
+ /* we'll connect to the addr on the server */
+ conn->addr.to = s->addr;
- if (ret) {
- set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
- switch (ret) {
- case 1:
- Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
- s->proxy->id, s->id);
- break;
- case 2:
- Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
- s->proxy->id, s->id);
- break;
- }
- }
- }
- else if (s->proxy->options & PR_O_BIND_SRC) {
- struct sockaddr_storage *remote = NULL;
- int ret, flags = 0;
+ set_host_port(&conn->addr.to, s->check_port);
-#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
- if ((s->proxy->options & PR_O_TPXY_MASK) == PR_O_TPXY_ADDR) {
- remote = &s->proxy->tproxy_addr;
- flags = 3;
- }
-#endif
-#ifdef SO_BINDTODEVICE
- /* Note: this might fail if not CAP_NET_RAW */
- if (s->proxy->iface_name)
- setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
- s->proxy->iface_name, s->proxy->iface_len + 1);
-#endif
- ret = tcp_bind_socket(fd, flags, &s->proxy->source_addr, remote);
- if (ret) {
- set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
- switch (ret) {
- case 1:
- Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n",
- proxy_type_str(s->proxy), s->proxy->id);
- break;
- case 2:
- Alert("Cannot bind to tproxy source address before connect() for %s '%s'. Aborting.\n",
- proxy_type_str(s->proxy), s->proxy->id);
- break;
- }
- }
- }
+ /* It can return one of :
+ * - SN_ERR_NONE if everything's OK
+ * - SN_ERR_SRVTO if there are no more servers
+ * - SN_ERR_SRVCL if the connection was refused by the server
+ * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SN_ERR_INTERNAL for any other purely internal errors
+ * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
+ */
+ ret = tcp_connect_server(conn, 1);
+ conn->flags |= CO_FL_WAKE_DATA;
- if (s->result == SRV_CHK_UNKNOWN) {
-#if defined(TCP_QUICKACK)
- /* disabling tcp quick ack now allows
- * the request to leave the machine with
- * the first ACK.
- */
- if (s->proxy->options2 & PR_O2_SMARTCON)
- setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, (char *) &zero, sizeof(zero));
-#endif
- if ((connect(fd, (struct sockaddr *)&sa, get_addr_len(&sa)) != -1) || (errno == EINPROGRESS)) {
- /* OK, connection in progress or established */
-
- //fprintf(stderr, "process_chk: 4\n");
-
- s->curfd = fd; /* that's how we know a test is in progress ;-) */
- s->check_conn->flags = CO_FL_WAIT_L4_CONN; /* TCP connection pending */
- fd_insert(fd);
- fdtab[fd].owner = t;
- fdtab[fd].iocb = &check_iocb;
- fd_want_send(fd); /* for connect status */
-#ifdef DEBUG_FULL
- assert (!EV_FD_ISSET(fd, DIR_RD));
-#endif
- //fprintf(stderr, "process_chk: 4+, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
- /* we allow up to min(inter, timeout.connect) for a connection
- * to establish but only when timeout.check is set
- * as it may be to short for a full check otherwise
- */
- t->expire = tick_add(now_ms, MS_TO_TICKS(s->inter));
+ switch (ret) {
+ case SN_ERR_NONE:
+ break;
+ case SN_ERR_SRVTO: /* ETIMEDOUT */
+ case SN_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
+ set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
+ break;
+ case SN_ERR_PRXCOND:
+ case SN_ERR_RESOURCE:
+ case SN_ERR_INTERNAL:
+ set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
+ break;
+ }
- if (s->proxy->timeout.check && s->proxy->timeout.connect) {
- int t_con = tick_add(now_ms, s->proxy->timeout.connect);
- t->expire = tick_first(t->expire, t_con);
- }
- return t;
- }
- else if (errno != EALREADY && errno != EISCONN && errno != EAGAIN) {
- /* a real error */
+ if (s->result == SRV_CHK_UNKNOWN) {
+ /* connection attempt was started */
- switch (errno) {
- /* FIXME: is it possible to get ECONNREFUSED/ENETUNREACH with O_NONBLOCK? */
- case ECONNREFUSED:
- case ENETUNREACH:
- set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
- break;
+ /* we allow up to min(inter, timeout.connect) for a connection
+ * to establish but only when timeout.check is set
+ * as it may be to short for a full check otherwise
+ */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(s->inter));
- default:
- set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
- }
- }
- }
+ if (s->proxy->timeout.check && s->proxy->timeout.connect) {
+ int t_con = tick_add(now_ms, s->proxy->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
}
- port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
- fdinfo[fd].port_range = NULL;
- close(fd); /* socket creation error */
- }
- else
- set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
-
- if (s->result == SRV_CHK_UNKNOWN) { /* nothing done */
- //fprintf(stderr, "process_chk: 6\n");
- while (tick_is_expired(t->expire, now_ms))
- t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
- goto new_chk; /* may be we should initialize a new check */
+ return t;
}
/* here, we have seen a failure */
@@ -1566,8 +1462,10 @@
set_server_up(s);
}
- s->curfd = -1; /* no check running anymore */
- fd_delete(fd);
+ conn->t.sock.fd = -1; /* no check running anymore */
+ conn_xprt_close(conn);
+ if (conn->ctrl)
+ fd_delete(fd);
rv = 0;
if (global.spread_checks > 0) {
@@ -1579,7 +1477,7 @@
}
else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) {
if (!(s->result & SRV_CHK_ERROR)) {
- if (!EV_FD_ISSET(fd, DIR_RD)) {
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
set_server_check_status(s, HCHK_STATUS_L4TOUT, NULL);
} else {
if ((s->proxy->options2 & PR_O2_CHK_ANY) == PR_O2_SSL3_CHK)
@@ -1596,8 +1494,10 @@
}
else
set_server_down(s);
- s->curfd = -1;
- fd_delete(fd);
+ conn->t.sock.fd = -1;
+ conn_xprt_close(conn);
+ if (conn->ctrl)
+ fd_delete(fd);
rv = 0;
if (global.spread_checks > 0) {