[MEDIUM] implement "http-check disable-on-404" for graceful shutdown
When an HTTP server returns "404 not found", it indicates that at least
part of it is still running. For this reason, it can be convenient for
application administrators to be able to consider code 404 as valid,
but for a server which does not want to participate to load balancing
anymore. This is useful to seamlessly exclude a server from a farm
without acting on the load balancer. For instance, let's consider that
haproxy checks for the "/alive" file. To enable load balancing on a
server, the admin would simply do :
# touch /var/www/alive
And to disable the server, he would simply do :
# rm /var/www/alive
Another immediate gain from doing this is that it is now possible to
send NOTICE messages instead of ALERT messages when a server is first
disable, then goes down. This provides a graceful shutdown method.
To enable this behaviour, specify "http-check disable-on-404" in the
backend.
diff --git a/include/types/proxy.h b/include/types/proxy.h
index 2a75fad..fd48e70 100644
--- a/include/types/proxy.h
+++ b/include/types/proxy.h
@@ -102,6 +102,7 @@
#define PR_O_TCPSPLICE 0x08000000 /* delegate data transfer to linux kernel's tcp_splice */
#define PR_O_CONTSTATS 0x10000000 /* continous counters */
#define PR_O_HTTP_PROXY 0x20000000 /* Enable session to use HTTP proxy operations */
+#define PR_O_DISABLE404 0x40000000 /* Disable a server on a 404 response to a health-check */
/* This structure is used to apply fast weighted round robin on a server group */
struct fwrr_group {
diff --git a/include/types/server.h b/include/types/server.h
index 47c152c..b50e193 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -41,6 +41,7 @@
#define SRV_MAPPORTS 0x0004 /* this server uses mapped ports */
#define SRV_BIND_SRC 0x0008 /* this server uses a specific source address */
#define SRV_CHECKED 0x0010 /* this server needs to be checked */
+#define SRV_GOINGDOWN 0x0020 /* this server says that it's going down (404) */
#define SRV_TPROXY_ADDR 0x0020 /* bind to this non-local address to reach this server */
#define SRV_TPROXY_CIP 0x0040 /* bind to the client's IP address to reach this server */
diff --git a/src/backend.c b/src/backend.c
index a8676b3..cc12f7a 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -60,6 +60,8 @@
{
if (!weight)
return 0;
+ if (state & SRV_GOINGDOWN)
+ return 0;
if (!(state & SRV_RUNNING))
return 0;
return 1;
@@ -193,7 +195,8 @@
int max = 0;
best = NULL;
for (cur = px->srv; cur; cur = cur->next) {
- if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) {
+ if (flag == (cur->state &
+ (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
int v;
/* If we are forced to return only one server, we don't want to
diff --git a/src/cfgparse.c b/src/cfgparse.c
index affb956..f283332 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -1292,6 +1292,19 @@
/* enable reconnections to dispatch */
curproxy->options |= PR_O_REDISP;
}
+ else if (!strcmp(args[0], "http-check")) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ return 0;
+
+ if (strcmp(args[1], "disable-on-404") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options |= PR_O_DISABLE404;
+ }
+ else {
+ Alert("parsing [%s:%d] : '%s' only supports 'disable-on-404'.\n", file, linenum, args[0]);
+ return -1;
+ }
+ }
#ifdef TPROXY
else if (!strcmp(args[0], "transparent")) {
/* enable transparent proxy connections */
@@ -2525,6 +2538,11 @@
Warning("parsing %s : Layer 7 hash not possible for %s '%s'. Falling back to round robin.\n",
file, proxy_type_str(curproxy), curproxy->id);
}
+ if (curproxy->options & PR_O_DISABLE404) {
+ curproxy->options &= ~PR_O_DISABLE404;
+ Warning("parsing %s : '%s' will be ignored for %s '%s' (requires HTTP mode).\n",
+ file, "disable-on-404", proxy_type_str(curproxy), curproxy->id);
+ }
}
if (curproxy->mode == PR_MODE_HEALTH) { /* TCP PROXY or HEALTH CHECK */
@@ -2540,6 +2558,11 @@
file, curproxy->id);
cfgerr++;
}
+ if ((curproxy->options & PR_O_DISABLE404) && !(curproxy->options & PR_O_HTTP_CHK)) {
+ curproxy->options &= ~PR_O_DISABLE404;
+ Warning("parsing %s : '%s' will be ignored for %s '%s' (requires 'option httpchk').\n",
+ file, "disable-on-404", proxy_type_str(curproxy), curproxy->id);
+ }
}
/* if a default backend was specified, let's find it */
diff --git a/src/checks.c b/src/checks.c
index e02b891..d3aa6c8 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -45,6 +45,69 @@
#include <import/ip_tproxy.h>
#endif
+/* sends a log message when a backend goes down, and also sets last
+ * change date.
+ */
+static void set_backend_down(struct proxy *be)
+{
+ be->last_change = now.tv_sec;
+ be->down_trans++;
+
+ Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
+ send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
+}
+
+/* Redistribute pending connections when a server goes down. The number of
+ * connections redistributed is returned.
+ */
+static int redistribute_pending(struct server *s)
+{
+ struct pendconn *pc, *pc_bck, *pc_end;
+ int xferred = 0;
+
+ FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
+ struct session *sess = pc->sess;
+ if (sess->be->options & PR_O_REDISP) {
+ /* The REDISP option was specified. We will ignore
+ * cookie and force to balance or use the dispatcher.
+ */
+ sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
+ sess->srv = NULL; /* it's left to the dispatcher to choose a server */
+ http_flush_cookie_flags(&sess->txn);
+ pendconn_free(pc);
+ task_wakeup(sess->task);
+ xferred++;
+ }
+ }
+ return xferred;
+}
+
+/* Check for pending connections at the backend, and assign some of them to
+ * the server coming up. The server's weight is checked before being assigned
+ * connections it may not be able to handle. The total number of transferred
+ * connections is returned.
+ */
+static int check_for_pending(struct server *s)
+{
+ int xferred;
+
+ if (!s->eweight)
+ return 0;
+
+ for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
+ struct session *sess;
+ struct pendconn *p;
+
+ p = pendconn_from_px(s->proxy);
+ if (!p)
+ break;
+ p->sess->srv = s;
+ sess = p->sess;
+ pendconn_free(p);
+ task_wakeup(sess->task);
+ }
+ return xferred;
+}
/* Sets server <s> down, notifies by all available means, recounts the
* remaining servers on the proxy and transfers queued sessions whenever
@@ -53,36 +116,20 @@
*/
static void set_server_down(struct server *s)
{
- struct pendconn *pc, *pc_bck, *pc_end;
- struct session *sess;
int xferred;
if (s->health == s->rise) {
+ int srv_was_paused = s->state & SRV_GOINGDOWN;
s->last_change = now.tv_sec;
- s->state &= ~SRV_RUNNING;
+ s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
s->proxy->lbprm.set_server_status_down(s);
/* we might have sessions queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
* to another server or to the proxy itself.
*/
- xferred = 0;
- FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
- sess = pc->sess;
- if ((sess->be->options & PR_O_REDISP)) {
- /* The REDISP option was specified. We will ignore
- * cookie and force to balance or use the dispatcher.
- */
- sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
- sess->srv = NULL; /* it's left to the dispatcher to choose a server */
- http_flush_cookie_flags(&sess->txn);
- pendconn_free(pc);
- task_wakeup(sess->task);
- xferred++;
- }
- }
-
+ xferred = redistribute_pending(s);
sprintf(trash, "%sServer %s/%s is DOWN. %d active and %d backup servers left.%s"
" %d sessions active, %d requeued, %d remaining in queue.\n",
s->state & SRV_BACKUP ? "Backup " : "",
@@ -91,15 +138,16 @@
s->cur_sess, xferred, s->nbpend);
Warning("%s", trash);
- send_log(s->proxy, LOG_ALERT, "%s", trash);
- if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
- s->proxy->last_change = now.tv_sec;
- s->proxy->down_trans++;
+ /* we don't send an alert if the server was previously paused */
+ if (srv_was_paused)
+ send_log(s->proxy, LOG_NOTICE, "%s", trash);
+ else
+ send_log(s->proxy, LOG_ALERT, "%s", trash);
- Alert("%s '%s' has no server available!\n", proxy_type_str(s->proxy), s->proxy->id);
- send_log(s->proxy, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(s->proxy), s->proxy->id);
- }
+ if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+
s->down_trans++;
}
s->health = 0; /* failure */
@@ -260,6 +308,12 @@
/* check the reply : HTTP/1.X 2xx and 3xx are OK */
if (trash[9] == '2' || trash[9] == '3')
s->result |= SRV_CHK_RUNNING;
+ else if ((s->proxy->options & PR_O_DISABLE404) &&
+ (s->state & SRV_RUNNING) &&
+ (memcmp(&trash[9], "404", 3) == 0)) {
+ /* 404 may be accepted as "stopping" only if the server was up */
+ s->result |= SRV_CHK_RUNNING | SRV_CHK_DISABLE;
+ }
else
s->result |= SRV_CHK_ERROR;
}
@@ -301,6 +355,7 @@
__label__ new_chk, out;
struct server *s = t->context;
struct sockaddr_in sa;
+ int xferred;
int fd;
int rv;
@@ -475,12 +530,63 @@
if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
//fprintf(stderr, "process_chk: 9\n");
+ /* we may have to add/remove this server from the LB group */
+ if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
+ if ((s->state & SRV_GOINGDOWN) &&
+ ((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING)) {
+ /* server enabled again */
+ s->state &= ~SRV_GOINGDOWN;
+ s->proxy->lbprm.set_server_status_up(s);
+
+ /* check if we can handle some connections queued at the proxy. We
+ * will take as many as we can handle.
+ */
+ xferred = check_for_pending(s);
+
+ sprintf(trash,
+ "Load-balancing on %sServer %s/%s is enabled again. %d active and %d backup servers online.%s"
+ " %d sessions requeued, %d total in queue.\n",
+ s->state & SRV_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
+ (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+ xferred, s->nbpend);
+
+ Warning("%s", trash);
+ send_log(s->proxy, LOG_NOTICE, "%s", trash);
+ }
+ else if (!(s->state & SRV_GOINGDOWN) &&
+ ((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) ==
+ (SRV_CHK_RUNNING | SRV_CHK_DISABLE))) {
+ /* server disabled */
+ s->state |= SRV_GOINGDOWN;
+ s->proxy->lbprm.set_server_status_down(s);
+
+ /* we might have sessions queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = redistribute_pending(s);
+
+ sprintf(trash,
+ "Load-balancing on %sServer %s/%s is disabled. %d active and %d backup servers online.%s"
+ " %d sessions requeued, %d total in queue.\n",
+ s->state & SRV_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
+ (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+ xferred, s->nbpend);
+
+ Warning("%s", trash);
+
+ send_log(s->proxy, LOG_NOTICE, "%s", trash);
+ if (!s->proxy->srv_bck && !s->proxy->srv_act)
+ set_backend_down(s->proxy);
+ }
+ }
+
if (s->health < s->rise + s->fall - 1) {
s->health++; /* was bad, stays for a while */
if (s->health == s->rise) {
- int xferred;
-
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
if (s->proxy->last_change < now.tv_sec) // ignore negative times
s->proxy->down_time += now.tv_sec - s->proxy->last_change;
@@ -497,18 +603,7 @@
/* check if we can handle some connections queued at the proxy. We
* will take as many as we can handle.
*/
- for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
- struct session *sess;
- struct pendconn *p;
-
- p = pendconn_from_px(s->proxy);
- if (!p)
- break;
- p->sess->srv = s;
- sess = p->sess;
- pendconn_free(p);
- task_wakeup(sess->task);
- }
+ xferred = check_for_pending(s);
sprintf(trash,
"%sServer %s/%s is UP. %d active and %d backup servers online.%s"