MAJOR: servers: propagate server status changes asynchronously.
In order to prepare multi-thread development, code was re-worked
to propagate changes asynchronoulsy.
Servers with pending status changes are registered in a list
and this one is processed and emptied only once 'run poll' loop.
Operational status changes are performed before administrative
status changes.
In a case of multiple operational status change or admin status
change in the same 'run poll' loop iteration, those changes are
merged to reach only the targeted status.
diff --git a/include/proto/server.h b/include/proto/server.h
index 0b238a5..1a649f5 100644
--- a/include/proto/server.h
+++ b/include/proto/server.h
@@ -50,6 +50,9 @@
int srv_set_addr_via_libc(struct server *srv, int *err_code);
int srv_init_addr(void);
struct server *cli_find_server(struct appctx *appctx, char *arg);
+void servers_update_status(void);
+
+extern struct list updated_servers;
/* functions related to server name resolution */
int snr_update_srv_status(struct server *s, int has_no_ip);
diff --git a/include/types/server.h b/include/types/server.h
index 75349d7..75054a0 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -299,6 +299,9 @@
int nb_high;
} tmpl_info;
struct dns_srvrq *srvrq; /* Pointer representing the DNS SRV requeest, if any */
+ struct list update_status; /* to attach to list of servers chnaging status */
+ char op_st_chg_reason[48]; /* operational status change's reason */
+ char adm_st_chg_cause[48]; /* adminstrative status change's cause */
};
/* Descriptor for a "server" keyword. The ->parse() function returns 0 in case of
diff --git a/src/haproxy.c b/src/haproxy.c
index 2fa00a4..a5e4a38 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2182,6 +2182,9 @@
cur_poller.poll(&cur_poller, next);
fd_process_cached_events();
applet_run_active();
+
+ /* Commit server status changes */
+ servers_update_status();
}
}
diff --git a/src/server.c b/src/server.c
index b5904e9..65c2c14 100644
--- a/src/server.c
+++ b/src/server.c
@@ -44,6 +44,8 @@
#include <proto/dns.h>
#include <netinet/tcp.h>
+struct list updated_servers = LIST_HEAD_INIT(updated_servers);
+
static void srv_update_state(struct server *srv, int version, char **params);
static int srv_apply_lastaddr(struct server *srv, int *err_code);
static int srv_set_fqdn(struct server *srv, const char *fqdn);
@@ -819,44 +821,17 @@
void srv_set_stopped(struct server *s, const char *reason)
{
struct server *srv;
- int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
- int srv_was_stopping = (s->next_state == SRV_ST_STOPPING);
- int log_level;
- int xferred;
- if ((s->next_admin & SRV_ADMF_MAINT) || s->next_state == SRV_ST_STOPPED)
+ if ((s->cur_admin & SRV_ADMF_MAINT) || s->next_state == SRV_ST_STOPPED)
return;
- s->last_change = now.tv_sec;
s->next_state = SRV_ST_STOPPED;
- if (s->proxy->lbprm.set_server_status_down)
- s->proxy->lbprm.set_server_status_down(s);
-
- if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
- srv_shutdown_streams(s, SF_ERR_DOWN);
-
- /* we might have streams queued on this server and waiting for
- * a connection. Those which are redispatchable will be queued
- * to another server or to the proxy itself.
- */
- xferred = pendconn_redistribute(s);
-
- chunk_printf(&trash,
- "%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
-
- srv_append_status(&trash, s, reason, xferred, 0);
- Warning("%s.\n", trash.str);
-
- /* we don't send an alert if the server was previously paused */
- log_level = srv_was_stopping ? LOG_NOTICE : LOG_ALERT;
- send_log(s->proxy, log_level, "%s.\n", trash.str);
- send_email_alert(s, log_level, "%s", trash.str);
-
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
+ if (reason)
+ strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason));
- s->counters.down_trans++;
+ /* Register changes to be applied asynchronously */
+ if (LIST_ISEMPTY(&s->update_status))
+ LIST_ADDQ(&updated_servers, &s->update_status);
for (srv = s->trackers; srv; srv = srv->tracknext)
srv_set_stopped(srv, NULL);
@@ -873,55 +848,23 @@
void srv_set_running(struct server *s, const char *reason)
{
struct server *srv;
- int xferred;
- if (s->next_admin & SRV_ADMF_MAINT)
+ if (s->cur_admin & SRV_ADMF_MAINT)
return;
if (s->next_state == SRV_ST_STARTING || s->next_state == SRV_ST_RUNNING)
return;
- if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
- if (s->proxy->last_change < now.tv_sec) // ignore negative times
- s->proxy->down_time += now.tv_sec - s->proxy->last_change;
- s->proxy->last_change = now.tv_sec;
- }
-
- if (s->next_state == SRV_ST_STOPPED && s->last_change < now.tv_sec) // ignore negative times
- s->down_time += now.tv_sec - s->last_change;
-
- s->last_change = now.tv_sec;
-
s->next_state = SRV_ST_STARTING;
- if (s->slowstart > 0)
- task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
- else
- s->next_state = SRV_ST_RUNNING;
-
- server_recalc_eweight(s);
-
- /* If the server is set with "on-marked-up shutdown-backup-sessions",
- * and it's not a backup server and its effective weight is > 0,
- * then it can accept new connections, so we shut down all streams
- * on all backup servers.
- */
- if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
- !(s->flags & SRV_F_BACKUP) && s->next_eweight)
- srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
-
- /* check if we can handle some connections queued at the proxy. We
- * will take as many as we can handle.
- */
- xferred = pendconn_grab_from_px(s);
+ if (reason)
+ strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason));
- chunk_printf(&trash,
- "%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
+ if (s->slowstart <= 0)
+ s->next_state = SRV_ST_RUNNING;
- srv_append_status(&trash, s, reason, xferred, 0);
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- send_email_alert(s, LOG_NOTICE, "%s", trash.str);
+ /* Register changes to be applied asynchronously */
+ if (LIST_ISEMPTY(&s->update_status))
+ LIST_ADDQ(&updated_servers, &s->update_status);
for (srv = s->trackers; srv; srv = srv->tracknext)
srv_set_running(srv, NULL);
@@ -938,36 +881,20 @@
void srv_set_stopping(struct server *s, const char *reason)
{
struct server *srv;
- int xferred;
- if (s->next_admin & SRV_ADMF_MAINT)
+ if (s->cur_admin & SRV_ADMF_MAINT)
return;
if (s->next_state == SRV_ST_STOPPING)
return;
- s->last_change = now.tv_sec;
s->next_state = SRV_ST_STOPPING;
- if (s->proxy->lbprm.set_server_status_down)
- s->proxy->lbprm.set_server_status_down(s);
-
- /* we might have streams queued on this server and waiting for
- * a connection. Those which are redispatchable will be queued
- * to another server or to the proxy itself.
- */
- xferred = pendconn_redistribute(s);
-
- chunk_printf(&trash,
- "%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
-
- srv_append_status(&trash, s, reason, xferred, 0);
-
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
+ if (reason)
+ strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason));
- if (!s->proxy->srv_bck && !s->proxy->srv_act)
- set_backend_down(s->proxy);
+ /* Register changes to be applied asynchronously */
+ if (LIST_ISEMPTY(&s->update_status))
+ LIST_ADDQ(&updated_servers, &s->update_status);
for (srv = s->trackers; srv; srv = srv->tracknext)
srv_set_stopping(srv, NULL);
@@ -983,9 +910,7 @@
*/
void srv_set_admin_flag(struct server *s, enum srv_admin mode, const char *cause)
{
- struct check *check = &s->check;
struct server *srv;
- int xferred;
if (!mode)
return;
@@ -995,100 +920,18 @@
return;
s->next_admin |= mode;
+ if (cause)
+ strlcpy2(s->adm_st_chg_cause, cause, sizeof(s->adm_st_chg_cause));
+
+ /* Register changes to be applied asynchronously */
+ if (LIST_ISEMPTY(&s->update_status))
+ LIST_ADDQ(&updated_servers, &s->update_status);
/* stop going down if the equivalent flag was already present (forced or inherited) */
if (((mode & SRV_ADMF_MAINT) && (s->next_admin & ~mode & SRV_ADMF_MAINT)) ||
((mode & SRV_ADMF_DRAIN) && (s->next_admin & ~mode & SRV_ADMF_DRAIN)))
return;
- /* Maintenance must also disable health checks */
- if (mode & SRV_ADMF_MAINT) {
- if (s->check.state & CHK_ST_ENABLED) {
- s->check.state |= CHK_ST_PAUSED;
- check->health = 0;
- }
-
- if (s->next_state == SRV_ST_STOPPED) { /* server was already down */
- chunk_printf(&trash,
- "%sServer %s/%s was DOWN and now enters maintenance%s%s%s",
- s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id,
- cause ? " (" : "", cause ? cause : "", cause ? ")" : "");
-
- srv_append_status(&trash, s, NULL, -1, (mode & SRV_ADMF_FMAINT));
-
- if (!(global.mode & MODE_STARTING)) {
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
- }
- else { /* server was still running */
- int srv_was_stopping = (s->next_state == SRV_ST_STOPPING) || (s->next_admin & SRV_ADMF_DRAIN);
- int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
-
- check->health = 0; /* failure */
- s->last_change = now.tv_sec;
- s->next_state = SRV_ST_STOPPED;
- if (s->proxy->lbprm.set_server_status_down)
- s->proxy->lbprm.set_server_status_down(s);
-
- if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
- srv_shutdown_streams(s, SF_ERR_DOWN);
-
- /* we might have streams queued on this server and waiting for
- * a connection. Those which are redispatchable will be queued
- * to another server or to the proxy itself.
- */
- xferred = pendconn_redistribute(s);
-
- chunk_printf(&trash,
- "%sServer %s/%s is going DOWN for maintenance%s%s%s",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id,
- cause ? " (" : "", cause ? cause : "", cause ? ")" : "");
-
- srv_append_status(&trash, s, NULL, xferred, (mode & SRV_ADMF_FMAINT));
-
- if (!(global.mode & MODE_STARTING)) {
- Warning("%s.\n", trash.str);
- send_log(s->proxy, srv_was_stopping ? LOG_NOTICE : LOG_ALERT, "%s.\n", trash.str);
- }
-
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
-
- s->counters.down_trans++;
- }
- }
-
- /* drain state is applied only if not yet in maint */
- if ((mode & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_MAINT)) {
- int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
-
- s->last_change = now.tv_sec;
- if (s->proxy->lbprm.set_server_status_down)
- s->proxy->lbprm.set_server_status_down(s);
-
- /* we might have streams queued on this server and waiting for
- * a connection. Those which are redispatchable will be queued
- * to another server or to the proxy itself.
- */
- xferred = pendconn_redistribute(s);
-
- chunk_printf(&trash, "%sServer %s/%s enters drain state%s%s%s",
- s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id,
- cause ? " (" : "", cause ? cause : "", cause ? ")" : "");
-
- srv_append_status(&trash, s, NULL, xferred, (mode & SRV_ADMF_FDRAIN));
-
- if (!(global.mode & MODE_STARTING)) {
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- send_email_alert(s, LOG_NOTICE, "%s", trash.str);
- }
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
- }
-
/* compute the inherited flag to propagate */
if (mode & SRV_ADMF_MAINT)
mode = SRV_ADMF_IMAINT;
@@ -1107,9 +950,7 @@
*/
void srv_clr_admin_flag(struct server *s, enum srv_admin mode)
{
- struct check *check = &s->check;
struct server *srv;
- int xferred = -1;
if (!mode)
return;
@@ -1120,196 +961,9 @@
s->next_admin &= ~mode;
- if (s->next_admin & SRV_ADMF_MAINT) {
- /* remaining in maintenance mode, let's inform precisely about the
- * situation.
- */
- if (mode & SRV_ADMF_FMAINT) {
- chunk_printf(&trash,
- "%sServer %s/%s is leaving forced maintenance but remains in maintenance",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
-
- if (s->track) /* normally it's mandatory here */
- chunk_appendf(&trash, " via %s/%s",
- s->track->proxy->id, s->track->id);
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
- if (mode & SRV_ADMF_RMAINT) {
- chunk_printf(&trash,
- "%sServer %s/%s ('%s') resolves again but remains in maintenance",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id, s->hostname);
-
- if (s->track) /* normally it's mandatory here */
- chunk_appendf(&trash, " via %s/%s",
- s->track->proxy->id, s->track->id);
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
- else if (mode & SRV_ADMF_IMAINT) {
- chunk_printf(&trash,
- "%sServer %s/%s remains in forced maintenance",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
- /* don't report anything when leaving drain mode and remaining in maintenance */
- }
- else if (mode & SRV_ADMF_MAINT) {
- /* OK here we're leaving maintenance, we have many things to check,
- * because the server might possibly be coming back up depending on
- * its state. In practice, leaving maintenance means that we should
- * immediately turn to UP (more or less the slowstart) under the
- * following conditions :
- * - server is neither checked nor tracked
- * - server tracks another server which is not checked
- * - server tracks another server which is already up
- * Which sums up as something simpler :
- * "either the tracking server is up or the server's checks are disabled
- * or up". Otherwise we only re-enable health checks. There's a special
- * case associated to the stopping state which can be inherited. Note
- * that the server might still be in drain mode, which is naturally dealt
- * with by the lower level functions.
- */
-
- if (s->check.state & CHK_ST_ENABLED) {
- s->check.state &= ~CHK_ST_PAUSED;
- check->health = check->rise; /* start OK but check immediately */
- }
-
- if ((!s->track || s->track->next_state != SRV_ST_STOPPED) &&
- (!(s->agent.state & CHK_ST_ENABLED) || (s->agent.health >= s->agent.rise)) &&
- (!(s->check.state & CHK_ST_ENABLED) || (s->check.health >= s->check.rise))) {
-
- if (s->track && s->track->next_state == SRV_ST_STOPPING) {
- if (s->last_change < now.tv_sec) // ignore negative times
- s->down_time += now.tv_sec - s->last_change;
-
- s->last_change = now.tv_sec;
- s->next_state = SRV_ST_STOPPING;
- }
- else {
- if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
- if (s->proxy->last_change < now.tv_sec) // ignore negative times
- s->proxy->down_time += now.tv_sec - s->proxy->last_change;
- s->proxy->last_change = now.tv_sec;
- }
-
- if (s->last_change < now.tv_sec) // ignore negative times
- s->down_time += now.tv_sec - s->last_change;
-
- s->last_change = now.tv_sec;
- s->next_state = SRV_ST_STARTING;
- if (s->slowstart > 0)
- task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
- else
- s->next_state = SRV_ST_RUNNING;
-
- server_recalc_eweight(s);
-
- /* If the server is set with "on-marked-up shutdown-backup-sessions",
- * and it's not a backup server and its effective weight is > 0,
- * then it can accept new connections, so we shut down all streams
- * on all backup servers.
- */
- if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
- !(s->flags & SRV_F_BACKUP) && s->next_eweight)
- srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
-
- /* check if we can handle some connections queued at the proxy. We
- * will take as many as we can handle.
- */
- xferred = pendconn_grab_from_px(s);
- }
-
- }
-
- if (mode & SRV_ADMF_FMAINT) {
- chunk_printf(&trash,
- "%sServer %s/%s is %s/%s (leaving forced maintenance)",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id,
- (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
- (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
- }
- else if (mode & SRV_ADMF_RMAINT) {
- chunk_printf(&trash,
- "%sServer %s/%s ('%s') is %s/%s (resolves again)",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id, s->hostname,
- (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
- (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
- }
- else {
- chunk_printf(&trash,
- "%sServer %s/%s is %s/%s (leaving maintenance)",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id,
- (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
- (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
- srv_append_status(&trash, s, NULL, xferred, 0);
- }
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
- else if ((mode & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)) {
- /* remaining in drain mode after removing one of its flags */
-
- if (mode & SRV_ADMF_FDRAIN) {
- chunk_printf(&trash,
- "%sServer %s/%s is leaving forced drain but remains in drain mode",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
-
- if (s->track) /* normally it's mandatory here */
- chunk_appendf(&trash, " via %s/%s",
- s->track->proxy->id, s->track->id);
- }
- else {
- chunk_printf(&trash,
- "%sServer %s/%s remains in forced drain mode",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id);
- }
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
- else if (mode & SRV_ADMF_DRAIN) {
- /* OK completely leaving drain mode */
- if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
- if (s->proxy->last_change < now.tv_sec) // ignore negative times
- s->proxy->down_time += now.tv_sec - s->proxy->last_change;
- s->proxy->last_change = now.tv_sec;
- }
-
- if (s->last_change < now.tv_sec) // ignore negative times
- s->down_time += now.tv_sec - s->last_change;
- s->last_change = now.tv_sec;
- server_recalc_eweight(s);
-
- if (mode & SRV_ADMF_FDRAIN) {
- chunk_printf(&trash,
- "%sServer %s/%s is %s (leaving forced drain)",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id,
- (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
- }
- else {
- chunk_printf(&trash,
- "%sServer %s/%s is %s (leaving drain)",
- s->flags & SRV_F_BACKUP ? "Backup " : "",
- s->proxy->id, s->id,
- (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
- if (s->track) /* normally it's mandatory here */
- chunk_appendf(&trash, " via %s/%s",
- s->track->proxy->id, s->track->id);
- }
- Warning("%s.\n", trash.str);
- send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
- }
+ /* Register changes to be applied asynchronously */
+ if (LIST_ISEMPTY(&s->update_status))
+ LIST_ADDQ(&updated_servers, &s->update_status);
/* stop going down if the equivalent flag is still present (forced or inherited) */
if (((mode & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) ||
@@ -1426,17 +1080,9 @@
sv->next_eweight = (sv->uweight * w + px->lbprm.wmult - 1) / px->lbprm.wmult;
- /* now propagate the status change to any LB algorithms */
- if (px->lbprm.update_server_eweight)
- px->lbprm.update_server_eweight(sv);
- else if (srv_willbe_usable(sv)) {
- if (px->lbprm.set_server_status_up)
- px->lbprm.set_server_status_up(sv);
- }
- else {
- if (px->lbprm.set_server_status_down)
- px->lbprm.set_server_status_down(sv);
- }
+ /* Register changes to be applied asynchronously */
+ if (LIST_ISEMPTY(&sv->update_status))
+ LIST_ADDQ(&updated_servers, &sv->update_status);
}
/*
@@ -1846,6 +1492,7 @@
LIST_INIT(&srv->priv_conns);
LIST_INIT(&srv->idle_conns);
LIST_INIT(&srv->safe_conns);
+ LIST_INIT(&srv->update_status);
srv->next_state = SRV_ST_RUNNING; /* early server setup */
srv->last_change = now.tv_sec;
@@ -4760,6 +4407,516 @@
cli_register_kw(&cli_kws);
}
+
+/*
+ * This function applies server's status changes, it is
+ * is designed to be called asynchronously.
+ *
+ */
+void srv_update_status(struct server *s)
+{
+ struct check *check = &s->check;
+ int xferred;
+ struct proxy *px = s->proxy;
+ int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
+ int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN);
+ int log_level;
+ struct chunk *tmptrash = NULL;
+
+
+ /* If currently main is not set we try to apply pending state changes */
+ if (!(s->cur_admin & SRV_ADMF_MAINT)) {
+ int next_admin;
+
+ /* Backup next admin */
+ next_admin = s->next_admin;
+
+ /* restore current admin state */
+ s->next_admin = s->cur_admin;
+
+ if ((s->cur_state != SRV_ST_STOPPED) && (s->next_state == SRV_ST_STOPPED)) {
+ s->last_change = now.tv_sec;
+ if (s->proxy->lbprm.set_server_status_down)
+ s->proxy->lbprm.set_server_status_down(s);
+
+ if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
+ srv_shutdown_streams(s, SF_ERR_DOWN);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0);
+ Warning("%s.\n", tmptrash->str);
+
+ /* we don't send an alert if the server was previously paused */
+ log_level = srv_was_stopping ? LOG_NOTICE : LOG_ALERT;
+ send_log(s->proxy, log_level, "%s.\n", tmptrash->str);
+ send_email_alert(s, log_level, "%s", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+
+ s->counters.down_trans++;
+ }
+ else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) {
+ s->last_change = now.tv_sec;
+ if (s->proxy->lbprm.set_server_status_down)
+ s->proxy->lbprm.set_server_status_down(s);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0);
+
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+ else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING))
+ || ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) {
+ if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
+ if (s->proxy->last_change < now.tv_sec) // ignore negative times
+ s->proxy->down_time += now.tv_sec - s->proxy->last_change;
+ s->proxy->last_change = now.tv_sec;
+ }
+
+ if (s->next_state == SRV_ST_STOPPED && s->last_change < now.tv_sec) // ignore negative times
+ s->down_time += now.tv_sec - s->last_change;
+
+ s->last_change = now.tv_sec;
+ if (s->next_state == SRV_ST_STARTING)
+ task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
+
+ server_recalc_eweight(s);
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+
+ /* If the server is set with "on-marked-up shutdown-backup-sessions",
+ * and it's not a backup server and its effective weight is > 0,
+ * then it can accept new connections, so we shut down all streams
+ * on all backup servers.
+ */
+ if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
+ !(s->flags & SRV_F_BACKUP) && s->next_eweight)
+ srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
+
+ /* check if we can handle some connections queued at the proxy. We
+ * will take as many as we can handle.
+ */
+ xferred = pendconn_grab_from_px(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0);
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ send_email_alert(s, LOG_NOTICE, "%s", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+ else if (s->cur_eweight != s->next_eweight) {
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+
+ s->next_admin = next_admin;
+ }
+
+
+ /* Now we try to apply pending admin changes */
+
+ /* Maintenance must also disable health checks */
+ if (!(s->cur_admin & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) {
+ if (s->check.state & CHK_ST_ENABLED) {
+ s->check.state |= CHK_ST_PAUSED;
+ check->health = 0;
+ }
+
+ if (s->cur_state == SRV_ST_STOPPED) { /* server was already down */
+ chunk_printf(tmptrash,
+ "%sServer %s/%s was DOWN and now enters maintenance%s%s%s",
+ s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id,
+ *(s->adm_st_chg_cause) ? " (" : "", s->adm_st_chg_cause, *(s->adm_st_chg_cause) ? ")" : "");
+
+ srv_append_status(tmptrash, s, NULL, -1, (s->next_admin & SRV_ADMF_FMAINT));
+
+ if (!(global.mode & MODE_STARTING)) {
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ }
+ }
+ else { /* server was still running */
+ check->health = 0; /* failure */
+ s->last_change = now.tv_sec;
+ if (s->proxy->lbprm.set_server_status_down)
+ s->proxy->lbprm.set_server_status_down(s);
+
+ s->next_state = SRV_ST_STOPPED;
+ if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
+ srv_shutdown_streams(s, SF_ERR_DOWN);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is going DOWN for maintenance%s%s%s",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ *(s->adm_st_chg_cause) ? " (" : "", s->adm_st_chg_cause, *(s->adm_st_chg_cause) ? ")" : "");
+
+ srv_append_status(tmptrash, s, NULL, xferred, (s->next_admin & SRV_ADMF_FMAINT));
+
+ if (!(global.mode & MODE_STARTING)) {
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, srv_was_stopping ? LOG_NOTICE : LOG_ALERT, "%s.\n", tmptrash->str);
+ }
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+
+ s->counters.down_trans++;
+ }
+ }
+ else if ((s->cur_admin & SRV_ADMF_MAINT) && !(s->next_admin & SRV_ADMF_MAINT)) {
+ /* OK here we're leaving maintenance, we have many things to check,
+ * because the server might possibly be coming back up depending on
+ * its state. In practice, leaving maintenance means that we should
+ * immediately turn to UP (more or less the slowstart) under the
+ * following conditions :
+ * - server is neither checked nor tracked
+ * - server tracks another server which is not checked
+ * - server tracks another server which is already up
+ * Which sums up as something simpler :
+ * "either the tracking server is up or the server's checks are disabled
+ * or up". Otherwise we only re-enable health checks. There's a special
+ * case associated to the stopping state which can be inherited. Note
+ * that the server might still be in drain mode, which is naturally dealt
+ * with by the lower level functions.
+ */
+
+ if (s->check.state & CHK_ST_ENABLED) {
+ s->check.state &= ~CHK_ST_PAUSED;
+ check->health = check->rise; /* start OK but check immediately */
+ }
+
+ if ((!s->track || s->track->next_state != SRV_ST_STOPPED) &&
+ (!(s->agent.state & CHK_ST_ENABLED) || (s->agent.health >= s->agent.rise)) &&
+ (!(s->check.state & CHK_ST_ENABLED) || (s->check.health >= s->check.rise))) {
+ if (s->track && s->track->next_state == SRV_ST_STOPPING) {
+ s->next_state = SRV_ST_STOPPING;
+ }
+ else {
+ s->next_state = SRV_ST_STARTING;
+ if (s->slowstart > 0)
+ task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
+ else
+ s->next_state = SRV_ST_RUNNING;
+ }
+
+ }
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s/%s (leaving forced maintenance)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s ('%s') is %s/%s (resolves again)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->hostname,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s/%s (leaving maintenance)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ server_recalc_eweight(s);
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+
+ }
+ else if (s->next_admin & SRV_ADMF_MAINT) {
+ /* remaining in maintenance mode, let's inform precisely about the
+ * situation.
+ */
+ if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is leaving forced maintenance but remains in maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ }
+ if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s ('%s') resolves again but remains in maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->hostname);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ }
+ else if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in forced maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+ }
+ /* don't report anything when leaving drain mode and remaining in maintenance */
+
+ s->cur_admin = s->next_admin;
+ }
+
+ if (!(s->next_admin & SRV_ADMF_MAINT)) {
+ if (!(s->cur_admin & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)) {
+ /* drain state is applied only if not yet in maint */
+
+ s->last_change = now.tv_sec;
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash, "%sServer %s/%s enters drain state%s%s%s",
+ s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id,
+ *(s->adm_st_chg_cause) ? " (" : "", s->adm_st_chg_cause, *(s->adm_st_chg_cause) ? ")" : "");
+
+ srv_append_status(tmptrash, s, NULL, xferred, (s->next_admin & SRV_ADMF_FDRAIN));
+
+ if (!(global.mode & MODE_STARTING)) {
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ send_email_alert(s, LOG_NOTICE, "%s", tmptrash->str);
+ }
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy);
+ }
+ else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) {
+ /* OK completely leaving drain mode */
+ if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
+ if (s->proxy->last_change < now.tv_sec) // ignore negative times
+ s->proxy->down_time += now.tv_sec - s->proxy->last_change;
+ s->proxy->last_change = now.tv_sec;
+ }
+
+ if (s->last_change < now.tv_sec) // ignore negative times
+ s->down_time += now.tv_sec - s->last_change;
+ s->last_change = now.tv_sec;
+ server_recalc_eweight(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FDRAIN)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s (leaving forced drain)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
+ }
+ else {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s (leaving drain)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ }
+
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ /* now propagate the status change to any LB algorithms */
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+ }
+ else if ((s->next_admin & SRV_ADMF_DRAIN)) {
+ /* remaining in drain mode after removing one of its flags */
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FDRAIN)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is leaving forced drain but remains in drain mode",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ }
+ else {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in forced drain mode",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ }
+ Warning("%s.\n", tmptrash->str);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
+ free_trash_chunk(tmptrash);
+ tmptrash = NULL;
+ }
+
+ /* commit new admin status */
+
+ s->cur_admin = s->next_admin;
+ }
+ }
+
+ /* Re-set log strings to empty */
+ *s->op_st_chg_reason = 0;
+ *s->adm_st_chg_cause = 0;
+}
+/*
+ * This function loops on servers registered for asynchronous
+ * status changes
+ */
+void servers_update_status(void) {
+ struct server *s, *stmp;
+
+ list_for_each_entry_safe(s, stmp, &updated_servers, update_status) {
+ srv_update_status(s);
+ LIST_DEL(&s->update_status);
+ LIST_INIT(&s->update_status);
+ }
+}
+
/*
* Local variables:
* c-indent-level: 8