MINOR: checks: add on-marked-up option
This implements the feature discussed in the earlier thread of killing
connections on backup servers when a non-backup server comes back up. For
example, you can use this to route to a mysql master & slave and ensure
clients don't stay on the slave after the master goes from down->up. I've done
some minimal testing and it seems to work.
[WT: added session flag & doc, moved the killing after logging the server UP,
and ensured that the new server is really usable]
diff --git a/doc/configuration.txt b/doc/configuration.txt
index b7ca28d..2a4f081 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6970,12 +6970,35 @@
on-marked-down <action>
Modify what occurs when a server is marked down.
Currently one action is available:
- - shutdown-sessions: Shutdown peer sessions
+ - shutdown-sessions: Shutdown peer sessions. When this setting is enabled,
+ all connections to the server are immediately terminated when the server
+ goes down. It might be used if the health check detects more complex cases
+ than a simple connection status, and long timeouts would cause the service
+ to remain unresponsive for too long a time. For instance, a health check
+ might detect that a database is stuck and that there's no chance to reuse
+ existing connections anymore. Connections killed this way are logged with
+ a 'D' termination code (for "Down").
Actions are disabled by default
Supported in default-server: Yes
+on-marked-up <action>
+ Modify what occurs when a server is marked up.
+ Currently one action is available:
+ - shutdown-backup-sessions: Shutdown sessions on all backup servers. This is
+ done only if the server is not in backup state and if it is not disabled
+ (it must have an effective weight > 0). This can be used sometimes to force
+ an active server to take all the traffic back after recovery when dealing
+ with long sessions (eg: LDAP, SQL, ...). Doing this can cause more trouble
+ than it tries to solve (eg: incomplete transactions), so use this feature
+ with extreme care. Sessions killed because a server comes up are logged
+ with an 'U' termination code (for "Up").
+
+ Actions are disabled by default
+
+ Supported in default-server: Yes
+
port <port>
Using the "port" parameter, it becomes possible to use a different port to
send health-checks. On some servers, it may be desirable to dedicate a port
@@ -9407,6 +9430,10 @@
D : the session was killed by haproxy because the server was detected
as down and was configured to kill all connections when going down.
+ U : the session was killed by haproxy on this backup server because an
+ active server was detected as up and was configured to kill all
+ backup connections when going up.
+
K : the session was actively killed by an admin operating on haproxy.
c : the client-side timeout expired while waiting for the client to
diff --git a/include/types/checks.h b/include/types/checks.h
index fd15c95..6dcecf5 100644
--- a/include/types/checks.h
+++ b/include/types/checks.h
@@ -75,11 +75,15 @@
enum {
HANA_ONMARKEDDOWN_NONE = 0,
-
HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS, /* Shutdown peer sessions */
};
enum {
+ HANA_ONMARKEDUP_NONE = 0,
+ HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS, /* Shutdown peer sessions */
+};
+
+enum {
HANA_OBS_NONE = 0,
HANA_OBS_LAYER4, /* Observe L4 - for example tcp */
diff --git a/include/types/server.h b/include/types/server.h
index aa2c4f8..1885eab 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -120,7 +120,8 @@
int rise, fall; /* time in iterations */
int consecutive_errors_limit; /* number of consecutive errors that triggers an event */
short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */
- short onmarkeddown; /* what to do when marked down: on of HANA_ONMARKEDDOWN_* */
+ short onmarkeddown; /* what to do when marked down: one of HANA_ONMARKEDDOWN_* */
+ short onmarkedup; /* what to do when marked up: one of HANA_ONMARKEDUP_* */
int inter, fastinter, downinter; /* checks: time in milliseconds */
int slowstart; /* slowstart time in seconds (ms in the conf) */
int result; /* health-check result : SRV_CHK_* */
diff --git a/include/types/session.h b/include/types/session.h
index f1b7451..a098002 100644
--- a/include/types/session.h
+++ b/include/types/session.h
@@ -67,6 +67,7 @@
#define SN_ERR_INTERNAL 0x00007000 /* the proxy encountered an internal error */
#define SN_ERR_DOWN 0x00008000 /* the proxy killed a session because the backend became unavailable */
#define SN_ERR_KILLED 0x00009000 /* the proxy killed a session because it was asked to do so */
+#define SN_ERR_UP 0x0000a000 /* the proxy killed a session because a preferred backend became available */
#define SN_ERR_MASK 0x0000f000 /* mask to get only session error flags */
#define SN_ERR_SHIFT 12 /* bit shift */
diff --git a/src/cfgparse.c b/src/cfgparse.c
index a95f06c..7a0e978 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -4406,6 +4406,18 @@
cur_arg += 2;
}
+ else if (!strcmp(args[cur_arg], "on-marked-up")) {
+ if (!strcmp(args[cur_arg + 1], "shutdown-backup-sessions"))
+ newsrv->onmarkedup = HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS;
+ else {
+ Alert("parsing [%s:%d]: '%s' expects 'shutdown-backup-sessions' but got '%s'\n",
+ file, linenum, args[cur_arg], args[cur_arg + 1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ cur_arg += 2;
+ }
else if (!strcmp(args[cur_arg], "error-limit")) {
if (!*args[cur_arg + 1]) {
Alert("parsing [%s:%d]: '%s' expects an integer argument.\n",
diff --git a/src/checks.c b/src/checks.c
index 03354a7..d0e5b6d 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -358,17 +358,32 @@
return xferred;
}
-/* Shutdown connections when their server goes down.
+/* Shutdown all connections of a server. The caller must pass a termination
+ * code in <why>, which must be one of SN_ERR_* indicating the reason for the
+ * shutdown.
*/
-static void shutdown_sessions(struct server *srv)
+static void shutdown_sessions(struct server *srv, int why)
{
struct session *session, *session_bck;
list_for_each_entry_safe(session, session_bck, &srv->actconns, by_srv)
if (session->srv_conn == srv)
- session_shutdown(session, SN_ERR_DOWN);
+ session_shutdown(session, why);
}
+/* Shutdown all connections of all backup servers of a proxy. The caller must
+ * pass a termination code in <why>, which must be one of SN_ERR_* indicating
+ * the reason for the shutdown.
+ */
+static void shutdown_backup_sessions(struct proxy *px, int why)
+{
+ struct server *srv;
+
+ for (srv = px->srv; srv != NULL; srv = srv->next)
+ if (srv->state & SRV_BACKUP)
+ shutdown_sessions(srv, why);
+}
+
/* Sets server <s> down, notifies by all available means, recounts the
* remaining servers on the proxy and transfers queued sessions whenever
* possible to other servers. It automatically recomputes the number of
@@ -394,7 +409,7 @@
s->proxy->lbprm.set_server_status_down(s);
if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
- shutdown_sessions(s);
+ shutdown_sessions(s, SN_ERR_DOWN);
/* we might have sessions queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
@@ -480,6 +495,15 @@
if (s->proxy->lbprm.set_server_status_up)
s->proxy->lbprm.set_server_status_up(s);
+ /* If the server is set with "on-marked-up shutdown-backup-sessions",
+ * and it's not a backup server and its effective weight is > 0,
+ * then it can accept new connections, so we shut down all sessions
+ * on all backup servers.
+ */
+ if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
+ !(s->state & SRV_BACKUP) && s->eweight)
+ shutdown_backup_sessions(s->proxy, SN_ERR_UP);
+
/* check if we can handle some connections queued at the proxy. We
* will take as many as we can handle.
*/
diff --git a/src/log.c b/src/log.c
index 15035ae..2a3cd16 100644
--- a/src/log.c
+++ b/src/log.c
@@ -50,7 +50,7 @@
"warning", "notice", "info", "debug"
};
-const char sess_term_cond[10] = "-cCsSPRIDK"; /* normal, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed */
+const char sess_term_cond[16] = "-cCsSPRIDKUIIIII"; /* normal, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed, Up, -- */
const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */