MINOR: listener: support another thread dispatch mode: "fair"
This new algorithm for rebalancing incoming connections to multiple
threads is simpler and instead of considering the threads load, it will
only cycle through all of them, offering a fair share of the traffic to
each thread. It may be well suited for short-lived connections but is
also convenient for very large thread counts where it's not always certain
that the least loaded thread will always be found.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 82721f4..a4c9672 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -3023,16 +3023,23 @@
clicking). There should be no reason for changing this value. Please check
tune.ssl.maxrecord below.
-tune.listener.multi-queue { on | off }
- Enables ('on') or disables ('off') the listener's multi-queue accept which
- spreads the incoming traffic to all threads a "bind" line is allowed to run
- on instead of taking them for itself. This provides a smoother traffic
+tune.listener.multi-queue { on | fair | off }
+ Enables ('on' / 'fair') or disables ('off') the listener's multi-queue accept
+ which spreads the incoming traffic to all threads a "bind" line is allowed to
+ run on instead of taking them for itself. This provides a smoother traffic
distribution and scales much better, especially in environments where threads
may be unevenly loaded due to external activity (network interrupts colliding
- with one thread for example). This option is enabled by default, but it may
- be forcefully disabled for troubleshooting or for situations where it is
- estimated that the operating system already provides a good enough
- distribution and connections are extremely short-lived.
+ with one thread for example). The default mode, "on", optimizes the choice of
+ a thread by picking in a sample the one with the less connections. It is
+ often the best choice when connections are long-lived as it manages to keep
+ all threads busy. A second mode, "fair", instead cycles through all threads
+ regardless of their instant load level. It can be better suited for short-
+ lived connections, or on machines with very large numbers of threads where
+ the probability to find the least loaded thread with the first mode is low.
+ Finally it is possible to forcefully disable the redistribution mechanism
+ using "off" for troubleshooting, or for situations where connections are
+ short-lived and it is estimated that the operating system alredy provides a
+ good enough distribution. The default is "on".
tune.lua.forced-yield <number>
This directive forces the Lua engine to execute a yield each <number> of
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h
index b7b00ba..af7f26c 100644
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -66,7 +66,7 @@
#define GTUNE_USE_SYSTEMD (1<<10)
#define GTUNE_BUSY_POLLING (1<<11)
-#define GTUNE_LISTENER_MQ (1<<12)
+/* unused: (1<<12) */
#define GTUNE_SET_DUMPABLE (1<<13)
#define GTUNE_USE_EVPORTS (1<<14)
#define GTUNE_STRICT_LIMITS (1<<15)
@@ -81,6 +81,9 @@
#define GTUNE_QUIC_SOCK_PER_CONN (1<<24)
#define GTUNE_NO_QUIC (1<<25)
#define GTUNE_USE_FAST_FWD (1<<26)
+#define GTUNE_LISTENER_MQ_FAIR (1<<27)
+#define GTUNE_LISTENER_MQ_OPT (1<<28)
+#define GTUNE_LISTENER_MQ_ANY (GTUNE_LISTENER_MQ_FAIR | GTUNE_LISTENER_MQ_OPT)
/* SSL server verify mode */
enum {
diff --git a/src/haproxy.c b/src/haproxy.c
index fd43c96..5306113 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -187,7 +187,7 @@
}
},
.tune = {
- .options = GTUNE_LISTENER_MQ,
+ .options = GTUNE_LISTENER_MQ_OPT,
.bufsize = (BUFSIZE + 2*sizeof(void *) - 1) & -(2*sizeof(void *)),
.maxrewrite = MAXREWRITE,
.reserved_bufs = RESERVED_BUFS,
diff --git a/src/listener.c b/src/listener.c
index d6e58ce..e441eff 100644
--- a/src/listener.c
+++ b/src/listener.c
@@ -1099,7 +1099,7 @@
#if defined(USE_THREAD)
mask = l->rx.bind_thread & _HA_ATOMIC_LOAD(&tg->threads_enabled);
- if (atleast2(mask) && (global.tune.options & GTUNE_LISTENER_MQ) && !stopping) {
+ if (atleast2(mask) && (global.tune.options & GTUNE_LISTENER_MQ_ANY) && !stopping) {
struct accept_queue_ring *ring;
unsigned int t, t0, t1, t2;
int base = tg->base;
@@ -1140,6 +1140,14 @@
t1 += my_ffsl(m1) - 1;
}
+ /* if running in round-robin mode ("fair"), we don't need
+ * to go further.
+ */
+ if ((global.tune.options & GTUNE_LISTENER_MQ_ANY) == GTUNE_LISTENER_MQ_FAIR) {
+ t = t1;
+ goto updt_t1;
+ }
+
if (unlikely(!(m2 & (1UL << t2)) || t1 == t2)) {
/* highest bit not set */
if (!m2)
@@ -1184,6 +1192,7 @@
}
else {
t = t1;
+ updt_t1:
t1++;
if (t1 >= LONGBITS)
t1 = 0;
@@ -1898,7 +1907,7 @@
return 0;
}
-/* config parser for global "tune.listener.multi-queue", accepts "on" or "off" */
+/* config parser for global "tune.listener.multi-queue", accepts "on", "fair" or "off" */
static int cfg_parse_tune_listener_mq(char **args, int section_type, struct proxy *curpx,
const struct proxy *defpx, const char *file, int line,
char **err)
@@ -1907,11 +1916,13 @@
return -1;
if (strcmp(args[1], "on") == 0)
- global.tune.options |= GTUNE_LISTENER_MQ;
+ global.tune.options = (global.tune.options & ~GTUNE_LISTENER_MQ_ANY) | GTUNE_LISTENER_MQ_OPT;
+ else if (strcmp(args[1], "fair") == 0)
+ global.tune.options = (global.tune.options & ~GTUNE_LISTENER_MQ_ANY) | GTUNE_LISTENER_MQ_FAIR;
else if (strcmp(args[1], "off") == 0)
- global.tune.options &= ~GTUNE_LISTENER_MQ;
+ global.tune.options &= ~GTUNE_LISTENER_MQ_ANY;
else {
- memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ memprintf(err, "'%s' expects either 'on', 'fair', or 'off' but got '%s'.", args[0], args[1]);
return -1;
}
return 0;