MINOR: listener: add a new global tune.listener.default-shards setting
This new setting accepts "by-process", "by-group" and "by-thread" and
will dictate how listeners will be sharded by default when nothing is
specified. While the default remains "by-process", "by-group" should be
much more efficient with many threads, while not changing anything for
single-group setups.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index d337c88..8fbe88a 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -3023,6 +3023,30 @@
clicking). There should be no reason for changing this value. Please check
tune.ssl.maxrecord below.
+tune.listener.default-shards { by-process | by-thread | by-group }
+ Normally, all "bind" lines will create a single shard, that is, a single
+ socket that all threads of the process will listen to. With many threads,
+ this is not very efficient, and may even induce some important overhead in
+ the kernel for updating the polling state or even distributing events to the
+ various threads. Modern operating systems support balancing of incoming
+ connections, a mechanism that will consist in permitting multiple sockets to
+ be bound to the same address and port, and to evenly distribute all incoming
+ connections to these sockets so that each thread only sees the connections
+ that are waiting in the socket it is bound to. This significantly reduces
+ kernel-side overhead and increases performance in the incoming connection
+ path. This is usually enabled in HAProxy using the "shards" setting on "bind"
+ lines, which defaults to 1, meaning that each listener will be unique in the
+ process. On systems with many processors, it may be more convenient to change
+ the default setting to "by-thread" in order to always create one listening
+ socket per thread, or "by-group" in order to always create one listening
+ socket per thread group. Be careful about the file descriptor usage with
+ "by-thread" as each listener will need as many sockets as there are threads.
+ Also some operating systems (e.g. FreeBSD) are limited to no more than 256
+ sockets on a same address. Note that "by-group" will remain equivalent to
+ "by-process" for default configurations involving a single thread group, and
+ will fall back to sharing the same socket on systems that do not support this
+ mechanism. As such, it is the recommended setting.
+
tune.listener.multi-queue { on | fair | off }
Enables ('on' / 'fair') or disables ('off') the listener's multi-queue accept
which spreads the incoming traffic to all threads a "bind" line is allowed to
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h
index 3a7b53b..e7d02fe 100644
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -168,6 +168,7 @@
size_t pool_cache_size; /* per-thread cache size per pool (defaults to CONFIG_HAP_POOL_CACHE_SIZE) */
unsigned short idle_timer; /* how long before an empty buffer is considered idle (ms) */
int nb_stk_ctr; /* number of stick counters, defaults to MAX_SESS_STKCTR */
+ int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */
#ifdef USE_QUIC
unsigned int quic_backend_max_idle_timeout;
unsigned int quic_frontend_max_idle_timeout;
diff --git a/src/haproxy.c b/src/haproxy.c
index b1574ab..739183a 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -205,6 +205,7 @@
.idle_timer = 1000, /* 1 second */
#endif
.nb_stk_ctr = MAX_SESS_STKCTR,
+ .default_shards = 1, /* "by-process" = one shard per listener */
#ifdef USE_QUIC
.quic_backend_max_idle_timeout = QUIC_TP_DFLT_BACK_MAX_IDLE_TIMEOUT,
.quic_frontend_max_idle_timeout = QUIC_TP_DFLT_FRONT_MAX_IDLE_TIMEOUT,
diff --git a/src/listener.c b/src/listener.c
index bfb5ece..d5390ed 100644
--- a/src/listener.c
+++ b/src/listener.c
@@ -1918,7 +1918,7 @@
bind_conf->settings.ux.uid = -1;
bind_conf->settings.ux.gid = -1;
bind_conf->settings.ux.mode = 0;
- bind_conf->settings.shards = 1;
+ bind_conf->settings.shards = global.tune.default_shards;
bind_conf->xprt = xprt;
bind_conf->frontend = fe;
bind_conf->analysers = fe->fe_req_ana;
@@ -2298,6 +2298,27 @@
return 0;
}
+/* config parser for global "tune.listener.default-shards" */
+static int cfg_parse_tune_listener_shards(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "by-thread") == 0)
+ global.tune.default_shards = -1;
+ else if (strcmp(args[1], "by-group") == 0)
+ global.tune.default_shards = -2;
+ else if (strcmp(args[1], "by-process") == 0)
+ global.tune.default_shards = 1;
+ else {
+ memprintf(err, "'%s' expects either 'by-process', 'by-group', or 'by-thread' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
/* config parser for global "tune.listener.multi-queue", accepts "on", "fair" or "off" */
static int cfg_parse_tune_listener_mq(char **args, int section_type, struct proxy *curpx,
const struct proxy *defpx, const char *file, int line,
@@ -2366,6 +2387,7 @@
/* config keyword parsers */
static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.listener.default-shards", cfg_parse_tune_listener_shards },
{ CFG_GLOBAL, "tune.listener.multi-queue", cfg_parse_tune_listener_mq },
{ 0, NULL, NULL }
}};