MEDIUM: config: permit to start a bind on multiple groups at once Now it's possible for a bind line to span multiple thread groups. When this happens, the first one will become the reference and will be entirely set up, and the subsequent ones will be duplicated from this reference, so that they can be registered in distinct groups. The reference is always setup and started first so it is always available when the other ones are started. The doc was updated to reflect this new possibility with its limitations and impacts, and the differences with the "shards" option.

commit: 09b52d1c3de809a030061db0d4335f31dd2f4bdd [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Mon Feb 27 16:42:32 2023 +0100
committer: Willy Tarreau <w@1wt.eu> Fri Apr 21 17:41:26 2023 +0200
tree: 2dc54eef544b3ea7aa90c0b7e16f2581b4a49d9a
parent: 09e266e6f551daa6ed3cacfb59335d221cf7c83d [diff]
diff --git a/doc/configuration.txt b/doc/configuration.txt
index a4c9672..396fc5f 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt

@@ -15394,10 +15394,21 @@
   an absolute notation or a relative one, as those not set will be resolved at
   the end of the parsing.
 
+  It is important to know that each listener described by a "bind" line creates
+  at least one socket represented by at least one file descriptor. Since file
+  descriptors cannot span multiple thread groups, if a "bind" line specifies a
+  thread range that covers more than one group, several file descriptors will
+  automatically be created so that there is at least one per group. Technically
+  speaking they all refer to the same socket in the kernel, but they will get a
+  distinct identifier in haproxy and will even have a dedicated stats entry if
+  "option socket-stats" is used.
+
   The main purpose is to have multiple bind lines sharing the same IP:port but
   not the same thread in a listener, so that the system can distribute the
   incoming connections into multiple queues, bypassing haproxy's internal queue
   load balancing. Currently Linux 3.9 and above is known for supporting this.
+  See also the "shards" keyword above that automates duplication of "bind"
+  lines and their assignment to multiple groups of threads.
 
 tls-ticket-keys <keyfile>
   Sets the TLS ticket keys file to load the keys from. The keys need to be 48

diff --git a/src/cfgparse.c b/src/cfgparse.c
index f229e3b..a8b6bb8 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c

@@ -2995,10 +2995,10 @@
 
 			/* apply thread masks and groups to all receivers */
 			list_for_each_entry(li, &bind_conf->listeners, by_bind) {
-				struct listener *new_li;
+				struct listener *new_li, *ref;
 				struct thread_set new_ts;
-				int shard, shards, todo, done, grp;
-				ulong mask, bit;
+				int shard, shards, todo, done, grp, dups;
+				ulong mask, gmask, bit;
 
 				shards = bind_conf->settings.shards;
 				todo = thread_set_count(&bind_conf->thread_set);
@@ -3042,17 +3042,55 @@
 
 					BUG_ON(!new_ts.grps); // no more bits left unassigned
 
-					if (atleast2(new_ts.grps)) {
-						ha_alert("Proxy '%s': shard number %d spans %d groups in 'bind %s' at [%s:%d]\n",
-							 curproxy->id, shard, my_popcountl(new_ts.grps), bind_conf->arg, bind_conf->file, bind_conf->line);
-						cfgerr++;
-						err_code |= ERR_FATAL | ERR_ALERT;
-						goto out;
-					}
+					/* Create all required listeners for all bound groups. If more than one group is
+					 * needed, the first receiver serves as a reference, and subsequent ones point to
+					 * it. We already have a listener available in new_li() so we only allocate a new
+					 * one if we're not on the last one. We count the remaining groups by copying their
+					 * mask into <gmask> and dropping the lowest bit at the end of the loop until there
+					 * is no more. Ah yes, it's not pretty :-/
+					 */
+					ref = new_li;
+					gmask = new_ts.grps;
+					for (dups = 0; gmask; dups++) {
+						/* assign the first (and only) thread and group */
+						new_li->rx.bind_thread = thread_set_nth_tmask(&new_ts, dups);
+						new_li->rx.bind_tgroup = thread_set_nth_group(&new_ts, dups);
+
+						if (dups) {
+							/* it has been allocated already in the previous round */
+							shard_info_attach(&new_li->rx, ref->rx.shard_info);
+							new_li->rx.flags |= RX_F_MUST_DUP;
+						}
+
+						gmask &= gmask - 1; // drop lowest bit
+						if (gmask) {
+							/* yet another listener expected in this shard, let's
+							 * chain it.
+							 */
+							struct listener *tmp_li = clone_listener(new_li);
 
-					/* assign the first (and only) thread and group */
-					new_li->rx.bind_thread = thread_set_nth_tmask(&new_ts, 0);
-					new_li->rx.bind_tgroup = thread_set_nth_group(&new_ts, 0);
+							if (!tmp_li) {
+								ha_alert("Out of memory while trying to allocate extra listener for group %u of shard %d in %s %s\n",
+									 new_li->rx.bind_tgroup, shard, proxy_type_str(curproxy), curproxy->id);
+								cfgerr++;
+								err_code |= ERR_FATAL | ERR_ALERT;
+								goto out;
+							}
+
+							/* if we're forced to create at least two listeners, we have to
+							 * allocate a shared shard_info that's linked to from the reference
+							 * and each other listener, so we'll create it here.
+							 */
+							if (!shard_info_attach(&ref->rx, NULL)) {
+								ha_alert("Out of memory while trying to allocate shard_info for listener for group %u of shard %d in %s %s\n",
+									 new_li->rx.bind_tgroup, shard, proxy_type_str(curproxy), curproxy->id);
+								cfgerr++;
+								err_code |= ERR_FATAL | ERR_ALERT;
+								goto out;
+							}
+							new_li = tmp_li;
+						}
+					}
 					done -= todo;
 
 					shard++;
commit	09b52d1c3de809a030061db0d4335f31dd2f4bdd	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Mon Feb 27 16:42:32 2023 +0100
committer	Willy Tarreau <w@1wt.eu>	Fri Apr 21 17:41:26 2023 +0200
tree	2dc54eef544b3ea7aa90c0b7e16f2581b4a49d9a
parent	09e266e6f551daa6ed3cacfb59335d221cf7c83d [diff]