MINOR: listener: always compare the local thread as well By comparing the local thread's load with the least loaded thread's load, we can further improve the fairness and at the same time also improve locality since it allows a small ratio of connections not to be migrated. This is visible on CPU usage with long connections on very large thread counts (224) and high bandwidth (200G). The cost of checking the local thread's load remains fairly low so there's no reason not to do this. We continue to update the index if we select the local thread, because it means that the two other threads were both more loaded so we'd rather find better ones.

commit: 8adffaa899ec24c023f9b2163cb51a92cdad5e7b [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Wed Apr 19 18:06:16 2023 +0200
committer: Willy Tarreau <w@1wt.eu> Fri Apr 21 17:41:26 2023 +0200
tree: 69e1cd5bcbb02ce01c143d24274a9004c2aa00c7
parent: ff18504d7322ee262caa257dc0241afc9d8493c4 [diff]
diff --git a/src/listener.c b/src/listener.c
index 0385094..a68cb12 100644
--- a/src/listener.c
+++ b/src/listener.c

@@ -1229,7 +1229,7 @@
 			 */
 			thr_idx_ptr = l->rx.shard_info ? &((struct listener *)(l->rx.shard_info->ref->owner))->thr_idx : &l->thr_idx;
 			while (1) {
-				int q1, q2;
+				int q0, q1, q2;
 
 				/* calculate r1/g1/t1 first (ascending idx) */
 				n0 = _HA_ATOMIC_LOAD(thr_idx_ptr);
@@ -1337,12 +1337,15 @@
 
 				/* here we have (r1,g1,t1) that designate the first receiver, its
 				 * thread group and local thread, and (r2,g2,t2) that designate
-				 * the second receiver, its thread group and local thread.
+				 * the second receiver, its thread group and local thread. We'll
+				 * also consider the local thread with q0.
 				 */
+				q0 = accept_queue_ring_len(&accept_queue_rings[tid]);
 				q1 = accept_queue_ring_len(&accept_queue_rings[g1->base + t1]);
 				q2 = accept_queue_ring_len(&accept_queue_rings[g2->base + t2]);
 
 				/* add to this the currently active connections */
+				q0 += _HA_ATOMIC_LOAD(&l->thr_conn[ti->ltid]);
 				if (l->rx.shard_info) {
 					q1 += _HA_ATOMIC_LOAD(&((struct listener *)l->rx.shard_info->members[r1]->owner)->thr_conn[t1]);
 					q2 += _HA_ATOMIC_LOAD(&((struct listener *)l->rx.shard_info->members[r2]->owner)->thr_conn[t2]);
@@ -1361,12 +1364,17 @@
 				 *   q1 = q2 : both are equally loaded, thus we pick t1
 				 *             and update t1 as it will become more loaded
 				 *             than t2.
+				 * On top of that, if in the end the current thread appears
+				 * to be as good of a deal, we'll prefer it over a foreign
+				 * one as it will improve locality and avoid a migration.
 				 */
 
 				if (q1 - q2 < 0) {
 					t = g1->base + t1;
+					if (q0 <= q1)
+						t = tid;
 
-					if (l->rx.shard_info)
+					if (l->rx.shard_info && t != tid)
 						new_li = l->rx.shard_info->members[r1]->owner;
 
 					t2--;
@@ -1378,15 +1386,19 @@
 				}
 				else if (q1 - q2 > 0) {
 					t = g2->base + t2;
+					if (q0 <= q2)
+						t = tid;
 
-					if (l->rx.shard_info)
+					if (l->rx.shard_info && t != tid)
 						new_li = l->rx.shard_info->members[r2]->owner;
 					goto updt_t1;
 				}
-				else {
+				else { // q1 == q2
 					t = g1->base + t1;
+					if (q0 < q1) // local must be strictly better than both
+						t = tid;
 
-					if (l->rx.shard_info)
+					if (l->rx.shard_info && t != tid)
 						new_li = l->rx.shard_info->members[r1]->owner;
 				updt_t1:
 					t1++;
commit	8adffaa899ec24c023f9b2163cb51a92cdad5e7b	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Wed Apr 19 18:06:16 2023 +0200
committer	Willy Tarreau <w@1wt.eu>	Fri Apr 21 17:41:26 2023 +0200
tree	69e1cd5bcbb02ce01c143d24274a9004c2aa00c7
parent	ff18504d7322ee262caa257dc0241afc9d8493c4 [diff]