BUG/MEDIUM: peers: Fix synchro for huge number of tables The number of updates sent at once was limited to not loop too long to emit updates when the buffer size is huge or when the number of sync tables is huge. The limit can be configured and is set to 200 by default. However, this fix introduced a bug. It is impossible to syncrhonize two peers if the number of tables is higher than this limit. Thus by default, it is not possible to sync two peers if there are more than 200 tables to sync. Technically speacking, a teaching process is finished if we loop on all tables with no new update messages sent. Because we are limited at each call, the loop is splitted on several calls. However the restart point for the next loop is always the last table for which we emitted an update message. Thus with more tables than the limit, the loop never reachs the end point. Worse, in conjunction with the bug fixed by "BUG/MEDIUM: peers: Be sure to always refresh recconnect timer in sync task", it is possible to trigger the watchdog because the applets may be woken up in loop and leave requesting more room while its buffer is empty. To fix the issue, restart conditions for a teaching loop were changed. If the teach process is interrupted, we now save the restart point, called stop_local_table. It is the last evaluated table on the previous loop. This restart point is reset when the teach process is finished. In additionn, the updates_sent variable in peer_send_msgs() was renamed to updates to avoid ambiguities. Indeed, the variable is incremented, whether messages were sent or not. This patch must be backported as far as 2.6. (cherry picked from commit 60e7116be0d01edc5117bb2c514c363e5cee7def) Signed-off-by: Amaury Denoyelle <adenoyelle@haproxy.com>

commit: edd1d06494b675a3437d9d1c7d2ffaea3f94478b [log] [tgz]
author: Christopher Faulet <cfaulet@haproxy.com> Mon Oct 16 08:14:50 2023 +0200
committer: Amaury Denoyelle <adenoyelle@haproxy.com> Thu Oct 26 17:30:09 2023 +0200
tree: aa04c4743a6703743b745759a4f32053f9599638
parent: d4f7e083aa984336151529b35bb97cecd53f71eb [diff]
diff --git a/include/haproxy/peers-t.h b/include/haproxy/peers-t.h
index 9535518..42199ff 100644
--- a/include/haproxy/peers-t.h
+++ b/include/haproxy/peers-t.h

@@ -77,7 +77,8 @@
 	uint32_t coll;                /* connection collisions counter */
 	struct appctx *appctx;        /* the appctx running it */
 	struct shared_table *remote_table;
-	struct shared_table *last_local_table;
+	struct shared_table *last_local_table; /* Last table that emit update messages during a teach process */
+	struct shared_table *stop_local_table; /* last evaluated table, used as restart point for the next teach process */
 	struct shared_table *tables;
 	struct server *srv;
 	struct dcache *dcache;        /* dictionary cache */

diff --git a/src/peers.c b/src/peers.c
index 2eb262b..c4760e6 100644
--- a/src/peers.c
+++ b/src/peers.c

@@ -1057,7 +1057,7 @@
 	flush_dcache(peer);
 
 	/* Re-init current table pointers to force announcement on re-connect */
-	peer->remote_table = peer->last_local_table = NULL;
+	peer->remote_table = peer->last_local_table = peer->stop_local_table = NULL;
 	peer->appctx = NULL;
 	if (peer->flags & PEER_F_LEARN_ASSIGN) {
 		/* unassign current peer for learning */
@@ -2574,7 +2574,25 @@
  * Returns 1 if succeeded, or -1 or 0 if failed.
  * -1 means an internal error occurred, 0 is for a peer protocol error leading
  * to a peer state change (from the peer I/O handler point of view).
+ *
+ *   - peer->last_local_table is the last table for which we send an update
+ *                            messages.
+ *
+ *   - peer->stop_local_table is the last evaluated table. It is unset when the
+ *                            teaching process starts. But we use it as a
+ *                            restart point when the loop is interrupted. It is
+ *                            especially useful whe the number of tables exceeds
+ *                            peers_max_updates_at_once value.
+ *
+ * When a teaching lopp is started, the peer's last_local_table is saved in a
+ * local variable. This variable is used as a finish point. When the crrent
+ * table is equal to it, it means all tables were evaluated, all updates where
+ * sent and the teaching process is finished.
+ *
+ * peer->stop_local_table is always NULL when the teaching process begins. It is
+ * only reset at the end. In the mean time, it always point on a table.
  */
+
 static inline int peer_send_msgs(struct appctx *appctx,
                                  struct peer *peer, struct peers *peers)
 {
@@ -2596,17 +2614,18 @@
 	if (peer->tables) {
 		struct shared_table *st;
 		struct shared_table *last_local_table;
-		int updates_sent = 0;
+		int updates = 0;
 
 		last_local_table = peer->last_local_table;
 		if (!last_local_table)
 			last_local_table = peer->tables;
-		st = last_local_table->next;
+		if (!peer->stop_local_table)
+			peer->stop_local_table = last_local_table;
+		st = peer->stop_local_table->next;
 
 		while (1) {
 			if (!st)
 				st = peer->tables;
-
 			/* It remains some updates to ack */
 			if (st->last_get != st->last_acked) {
 				repl = peer_send_ackmsg(st, appctx);
@@ -2624,6 +2643,7 @@
 					repl = peer_send_teach_process_msgs(appctx, peer, st);
 					if (repl <= 0) {
 						HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+						peer->stop_local_table = peer->last_local_table;
 						return repl;
 					}
 				}
@@ -2632,29 +2652,43 @@
 			else if (!(peer->flags & PEER_F_TEACH_FINISHED)) {
 				if (!(st->flags & SHTABLE_F_TEACH_STAGE1)) {
 					repl = peer_send_teach_stage1_msgs(appctx, peer, st);
-					if (repl <= 0)
+					if (repl <= 0) {
+						peer->stop_local_table = peer->last_local_table;
 						return repl;
+					}
 				}
 
 				if (!(st->flags & SHTABLE_F_TEACH_STAGE2)) {
 					repl = peer_send_teach_stage2_msgs(appctx, peer, st);
-					if (repl <= 0)
+					if (repl <= 0) {
+						peer->stop_local_table = peer->last_local_table;
 						return repl;
+					}
 				}
 			}
 
-			if (st == last_local_table)
+			if (st == last_local_table) {
+				peer->stop_local_table = NULL;
 				break;
-			st = st->next;
+			}
 
-			updates_sent++;
-			if (updates_sent >= peers_max_updates_at_once) {
+			/* This one is to be sure to restart from <st->next> if we are interrupted
+			 * because of peer_send_teach_stage2_msgs or because buffer is full
+			 * when sedning an ackmsg. In both cases current <st> was evaluated and
+			 * we must restart from <st->next>
+			 */
+			peer->stop_local_table = st;
+
+			updates++;
+			if (updates >= peers_max_updates_at_once) {
 				/* pretend we're full so that we get back ASAP */
 				struct stconn *sc = appctx_sc(appctx);
 
 				sc_need_room(sc, 0);
 				return -1;
 			}
+
+			st = st->next;
 		}
 	}
commit	edd1d06494b675a3437d9d1c7d2ffaea3f94478b	[log] [tgz]
author	Christopher Faulet <cfaulet@haproxy.com>	Mon Oct 16 08:14:50 2023 +0200
committer	Amaury Denoyelle <adenoyelle@haproxy.com>	Thu Oct 26 17:30:09 2023 +0200
tree	aa04c4743a6703743b745759a4f32053f9599638
parent	d4f7e083aa984336151529b35bb97cecd53f71eb [diff]