BUG/MEDIUM: peers: Fix synchro for huge number of tables
The number of updates sent at once was limited to not loop too long to emit
updates when the buffer size is huge or when the number of sync tables is
huge. The limit can be configured and is set to 200 by default. However,
this fix introduced a bug. It is impossible to syncrhonize two peers if the
number of tables is higher than this limit. Thus by default, it is not
possible to sync two peers if there are more than 200 tables to sync.
Technically speacking, a teaching process is finished if we loop on all tables
with no new update messages sent. Because we are limited at each call, the loop
is splitted on several calls. However the restart point for the next loop is
always the last table for which we emitted an update message. Thus with more
tables than the limit, the loop never reachs the end point.
Worse, in conjunction with the bug fixed by "BUG/MEDIUM: peers: Be sure to
always refresh recconnect timer in sync task", it is possible to trigger the
watchdog because the applets may be woken up in loop and leave requesting
more room while its buffer is empty.
To fix the issue, restart conditions for a teaching loop were changed. If
the teach process is interrupted, we now save the restart point, called
stop_local_table. It is the last evaluated table on the previous loop. This
restart point is reset when the teach process is finished.
In additionn, the updates_sent variable in peer_send_msgs() was renamed to
updates to avoid ambiguities. Indeed, the variable is incremented, whether
messages were sent or not.
This patch must be backported as far as 2.6.
(cherry picked from commit 60e7116be0d01edc5117bb2c514c363e5cee7def)
Signed-off-by: Amaury Denoyelle <adenoyelle@haproxy.com>
diff --git a/include/haproxy/peers-t.h b/include/haproxy/peers-t.h
index 9535518..42199ff 100644
--- a/include/haproxy/peers-t.h
+++ b/include/haproxy/peers-t.h
@@ -77,7 +77,8 @@
uint32_t coll; /* connection collisions counter */
struct appctx *appctx; /* the appctx running it */
struct shared_table *remote_table;
- struct shared_table *last_local_table;
+ struct shared_table *last_local_table; /* Last table that emit update messages during a teach process */
+ struct shared_table *stop_local_table; /* last evaluated table, used as restart point for the next teach process */
struct shared_table *tables;
struct server *srv;
struct dcache *dcache; /* dictionary cache */
diff --git a/src/peers.c b/src/peers.c
index 2eb262b..c4760e6 100644
--- a/src/peers.c
+++ b/src/peers.c
@@ -1057,7 +1057,7 @@
flush_dcache(peer);
/* Re-init current table pointers to force announcement on re-connect */
- peer->remote_table = peer->last_local_table = NULL;
+ peer->remote_table = peer->last_local_table = peer->stop_local_table = NULL;
peer->appctx = NULL;
if (peer->flags & PEER_F_LEARN_ASSIGN) {
/* unassign current peer for learning */
@@ -2574,7 +2574,25 @@
* Returns 1 if succeeded, or -1 or 0 if failed.
* -1 means an internal error occurred, 0 is for a peer protocol error leading
* to a peer state change (from the peer I/O handler point of view).
+ *
+ * - peer->last_local_table is the last table for which we send an update
+ * messages.
+ *
+ * - peer->stop_local_table is the last evaluated table. It is unset when the
+ * teaching process starts. But we use it as a
+ * restart point when the loop is interrupted. It is
+ * especially useful whe the number of tables exceeds
+ * peers_max_updates_at_once value.
+ *
+ * When a teaching lopp is started, the peer's last_local_table is saved in a
+ * local variable. This variable is used as a finish point. When the crrent
+ * table is equal to it, it means all tables were evaluated, all updates where
+ * sent and the teaching process is finished.
+ *
+ * peer->stop_local_table is always NULL when the teaching process begins. It is
+ * only reset at the end. In the mean time, it always point on a table.
*/
+
static inline int peer_send_msgs(struct appctx *appctx,
struct peer *peer, struct peers *peers)
{
@@ -2596,17 +2614,18 @@
if (peer->tables) {
struct shared_table *st;
struct shared_table *last_local_table;
- int updates_sent = 0;
+ int updates = 0;
last_local_table = peer->last_local_table;
if (!last_local_table)
last_local_table = peer->tables;
- st = last_local_table->next;
+ if (!peer->stop_local_table)
+ peer->stop_local_table = last_local_table;
+ st = peer->stop_local_table->next;
while (1) {
if (!st)
st = peer->tables;
-
/* It remains some updates to ack */
if (st->last_get != st->last_acked) {
repl = peer_send_ackmsg(st, appctx);
@@ -2624,6 +2643,7 @@
repl = peer_send_teach_process_msgs(appctx, peer, st);
if (repl <= 0) {
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ peer->stop_local_table = peer->last_local_table;
return repl;
}
}
@@ -2632,29 +2652,43 @@
else if (!(peer->flags & PEER_F_TEACH_FINISHED)) {
if (!(st->flags & SHTABLE_F_TEACH_STAGE1)) {
repl = peer_send_teach_stage1_msgs(appctx, peer, st);
- if (repl <= 0)
+ if (repl <= 0) {
+ peer->stop_local_table = peer->last_local_table;
return repl;
+ }
}
if (!(st->flags & SHTABLE_F_TEACH_STAGE2)) {
repl = peer_send_teach_stage2_msgs(appctx, peer, st);
- if (repl <= 0)
+ if (repl <= 0) {
+ peer->stop_local_table = peer->last_local_table;
return repl;
+ }
}
}
- if (st == last_local_table)
+ if (st == last_local_table) {
+ peer->stop_local_table = NULL;
break;
- st = st->next;
+ }
- updates_sent++;
- if (updates_sent >= peers_max_updates_at_once) {
+ /* This one is to be sure to restart from <st->next> if we are interrupted
+ * because of peer_send_teach_stage2_msgs or because buffer is full
+ * when sedning an ackmsg. In both cases current <st> was evaluated and
+ * we must restart from <st->next>
+ */
+ peer->stop_local_table = st;
+
+ updates++;
+ if (updates >= peers_max_updates_at_once) {
/* pretend we're full so that we get back ASAP */
struct stconn *sc = appctx_sc(appctx);
sc_need_room(sc, 0);
return -1;
}
+
+ st = st->next;
}
}