BUG/MINOR: server: don't miss proxy stats update on server state transitions
backend "down" stats logic has been duplicated multiple times in
srv_update_status(), resulting in the logic now being error-prone.
For example, the following bugfix was needed to compensate for a
copy-paste introduced bug: d332f139
("BUG/MINOR: server: update last_change on maint->ready transitions too")
While the above patch works great, we actually forgot to update the
proxy downtime like it is done for other down->up transitions...
This is simply illustrating that the current design is error-prone,
it is very easy to miss something in this area.
To properly update the proxy downtime stats on the maint->ready transition,
to cleanup srv_update_status() and to prevent similar bugs from being
introduced in the future, proxy/backend stats update are now automatically
performed at the end of the server state change if needed.
Thus we can remove existing updates that were performed at various places
within the function, this simplifies things a bit.
This patch depends on:
- "MINOR: server: explicitly commit state change in srv_update_status()"
This could be backported to all stable versions.
Backport notes:
2.2:
Replace
struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state)
by
struct task *srv_cleanup_toremove_connections(struct task *task, void *context, unsigned short state)
(cherry picked from commit e80ddb18a8bba52649896b8f894bf91ef8e53ca6)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 36306b2d6c6bb99c3c6d84cc0ad797aece381a4e)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 8fb6c73786080f29d04545256d08b33dfbfe5268)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
diff --git a/src/server.c b/src/server.c
index 5065112..0fe53db 100644
--- a/src/server.c
+++ b/src/server.c
@@ -4946,9 +4946,6 @@
free_trash_chunk(tmptrash);
tmptrash = NULL;
}
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
-
s->counters.down_trans++;
}
else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) {
@@ -4976,17 +4973,9 @@
free_trash_chunk(tmptrash);
tmptrash = NULL;
}
-
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
}
else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING))
|| ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) {
- if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
- if (s->proxy->last_change < now.tv_sec) // ignore negative times
- s->proxy->down_time += now.tv_sec - s->proxy->last_change;
- s->proxy->last_change = now.tv_sec;
- }
if (s->cur_state == SRV_ST_STOPPED && s->last_change < now.tv_sec) // ignore negative times
s->down_time += now.tv_sec - s->last_change;
@@ -5037,9 +5026,6 @@
free_trash_chunk(tmptrash);
tmptrash = NULL;
}
-
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
}
else if (s->cur_eweight != s->next_eweight) {
/* now propagate the status change to any LB algorithms */
@@ -5053,9 +5039,6 @@
if (px->lbprm.set_server_status_down)
px->lbprm.set_server_status_down(s);
}
-
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
}
s->next_admin = next_admin;
@@ -5131,9 +5114,6 @@
free_trash_chunk(tmptrash);
tmptrash = NULL;
}
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
-
s->counters.down_trans++;
}
}
@@ -5153,7 +5133,6 @@
* that the server might still be in drain mode, which is naturally dealt
* with by the lower level functions.
*/
-
if (s->check.state & CHK_ST_ENABLED) {
s->check.state &= ~CHK_ST_PAUSED;
check->health = check->rise; /* start OK but check immediately */
@@ -5224,11 +5203,6 @@
px->lbprm.set_server_status_down(s);
}
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
- else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act))
- s->proxy->last_change = now.tv_sec;
-
/* If the server is set with "on-marked-up shutdown-backup-sessions",
* and it's not a backup server and its effective weight is > 0,
* then it can accept new connections, so we shut down all streams
@@ -5332,17 +5306,9 @@
free_trash_chunk(tmptrash);
tmptrash = NULL;
}
-
- if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
- set_backend_down(s->proxy);
}
else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) {
/* OK completely leaving drain mode */
- if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
- if (s->proxy->last_change < now.tv_sec) // ignore negative times
- s->proxy->down_time += now.tv_sec - s->proxy->last_change;
- s->proxy->last_change = now.tv_sec;
- }
if (s->last_change < now.tv_sec) // ignore negative times
s->down_time += now.tv_sec - s->last_change;
@@ -5425,6 +5391,18 @@
* by some lb state change function), so we don't miss anything
*/
srv_lb_commit_status(s);
+
+ /* check if backend stats must be updated due to the server state change */
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy); /* backend going down */
+ else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act)) {
+ /* backend was down and is back up again:
+ * no helper function, updating last_change and backend downtime stats
+ */
+ if (s->proxy->last_change < now.tv_sec) // ignore negative times
+ s->proxy->down_time += now.tv_sec - s->proxy->last_change;
+ s->proxy->last_change = now.tv_sec;
+ }
}
struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state)