BUG/MINOR: server: don't miss proxy stats update on server state transitions backend "down" stats logic has been duplicated multiple times in srv_update_status(), resulting in the logic now being error-prone. For example, the following bugfix was needed to compensate for a copy-paste introduced bug: d332f139 ("BUG/MINOR: server: update last_change on maint->ready transitions too") While the above patch works great, we actually forgot to update the proxy downtime like it is done for other down->up transitions... This is simply illustrating that the current design is error-prone, it is very easy to miss something in this area. To properly update the proxy downtime stats on the maint->ready transition, to cleanup srv_update_status() and to prevent similar bugs from being introduced in the future, proxy/backend stats update are now automatically performed at the end of the server state change if needed. Thus we can remove existing updates that were performed at various places within the function, this simplifies things a bit. This patch depends on: - "MINOR: server: explicitly commit state change in srv_update_status()" This could be backported to all stable versions. Backport notes: 2.2: Replace struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state) by struct task *srv_cleanup_toremove_connections(struct task *task, void *context, unsigned short state) (cherry picked from commit e80ddb18a8bba52649896b8f894bf91ef8e53ca6) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com> (cherry picked from commit 36306b2d6c6bb99c3c6d84cc0ad797aece381a4e) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com> (cherry picked from commit 8fb6c73786080f29d04545256d08b33dfbfe5268) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>

commit: 9aba5c0495edd66ff67ad50e7c9bc76f1934956a [log] [tgz]
author: Aurelien DARRAGON <adarragon@haproxy.com> Mon Apr 17 15:30:11 2023 +0200
committer: Christopher Faulet <cfaulet@haproxy.com> Fri May 26 10:26:32 2023 +0200
tree: 27877b73e069ba64726b5991983713ea668bc3a4
parent: 674ba6776a167e5c7ba455bca738dac0546bb2bf [diff] [blame]
diff --git a/src/server.c b/src/server.c
index 5065112..0fe53db 100644
--- a/src/server.c
+++ b/src/server.c

@@ -4946,9 +4946,6 @@
 				free_trash_chunk(tmptrash);
 				tmptrash = NULL;
 			}
-			if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-				set_backend_down(s->proxy);
-
 			s->counters.down_trans++;
 		}
 		else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) {
@@ -4976,17 +4973,9 @@
 				free_trash_chunk(tmptrash);
 				tmptrash = NULL;
 			}
-
-			if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-				set_backend_down(s->proxy);
 		}
 		else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING))
 			 || ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) {
-			if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
-				if (s->proxy->last_change < now.tv_sec)		// ignore negative times
-					s->proxy->down_time += now.tv_sec - s->proxy->last_change;
-				s->proxy->last_change = now.tv_sec;
-			}
 
 			if (s->cur_state == SRV_ST_STOPPED && s->last_change < now.tv_sec)	// ignore negative times
 				s->down_time += now.tv_sec - s->last_change;
@@ -5037,9 +5026,6 @@
 				free_trash_chunk(tmptrash);
 				tmptrash = NULL;
 			}
-
-			if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-				set_backend_down(s->proxy);
 		}
 		else if (s->cur_eweight != s->next_eweight) {
 			/* now propagate the status change to any LB algorithms */
@@ -5053,9 +5039,6 @@
 				if (px->lbprm.set_server_status_down)
 					px->lbprm.set_server_status_down(s);
 			}
-
-			if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-				set_backend_down(s->proxy);
 		}
 
 		s->next_admin = next_admin;
@@ -5131,9 +5114,6 @@
 				free_trash_chunk(tmptrash);
 				tmptrash = NULL;
 			}
-			if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-				set_backend_down(s->proxy);
-
 			s->counters.down_trans++;
 		}
 	}
@@ -5153,7 +5133,6 @@
 		 * that the server might still be in drain mode, which is naturally dealt
 		 * with by the lower level functions.
 		 */
-
 		if (s->check.state & CHK_ST_ENABLED) {
 			s->check.state &= ~CHK_ST_PAUSED;
 			check->health = check->rise; /* start OK but check immediately */
@@ -5224,11 +5203,6 @@
 				px->lbprm.set_server_status_down(s);
 		}
 
-		if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-			set_backend_down(s->proxy);
-		else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act))
-			s->proxy->last_change = now.tv_sec;
-
 		/* If the server is set with "on-marked-up shutdown-backup-sessions",
 		 * and it's not a backup server and its effective weight is > 0,
 		 * then it can accept new connections, so we shut down all streams
@@ -5332,17 +5306,9 @@
 				free_trash_chunk(tmptrash);
 				tmptrash = NULL;
 			}
-
-			if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
-				set_backend_down(s->proxy);
 		}
 		else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) {
 			/* OK completely leaving drain mode */
-			if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
-				if (s->proxy->last_change < now.tv_sec)         // ignore negative times
-					s->proxy->down_time += now.tv_sec - s->proxy->last_change;
-				s->proxy->last_change = now.tv_sec;
-			}
 
 			if (s->last_change < now.tv_sec)                        // ignore negative times
 				s->down_time += now.tv_sec - s->last_change;
@@ -5425,6 +5391,18 @@
 	 * by some lb state change function), so we don't miss anything
 	 */
 	srv_lb_commit_status(s);
+
+	/* check if backend stats must be updated due to the server state change */
+	if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+		set_backend_down(s->proxy); /* backend going down */
+	else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act)) {
+		/* backend was down and is back up again:
+		 * no helper function, updating last_change and backend downtime stats
+		 */
+		if (s->proxy->last_change < now.tv_sec)         // ignore negative times
+			s->proxy->down_time += now.tv_sec - s->proxy->last_change;
+		s->proxy->last_change = now.tv_sec;
+	}
 }
 
 struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state)
commit	9aba5c0495edd66ff67ad50e7c9bc76f1934956a	[log] [tgz]
author	Aurelien DARRAGON <adarragon@haproxy.com>	Mon Apr 17 15:30:11 2023 +0200
committer	Christopher Faulet <cfaulet@haproxy.com>	Fri May 26 10:26:32 2023 +0200
tree	27877b73e069ba64726b5991983713ea668bc3a4
parent	674ba6776a167e5c7ba455bca738dac0546bb2bf [diff] [blame]