[MEDIUM]: Prevent redispatcher from selecting the same server, version #3

When haproxy decides that session needs to be redispatched it chose a server,
but there is no guarantee for it to be a different one. So, it often
happens that selected server is exactly the same that it was previously, so
a client ends up with a 503 error anyway, especially when one sever has
much bigger weight than others.

Changes from the previous version:
 - drop stupid and unnecessary SN_DIRECT changes

 - assign_server(): use srvtoavoid to keep the old server and clear s->srv
    so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again)
    and get_server_rr_with_conns has chances to work (previously
    we were passing a NULL here)

 - srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns
   incrementing as t->srv was guaranteed to be NULL

 - add avoididx to get_server_rr_with_conns. I hope I correctly understand this code.

 - fix http_flush_cookie_flags() and move it to assign_server_and_queue()
   directly. The code here was supposed to set CK_DOWN and clear CK_VALID,
   but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so:
	if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
		txn->flags ^= (TX_CK_VALID | TX_CK_DOWN);
   was really a:
	if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
		txn->flags &= TX_CK_VALID

   Now haproxy logs "--DI" after redispatching connection.

 - defer srv->redispatches++ and s->be->redispatches++ so there
   are called only if a conenction was redispatched, not only
   supposed to.

 - don't increment lbconn if redispatcher selected the same sarver

 - don't count unsuccessfully redispatched connections as redispatched
   connections

 - don't count redispatched connections as errors, so:

 - the number of connections effectively served by a server is:
 srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches
   and
 SUM(servers->failed_conns) == be->failed_conns

 - requires the "Don't increment server connections too much + fix retries" patch

 - needs little more testing and probably some discussion so reverting to the RFC state

Tests #1:
 retries 4
 redispatch

i) 1 server(s): b (wght=1, down)
  b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0
  -> request failed

ii) server(s): b (wght=1, down), u (wght=1, down)
  b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
  u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0
  -> request FAILED

iii) 2 server(s): b (wght=1, down), u (wght=1, up)
  b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
  u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0
  -> request OK

iv) 2 server(s): b (wght=100, down), u (wght=1, up)
  b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
  u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0
  -> request OK

v) 1 server(s): b (down for first 4 SYNS)
  b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0
  -> request OK

Tests #2:
 retries 4

i) 1 server(s): b (down)
  b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0
  -> request FAILED
diff --git a/include/proto/backend.h b/include/proto/backend.h
index 444e53a..c4ffe85 100644
--- a/include/proto/backend.h
+++ b/include/proto/backend.h
@@ -50,10 +50,10 @@
  * If any server is found, it will be returned and px->lbprm.map.rr_idx will be updated
  * to point to the next server. If no valid server is found, NULL is returned.
  */
-static inline struct server *get_server_rr_with_conns(struct proxy *px)
+static inline struct server *get_server_rr_with_conns(struct proxy *px, struct server *srvtoavoid)
 {
-	int newidx;
-	struct server *srv;
+	int newidx, avoididx;
+	struct server *srv, *avoided;
 
 	if (px->lbprm.tot_weight == 0)
 		return NULL;
@@ -65,17 +65,28 @@
 		px->lbprm.map.rr_idx = 0;
 	newidx = px->lbprm.map.rr_idx;
 
+	avoided = NULL;
 	do {
 		srv = px->lbprm.map.srv[newidx++];
 		if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
-			px->lbprm.map.rr_idx = newidx;
-			return srv;
+			/* make sure it is not the server we are try to exclude... */
+			if (srv != srvtoavoid) {
+				px->lbprm.map.rr_idx = newidx;
+				return srv;
+			}
+
+			avoided = srv;	/* ...but remember that is was selected yet avoided */
+			avoididx = newidx;
 		}
 		if (newidx == px->lbprm.tot_weight)
 			newidx = 0;
 	} while (newidx != px->lbprm.map.rr_idx);
 
-	return NULL;
+	if (avoided)
+		px->lbprm.map.rr_idx = avoididx;
+
+	/* return NULL or srvtoavoid if found */
+	return avoided;
 }
 
 
diff --git a/include/proto/proto_http.h b/include/proto/proto_http.h
index da4ea0e..523471d 100644
--- a/include/proto/proto_http.h
+++ b/include/proto/proto_http.h
@@ -82,15 +82,6 @@
 int stats_check_uri_auth(struct session *t, struct proxy *backend);
 void init_proto_http();
 
-/* used to clear the cookie flags when a transaction failed on the server
- * designed by the cookie. We clear the CK_VALID bit and set the CK_DOWN.
- */
-static inline void http_flush_cookie_flags(struct http_txn *txn)
-{
-	if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
-		txn->flags ^= (TX_CK_VALID | TX_CK_DOWN);
-}
-
 #endif /* _PROTO_PROTO_HTTP_H */
 
 /*
diff --git a/src/backend.c b/src/backend.c
index b1452b7..1cd2a19 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -727,11 +727,10 @@
  * the init tree if appropriate. If both trees are empty, return NULL.
  * Saturated servers are skipped and requeued.
  */
-static struct server *fwrr_get_next_server(struct proxy *p)
+static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 {
-	struct server *srv;
+	struct server *srv, *full, *avoided;
 	struct fwrr_group *grp;
-	struct server *full;
 	int switched;
 
 	if (p->srv_act)
@@ -744,6 +743,7 @@
 		return NULL;
 
 	switched = 0;
+	avoided = NULL;
 	full = NULL; /* NULL-terminated list of saturated servers */
 	while (1) {
 		/* if we see an empty group, let's first try to collect weights
@@ -759,8 +759,13 @@
 			srv = fwrr_get_server_from_group(grp);
 			if (srv)
 				break;
-			if (switched)
+			if (switched) {
+				if (avoided) {
+					srv = avoided;
+					break;
+				}
 				goto requeue_servers;
+			}
 			switched = 1;
 			fwrr_switch_trees(grp);
 
@@ -774,10 +779,15 @@
 		fwrr_update_position(grp, srv);
 		fwrr_dequeue_srv(srv);
 		grp->curr_pos++;
-		if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv))
-			break;
+		if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
+			/* make sure it is not the server we are trying to exclude... */
+			if (srv != srvtoavoid || avoided)
+				break;
+
+			avoided = srv; /* ...but remember that is was selected yet avoided */
+		}
 
-		/* the server is saturated, let's chain it for later reinsertion */
+		/* the server is saturated or avoided, let's chain it for later reinsertion */
 		srv->next_full = full;
 		full = srv;
 	}
@@ -786,6 +796,9 @@
 	fwrr_queue_srv(srv);
 
  requeue_servers:
+	/* Requeue all extracted servers. If full==srv then it was
+	 * avoided (unsucessfully) and chained, omit it now.
+	 */
 	if (unlikely(full != NULL)) {
 		if (switched) {
 			/* the tree has switched, requeue all extracted servers
@@ -793,7 +806,8 @@
 			 * their weight matters.
 			 */
 			do {
-				fwrr_queue_by_weight(grp->init, full);
+				if (likely(full != srv))
+					fwrr_queue_by_weight(grp->init, full);
 				full = full->next_full;
 			} while (full);
 		} else {
@@ -801,7 +815,8 @@
 			 * so that they regain their expected place.
 			 */
 			do {
-				fwrr_queue_srv(full);
+				if (likely(full != srv))
+					fwrr_queue_srv(full);
 				full = full->next_full;
 			} while (full);
 		}
@@ -893,10 +908,16 @@
 
 int assign_server(struct session *s)
 {
+
+	struct server *srvtoavoid;
+
 #ifdef DEBUG_FULL
 	fprintf(stderr,"assign_server : s=%p\n",s);
 #endif
 
+	srvtoavoid = s->srv;
+	s->srv = NULL;
+
 	if (s->pend_pos)
 		return SRV_STATUS_INTERNAL;
 
@@ -914,7 +935,7 @@
 
 			switch (s->be->lbprm.algo & BE_LB_ALGO) {
 			case BE_LB_ALGO_RR:
-				s->srv = fwrr_get_next_server(s->be);
+				s->srv = fwrr_get_next_server(s->be, srvtoavoid);
 				if (!s->srv)
 					return SRV_STATUS_FULL;
 				break;
@@ -943,7 +964,7 @@
 						       s->txn.req.sl.rq.u_l);
 				if (!s->srv) {
 					/* parameter not found, fall back to round robin on the map */
-					s->srv = get_server_rr_with_conns(s->be);
+					s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
 					if (!s->srv)
 						return SRV_STATUS_FULL;
 				}
@@ -952,8 +973,10 @@
 				/* unknown balancing algorithm */
 				return SRV_STATUS_INTERNAL;
 			}
-			s->be->cum_lbconn++;
-			s->srv->cum_lbconn++;
+			if (s->srv != srvtoavoid) {
+				s->be->cum_lbconn++;
+				s->srv->cum_lbconn++;
+			}
 		}
 		else if (s->be->options & PR_O_HTTP_PROXY) {
 			if (!s->srv_addr.sin_addr.s_addr)
@@ -1053,6 +1076,7 @@
 int assign_server_and_queue(struct session *s)
 {
 	struct pendconn *p;
+	struct server *srv;
 	int err;
 
 	if (s->pend_pos)
@@ -1067,9 +1091,8 @@
 		}
 
 		if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
+			/* it's left to the dispatcher to choose a server */
 			s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
-			s->srv = NULL;
-			http_flush_cookie_flags(&s->txn);
 		} else {
 			/* a server does not need to be assigned, perhaps because we're in
 			 * direct mode, or in dispatch or transparent modes where the server
@@ -1088,7 +1111,32 @@
 	}
 
 	/* a server needs to be assigned */
+	srv = s->srv;
 	err = assign_server(s);
+
+	if (srv) {
+		if (srv != s->srv) {
+			/* This session was previously dispatched to another server:
+			 *  - set TX_CK_DOWN if txn.flags was TX_CK_VALID
+			 *  - set SN_REDISP if it was successfully redispatched
+			 *  - increment srv->redispatches and be->redispatches
+			 */
+
+			if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
+				s->txn.flags &= ~TX_CK_MASK;
+				s->txn.flags |= TX_CK_DOWN;
+			}
+
+			s->flags |= SN_REDISP;
+
+			srv->redispatches++;
+			s->be->redispatches++;
+		} else {
+			srv->retries++;
+			s->be->retries++;
+		}
+	}
+
 	switch (err) {
 	case SRV_STATUS_OK:
 		if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
@@ -1414,17 +1462,11 @@
 	if (may_dequeue_tasks(t->srv, t->be))
 		task_wakeup(t->srv->queue_mgt);
 
-	if (t->srv) {
-		t->srv->cum_sess++;
-		t->srv->failed_conns++;
-		t->srv->redispatches++;
-	}
-	t->be->redispatches++;
+	if (t->srv)
+		t->srv->cum_sess++;		//FIXME?
 
+	/* it's left to the dispatcher to choose a server */
 	t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
-	t->flags |= SN_REDISP;
-	t->srv = NULL; /* it's left to the dispatcher to choose a server */
-	http_flush_cookie_flags(&t->txn);
 	return 0;
 }
 
@@ -1454,10 +1496,7 @@
 		tv_eternity(&t->req->cex);
 		srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
 				   503, error_message(t, HTTP_ERR_503));
-		if (t->srv)
-			t->srv->cum_sess++;
-		if (t->srv)
-			t->srv->failed_conns++;
+
 		t->be->failed_conns++;
 
 		return 1;
diff --git a/src/checks.c b/src/checks.c
index 53b7e54..0b8756b 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -72,14 +72,9 @@
 			 * cookie and force to balance or use the dispatcher.
 			 */
 
-			sess->srv->redispatches++;
-			sess->be->redispatches++;
-
+			/* it's left to the dispatcher to choose a server */
 			sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
-			sess->flags |= SN_REDISP;
 
-			sess->srv = NULL; /* it's left to the dispatcher to choose a server */
-			http_flush_cookie_flags(&sess->txn);
 			pendconn_free(pc);
 			task_wakeup(sess->task);
 			xferred++;
diff --git a/src/proto_http.c b/src/proto_http.c
index cd88905..f685201 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -2627,16 +2627,8 @@
 				if (may_dequeue_tasks(t->srv, t->be))
 					task_wakeup(t->srv->queue_mgt);
 
-				if (t->srv) {
-					t->srv->failed_conns++;
-					t->srv->redispatches++;
-				}
-				t->be->redispatches++;
-
+				/* it's left to the dispatcher to choose a server */
 				t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
-				t->flags |= SN_REDISP;
-				t->srv = NULL; /* it's left to the dispatcher to choose a server */
-				http_flush_cookie_flags(txn);
 
 				/* first, get a connection */
 				if (srv_redispatch_connect(t))