[MEDIUM]: Prevent redispatcher from selecting the same server, version #3
When haproxy decides that session needs to be redispatched it chose a server,
but there is no guarantee for it to be a different one. So, it often
happens that selected server is exactly the same that it was previously, so
a client ends up with a 503 error anyway, especially when one sever has
much bigger weight than others.
Changes from the previous version:
- drop stupid and unnecessary SN_DIRECT changes
- assign_server(): use srvtoavoid to keep the old server and clear s->srv
so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again)
and get_server_rr_with_conns has chances to work (previously
we were passing a NULL here)
- srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns
incrementing as t->srv was guaranteed to be NULL
- add avoididx to get_server_rr_with_conns. I hope I correctly understand this code.
- fix http_flush_cookie_flags() and move it to assign_server_and_queue()
directly. The code here was supposed to set CK_DOWN and clear CK_VALID,
but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so:
if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
txn->flags ^= (TX_CK_VALID | TX_CK_DOWN);
was really a:
if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
txn->flags &= TX_CK_VALID
Now haproxy logs "--DI" after redispatching connection.
- defer srv->redispatches++ and s->be->redispatches++ so there
are called only if a conenction was redispatched, not only
supposed to.
- don't increment lbconn if redispatcher selected the same sarver
- don't count unsuccessfully redispatched connections as redispatched
connections
- don't count redispatched connections as errors, so:
- the number of connections effectively served by a server is:
srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches
and
SUM(servers->failed_conns) == be->failed_conns
- requires the "Don't increment server connections too much + fix retries" patch
- needs little more testing and probably some discussion so reverting to the RFC state
Tests #1:
retries 4
redispatch
i) 1 server(s): b (wght=1, down)
b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0
-> request failed
ii) server(s): b (wght=1, down), u (wght=1, down)
b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0
-> request FAILED
iii) 2 server(s): b (wght=1, down), u (wght=1, up)
b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0
-> request OK
iv) 2 server(s): b (wght=100, down), u (wght=1, up)
b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0
-> request OK
v) 1 server(s): b (down for first 4 SYNS)
b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0
-> request OK
Tests #2:
retries 4
i) 1 server(s): b (down)
b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0
-> request FAILED
diff --git a/include/proto/backend.h b/include/proto/backend.h
index 444e53a..c4ffe85 100644
--- a/include/proto/backend.h
+++ b/include/proto/backend.h
@@ -50,10 +50,10 @@
* If any server is found, it will be returned and px->lbprm.map.rr_idx will be updated
* to point to the next server. If no valid server is found, NULL is returned.
*/
-static inline struct server *get_server_rr_with_conns(struct proxy *px)
+static inline struct server *get_server_rr_with_conns(struct proxy *px, struct server *srvtoavoid)
{
- int newidx;
- struct server *srv;
+ int newidx, avoididx;
+ struct server *srv, *avoided;
if (px->lbprm.tot_weight == 0)
return NULL;
@@ -65,17 +65,28 @@
px->lbprm.map.rr_idx = 0;
newidx = px->lbprm.map.rr_idx;
+ avoided = NULL;
do {
srv = px->lbprm.map.srv[newidx++];
if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
- px->lbprm.map.rr_idx = newidx;
- return srv;
+ /* make sure it is not the server we are try to exclude... */
+ if (srv != srvtoavoid) {
+ px->lbprm.map.rr_idx = newidx;
+ return srv;
+ }
+
+ avoided = srv; /* ...but remember that is was selected yet avoided */
+ avoididx = newidx;
}
if (newidx == px->lbprm.tot_weight)
newidx = 0;
} while (newidx != px->lbprm.map.rr_idx);
- return NULL;
+ if (avoided)
+ px->lbprm.map.rr_idx = avoididx;
+
+ /* return NULL or srvtoavoid if found */
+ return avoided;
}
diff --git a/include/proto/proto_http.h b/include/proto/proto_http.h
index da4ea0e..523471d 100644
--- a/include/proto/proto_http.h
+++ b/include/proto/proto_http.h
@@ -82,15 +82,6 @@
int stats_check_uri_auth(struct session *t, struct proxy *backend);
void init_proto_http();
-/* used to clear the cookie flags when a transaction failed on the server
- * designed by the cookie. We clear the CK_VALID bit and set the CK_DOWN.
- */
-static inline void http_flush_cookie_flags(struct http_txn *txn)
-{
- if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
- txn->flags ^= (TX_CK_VALID | TX_CK_DOWN);
-}
-
#endif /* _PROTO_PROTO_HTTP_H */
/*
diff --git a/src/backend.c b/src/backend.c
index b1452b7..1cd2a19 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -727,11 +727,10 @@
* the init tree if appropriate. If both trees are empty, return NULL.
* Saturated servers are skipped and requeued.
*/
-static struct server *fwrr_get_next_server(struct proxy *p)
+static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
{
- struct server *srv;
+ struct server *srv, *full, *avoided;
struct fwrr_group *grp;
- struct server *full;
int switched;
if (p->srv_act)
@@ -744,6 +743,7 @@
return NULL;
switched = 0;
+ avoided = NULL;
full = NULL; /* NULL-terminated list of saturated servers */
while (1) {
/* if we see an empty group, let's first try to collect weights
@@ -759,8 +759,13 @@
srv = fwrr_get_server_from_group(grp);
if (srv)
break;
- if (switched)
+ if (switched) {
+ if (avoided) {
+ srv = avoided;
+ break;
+ }
goto requeue_servers;
+ }
switched = 1;
fwrr_switch_trees(grp);
@@ -774,10 +779,15 @@
fwrr_update_position(grp, srv);
fwrr_dequeue_srv(srv);
grp->curr_pos++;
- if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv))
- break;
+ if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
+ /* make sure it is not the server we are trying to exclude... */
+ if (srv != srvtoavoid || avoided)
+ break;
+
+ avoided = srv; /* ...but remember that is was selected yet avoided */
+ }
- /* the server is saturated, let's chain it for later reinsertion */
+ /* the server is saturated or avoided, let's chain it for later reinsertion */
srv->next_full = full;
full = srv;
}
@@ -786,6 +796,9 @@
fwrr_queue_srv(srv);
requeue_servers:
+ /* Requeue all extracted servers. If full==srv then it was
+ * avoided (unsucessfully) and chained, omit it now.
+ */
if (unlikely(full != NULL)) {
if (switched) {
/* the tree has switched, requeue all extracted servers
@@ -793,7 +806,8 @@
* their weight matters.
*/
do {
- fwrr_queue_by_weight(grp->init, full);
+ if (likely(full != srv))
+ fwrr_queue_by_weight(grp->init, full);
full = full->next_full;
} while (full);
} else {
@@ -801,7 +815,8 @@
* so that they regain their expected place.
*/
do {
- fwrr_queue_srv(full);
+ if (likely(full != srv))
+ fwrr_queue_srv(full);
full = full->next_full;
} while (full);
}
@@ -893,10 +908,16 @@
int assign_server(struct session *s)
{
+
+ struct server *srvtoavoid;
+
#ifdef DEBUG_FULL
fprintf(stderr,"assign_server : s=%p\n",s);
#endif
+ srvtoavoid = s->srv;
+ s->srv = NULL;
+
if (s->pend_pos)
return SRV_STATUS_INTERNAL;
@@ -914,7 +935,7 @@
switch (s->be->lbprm.algo & BE_LB_ALGO) {
case BE_LB_ALGO_RR:
- s->srv = fwrr_get_next_server(s->be);
+ s->srv = fwrr_get_next_server(s->be, srvtoavoid);
if (!s->srv)
return SRV_STATUS_FULL;
break;
@@ -943,7 +964,7 @@
s->txn.req.sl.rq.u_l);
if (!s->srv) {
/* parameter not found, fall back to round robin on the map */
- s->srv = get_server_rr_with_conns(s->be);
+ s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
if (!s->srv)
return SRV_STATUS_FULL;
}
@@ -952,8 +973,10 @@
/* unknown balancing algorithm */
return SRV_STATUS_INTERNAL;
}
- s->be->cum_lbconn++;
- s->srv->cum_lbconn++;
+ if (s->srv != srvtoavoid) {
+ s->be->cum_lbconn++;
+ s->srv->cum_lbconn++;
+ }
}
else if (s->be->options & PR_O_HTTP_PROXY) {
if (!s->srv_addr.sin_addr.s_addr)
@@ -1053,6 +1076,7 @@
int assign_server_and_queue(struct session *s)
{
struct pendconn *p;
+ struct server *srv;
int err;
if (s->pend_pos)
@@ -1067,9 +1091,8 @@
}
if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
+ /* it's left to the dispatcher to choose a server */
s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
- s->srv = NULL;
- http_flush_cookie_flags(&s->txn);
} else {
/* a server does not need to be assigned, perhaps because we're in
* direct mode, or in dispatch or transparent modes where the server
@@ -1088,7 +1111,32 @@
}
/* a server needs to be assigned */
+ srv = s->srv;
err = assign_server(s);
+
+ if (srv) {
+ if (srv != s->srv) {
+ /* This session was previously dispatched to another server:
+ * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
+ * - set SN_REDISP if it was successfully redispatched
+ * - increment srv->redispatches and be->redispatches
+ */
+
+ if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
+ s->txn.flags &= ~TX_CK_MASK;
+ s->txn.flags |= TX_CK_DOWN;
+ }
+
+ s->flags |= SN_REDISP;
+
+ srv->redispatches++;
+ s->be->redispatches++;
+ } else {
+ srv->retries++;
+ s->be->retries++;
+ }
+ }
+
switch (err) {
case SRV_STATUS_OK:
if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
@@ -1414,17 +1462,11 @@
if (may_dequeue_tasks(t->srv, t->be))
task_wakeup(t->srv->queue_mgt);
- if (t->srv) {
- t->srv->cum_sess++;
- t->srv->failed_conns++;
- t->srv->redispatches++;
- }
- t->be->redispatches++;
+ if (t->srv)
+ t->srv->cum_sess++; //FIXME?
+ /* it's left to the dispatcher to choose a server */
t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
- t->flags |= SN_REDISP;
- t->srv = NULL; /* it's left to the dispatcher to choose a server */
- http_flush_cookie_flags(&t->txn);
return 0;
}
@@ -1454,10 +1496,7 @@
tv_eternity(&t->req->cex);
srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
503, error_message(t, HTTP_ERR_503));
- if (t->srv)
- t->srv->cum_sess++;
- if (t->srv)
- t->srv->failed_conns++;
+
t->be->failed_conns++;
return 1;
diff --git a/src/checks.c b/src/checks.c
index 53b7e54..0b8756b 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -72,14 +72,9 @@
* cookie and force to balance or use the dispatcher.
*/
- sess->srv->redispatches++;
- sess->be->redispatches++;
-
+ /* it's left to the dispatcher to choose a server */
sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
- sess->flags |= SN_REDISP;
- sess->srv = NULL; /* it's left to the dispatcher to choose a server */
- http_flush_cookie_flags(&sess->txn);
pendconn_free(pc);
task_wakeup(sess->task);
xferred++;
diff --git a/src/proto_http.c b/src/proto_http.c
index cd88905..f685201 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -2627,16 +2627,8 @@
if (may_dequeue_tasks(t->srv, t->be))
task_wakeup(t->srv->queue_mgt);
- if (t->srv) {
- t->srv->failed_conns++;
- t->srv->redispatches++;
- }
- t->be->redispatches++;
-
+ /* it's left to the dispatcher to choose a server */
t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
- t->flags |= SN_REDISP;
- t->srv = NULL; /* it's left to the dispatcher to choose a server */
- http_flush_cookie_flags(txn);
/* first, get a connection */
if (srv_redispatch_connect(t))