BUG/MAJOR: server: fix deadlock when changing maxconn via agent-check
The server_parse_maxconn_change_request locks the server lock. However,
this function can be called via agent-checks or lua code which already
lock it. This bug has been introduced by the following commit :
commit 79a88ba3d09f7e2b73ae27cb5d24cc087a548fa6
BUG/MAJOR: server: prevent deadlock when using 'set maxconn server'
This commit tried to fix another deadlock with can occur because
previoulsy server_parse_maxconn_change_request requires the server lock
to be held. However, it may call internally process_srv_queue which also
locks the server lock. The locking policy has thus been updated. The fix
is functional for the CLI 'set maxconn' but fails to address the
agent-check / lua counterparts.
This new issue is fixed in two steps :
- changes from the above commit have been reverted. This means that
server_parse_maxconn_change_request must again be called with the
server lock.
- to counter the deadlock fixed by the above commit, process_srv_queue
now takes an argument to render the server locking optional if the
caller already held it. This is only used by
server_parse_maxconn_change_request.
The above commit was subject to backport up to 1.8. Thus this commit
must be backported in every release where it is already present.
(cherry picked from commit 0274286dd3768c0d5e58588a1cb7e7e710fbc9d4)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit caaafd055edcbceb3d57ed9672b90900ec52a203)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
diff --git a/include/haproxy/queue.h b/include/haproxy/queue.h
index f61181a..a5d52b3 100644
--- a/include/haproxy/queue.h
+++ b/include/haproxy/queue.h
@@ -34,7 +34,7 @@
struct pendconn *pendconn_add(struct stream *strm);
int pendconn_dequeue(struct stream *strm);
-void process_srv_queue(struct server *s);
+void process_srv_queue(struct server *s, int server_locked);
unsigned int srv_dynamic_maxconn(const struct server *s);
int pendconn_redistribute(struct server *s);
int pendconn_grab_from_px(struct server *s);
diff --git a/include/haproxy/stream.h b/include/haproxy/stream.h
index 2606c99..ab870fa 100644
--- a/include/haproxy/stream.h
+++ b/include/haproxy/stream.h
@@ -398,7 +398,7 @@
((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_RR)))) {
sess_change_server(s, NULL);
if (may_dequeue_tasks(objt_server(s->target), s->be))
- process_srv_queue(objt_server(s->target));
+ process_srv_queue(objt_server(s->target), 0);
s->flags &= ~(SF_DIRECT | SF_ASSIGNED | SF_ADDR_SET);
si->state = SI_ST_REQ;
diff --git a/src/backend.c b/src/backend.c
index 3181e74..c716ebb 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -819,7 +819,7 @@
sess_change_server(s, srv);
} else {
if (may_dequeue_tasks(conn_slot, s->be))
- process_srv_queue(conn_slot);
+ process_srv_queue(conn_slot, 0);
}
}
@@ -1734,7 +1734,7 @@
/* release other streams waiting for this server */
if (may_dequeue_tasks(srv, s->be))
- process_srv_queue(srv);
+ process_srv_queue(srv, 0);
return 1;
}
/* if we get here, it's because we got SRV_STATUS_OK, which also
@@ -1810,7 +1810,7 @@
/* release other streams waiting for this server */
sess_change_server(s, NULL);
if (may_dequeue_tasks(srv, s->be))
- process_srv_queue(srv);
+ process_srv_queue(srv, 0);
/* Failed and not retryable. */
si_shutr(si);
@@ -2138,7 +2138,7 @@
_HA_ATOMIC_ADD(&s->be->be_counters.failed_conns, 1);
sess_change_server(s, NULL);
if (may_dequeue_tasks(objt_server(s->target), s->be))
- process_srv_queue(objt_server(s->target));
+ process_srv_queue(objt_server(s->target), 0);
/* shutw is enough so stop a connecting socket */
si_shutw(si);
diff --git a/src/cli.c b/src/cli.c
index 2460e53..014a166 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -2416,7 +2416,7 @@
HA_ATOMIC_SUB(&__objt_server(s->target)->cur_sess, 1);
}
if (may_dequeue_tasks(__objt_server(s->target), be))
- process_srv_queue(__objt_server(s->target));
+ process_srv_queue(__objt_server(s->target), 0);
}
s->target = NULL;
diff --git a/src/queue.c b/src/queue.c
index fb49676..1ef0607 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -336,14 +336,16 @@
}
/* Manages a server's connection queue. This function will try to dequeue as
- * many pending streams as possible, and wake them up.
+ * many pending streams as possible, and wake them up. <server_locked> must
+ * only be set if the caller already hold the server lock.
*/
-void process_srv_queue(struct server *s)
+void process_srv_queue(struct server *s, int server_locked)
{
struct proxy *p = s->proxy;
int maxconn;
- HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ if (!server_locked)
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
maxconn = srv_dynamic_maxconn(s);
while (s->served < maxconn) {
@@ -352,7 +354,8 @@
break;
}
HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
- HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ if (!server_locked)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
}
/* Adds the stream <strm> to the pending connection queue of server <strm>->srv
diff --git a/src/server.c b/src/server.c
index 3569fd1..b6092b1 100644
--- a/src/server.c
+++ b/src/server.c
@@ -1382,16 +1382,17 @@
else if (end[0] != '\0')
return "Trailing garbage in maxconn string";
- HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
if (sv->maxconn == sv->minconn) { // static maxconn
sv->maxconn = sv->minconn = v;
} else { // dynamic maxconn
sv->maxconn = v;
}
- HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ /* server_parse_maxconn_change_request requires the server lock held.
+ * Specify it to process_srv_queue to prevent a deadlock.
+ */
if (may_dequeue_tasks(sv, sv->proxy))
- process_srv_queue(sv);
+ process_srv_queue(sv, 1);
return NULL;
}
@@ -4638,10 +4639,14 @@
if (!sv)
return 1;
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+
warning = server_parse_maxconn_change_request(sv, args[4]);
if (warning)
cli_err(appctx, warning);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+
return 1;
}
diff --git a/src/stream.c b/src/stream.c
index 4e14e8f..c569e9b 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -575,7 +575,7 @@
_HA_ATOMIC_SUB(&__objt_server(s->target)->cur_sess, 1);
}
if (may_dequeue_tasks(objt_server(s->target), s->be))
- process_srv_queue(objt_server(s->target));
+ process_srv_queue(objt_server(s->target), 0);
}
if (unlikely(s->srv_conn)) {
@@ -1706,7 +1706,7 @@
}
sess_change_server(s, NULL);
if (may_dequeue_tasks(srv, s->be))
- process_srv_queue(srv);
+ process_srv_queue(srv, 0);
}
}
diff --git a/src/tcpcheck.c b/src/tcpcheck.c
index f889dff..896d4ee 100644
--- a/src/tcpcheck.c
+++ b/src/tcpcheck.c
@@ -913,6 +913,9 @@
cs += strlen("maxconn:");
+ /* This is safe to call server_parse_maxconn_change_request
+ * because the server lock is held during the check.
+ */
msg = server_parse_maxconn_change_request(check->server, cs);
if (!wrn || !*wrn)
wrn = msg;