BUG/MAJOR: queue: better protect a pendconn being picked from the proxy
The locking in the dequeuing process was significantly improved by commit
49667c14b ("MEDIUM: queue: take the proxy lock only during the px queue
accesses") in that it tries hard to limit the time during which the
proxy's queue lock is held to the strict minimum. Unfortunately it's not
enough anymore, because we take up the task and manipulate a few pendconn
elements after releasing the proxy's lock (while we're under the server's
lock) but the task will not necessarily hold the server lock since it may
not have successfully found one (e.g. timeout in the backend queue). As
such, stream_free() calling pendconn_free() may release the pendconn
immediately after the proxy's lock is released while the other thread
currently proceeding with the dequeuing tries to wake up the owner's
task and dies in task_wakeup().
One solution consists in releasing le proxy's lock later. But tests have
shown that we'd have to sacrifice a significant share of the performance
gained with the patch above (roughly a 20% loss).
This patch takes another approach. It adds a "del_lock" to each pendconn
struct, that allows to keep it referenced while the proxy's lock is being
released. It's mostly a serialization lock like a refcount, just to maintain
the pendconn alive till the task_wakeup() call is complete. This way we can
continue to release the proxy's lock early while keeping this one. It had
to be added to the few points where we're about to free a pendconn, namely
in pendconn_dequeue() and pendconn_unlink(). This way we continue to
release the proxy's lock very early and there is no performance degradation.
This lock may only be held under the queue's lock to prevent lock
inversion.
No backport is needed since the patch above was merged in 2.5-dev only.
diff --git a/include/haproxy/queue-t.h b/include/haproxy/queue-t.h
index 8bdc00b..2667449 100644
--- a/include/haproxy/queue-t.h
+++ b/include/haproxy/queue-t.h
@@ -37,6 +37,7 @@
struct queue *queue; /* the queue the entry is queued into */
struct server *target; /* the server that was assigned, = srv except if srv==NULL */
struct eb32_node node;
+ __decl_thread(HA_SPINLOCK_T del_lock); /* use before removal, always under queue's lock */
};
struct queue {
diff --git a/src/queue.c b/src/queue.c
index 6d3aa9a..92692ab 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -172,7 +172,9 @@
* connection is not really dequeued. It will be done during process_stream().
* This function takes all the required locks for the operation. The pendconn
* must be valid, though it doesn't matter if it was already unlinked. Prefer
- * pendconn_cond_unlink() to first check <p>.
+ * pendconn_cond_unlink() to first check <p>. It also forces a serialization
+ * on p->del_lock to make sure another thread currently waking it up finishes
+ * first.
*/
void pendconn_unlink(struct pendconn *p)
{
@@ -184,10 +186,14 @@
oldidx = _HA_ATOMIC_LOAD(&p->queue->idx);
HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
+
if (p->node.node.leaf_p) {
eb32_delete(&p->node);
done = 1;
}
+
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
if (done) {
@@ -244,8 +250,8 @@
*
* The proxy's queue will be consulted only if px_ok is non-zero.
*
- * This function must only be called if the server queue _AND_ the proxy queue
- * are locked (if px_ok is set). Today it is only called by process_srv_queue.
+ * This function must only be called if the server queue is locked _AND_ the
+ * proxy queue is not. Today it is only called by process_srv_queue.
* When a pending connection is dequeued, this function returns 1 if a pendconn
* is dequeued, otherwise 0.
*/
@@ -298,27 +304,51 @@
goto use_p;
use_pp:
- /* Let's switch from the server pendconn to the proxy pendconn */
+ /* we'd like to release the proxy lock ASAP to let other threads
+ * work with other servers. But for this we must first hold the
+ * pendconn alive to prevent a removal from its owning stream.
+ */
+ HA_SPIN_LOCK(QUEUE_LOCK, &pp->del_lock);
+
+ /* now the element won't go, we can release the proxy */
__pendconn_unlink_prx(pp);
- HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+
+ pp->strm_flags |= SF_ASSIGNED;
+ pp->target = srv;
+ stream_add_srv_conn(pp->strm, srv);
+
+ /* we must wake the task up before releasing the lock as it's the only
+ * way to make sure the task still exists. The pendconn cannot vanish
+ * under us since the task will need to take the lock anyway and to wait
+ * if it wakes up on a different thread.
+ */
+ task_wakeup(pp->strm->task, TASK_WOKEN_RES);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &pp->del_lock);
+
_HA_ATOMIC_DEC(&px->queue.length);
_HA_ATOMIC_INC(&px->queue.idx);
- p = pp;
- goto unlinked;
+ return 1;
+
use_p:
+ /* we don't need the px queue lock anymore, we have the server's lock */
if (pp)
- HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
- __pendconn_unlink_srv(p);
- _HA_ATOMIC_DEC(&srv->queue.length);
- _HA_ATOMIC_INC(&srv->queue.idx);
- unlinked:
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+
p->strm_flags |= SF_ASSIGNED;
p->target = srv;
-
stream_add_srv_conn(p->strm, srv);
+ /* we must wake the task up before releasing the lock as it's the only
+ * way to make sure the task still exists. The pendconn cannot vanish
+ * under us since the task will need to take the lock anyway and to wait
+ * if it wakes up on a different thread.
+ */
task_wakeup(p->strm->task, TASK_WOKEN_RES);
+ __pendconn_unlink_srv(p);
+ _HA_ATOMIC_DEC(&srv->queue.length);
+ _HA_ATOMIC_INC(&srv->queue.idx);
return 1;
}
@@ -405,6 +435,7 @@
p->node.key = MAKE_KEY(strm->priority_class, strm->priority_offset);
p->strm = strm;
p->strm_flags = strm->flags;
+ HA_SPIN_INIT(&p->del_lock);
strm->pend_pos = p;
px = strm->be;
@@ -556,6 +587,10 @@
is_unlinked = !p->node.node.leaf_p;
pendconn_queue_unlock(p);
+ /* serialize to make sure the element was finished processing */
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
+
if (!is_unlinked)
return 1;