MEDIUM: pipe/thread: maintain a per-thread local cache of recently used pipes
In order to completely remove the pipe locking cost and try to reuse
hot pipes, each thread now maintains a local cache of recently used pipes
that is no larger than its share (maxpipes/nbthreads). All extra pipes
are instead refilled into the global pool. Allocations are made from the
local pool first, and fall back to the global one before allocating one.
This completely removes the observed pipe locking cost at high bit rates,
which was still around 5-6%.
diff --git a/src/pipe.c b/src/pipe.c
index 5b0fdb4..35cd0fe 100644
--- a/src/pipe.c
+++ b/src/pipe.c
@@ -26,6 +26,9 @@
__decl_spinlock(pipes_lock); /* lock used to protect pipes list */
+static THREAD_LOCAL int local_pipes_free = 0; /* #cache objects */
+static THREAD_LOCAL struct pipe *local_pipes = NULL;
+
int pipes_used = 0; /* # of pipes in use (2 fds each) */
int pipes_free = 0; /* # of pipes unused */
@@ -37,6 +40,15 @@
struct pipe *ret = NULL;
int pipefd[2];
+ ret = local_pipes;
+ if (likely(ret)) {
+ local_pipes = ret->next;
+ local_pipes_free--;
+ HA_ATOMIC_SUB(&pipes_free, 1);
+ HA_ATOMIC_ADD(&pipes_used, 1);
+ goto out;
+ }
+
if (likely(pipes_live)) {
HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
ret = pipes_live;
@@ -96,6 +108,15 @@
return;
}
+ if (likely(local_pipes_free * global.nbthread < global.maxpipes)) {
+ p->next = local_pipes;
+ local_pipes = p;
+ local_pipes_free++;
+ HA_ATOMIC_ADD(&pipes_free, 1);
+ HA_ATOMIC_SUB(&pipes_used, 1);
+ return;
+ }
+
HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
p->next = pipes_live;
pipes_live = p;