MEDIUM: pipe/thread: reduce the locking overhead

In a quick test involving splicing, we can see that get_pipe() and
put_pipe() together consume up to 12% of the CPU. That's not surprizing
considering how much work is performed under the lock, including the
pipe struct allocation, the pipe creation and its initialization. Same
for releasing, we don't need a lock there to call close() nor to free
to the pool.

Changing this alone was enough to cut the overhead in half. A better
approach should consist in having a per-thread pipe cache, which will
also help keep pages hot in the CPU caches.
diff --git a/src/pipe.c b/src/pipe.c
index 9190ba9..5b0fdb4 100644
--- a/src/pipe.c
+++ b/src/pipe.c
@@ -37,13 +37,17 @@
 	struct pipe *ret = NULL;
 	int pipefd[2];
 
-	HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
 	if (likely(pipes_live)) {
+		HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
 		ret = pipes_live;
-		pipes_live = pipes_live->next;
-		pipes_free--;
-		pipes_used++;
-		goto out;
+		if (likely(ret)) {
+			pipes_live = ret->next;
+			pipes_free--;
+			pipes_used++;
+		}
+		HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
+		if (ret)
+			goto out;
 	}
 
 	if (pipes_used >= global.maxpipes)
@@ -65,30 +69,20 @@
 	ret->prod = pipefd[1];
 	ret->cons = pipefd[0];
 	ret->next = NULL;
-	pipes_used++;
+	HA_ATOMIC_ADD(&pipes_used, 1);
  out:
-	HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
 	return ret;
 }
 
-static inline void __kill_pipe(struct pipe *p)
-{
-	close(p->prod);
-	close(p->cons);
-	pool_free(pool_head_pipe, p);
-	pipes_used--;
-	return;
-}
-
 /* destroy a pipe, possibly because an error was encountered on it. Its FDs
  * will be closed and it will not be reinjected into the live pool.
  */
 void kill_pipe(struct pipe *p)
 {
-	HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
-	__kill_pipe(p);
-	HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
-	return;
+	close(p->prod);
+	close(p->cons);
+	pool_free(pool_head_pipe, p);
+	HA_ATOMIC_SUB(&pipes_used, 1);
 }
 
 /* put back a unused pipe into the live pool. If it still has data in it, it is
@@ -97,16 +91,16 @@
  */
 void put_pipe(struct pipe *p)
 {
-	HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
-	if (p->data) {
-		__kill_pipe(p);
-		goto out;
+	if (unlikely(p->data)) {
+		kill_pipe(p);
+		return;
 	}
+
+	HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
 	p->next = pipes_live;
 	pipes_live = p;
 	pipes_free++;
 	pipes_used--;
- out:
 	HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
 }