MINOR: pools: release the pool's lock during the malloc/free calls

The malloc and free calls and especially the underlying mmap/munmap()
can occasionally take a huge amount of time and even cause the thread
to sleep. This is visible when haproxy is compiled with DEBUG_UAF which
causes every single pool allocation/free to allocate and release pages.
In this case, when using the locked pools, the watchdog can occasionally
fire under high contention (typically requesting 40000 1M objects in
parallel over 8 threads). Then, "perf top" shows that 50% of the CPU
time is spent in mmap() and munmap(). The reason the watchdog fires is
because some threads spin on the pool lock which is held by other threads
waiting on mmap() or munmap().

This patch modifies this so that the pool lock is released during these
syscalls. Not only this allows other threads to request try to allocate
their data in parallel, but it also considerably reduces the lock
contention.

Note that the locked pools are only used on small architectures where
high thread counts would not make sense, so this will not provide any
benefit in the general case. However it makes the debugging versions
way more stable, which is always appreciated.
diff --git a/include/common/memory.h b/include/common/memory.h
index 8e61156..8fc7cc8 100644
--- a/include/common/memory.h
+++ b/include/common/memory.h
@@ -473,7 +473,6 @@
 static inline void pool_free(struct pool_head *pool, void *ptr)
 {
         if (likely(ptr != NULL)) {
-		HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
 #ifdef DEBUG_MEMORY_POOLS
 		/* we'll get late corruption if we refill to the wrong pool or double-free */
 		if (*POOL_LINK(pool, ptr) != (void *)pool)
@@ -481,16 +480,20 @@
 #endif
 
 #ifndef DEBUG_UAF /* normal pool behaviour */
+		HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
 		*POOL_LINK(pool, ptr) = (void *)pool->free_list;
-                pool->free_list = (void *)ptr;
+		pool->free_list = (void *)ptr;
+		pool->used--;
+		HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
 #else  /* release the entry for real to detect use after free */
 		/* ensure we crash on double free or free of a const area*/
 		*(uint32_t *)ptr = 0xDEADADD4;
 		pool_free_area(ptr, pool->size + POOL_EXTRA);
+		HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
 		pool->allocated--;
-#endif /* DEBUG_UAF */
-                pool->used--;
+		pool->used--;
 		HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
+#endif /* DEBUG_UAF */
 	}
 }
 #endif /* CONFIG_HAP_LOCKLESS_POOLS */
diff --git a/src/memory.c b/src/memory.c
index edacb00..8fed2f4 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -335,7 +335,9 @@
 			return NULL;
 		}
 
+		HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
 		ptr = pool_alloc_area(pool->size + POOL_EXTRA);
+		HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
 		if (!ptr) {
 			pool->failed++;
 			if (failed) {
@@ -373,21 +375,24 @@
  */
 void pool_flush(struct pool_head *pool)
 {
-	void *temp, *next;
+	void *temp;
+
 	if (!pool)
 		return;
 
-	HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
-	next = pool->free_list;
-	while (next) {
-		temp = next;
-		next = *POOL_LINK(pool, temp);
+	while (1) {
+		HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
+		temp = pool->free_list;
+		if (!temp) {
+			HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
+			break;
+		}
+		pool->free_list = *POOL_LINK(pool, temp);
 		pool->allocated--;
+		HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
 		pool_free_area(temp, pool->size + POOL_EXTRA);
 	}
-	pool->free_list = next;
-	HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
-	/* here, we should have pool->allocate == pool->used */
+	/* here, we should have pool->allocated == pool->used */
 }
 
 /*
@@ -419,7 +424,11 @@
 			temp = next;
 			next = *POOL_LINK(entry, temp);
 			entry->allocated--;
+			if (entry != pool_ctx)
+				HA_SPIN_UNLOCK(POOL_LOCK, &entry->lock);
 			pool_free_area(temp, entry->size + POOL_EXTRA);
+			if (entry != pool_ctx)
+				HA_SPIN_LOCK(POOL_LOCK, &entry->lock);
 		}
 		entry->free_list = next;
 		if (entry != pool_ctx)