REORG: pools: uninline the UAF allocator and force-inline the rest

pool-os.h relies on a number of includes solely because the
pool_alloc_area() function was inlined, and this only because we want
the normal version to be inlined so that we can track the calling
places for the memory profiler. It's worth noting that it already
does not work at -O0, and that when UAF is enabled we don't care a
dime about profiling.

This patch does two things at once:
  - force-inline the functions so that pool_alloc_area() is still
    inlined at -O0 to help track malloc() users ;

  - uninline the UAF version of these (that rely on mmap/munmap)
    and move them to pools.c so that we can remove all unneeded
    includes.

Doing so reduces by ~270kB or 0.15% the total build size.
diff --git a/include/haproxy/pool-os.h b/include/haproxy/pool-os.h
index dc86f53..ef3c935 100644
--- a/include/haproxy/pool-os.h
+++ b/include/haproxy/pool-os.h
@@ -22,11 +22,8 @@
 #ifndef _HAPROXY_POOL_OS_H
 #define _HAPROXY_POOL_OS_H
 
-#include <sys/mman.h>
 #include <stdlib.h>
 #include <haproxy/api.h>
-#include <haproxy/pool-t.h>
-#include <haproxy/thread.h>
 
 
 #ifndef DEBUG_UAF
@@ -36,7 +33,7 @@
 /* allocates an area of size <size> and returns it. The semantics are similar
  * to those of malloc().
  */
-static inline void *pool_alloc_area(size_t size)
+static forceinline void *pool_alloc_area(size_t size)
 {
 	return malloc(size);
 }
@@ -45,7 +42,7 @@
  * semantics are identical to free() except that the size is specified and
  * may be ignored.
  */
-static inline void pool_free_area(void *area, size_t __maybe_unused size)
+static forceinline void pool_free_area(void *area, size_t __maybe_unused size)
 {
 	free(area);
 }
@@ -54,56 +51,25 @@
 
 /************* use-after-free allocator *************/
 
+void *pool_alloc_area_uaf(size_t size);
+void pool_free_area_uaf(void *area, size_t size);
+
+
 /* allocates an area of size <size> and returns it. The semantics are similar
- * to those of malloc(). However the allocation is rounded up to 4kB so that a
- * full page is allocated. This ensures the object can be freed alone so that
- * future dereferences are easily detected. The returned object is always
- * 16-bytes aligned to avoid issues with unaligned structure objects. In case
- * some padding is added, the area's start address is copied at the end of the
- * padding to help detect underflows.
+ * to those of malloc().
  */
-static inline void *pool_alloc_area(size_t size)
+static forceinline void *pool_alloc_area(size_t size)
 {
-	size_t pad = (4096 - size) & 0xFF0;
-	int isolated;
-	void *ret;
-
-	isolated = thread_isolated();
-	if (!isolated)
-		thread_harmless_now();
-	ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-	if (ret != MAP_FAILED) {
-		/* let's dereference the page before returning so that the real
-		 * allocation in the system is performed without holding the lock.
-		 */
-		*(int *)ret = 0;
-		if (pad >= sizeof(void *))
-			*(void **)(ret + pad - sizeof(void *)) = ret + pad;
-		ret += pad;
-	} else {
-		ret = NULL;
-	}
-	if (!isolated)
-		thread_harmless_end();
-	return ret;
+	return pool_alloc_area_uaf(size);
 }
 
 /* frees an area <area> of size <size> allocated by pool_alloc_area(). The
- * semantics are identical to free() except that the size must absolutely match
- * the one passed to pool_alloc_area(). In case some padding is added, the
- * area's start address is compared to the one at the end of the padding, and
- * a segfault is triggered if they don't match, indicating an underflow.
+ * semantics are identical to free() except that the size is specified and
+ * may be ignored.
  */
-static inline void pool_free_area(void *area, size_t size)
+static forceinline void pool_free_area(void *area, size_t size)
 {
-	size_t pad = (4096 - size) & 0xFF0;
-
-	if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area)
-		ABORT_NOW();
-
-	thread_harmless_now();
-	munmap(area - pad, (size + 4095) & -4096);
-	thread_harmless_end();
+	pool_free_area_uaf(area, size);
 }
 
 #endif /* DEBUG_UAF */
diff --git a/src/pool.c b/src/pool.c
index e6669fb..4bfe014 100644
--- a/src/pool.c
+++ b/src/pool.c
@@ -9,6 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  *
  */
+
+#include <sys/mman.h>
 #include <errno.h>
 
 #include <haproxy/activity.h>
@@ -420,6 +422,65 @@
 
 #endif /* CONFIG_HAP_POOLS */
 
+
+#ifdef DEBUG_UAF
+
+/************* use-after-free allocator *************/
+
+/* allocates an area of size <size> and returns it. The semantics are similar
+ * to those of malloc(). However the allocation is rounded up to 4kB so that a
+ * full page is allocated. This ensures the object can be freed alone so that
+ * future dereferences are easily detected. The returned object is always
+ * 16-bytes aligned to avoid issues with unaligned structure objects. In case
+ * some padding is added, the area's start address is copied at the end of the
+ * padding to help detect underflows.
+ */
+void *pool_alloc_area_uaf(size_t size)
+{
+	size_t pad = (4096 - size) & 0xFF0;
+	int isolated;
+	void *ret;
+
+	isolated = thread_isolated();
+	if (!isolated)
+		thread_harmless_now();
+	ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (ret != MAP_FAILED) {
+		/* let's dereference the page before returning so that the real
+		 * allocation in the system is performed without holding the lock.
+		 */
+		*(int *)ret = 0;
+		if (pad >= sizeof(void *))
+			*(void **)(ret + pad - sizeof(void *)) = ret + pad;
+		ret += pad;
+	} else {
+		ret = NULL;
+	}
+	if (!isolated)
+		thread_harmless_end();
+	return ret;
+}
+
+/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
+ * semantics are identical to free() except that the size must absolutely match
+ * the one passed to pool_alloc_area(). In case some padding is added, the
+ * area's start address is compared to the one at the end of the padding, and
+ * a segfault is triggered if they don't match, indicating an underflow.
+ */
+void pool_free_area_uaf(void *area, size_t size)
+{
+	size_t pad = (4096 - size) & 0xFF0;
+
+	if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area)
+		ABORT_NOW();
+
+	thread_harmless_now();
+	munmap(area - pad, (size + 4095) & -4096);
+	thread_harmless_end();
+}
+
+#endif /* DEBUG_UAF */
+
 /*
  * This function destroys a pool by freeing it completely, unless it's still
  * in use. This should be called only under extreme circumstances. It always