MEDIUM: pools: replace CONFIG_HAP_POOLS with a runtime "NO_CACHE" flag. Like previous patches, this replaces the build-time code paths that were conditionned by CONFIG_HAP_POOLS with runtime paths conditionned by !POOL_DBG_NO_CACHE. One trivial test had to be added in the hot path in __pool_alloc() to refrain from calling pool_get_from_cache(), and another one in __pool_free() to avoid calling pool_put_to_cache(). All cache-specific functions were instrumented with a BUG_ON() to make sure we never call them with cache disabled. Additionally the cache[] array was not initialized (remains NULL) so that we can later drop it if not needed. It's particularly huge and should be turned to dynamic with a pointer to a per-thread area where all the objects are located. This will solve the memory usage issue and will improve locality, or even help better deal with NUMA machines once each thread uses its own arena.

commit: e981631d2737c5ded7db96ec2789e60c2787bdb7 [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Tue Feb 22 16:23:09 2022 +0100
committer: Willy Tarreau <w@1wt.eu> Wed Feb 23 17:11:33 2022 +0100
tree: ee9e1b4febe83f93e5ebe83f0ab698ed4d407db4
parent: dff3b0627d7d26b99411b8e302bbb03968060acf [diff]
diff --git a/include/haproxy/pool-t.h b/include/haproxy/pool-t.h
index c8f7d46..0a8f9dd 100644
--- a/include/haproxy/pool-t.h
+++ b/include/haproxy/pool-t.h

@@ -46,6 +46,7 @@
 #define POOL_DBG_COLD_FIRST 0x00000004  // pick cold objects first
 #define POOL_DBG_INTEGRITY  0x00000008  // perform integrity checks on cache
 #define POOL_DBG_NO_GLOBAL  0x00000010  // disable global pools
+#define POOL_DBG_NO_CACHE   0x00000020  // disable thread-local pool caches
 
 
 /* This is the head of a thread-local cache */
@@ -115,9 +116,7 @@
 	/* 32-bit hole here */
 	struct list list;	/* list of all known pools */
 	char name[12];		/* name of the pool */
-#ifdef CONFIG_HAP_POOLS
 	struct pool_cache_head cache[MAX_THREADS]; /* pool caches */
-#endif
 } __attribute__((aligned(64)));
 
 #endif /* _HAPROXY_POOL_T_H */

diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h
index 8497cfa..acab81d 100644
--- a/include/haproxy/pool.h
+++ b/include/haproxy/pool.h

@@ -133,8 +133,6 @@
 void __pool_free(struct pool_head *pool, void *ptr);
 
 
-#ifdef CONFIG_HAP_POOLS
-
 /****************** Thread-local cache management ******************/
 
 extern THREAD_LOCAL size_t pool_cache_bytes;   /* total cache size */
@@ -162,7 +160,7 @@
 {
 	uint alloc, used;
 
-	if (unlikely(pool_debugging & POOL_DBG_NO_GLOBAL))
+	if (unlikely(pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL)))
 		return 0;
 
 	alloc = HA_ATOMIC_LOAD(&pool->allocated);
@@ -186,13 +184,16 @@
 
 /* Tries to retrieve an object from the local pool cache corresponding to pool
  * <pool>. If none is available, tries to allocate from the shared cache if any
- * and returns NULL if nothing is available.
+ * and returns NULL if nothing is available. Must not be used when pools are
+ * disabled.
  */
 static inline void *pool_get_from_cache(struct pool_head *pool, const void *caller)
 {
 	struct pool_cache_item *item;
 	struct pool_cache_head *ph;
 
+	BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
 	ph = &pool->cache[tid];
 	if (unlikely(LIST_ISEMPTY(&ph->list))) {
 		if (!(pool_debugging & POOL_DBG_NO_GLOBAL))
@@ -231,22 +232,6 @@
 	return item;
 }
 
-#else /* CONFIG_HAP_POOLS */
-
-/* no cache pools implementation */
-
-static inline void *pool_get_from_cache(struct pool_head *pool, const void *caller)
-{
-	return NULL;
-}
-
-static inline void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
-{
-	pool_free_nocache(pool, ptr);
-}
-
-#endif /* CONFIG_HAP_POOLS */
-
 
 /****************** Common high-level code ******************/
 

diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h
index dde3fe9..2c01480 100644
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h

@@ -85,9 +85,7 @@
 	uint8_t tl_class_mask;              /* bit mask of non-empty tasklets classes */
 
 	// 7 bytes hole here
-#ifdef CONFIG_HAP_POOLS
-	struct list pool_lru_head;          /* oldest objects   */
-#endif
+	struct list pool_lru_head;          /* oldest objects in thread-local pool caches */
 	struct list buffer_wq;              /* buffer waiters */
 	struct list streams;                /* list of streams attached to this thread */
 

diff --git a/src/pool.c b/src/pool.c
index 82f51b5..cb1f768 100644
--- a/src/pool.c
+++ b/src/pool.c

@@ -29,11 +29,9 @@
 #include <haproxy/tools.h>
 
 
-#ifdef CONFIG_HAP_POOLS
 /* These ones are initialized per-thread on startup by init_pools() */
 THREAD_LOCAL size_t pool_cache_bytes = 0;                /* total cache size */
 THREAD_LOCAL size_t pool_cache_count = 0;                /* #cache objects   */
-#endif
 
 static struct list pools __read_mostly = LIST_HEAD_INIT(pools);
 int mem_poison_byte __read_mostly = -1;
@@ -53,6 +51,9 @@
 #ifdef CONFIG_HAP_NO_GLOBAL_POOLS
 	POOL_DBG_NO_GLOBAL  |
 #endif
+#ifndef CONFIG_HAP_POOLS
+	POOL_DBG_NO_CACHE   |
+#endif
 	0;
 
 static int mem_fail_rate __read_mostly = 0;
@@ -206,15 +207,16 @@
 		size  = ((size + POOL_EXTRA + align - 1) & -align) - POOL_EXTRA;
 	}
 
-#ifdef CONFIG_HAP_POOLS
-	/* we'll store two lists there, we need the room for this. This is
-	 * guaranteed by the test above, except if MEM_F_EXACT is set, or if
-	 * the only EXTRA part is in fact the one that's stored in the cache
-	 * in addition to the pci struct.
-	 */
-	if (size + POOL_EXTRA - POOL_EXTRA_CALLER < sizeof(struct pool_cache_item))
-		size = sizeof(struct pool_cache_item) + POOL_EXTRA_CALLER - POOL_EXTRA;
-#endif
+	if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+		/* we'll store two lists there, we need the room for this. This is
+		 * guaranteed by the test above, except if MEM_F_EXACT is set, or if
+		 * the only EXTRA part is in fact the one that's stored in the cache
+		 * in addition to the pci struct.
+		 */
+		if (size + POOL_EXTRA - POOL_EXTRA_CALLER < sizeof(struct pool_cache_item))
+			size = sizeof(struct pool_cache_item) + POOL_EXTRA_CALLER - POOL_EXTRA;
+	}
+
 	/* TODO: thread: we do not lock pool list for now because all pools are
 	 * created during HAProxy startup (so before threads creation) */
 	start = &pools;
@@ -254,14 +256,14 @@
 		pool->flags = flags;
 		LIST_APPEND(start, &pool->list);
 
-#ifdef CONFIG_HAP_POOLS
-		/* update per-thread pool cache if necessary */
-		for (thr = 0; thr < MAX_THREADS; thr++) {
-			LIST_INIT(&pool->cache[thr].list);
-			pool->cache[thr].tid = thr;
-			pool->cache[thr].pool = pool;
+		if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+			/* update per-thread pool cache if necessary */
+			for (thr = 0; thr < MAX_THREADS; thr++) {
+				LIST_INIT(&pool->cache[thr].list);
+				pool->cache[thr].tid = thr;
+				pool->cache[thr].pool = pool;
+			}
 		}
-#endif
 	}
 	pool->users++;
 	return pool;
@@ -336,8 +338,6 @@
 }
 
 
-#ifdef CONFIG_HAP_POOLS
-
 /* Updates <pch>'s fill_pattern and fills the free area after <item> with it,
  * up to <size> bytes. The item part is left untouched.
  */
@@ -386,7 +386,8 @@
  * pool <pool>. The shared pool is refilled with these objects in the limit
  * of the number of acceptable objects, and the rest will be released to the
  * OS. It is not a problem is <count> is larger than the number of objects in
- * the local cache. The counters are automatically updated.
+ * the local cache. The counters are automatically updated. Must not be used
+ * with pools disabled.
  */
 static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head *ph, uint count)
 {
@@ -396,6 +397,8 @@
 	uint cluster = 0;
 	uint to_free_max;
 
+	BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
 	/* Note: this will be zero when global pools are disabled */
 	to_free_max = pool_releasable(pool);
 
@@ -440,11 +443,14 @@
  * or the total size of the local cache is no more than 75% of its maximum (i.e.
  * we don't want a single cache to use all the cache for itself). For this, the
  * list is scanned in reverse. If <full> is non-null, all objects are evicted.
+ * Must not be used when pools are disabled.
  */
 void pool_evict_from_local_cache(struct pool_head *pool, int full)
 {
 	struct pool_cache_head *ph = &pool->cache[tid];
 
+	BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
 	while ((ph->count && full) ||
 	       (ph->count >= CONFIG_HAP_POOL_CLUSTER_SIZE &&
 	        ph->count >= 16 + pool_cache_count / 8 &&
@@ -454,7 +460,7 @@
 }
 
 /* Evicts some of the oldest objects from the local cache, pushing them to the
- * global pool.
+ * global pool. Must not be used when pools are disabled.
  */
 void pool_evict_from_local_caches()
 {
@@ -462,6 +468,8 @@
 	struct pool_cache_head *ph;
 	struct pool_head *pool;
 
+	BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
 	do {
 		item = LIST_PREV(&th_ctx->pool_lru_head, struct pool_cache_item *, by_lru);
 		BUG_ON(&item->by_lru == &th_ctx->pool_lru_head);
@@ -482,13 +490,16 @@
  * shared cache, which itself may decide to release some of them to the OS.
  * While it is unspecified what the object becomes past this point, it is
  * guaranteed to be released from the users' perpective. A caller address may
- * be passed and stored into the area when DEBUG_POOL_TRACING is set.
+ * be passed and stored into the area when DEBUG_POOL_TRACING is set. Must not
+ * be used with pools disabled.
  */
 void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
 {
 	struct pool_cache_item *item = (struct pool_cache_item *)ptr;
 	struct pool_cache_head *ph = &pool->cache[tid];
 
+	BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
 	LIST_INSERT(&ph->list, &item->by_pool);
 	LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
 	POOL_DEBUG_TRACE_CALLER(pool, item, caller);
@@ -510,7 +521,8 @@
  * This is only used when pools are in use and shared pools are enabled. No
  * malloc() is attempted, and poisonning is never performed. The purpose is to
  * get the fastest possible refilling so that the caller can easily check if
- * the cache has enough objects for its use.
+ * the cache has enough objects for its use. Must not be used when pools are
+ * disabled.
  */
 void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch)
 {
@@ -518,6 +530,8 @@
 	struct pool_item *ret, *down;
 	uint count;
 
+	BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
 	/* we'll need to reference the first element to figure the next one. We
 	 * must temporarily lock it so that nobody allocates then releases it,
 	 * or the dereference could fail.
@@ -588,7 +602,7 @@
 {
 	struct pool_item *next, *temp, *down;
 
-	if (!pool)
+	if (!pool || (pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL)))
 		return;
 
 	/* The loop below atomically detaches the head of the free list and
@@ -650,21 +664,6 @@
 		thread_release();
 }
 
-#else  /* CONFIG_HAP_POOLS */
-
-/* legacy stuff */
-void pool_flush(struct pool_head *pool)
-{
-}
-
-/* This function might ask the malloc library to trim its buffers. */
-void pool_gc(struct pool_head *pool_ctx)
-{
-	trim_all_pools();
-}
-
-#endif /* CONFIG_HAP_POOLS */
-
 /*
  * Returns a pointer to type <type> taken from the pool <pool_type> or
  * dynamically allocated. In the first case, <pool_type> is updated to point to
@@ -683,8 +682,9 @@
 #if defined(DEBUG_POOL_TRACING)
 	caller = __builtin_return_address(0);
 #endif
-	if (!p)
+	if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p)
 		p = pool_get_from_cache(pool, caller);
+
 	if (unlikely(!p))
 		p = pool_alloc_nocache(pool);
 
@@ -711,6 +711,12 @@
 	/* we'll get late corruption if we refill to the wrong pool or double-free */
 	POOL_DEBUG_CHECK_MARK(pool, ptr);
 	POOL_DEBUG_RESET_MARK(pool, ptr);
+
+	if (unlikely(pool_debugging & POOL_DBG_NO_CACHE)) {
+		pool_free_nocache(pool, ptr);
+		return;
+	}
+
 	pool_put_to_cache(pool, ptr, caller);
 }
 
@@ -775,9 +781,9 @@
 void *pool_destroy(struct pool_head *pool)
 {
 	if (pool) {
-#ifdef CONFIG_HAP_POOLS
-		pool_evict_from_local_cache(pool, 1);
-#endif
+		if (!(pool_debugging & POOL_DBG_NO_CACHE))
+			pool_evict_from_local_cache(pool, 1);
+
 		pool_flush(pool);
 		if (pool->used)
 			return pool;
@@ -806,30 +812,24 @@
 	struct pool_head *entry;
 	unsigned long allocated, used;
 	int nbpools;
-#ifdef CONFIG_HAP_POOLS
 	unsigned long cached_bytes = 0;
 	uint cached = 0;
-#endif
 
 	allocated = used = nbpools = 0;
 	chunk_printf(&trash, "Dumping pools usage. Use SIGQUIT to flush them.\n");
 	list_for_each_entry(entry, &pools, list) {
-#ifdef CONFIG_HAP_POOLS
-		int i;
-		for (cached = i = 0; i < global.nbthread; i++)
-			cached += entry->cache[i].count;
-		cached_bytes += cached * entry->size;
-#endif
+		if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+			int i;
+			for (cached = i = 0; i < global.nbthread; i++)
+				cached += entry->cache[i].count;
+			cached_bytes += cached * entry->size;
+		}
 		chunk_appendf(&trash, "  - Pool %s (%u bytes) : %u allocated (%u bytes), %u used"
-#ifdef CONFIG_HAP_POOLS
 			      " (~%u by thread caches)"
-#endif
 			      ", needed_avg %u, %u failures, %u users, @%p%s\n",
 		              entry->name, entry->size, entry->allocated,
 		              entry->size * entry->allocated, entry->used,
-#ifdef CONFIG_HAP_POOLS
 		              cached,
-#endif
 		              swrate_avg(entry->needed_avg, POOL_AVG_SAMPLES), entry->failed,
 		              entry->users, entry,
 		              (entry->flags & MEM_F_SHARED) ? " [SHARED]" : "");
@@ -839,14 +839,9 @@
 		nbpools++;
 	}
 	chunk_appendf(&trash, "Total: %d pools, %lu bytes allocated, %lu used"
-#ifdef CONFIG_HAP_POOLS
 		      " (~%lu by thread caches)"
-#endif
 		      ".\n",
-	              nbpools, allocated, used
-#ifdef CONFIG_HAP_POOLS
-	              , cached_bytes
-#endif
+	              nbpools, allocated, used, cached_bytes
 		      );
 }
 
@@ -923,13 +918,12 @@
 /* Initializes all per-thread arrays on startup */
 static void init_pools()
 {
-#ifdef CONFIG_HAP_POOLS
 	int thr;
 
 	for (thr = 0; thr < MAX_THREADS; thr++) {
 		LIST_INIT(&ha_thread_ctx[thr].pool_lru_head);
 	}
-#endif
+
 	detect_allocator();
 }
commit	e981631d2737c5ded7db96ec2789e60c2787bdb7	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Tue Feb 22 16:23:09 2022 +0100
committer	Willy Tarreau <w@1wt.eu>	Wed Feb 23 17:11:33 2022 +0100
tree	ee9e1b4febe83f93e5ebe83f0ab698ed4d407db4
parent	dff3b0627d7d26b99411b8e302bbb03968060acf [diff]