DEBUG: pools: add new build option DEBUG_POOL_INTEGRITY

When enabled, objects picked from the cache are checked for corruption
by comparing their contents against a pattern that was placed when they
were inserted into the cache. Objects are also allocated in the reverse
order, from the oldest one to the most recent, so as to maximize the
ability to detect such a corruption. The goal is to detect writes after
free (or possibly hardware memory corruptions). Contrary to DEBUG_UAF
this cannot detect reads after free, but may possibly detect later
corruptions and will not consume extra memory. The CPU usage will
increase a bit due to the cost of filling/checking the area and for the
preference for cold cache instead of hot cache, though not as much as
with DEBUG_UAF. This option is meant to be usable in production.
diff --git a/Makefile b/Makefile
index 54f3ba4..af0f5cc 100644
--- a/Makefile
+++ b/Makefile
@@ -231,7 +231,7 @@
 # passed as-is to CFLAGS). Please check sources for their exact meaning or do
 # not use them at all. Some even more obscure ones might also be available
 # without appearing here. Currently defined DEBUG macros include DEBUG_FULL,
-# DEBUG_MEM_STATS, DEBUG_DONT_SHARE_POOLS, DEBUG_FD,
+# DEBUG_MEM_STATS, DEBUG_DONT_SHARE_POOLS, DEBUG_FD, DEBUG_POOL_INTEGRITY,
 # DEBUG_NO_POOLS, DEBUG_FAIL_ALLOC, DEBUG_STRICT_NOCRASH, DEBUG_HPACK,
 # DEBUG_AUTH, DEBUG_SPOE, DEBUG_UAF, DEBUG_THREAD, DEBUG_STRICT, DEBUG_DEV,
 # DEBUG_TASK, DEBUG_MEMORY_POOLS.
diff --git a/doc/internals/api/pools.txt b/doc/internals/api/pools.txt
index 381cf95..f3a0141 100644
--- a/doc/internals/api/pools.txt
+++ b/doc/internals/api/pools.txt
@@ -177,6 +177,19 @@
  | 32-bytes long. This is the smallest size that a pool may be, and any smaller
  | size will automatically be rounded up to this size.
 
+When build option DEBUG_POOL_INTEGRITY is set, the area of the object between
+the two list elements and the end according to pool->size will be filled with
+pseudo-random words during pool_put_to_cache(), and these words will be
+compared between each other during pool_get_from_cache(), and the process will
+crash in case any bit differs, as this would indicate that the memory area was
+modified after the free. The pseudo-random pattern is in fact incremented by
+(~0)/3 upon each free so that roughly half of the bits change each time and we
+maximize the likelihood of detecting a single bit flip in either direction. In
+order to avoid an immediate reuse and maximize the time the object spends in
+the cache, when this option is set, objects are picked from the cache from the
+oldest one instead of the freshest one. This way even late memory corruptions
+have a chance to be detected.
+
 When build option DEBUG_MEMORY_POOLS is set, pool objects and allocated with
 one extra pointer compared to the requested size, so that the bytes that follow
 the memory area point to the pool descriptor itself as long as the object is
@@ -487,6 +500,19 @@
         use-after-free conditions by crashing the program at the first abnormal
         access. This should not be used in production.
 
+DEBUG_POOL_INTEGRITY
+        When enabled, objects picked from the cache are checked for corruption
+        by comparing their contents against a pattern that was placed when they
+        were inserted into the cache. Objects are also allocated in the reverse
+        order, from the oldest one to the most recent, so as to maximize the
+        ability to detect such a corruption. The goal is to detect writes after
+        free (or possibly hardware memory corruptions). Contrary to DEBUG_UAF
+        this cannot detect reads after free, but may possibly detect later
+        corruptions and will not consume extra memory. The CPU usage will
+        increase a bit due to the cost of filling/checking the area and for the
+        preference for cold cache instead of hot cache, though not as much as
+        with DEBUG_UAF. This option is meant to be usable in production.
+
 DEBUG_MEM_STATS
         When enabled, all malloc/calloc/realloc/strdup/free calls are accounted
         for per call place (file+line number), and may be displayed or reset on
diff --git a/include/haproxy/pool-t.h b/include/haproxy/pool-t.h
index f6b74e3..283cc9f 100644
--- a/include/haproxy/pool-t.h
+++ b/include/haproxy/pool-t.h
@@ -45,6 +45,9 @@
 struct pool_cache_head {
 	struct list list;    /* head of objects in this pool */
 	unsigned int count;  /* number of objects in this pool */
+#if defined(DEBUG_POOL_INTEGRITY)
+	ulong fill_pattern;  /* pattern used to fill the area on free */
+#endif
 } THREAD_ALIGNED(64);
 
 /* This represents one item stored in the thread-local cache. <by_pool> links
diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h
index b001f34..61428a8 100644
--- a/include/haproxy/pool.h
+++ b/include/haproxy/pool.h
@@ -180,6 +180,64 @@
  * cache first, then from the second level if it exists.
  */
 
+#if defined(DEBUG_POOL_INTEGRITY)
+
+/* Updates <pch>'s fill_pattern and fills the free area after <item> with it,
+ * up to <size> bytes. The item part is left untouched.
+ */
+static inline void pool_fill_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+	ulong *ptr = (ulong *)item;
+	uint ofs;
+	ulong u;
+
+	if (size <= sizeof(*item))
+		return;
+
+	/* Upgrade the fill_pattern to change about half of the bits
+	 * (to be sure to catch static flag corruption), and apply it.
+	 */
+	u = pch->fill_pattern += ~0UL / 3; // 0x55...55
+	ofs = sizeof(*item) / sizeof(*ptr);
+	while (ofs < size / sizeof(*ptr))
+		ptr[ofs++] = u;
+}
+
+/* check for a pool_cache_item integrity after extracting it from the cache. It
+ * must have been previously initialized using pool_fill_pattern(). If any
+ * corruption is detected, the function provokes an immediate crash.
+ */
+static inline void pool_check_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+	const ulong *ptr = (const ulong *)item;
+	uint ofs;
+	ulong u;
+
+	if (size <= sizeof(*item))
+		return;
+
+	/* let's check that all words past *item are equal */
+	ofs = sizeof(*item) / sizeof(*ptr);
+	u = ptr[ofs++];
+	while (ofs < size / sizeof(*ptr)) {
+		if (unlikely(ptr[ofs] != u))
+			ABORT_NOW();
+		ofs++;
+	}
+}
+
+#else
+
+static inline void pool_fill_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+}
+
+static inline void pool_check_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+}
+
+#endif
+
 /* Tries to retrieve an object from the local pool cache corresponding to pool
  * <pool>. If none is available, tries to allocate from the shared cache, and
  * returns NULL if nothing is available.
@@ -196,7 +254,17 @@
 			return NULL;
 	}
 
+#if defined(DEBUG_POOL_INTEGRITY)
+	/* allocate oldest objects first so as to keep them as long as possible
+	 * in the cache before being reused and maximizing the chance to detect
+	 * an overwrite.
+	 */
+	item = LIST_PREV(&ph->list, typeof(item), by_pool);
+	pool_check_pattern(ph, item, pool->size);
+#else
+	/* allocate hottest objects first */
 	item = LIST_NEXT(&ph->list, typeof(item), by_pool);
+#endif
 	LIST_DELETE(&item->by_pool);
 	LIST_DELETE(&item->by_lru);
 
diff --git a/src/pool.c b/src/pool.c
index a3b14d2..06089c3 100644
--- a/src/pool.c
+++ b/src/pool.c
@@ -319,6 +319,7 @@
 
 	while (released < count && !LIST_ISEMPTY(&ph->list)) {
 		item = LIST_PREV(&ph->list, typeof(item), by_pool);
+		pool_check_pattern(ph, item, pool->size);
 		LIST_DELETE(&item->by_pool);
 		LIST_DELETE(&item->by_lru);
 
@@ -399,6 +400,7 @@
 	LIST_INSERT(&ph->list, &item->by_pool);
 	LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
 	ph->count++;
+	pool_fill_pattern(ph, item, pool->size);
 	pool_cache_count++;
 	pool_cache_bytes += pool->size;
 
@@ -470,6 +472,7 @@
 		LIST_INSERT(&pch->list, &item->by_pool);
 		LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
 		count++;
+		pool_fill_pattern(pch, item, pool->size);
 	}
 	HA_ATOMIC_ADD(&pool->used, count);
 	pch->count += count;