MINOR: debug: add a new "debug dev memstats" command

Now when building with -DDEBUG_MEM_STATS, some malloc/calloc/strdup/realloc
stats are kept per file+line number and may be displayed and even reset on
the CLI using "debug dev memstats". This allows to easily track potential
leakers or abnormal usages.
diff --git a/include/haproxy/bug.h b/include/haproxy/bug.h
index d164c4c..99e585e 100644
--- a/include/haproxy/bug.h
+++ b/include/haproxy/bug.h
@@ -68,6 +68,96 @@
 #define BUG_ON(cond)
 #endif
 
+
+#if defined(DEBUG_MEM_STATS)
+#include <stdlib.h>
+#include <string.h>
+
+/* Memory allocation statistics are centralized into a global "mem_stats"
+ * section. This will not work with some linkers.
+ */
+enum {
+	MEM_STATS_TYPE_UNSET  = 0,
+	MEM_STATS_TYPE_CALLOC,
+	MEM_STATS_TYPE_FREE,
+	MEM_STATS_TYPE_MALLOC,
+	MEM_STATS_TYPE_REALLOC,
+	MEM_STATS_TYPE_STRDUP,
+};
+
+struct mem_stats {
+	size_t calls;
+	size_t size;
+	const char *file;
+	int line;
+	int type;
+};
+
+#define calloc(x,y)  ({							\
+	size_t __x = (x); size_t __y = (y);				\
+	static struct mem_stats _ __attribute__((used,__section__("mem_stats"))) = { \
+		.file = __FILE__, .line = __LINE__,			\
+		.type = MEM_STATS_TYPE_CALLOC,				\
+	};								\
+	__asm__(".globl __start_mem_stats");				\
+	__asm__(".globl __stop_mem_stats");				\
+	_HA_ATOMIC_ADD(&_.calls, 1);					\
+	_HA_ATOMIC_ADD(&_.size, __x * __y);				\
+	calloc(__x,__y);						\
+})
+
+#define __free(x)  ({							\
+	void *__x = (x);						\
+	static struct mem_stats _ __attribute__((used,__section__("mem_stats"))) = { \
+		.file = __FILE__, .line = __LINE__,			\
+		.type = MEM_STATS_TYPE_FREE,				\
+	};								\
+	__asm__(".globl __start_mem_stats");				\
+	__asm__(".globl __stop_mem_stats");				\
+	_HA_ATOMIC_ADD(&_.calls, 1);					\
+	free(__x);							\
+})
+
+#define malloc(x)  ({							\
+	size_t __x = (x);						\
+	static struct mem_stats _ __attribute__((used,__section__("mem_stats"))) = { \
+		.file = __FILE__, .line = __LINE__,			\
+		.type = MEM_STATS_TYPE_MALLOC,				\
+	};								\
+	__asm__(".globl __start_mem_stats");				\
+	__asm__(".globl __stop_mem_stats");				\
+	_HA_ATOMIC_ADD(&_.calls, 1);					\
+	_HA_ATOMIC_ADD(&_.size, __x);					\
+	malloc(__x);							\
+})
+
+#define realloc(x,y)  ({						\
+	void *__x = (x); size_t __y = (y);				\
+	static struct mem_stats _ __attribute__((used,__section__("mem_stats"))) = { \
+		.file = __FILE__, .line = __LINE__,			\
+		.type = MEM_STATS_TYPE_REALLOC,				\
+	};								\
+	__asm__(".globl __start_mem_stats");				\
+	__asm__(".globl __stop_mem_stats");				\
+	_HA_ATOMIC_ADD(&_.calls, 1);					\
+	_HA_ATOMIC_ADD(&_.size, __y);					\
+	realloc(__x,__y);						\
+})
+
+#define strdup(x)  ({							\
+	const char *__x = (x); size_t __y = strlen(__x); 		\
+	static struct mem_stats _ __attribute__((used,__section__("mem_stats"))) = { \
+		.file = __FILE__, .line = __LINE__,			\
+		.type = MEM_STATS_TYPE_STRDUP,				\
+	};								\
+	__asm__(".globl __start_mem_stats");				\
+	__asm__(".globl __stop_mem_stats");				\
+	_HA_ATOMIC_ADD(&_.calls, 1);					\
+	_HA_ATOMIC_ADD(&_.size, __y);					\
+	strdup(__x);							\
+})
+#endif /* DEBUG_MEM_STATS*/
+
 #endif /* _HAPROXY_BUG_H */
 
 /*
diff --git a/src/debug.c b/src/debug.c
index 6f46591..e1a07f0 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -673,6 +673,107 @@
 	return 1;
 }
 
+#if defined(DEBUG_MEM_STATS)
+/* CLI parser for the "debug dev memstats" command */
+static int debug_parse_cli_memstats(char **args, char *payload, struct appctx *appctx, void *private)
+{
+	extern __attribute__((__weak__)) struct mem_stats __start_mem_stats;
+	extern __attribute__((__weak__)) struct mem_stats __stop_mem_stats;
+
+	if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+		return 1;
+
+	if (strcmp(args[3], "reset") == 0) {
+		struct mem_stats *ptr;
+
+		if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+			return 1;
+
+		for (ptr = &__start_mem_stats; ptr < &__stop_mem_stats; ptr++) {
+			_HA_ATOMIC_STORE(&ptr->calls, 0);
+			_HA_ATOMIC_STORE(&ptr->size, 0);
+		}
+		return 1;
+	}
+
+	if (strcmp(args[3], "all") == 0)
+		appctx->ctx.cli.i0 = 1;
+
+	/* otherwise proceed with the dump from p0 to p1 */
+	appctx->ctx.cli.p0 = &__start_mem_stats;
+	appctx->ctx.cli.p1 = &__stop_mem_stats;
+	return 0;
+}
+
+/* CLI I/O handler for the "debug dev memstats" command. Dumps all mem_stats
+ * structs referenced by pointers located between p0 and p1. Dumps all entries
+ * if i0 > 0, otherwise only non-zero calls.
+ */
+static int debug_iohandler_memstats(struct appctx *appctx)
+{
+	struct stream_interface *si = appctx->owner;
+	struct mem_stats *ptr = appctx->ctx.cli.p0;
+	int ret = 1;
+
+	if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+		goto end;
+
+	chunk_reset(&trash);
+
+	/* we have two inner loops here, one for the proxy, the other one for
+	 * the buffer.
+	 */
+	for (ptr = appctx->ctx.cli.p0; ptr != appctx->ctx.cli.p1; ptr++) {
+		const char *type;
+		const char *name;
+		const char *p;
+
+		if (!ptr->size && !ptr->calls && !appctx->ctx.cli.i0)
+			continue;
+
+		/* basename only */
+		for (p = name = ptr->file; *p; p++) {
+			if (*p == '/')
+				name = p + 1;
+		}
+
+		switch (ptr->type) {
+		case MEM_STATS_TYPE_CALLOC:  type = "CALLOC";  break;
+		case MEM_STATS_TYPE_FREE:    type = "FREE";    break;
+		case MEM_STATS_TYPE_MALLOC:  type = "MALLOC";  break;
+		case MEM_STATS_TYPE_REALLOC: type = "REALLOC"; break;
+		case MEM_STATS_TYPE_STRDUP:  type = "STRDUP";  break;
+		default:                     type = "UNSET";   break;
+		}
+
+		//chunk_printf(&trash,
+		//	     "%20s:%-5d %7s size: %12lu calls: %9lu size/call: %6lu\n",
+		//	     name, ptr->line, type,
+		//	     (unsigned long)ptr->size, (unsigned long)ptr->calls,
+		//	     (unsigned long)(ptr->calls ? (ptr->size / ptr->calls) : 0));
+
+		chunk_printf(&trash, "%s:%d", name, ptr->line);
+		while (trash.data < 25)
+			trash.area[trash.data++] = ' ';
+		chunk_appendf(&trash, "%7s  size: %12lu  calls: %9lu  size/call: %6lu\n",
+			     type,
+			     (unsigned long)ptr->size, (unsigned long)ptr->calls,
+			     (unsigned long)(ptr->calls ? (ptr->size / ptr->calls) : 0));
+
+		if (ci_putchk(si_ic(si), &trash) == -1) {
+			si_rx_room_blk(si);
+			appctx->ctx.cli.p0 = ptr;
+			ret = 0;
+			break;
+		}
+	}
+
+ end:
+	return ret;
+}
+
+#endif
+
 #ifndef USE_THREAD_DUMP
 
 /* This function dumps all threads' state to the trash. This version is the
@@ -810,6 +911,9 @@
 	{{ "debug", "dev", "hex",   NULL }, "debug dev hex   <addr> [len]: dump a memory area",              debug_parse_cli_hex,   NULL, NULL, NULL, ACCESS_EXPERT },
 	{{ "debug", "dev", "log",   NULL }, "debug dev log   [msg] ...   : send this msg to global logs",    debug_parse_cli_log,   NULL, NULL, NULL, ACCESS_EXPERT },
 	{{ "debug", "dev", "loop",  NULL }, "debug dev loop  [ms]        : loop this long",                  debug_parse_cli_loop,  NULL, NULL, NULL, ACCESS_EXPERT },
+#if defined(DEBUG_MEM_STATS)
+	{{ "debug", "dev", "memstats", NULL }, "debug dev memstats [reset|all] : dump/reset memory statistics",    debug_parse_cli_memstats, debug_iohandler_memstats, NULL, NULL, ACCESS_EXPERT },
+#endif
 	{{ "debug", "dev", "panic", NULL }, "debug dev panic             : immediately trigger a panic",     debug_parse_cli_panic, NULL, NULL, NULL, ACCESS_EXPERT },
 	{{ "debug", "dev", "stream",NULL }, "debug dev stream ...        : show/manipulate stream flags",    debug_parse_cli_stream,NULL, NULL, NULL, ACCESS_EXPERT },
 	{{ "debug", "dev", "tkill", NULL }, "debug dev tkill [thr] [sig] : send signal to thread",           debug_parse_cli_tkill, NULL, NULL, NULL, ACCESS_EXPERT },