MINOR: debug: add a balance of alloc - free at the end of the memstats dump

When digging into suspected memory leaks, it's cumbersome to count the
number of allocations and free calls. Here we're adding a summary at the
end of the sum of allocs minus the sum of frees, excluding realloc since
we can't know how much it releases upon each call. This means that when
doing many realloc+free the count may be negative but in practice there
are very few reallocs so that's not a problem. Also the size/call is signed
and corresponds to the average size allocated (e.g. leaked) per call.

It seems to work reasonably well for now:

  > debug dev memstats match buf
  quic_conn.c:2978       P_FREE  size:   1239547904  calls:     75656  size/call:  16384 buffer
  quic_conn.c:2960      P_ALLOC  size:   1239547904  calls:     75656  size/call:  16384 buffer
  mux_quic.c:393        P_ALLOC  size:   9112780800  calls:    556200  size/call:  16384 buffer
  mux_quic.c:383        P_ALLOC  size:  17783193600  calls:   1085400  size/call:  16384 buffer
  mux_quic.c:159         P_FREE  size:   8935833600  calls:    545400  size/call:  16384 buffer
  mux_quic.c:142         P_FREE  size:   9112780800  calls:    556200  size/call:  16384 buffer
  h3.c:776              P_ALLOC  size:   8935833600  calls:    545400  size/call:  16384 buffer
  quic_stream.c:166      P_FREE  size:    975241216  calls:     59524  size/call:  16384 buffer
  quic_stream.c:127      P_FREE  size:   7960592384  calls:    485876  size/call:  16384 buffer
  stream.c:772           P_FREE  size:      8798208  calls:       537  size/call:  16384 buffer
  stream.c:768           P_FREE  size:      2424832  calls:       148  size/call:  16384 buffer
  stream.c:751          P_ALLOC  size:   8852062208  calls:    540287  size/call:  16384 buffer
  stream.c:641           P_FREE  size:   8849162240  calls:    540110  size/call:  16384 buffer
  stream.c:640           P_FREE  size:   8847360000  calls:    540000  size/call:  16384 buffer
  channel.h:850         P_ALLOC  size:      2441216  calls:       149  size/call:  16384 buffer
  channel.h:850         P_ALLOC  size:      5914624  calls:       361  size/call:  16384 buffer
  dynbuf.c:55            P_FREE  size:        32768  calls:         2  size/call:  16384 buffer
  Total                 BALANCE  size:            0  calls:   5606906  size/call:      0 (excl. realloc)

Let's see how useful this becomes over time.
diff --git a/src/debug.c b/src/debug.c
index 8868797..ba39630 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -1239,7 +1239,9 @@
 	struct mem_stats *start, *stop; /* begin/end of dump */
 	char *match;                    /* non-null if a name prefix is specified */
 	int show_all;                   /* show all entries if non-null */
-	int width;
+	int width;                      /* 1st column width */
+	long tot_size;                  /* sum of alloc-free */
+	ulong tot_calls;                /* sum of calls */
 };
 
 /* CLI parser for the "debug dev memstats" command. Sets a dev_mem_ctx shown above. */
@@ -1341,6 +1343,7 @@
 		const char *p;
 		const char *info = NULL;
 		const char *func = NULL;
+		int direction = 0; // neither alloc nor free (e.g. realloc)
 
 		if (!ptr->size && !ptr->calls && !ctx->show_all)
 			continue;
@@ -1354,13 +1357,13 @@
 		func = ptr->caller.func;
 
 		switch (ptr->caller.what) {
-		case MEM_STATS_TYPE_CALLOC:  type = "CALLOC";  break;
-		case MEM_STATS_TYPE_FREE:    type = "FREE";    break;
-		case MEM_STATS_TYPE_MALLOC:  type = "MALLOC";  break;
+		case MEM_STATS_TYPE_CALLOC:  type = "CALLOC";  direction =  1; break;
+		case MEM_STATS_TYPE_FREE:    type = "FREE";    direction = -1; break;
+		case MEM_STATS_TYPE_MALLOC:  type = "MALLOC";  direction =  1; break;
 		case MEM_STATS_TYPE_REALLOC: type = "REALLOC"; break;
-		case MEM_STATS_TYPE_STRDUP:  type = "STRDUP";  break;
-		case MEM_STATS_TYPE_P_ALLOC: type = "P_ALLOC"; if (ptr->extra) info = ((const struct pool_head *)ptr->extra)->name; break;
-		case MEM_STATS_TYPE_P_FREE:  type = "P_FREE";  if (ptr->extra) info = ((const struct pool_head *)ptr->extra)->name; break;
+		case MEM_STATS_TYPE_STRDUP:  type = "STRDUP";  direction =  1; break;
+		case MEM_STATS_TYPE_P_ALLOC: type = "P_ALLOC"; direction =  1; if (ptr->extra) info = ((const struct pool_head *)ptr->extra)->name; break;
+		case MEM_STATS_TYPE_P_FREE:  type = "P_FREE";  direction = -1; if (ptr->extra) info = ((const struct pool_head *)ptr->extra)->name; break;
 		default:                     type = "UNSET";   break;
 		}
 
@@ -1395,10 +1398,35 @@
 		if (applet_putchk(appctx, &trash) == -1) {
 			ctx->start = ptr;
 			ret = 0;
-			break;
+			goto end;
+		}
+		if (direction > 0) {
+			ctx->tot_size  += (ulong)ptr->size;
+			ctx->tot_calls += (ulong)ptr->calls;
+		}
+		else if (direction < 0) {
+			ctx->tot_size  -= (ulong)ptr->size;
+			ctx->tot_calls += (ulong)ptr->calls;
 		}
 	}
 
+	/* now dump a summary */
+	chunk_reset(&trash);
+	chunk_appendf(&trash, "Total");
+	while (trash.data < ctx->width)
+		trash.area[trash.data++] = ' ';
+
+	chunk_appendf(&trash, "%7s  size: %12ld  calls: %9lu  size/call: %6ld %s\n",
+		      "BALANCE",
+		      ctx->tot_size, ctx->tot_calls,
+		      (long)(ctx->tot_calls ? (ctx->tot_size / ctx->tot_calls) : 0),
+		      "(excl. realloc)");
+
+	if (applet_putchk(appctx, &trash) == -1) {
+		ctx->start = ptr;
+		ret = 0;
+		goto end;
+	}
  end:
 	return ret;
 }