MINOR: cli/debug: add a thread dump function

The new function ha_thread_dump() will dump debugging info about all known
threads. The current thread will contain a bit more info. The long-term goal
is to make it possible to use it in signal handlers to improve the accuracy
of some dumps.

The function dumps its output into the trash so as it was trivial to add,
a new "show threads" command appeared on the CLI.
diff --git a/Makefile b/Makefile
index f598e26..258e5a1 100644
--- a/Makefile
+++ b/Makefile
@@ -811,7 +811,7 @@
        src/xxhash.o src/hpack-enc.o src/h2.o src/freq_ctr.o src/lru.o         \
        src/protocol.o src/arg.o src/hpack-huff.o src/hdr_idx.o src/base64.o   \
        src/hash.o src/mailers.o src/activity.o src/http_msg.o src/version.o   \
-       src/mworker.o src/mworker-prog.o
+       src/mworker.o src/mworker-prog.o src/debug.o
 
 EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o $(EBTREE_DIR)/eb32sctree.o \
               $(EBTREE_DIR)/eb32tree.o $(EBTREE_DIR)/eb64tree.o \
diff --git a/doc/management.txt b/doc/management.txt
index 9d5e3e2..ecaf729 100644
--- a/doc/management.txt
+++ b/doc/management.txt
@@ -2512,6 +2512,15 @@
           | fgrep 'key=' | cut -d' ' -f2 | cut -d= -f2 > abusers-ip.txt
           ( or | awk '/key/{ print a[split($2,a,"=")]; }' )
 
+show threads
+  Dumps some internal states and structures for each thread, that may be useful
+  to help developers understand a problem. The output tries to be readable by
+  showing one block per thread, with a bit more info for the current thread.
+  The output format is purposely not documented so that it can easily evolve
+  as new needs are identified, without having to maintain any backwards
+  compatibility, and just like with "show activity", the values are only
+  meaningful with the code at hand.
+
 show tls-keys [id|*]
   Dump all loaded TLS ticket keys references. The TLS ticket key reference ID
   and the file from which the keys have been loaded is shown. Both of those
diff --git a/include/common/debug.h b/include/common/debug.h
index 014e5ee..ae6cdc8 100644
--- a/include/common/debug.h
+++ b/include/common/debug.h
@@ -81,6 +81,11 @@
 		##args);                                           \
         } while (0)
 
+
+struct task;
+void ha_task_dump(const struct task *task, const char *pfx);
+void ha_thread_dump(int thr);
+
 /* This one is useful to automatically apply poisonning on an area returned
  * by malloc(). Only "p_" is required to make it work, and to define a poison
  * byte using -dM.
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 0000000..54f91d6
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,133 @@
+/*
+ * Process debugging functions.
+ *
+ * Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <signal.h>
+#include <time.h>
+#include <stdio.h>
+
+#include <common/config.h>
+#include <common/debug.h>
+#include <common/hathreads.h>
+#include <common/initcall.h>
+#include <common/standard.h>
+
+#include <types/global.h>
+
+#include <proto/cli.h>
+#include <proto/fd.h>
+#include <proto/stream_interface.h>
+#include <proto/task.h>
+
+/* Dumps to the trash some known information for the desired thread, and
+ * optionally extra info for the current thread.
+ */
+void ha_thread_dump(int thr)
+{
+	unsigned long thr_bit = 1UL << thr;
+
+	chunk_reset(&trash);
+	chunk_appendf(&trash,
+	              "%c Thread %-2u: act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
+	              "             fdcache=%d prof=%d",
+	              (thr == tid) ? '*' : ' ', thr + 1,
+	              !!(active_tasks_mask & thr_bit),
+	              !!(global_tasks_mask & thr_bit),
+	              !eb_is_empty(&task_per_thread[thr].timers),
+	              !eb_is_empty(&task_per_thread[thr].rqueue),
+	              !LIST_ISEMPTY(&task_per_thread[thr].task_list),
+	              task_per_thread[thr].task_list_size,
+	              task_per_thread[thr].rqueue_size,
+	              !!(fd_cache_mask & thr_bit),
+	              !!(task_profiling_mask & thr_bit));
+
+#ifdef USE_THREAD
+	chunk_appendf(&trash,
+	              " harmless=%d wantrdv=%d",
+	              !!(threads_harmless_mask & thr_bit),
+	              !!(threads_want_rdv_mask & thr_bit));
+#endif
+
+	chunk_appendf(&trash, "\n");
+
+	/* this is the end of what we can dump from outside the thread */
+
+	if (thr != tid)
+		return;
+
+	chunk_appendf(&trash, "             curr_task=");
+	ha_task_dump(curr_task, "             ");
+}
+
+
+/* dumps into the trash some information related to task <task> (which may
+ * either be a task or a tasklet, and prepend each line except the first one
+ * with <pfx>. The trash is only appended and the first output starts by the
+ * pointer itself.
+ */
+void ha_task_dump(const struct task *task, const char *pfx)
+{
+	chunk_appendf(&trash,
+	              "%p (%s) calls=%u last=%llu%s\n",
+	              curr_task, TASK_IS_TASKLET(curr_task) ? "tasklet" : "task",
+	              curr_task->calls,
+	              curr_task->call_date ? (unsigned long long)(now_mono_time() - curr_task->call_date) : 0,
+	              curr_task->call_date ? " ns ago" : "");
+
+	chunk_appendf(&trash, "%s"
+	              "  fct=%p (%s) ctx=%p\n",
+	              pfx,
+	              curr_task->process,
+	              curr_task->process == process_stream ? "process_stream" :
+	              curr_task->process == task_run_applet ? "task_run_applet" :
+	              curr_task->process == si_cs_io_cb ? "si_cs_io_cb" :
+		      "?",
+	              curr_task->context);
+}
+
+
+/* This function dumps all profiling settings. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_threads(struct appctx *appctx)
+{
+	struct stream_interface *si = appctx->owner;
+	int thr;
+
+	if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
+		return 1;
+
+	if (appctx->st0)
+		thr = appctx->st1;
+	else
+		thr = 0;
+
+	while (thr < global.nbthread) {
+		ha_thread_dump(thr);
+
+		if (ci_putchk(si_ic(si), &trash) == -1) {
+			/* failed, try again */
+			si_rx_room_blk(si);
+			appctx->st1 = thr;
+			return 0;
+		}
+		thr++;
+	}
+	return 1;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+	{ { "show", "threads", NULL },    "show threads   : show some threads debugging information",   NULL, cli_io_handler_show_threads, NULL },
+	{{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);