MINOR: activity: also report collected tasks stats in "show profiling"

"show profiling" will now dump the stats collected by the scheduler if
profiling was previously enabled. This will immediately make it obvious
what functions are responsible for others' high latencies or which ones
are suffering from others, and should help spot issues like undesired
wakeups.

Example:

Per-task CPU profiling              : on      # set profiling tasks {on|auto|off}
Tasks activity:
  function                      calls   cpu_tot   cpu_avg   lat_tot   lat_avg
  si_cs_io_cb                 5569479   23.37s    4.196us      -         -
  h1_io_cb                    5558654   13.60s    2.446us      -         -
  process_stream               250841   1.476s    5.882us   3.499s    13.95us
  main+0x9e670                    198      -         -      5.526ms   27.91us
  task_run_applet                  17   1.509ms   88.77us   205.8us   12.11us
  srv_cleanup_idle_connections     12   44.51us   3.708us   25.71us   2.142us
  main+0x158c80                     9   48.72us   5.413us      -         -
  srv_cleanup_toremove_connections  5   165.1us   33.02us   123.6us   24.72us
diff --git a/doc/management.txt b/doc/management.txt
index d221e18..99b3e59 100644
--- a/doc/management.txt
+++ b/doc/management.txt
@@ -2310,7 +2310,9 @@
 
 show profiling
   Dumps the current profiling settings, one per line, as well as the command
-  needed to change them.
+  needed to change them. When tasks profiling is enabled, some per-function
+  statistics collected by the scheduler will also be emitted, with a summary
+  covering the number of calls, total/avg CPU time and total/avg latency.
 
 show resolvers [<resolvers section id>]
   Dump statistics for the given resolvers section, or all resolvers sections
diff --git a/src/activity.c b/src/activity.c
index 0d64a36..e0e6693 100644
--- a/src/activity.c
+++ b/src/activity.c
@@ -97,13 +97,29 @@
 	return 1;
 }
 
+static int cmp_sched_activity(const void *a, const void *b)
+{
+	const struct sched_activity *l = (const struct sched_activity *)a;
+	const struct sched_activity *r = (const struct sched_activity *)b;
+
+	if (l->calls > r->calls)
+		return -1;
+	else if (l->calls < r->calls)
+		return 1;
+	else
+		return 0;
+}
+
 /* This function dumps all profiling settings. It returns 0 if the output
  * buffer is full and it needs to be called again, otherwise non-zero.
  */
 static int cli_io_handler_show_profiling(struct appctx *appctx)
 {
+	struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
 	struct stream_interface *si = appctx->owner;
+	struct buffer *name_buffer = get_trash_chunk();
 	const char *str;
+	int i, max;
 
 	if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
 		return 1;
@@ -117,10 +133,38 @@
 	default:                 str="off"; break;
 	}
 
+	memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
+	qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
+
 	chunk_printf(&trash,
 	             "Per-task CPU profiling              : %s      # set profiling tasks {on|auto|off}\n",
 	             str);
 
+	chunk_appendf(&trash, "Tasks activity:\n"
+		      "  function                      calls   cpu_tot   cpu_avg   lat_tot   lat_avg\n");
+
+	for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
+		chunk_reset(name_buffer);
+
+		if (!tmp_activity[i].func)
+			chunk_printf(name_buffer, "other");
+		else
+			resolve_sym_name(name_buffer, "", tmp_activity[i].func);
+
+		/* reserve 35 chars for name+' '+#calls, knowing that longer names
+		 * are often used for less often called functions.
+		 */
+		max = 35 - name_buffer->data;
+		if (max < 1)
+			max = 1;
+		chunk_appendf(&trash, "  %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
+
+		print_time_short(&trash, "   ", tmp_activity[i].cpu_time, "");
+		print_time_short(&trash, "   ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
+		print_time_short(&trash, "   ", tmp_activity[i].lat_time, "");
+		print_time_short(&trash, "   ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
+	}
+
 	if (ci_putchk(si_ic(si), &trash) == -1) {
 		/* failed, try again */
 		si_rx_room_blk(si);