MINOR: tasks: also compute the tasklet latency when DEBUG_TASK is set

It is extremely useful to be able to observe the wakeup latency of some
important I/O operations, so let's accept to inflate the tasklet struct
by 8 extra bytes when DEBUG_TASK is set. With just this we have enough
to get live reports like this:

  $ socat - /tmp/sock1 <<< "show profiling"
  Per-task CPU profiling              : on      # set profiling tasks {on|auto|off}
  Tasks activity:
    function                      calls   cpu_tot   cpu_avg   lat_tot   lat_avg
    si_cs_io_cb                 8099492   4.833s    596.0ns   8.974m    66.48us
    h1_io_cb                    7460365   11.55s    1.548us   2.477m    19.92us
    process_stream              7383828   22.79s    3.086us   18.39m    149.5us
    h1_timeout_task                4157      -         -      348.4ms   83.81us
    srv_cleanup_toremove_connections751   39.70ms   52.86us   10.54ms   14.04us
    srv_cleanup_idle_connections     21   1.405ms   66.89us   30.82us   1.467us
    task_run_applet                  16   1.058ms   66.13us   446.2us   27.89us
    accept_queue_process              7   34.53us   4.933us   333.1us   47.58us
diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h
index 3df4bf6..34d9a45 100644
--- a/include/haproxy/task-t.h
+++ b/include/haproxy/task-t.h
@@ -127,6 +127,9 @@
 struct tasklet {
 	TASK_COMMON;			/* must be at the beginning! */
 	struct list list;
+#ifdef DEBUG_TASK
+	uint64_t call_date;		/* date of the last tasklet wakeup or call */
+#endif
 	int tid;                        /* TID of the tasklet owner, <0 if local */
 };
 
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index ce64182..c717420 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -368,6 +368,8 @@
 	tl->debug.caller_idx = !tl->debug.caller_idx;
 	tl->debug.caller_file[tl->debug.caller_idx] = file;
 	tl->debug.caller_line[tl->debug.caller_idx] = line;
+	if (task_profiling_mask & tid_bit)
+		tl->call_date = now_mono_time();
 #endif
 	__tasklet_wakeup_on(tl, thr);
 }
diff --git a/src/task.c b/src/task.c
index 9249068..e33120f 100644
--- a/src/task.c
+++ b/src/task.c
@@ -505,6 +505,9 @@
 
 				profile_entry = sched_activity_entry(sched_activity, t->process);
 				before = now_mono_time();
+#ifdef DEBUG_TASK
+				HA_ATOMIC_ADD(&profile_entry->lat_time, before - ((struct tasklet *)t)->call_date);
+#endif
 				process(t, ctx, state);
 				HA_ATOMIC_ADD(&profile_entry->calls, 1);
 				HA_ATOMIC_ADD(&profile_entry->cpu_time, now_mono_time() - before);