MEDIUM: tasks/activity: collect per-task statistics when profiling is enabled

Now when the profiling is enabled, the scheduler wlil update per-function
task-level statistics on number of calls, cpu usage and lateny, that could
later be checked using "show profiling". This will immediately make it
obvious what functions are responsible for others' high latencies or which
ones are suffering from others, and should help spot issues like undesired
wakeups. For now the stats are only collected but not reported (though they
are readable from sched_activity[] under gdb).
diff --git a/src/task.c b/src/task.c
index 92c771f..31f3102 100644
--- a/src/task.c
+++ b/src/task.c
@@ -400,6 +400,7 @@
 	struct list *tl_queues = sched->tasklets;
 	struct task *t;
 	uint8_t budget_mask = (1 << TL_CLASSES) - 1;
+	struct sched_activity *profile_entry = NULL;
 	unsigned int done = 0;
 	unsigned int queue;
 	unsigned short state;
@@ -465,7 +466,18 @@
 			__ha_barrier_store();
 			state = _HA_ATOMIC_XCHG(&t->state, state);
 			__ha_barrier_atomic_store();
-			process(t, ctx, state);
+
+			if (unlikely(task_profiling_mask & tid_bit)) {
+				uint64_t before;
+
+				profile_entry = sched_activity_entry(sched_activity, t->process);
+				before = now_mono_time();
+				process(t, ctx, state);
+				HA_ATOMIC_ADD(&profile_entry->calls, 1);
+				HA_ATOMIC_ADD(&profile_entry->cpu_time, now_mono_time() - before);
+			} else {
+				process(t, ctx, state);
+			}
 			done++;
 			sched->current = NULL;
 			__ha_barrier_store();
@@ -482,9 +494,13 @@
 		_HA_ATOMIC_SUB(&task_per_thread[tid].task_list_size, 1);
 		if (unlikely(t->call_date)) {
 			uint64_t now_ns = now_mono_time();
+			uint64_t lat = now_ns - t->call_date;
 
-			t->lat_time += now_ns - t->call_date;
+			t->lat_time += lat;
 			t->call_date = now_ns;
+			profile_entry = sched_activity_entry(sched_activity, t->process);
+			HA_ATOMIC_ADD(&profile_entry->lat_time, lat);
+			HA_ATOMIC_ADD(&profile_entry->calls, 1);
 		}
 
 		__ha_barrier_store();
@@ -515,8 +531,11 @@
 		 */
 		if (t != NULL) {
 			if (unlikely(t->call_date)) {
-				t->cpu_time += now_mono_time() - t->call_date;
+				uint64_t cpu = now_mono_time() - t->call_date;
+
+				t->cpu_time += cpu;
 				t->call_date = 0;
+				HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
 			}
 
 			state = _HA_ATOMIC_AND(&t->state, ~TASK_RUNNING);