DEBUG: task: simplify the caller recording in DEBUG_TASK

Instead of storing an index that's swapped at every call, let's use the
two pointers as a shifting history. Now we have a permanent "caller"
field that records the last caller, and an optional prev_caller in the
debug section enabled by DEBUG_TASK that keeps a copy of the previous
caller one. This way, not only it's much easier to follow what's
happening during debugging, but it saves 8 bytes in the struct task in
debug mode and still keeps it under 2 cache lines in nominal mode, and
this will finally be usable everywhere and later in profiling.

The caller_idx was also used as a hint that the entry was freed, in order
to detect wakeup-after-free. This was changed by setting caller to -1
instead and preserving its value in caller[1].

Finally, the operations were made atomic. That's not critical but since
it's used for debugging and race conditions represent a significant part
of the issues in multi-threaded mode, it seems wise to at least eliminate
some possible factors of faulty analysis.
diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h
index acaea2c..0c2dcc8 100644
--- a/include/haproxy/task-t.h
+++ b/include/haproxy/task-t.h
@@ -84,10 +84,10 @@
 };
 
 #ifdef DEBUG_TASK
+/* prev_caller keeps a copy of the previous value of the <caller> field. */
 #define TASK_DEBUG_STORAGE                   \
 	struct {                             \
-		const struct ha_caller *caller[2]; \
-		int caller_idx;              \
+		const struct ha_caller *prev_caller; \
 	} debug
 #else
 #define TASK_DEBUG_STORAGE
@@ -108,6 +108,7 @@
 		unsigned int calls; /* number of times process was called */ \
 		struct task *(*process)(struct task *t, void *ctx, unsigned int state); /* the function which processes the task */ \
 		void *context; /* the task's context */			\
+		const struct ha_caller *caller;	 /* call place of last wakeup(); 0 on init, -1 on free */ \
 		TASK_DEBUG_STORAGE;					\
 	}
 
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index b388d01..b9e733f 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -193,11 +193,10 @@
 	state = _HA_ATOMIC_OR_FETCH(&t->state, f);
 	while (!(state & (TASK_RUNNING | TASK_QUEUED))) {
 		if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED)) {
+			caller = HA_ATOMIC_XCHG(&t->caller, caller);
+			BUG_ON((ulong)caller & 1);
 #ifdef DEBUG_TASK
-			if ((unsigned int)t->debug.caller_idx > 1)
-				ABORT_NOW();
-			t->debug.caller_idx = !t->debug.caller_idx;
-			t->debug.caller[t->debug.caller_idx] = caller;
+			HA_ATOMIC_STORE(&t->debug.prev_caller, caller);
 #endif
 			__task_wakeup(t);
 			break;
@@ -232,11 +231,10 @@
 	}
 
 	if ((new_state & ~state) & TASK_QUEUED) {
+		caller = HA_ATOMIC_XCHG(&t->caller, caller);
+		BUG_ON((ulong)caller & 1);
 #ifdef DEBUG_TASK
-		if ((unsigned int)t->debug.caller_idx > 1)
-			ABORT_NOW();
-		t->debug.caller_idx = !t->debug.caller_idx;
-		t->debug.caller[t->debug.caller_idx] = caller;
+		HA_ATOMIC_STORE(&t->debug.prev_caller, caller);
 #endif
 		__task_wakeup(t);
 	}
@@ -356,11 +354,10 @@
 	} while (!_HA_ATOMIC_CAS(&tl->state, &state, state | TASK_IN_LIST));
 
 	/* at this point we're the first ones to add this task to the list */
+	caller = HA_ATOMIC_XCHG(&tl->caller, caller);
+	BUG_ON((ulong)caller & 1);
 #ifdef DEBUG_TASK
-	if ((unsigned int)tl->debug.caller_idx > 1)
-		ABORT_NOW();
-	tl->debug.caller_idx = !tl->debug.caller_idx;
-	tl->debug.caller[tl->debug.caller_idx] = caller;
+	HA_ATOMIC_STORE(&tl->debug.prev_caller, caller);
 #endif
 	if (_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)
 		tl->wake_date = now_mono_time();
@@ -411,11 +408,10 @@
 	BUG_ON_HOT(task_in_rq(t));
 
 	/* at this point we're the first ones to add this task to the list */
+	caller = HA_ATOMIC_XCHG(&t->caller, caller);
+	BUG_ON((ulong)caller & 1);
 #ifdef DEBUG_TASK
-	if ((unsigned int)t->debug.caller_idx > 1)
-		ABORT_NOW();
-	t->debug.caller_idx = !t->debug.caller_idx;
-	t->debug.caller[t->debug.caller_idx] = caller;
+	HA_ATOMIC_STORE(&t->debug.prev_caller, caller);
 #endif
 	if (_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)
 		t->wake_date = now_mono_time();
@@ -446,11 +442,10 @@
 	} while (!_HA_ATOMIC_CAS(&tl->state, &state, state | TASK_IN_LIST));
 
 	/* at this point we're the first one to add this task to the list */
+	caller = HA_ATOMIC_XCHG(&tl->caller, caller);
+	BUG_ON((ulong)caller & 1);
 #ifdef DEBUG_TASK
-	if ((unsigned int)tl->debug.caller_idx > 1)
-		ABORT_NOW();
-	tl->debug.caller_idx = !tl->debug.caller_idx;
-	tl->debug.caller[tl->debug.caller_idx] = caller;
+	HA_ATOMIC_STORE(&tl->debug.prev_caller, caller);
 #endif
 	if (th_ctx->flags & TH_FL_TASK_PROFILING)
 		tl->wake_date = now_mono_time();
@@ -462,7 +457,7 @@
  */
 #ifdef DEBUG_TASK
 #define DEBUG_TASK_PRINT_CALLER(t) do {						\
-	const struct ha_caller *__caller = (t)->debug.caller[(t)->debug.caller_idx];	\
+	const struct ha_caller *__caller = (t)->caller;			\
 	printf("%s woken up from %s(%s:%d)\n", __FUNCTION__,		\
 	       __caller ? __caller->func : NULL, \
 	       __caller ? __caller->file : NULL, \
@@ -506,9 +501,7 @@
 	t->calls = 0;
 	t->wake_date = 0;
 	t->expire = TICK_ETERNITY;
-#ifdef DEBUG_TASK
-	t->debug.caller_idx = 0;
-#endif
+	t->caller = NULL;
 	return t;
 }
 
@@ -523,9 +516,7 @@
 	t->process = NULL;
 	t->tid = -1;
 	t->wake_date = 0;
-#ifdef DEBUG_TASK
-	t->debug.caller_idx = 0;
-#endif
+	t->caller = NULL;
 	LIST_INIT(&t->list);
 }
 
@@ -588,11 +579,11 @@
 	}
 	BUG_ON(task_in_wq(t) || task_in_rq(t));
 
+	BUG_ON((ulong)t->caller & 1);
 #ifdef DEBUG_TASK
-	if ((unsigned int)t->debug.caller_idx > 1)
-		ABORT_NOW();
-	t->debug.caller_idx |= 2; // keep parity and make sure to crash if used after free
+	HA_ATOMIC_STORE(&t->debug.prev_caller, HA_ATOMIC_LOAD(&t->caller));
 #endif
+	HA_ATOMIC_STORE(&t->caller, (void*)1); // make sure to crash if used after free
 
 	pool_free(pool_head_task, t);
 	th_ctx->nb_tasks--;
@@ -631,11 +622,11 @@
 	if (MT_LIST_DELETE(list_to_mt_list(&tl->list)))
 		_HA_ATOMIC_DEC(&ha_thread_ctx[tl->tid >= 0 ? tl->tid : tid].rq_total);
 
+	BUG_ON((ulong)tl->caller & 1);
 #ifdef DEBUG_TASK
-	if ((unsigned int)tl->debug.caller_idx > 1)
-		ABORT_NOW();
-	tl->debug.caller_idx |= 2; // keep parity and make sure to crash if used after free
+	HA_ATOMIC_STORE(&tl->debug.prev_caller, HA_ATOMIC_LOAD(&tl->caller));
 #endif
+	HA_ATOMIC_STORE(&tl->caller, (void*)1); // make sure to crash if used after free
 	pool_free(pool_head_tasklet, tl);
 	if (unlikely(stopping))
 		pool_flush(pool_head_tasklet);