MAJOR: task: use t->tid instead of ffsl(t->thread_mask) to take the thread ID

At several places we need to figure the ID of the first thread allowed
to run a task. Till now this was performed using my_ffsl(t->thread_mask)
but since we now have the thread ID stored into the task, let's use it
instead. This is tagged major because it starts to assume that tid<0 is
strictly equivalent to atleast2(thread_mask), and that as such, among
the allowed threads are the current one.
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index 2d86452..66add67 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -438,9 +438,12 @@
 static inline void _task_instant_wakeup(struct task *t, unsigned int f, const char *file, int line)
 {
 	struct tasklet *tl = (struct tasklet *)t;
-	int thr = my_ffsl(t->thread_mask) - 1;
+	int thr = t->tid;
 	unsigned int state;
 
+	if (thr < 0)
+		thr = tid;
+
 	/* first, let's update the task's state with the wakeup condition */
 	state = _HA_ATOMIC_OR_FETCH(&tl->state, f);
 
diff --git a/src/peers.c b/src/peers.c
index 13f75ad..41cf7d8 100644
--- a/src/peers.c
+++ b/src/peers.c
@@ -1040,7 +1040,7 @@
 	if (!peers || !peer->appctx)
 		return;
 
-	thr = my_ffsl(peer->appctx->t->thread_mask) - 1;
+	thr = peer->appctx->t->tid;
 	HA_ATOMIC_DEC(&peers->applet_count[thr]);
 
 	if (peer->appctx->st0 == PEER_SESS_ST_WAITMSG)
diff --git a/src/task.c b/src/task.c
index 19df263..cbf657a 100644
--- a/src/task.c
+++ b/src/task.c
@@ -85,7 +85,7 @@
 			 * Note: that's a task so it must be accounted for as such. Pick
 			 * the task's first thread for the job.
 			 */
-			thr = my_ffsl(t->thread_mask) - 1;
+			thr = t->tid >= 0 ? t->tid : tid;
 
 			/* Beware: tasks that have never run don't have their ->list empty yet! */
 			MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list,
@@ -230,9 +230,10 @@
 void __task_wakeup(struct task *t)
 {
 	struct eb_root *root = &th_ctx->rqueue;
+	int thr __maybe_unused = t->tid >= 0 ? t->tid : tid;
 
 #ifdef USE_THREAD
-	if (t->thread_mask != tid_bit && global.nbthread != 1) {
+	if (thr != tid) {
 		root = &rqueue;
 
 		_HA_ATOMIC_INC(&grq_total);
@@ -262,7 +263,7 @@
 	eb32sc_insert(root, &t->rq, t->thread_mask);
 
 #ifdef USE_THREAD
-	if (root == &rqueue) {
+	if (thr != tid) {
 		_HA_ATOMIC_OR(&t->state, TASK_GLOBAL);
 		HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
 
@@ -273,9 +274,8 @@
 		     (t->thread_mask & all_threads_mask))) {
 			unsigned long m = (t->thread_mask & all_threads_mask) &~ tid_bit;
 
-			m = (m & (m - 1)) ^ m; // keep lowest bit set
 			_HA_ATOMIC_AND(&sleeping_thread_mask, ~m);
-			wake_thread(my_ffsl(m) - 1);
+			wake_thread(thr);
 		}
 	}
 #endif