MINOR: task: introduce a thread-local "sched" variable for local scheduler stuff

The aim is to rassemble all scheduler information related to the current
thread. It simply points to task_per_thread[tid] without having to perform
the operation at each time. We save around 1.2 kB of code on performance
sensitive paths and increase the request rate by almost 1%.
diff --git a/include/proto/task.h b/include/proto/task.h
index b0e631a..511d3cd 100644
--- a/include/proto/task.h
+++ b/include/proto/task.h
@@ -91,7 +91,7 @@
 extern struct pool_head *pool_head_task;
 extern struct pool_head *pool_head_tasklet;
 extern struct pool_head *pool_head_notification;
-extern THREAD_LOCAL struct task *curr_task; /* task currently running or NULL */
+extern THREAD_LOCAL struct task_per_thread *sched; /* current's thread scheduler context */
 #ifdef USE_THREAD
 extern struct eb_root timers;      /* sorted timers tree, global */
 extern struct eb_root rqueue;      /* tree constituting the run queue */
@@ -132,11 +132,11 @@
 	struct eb_root *root;
 
 	if (t->thread_mask == tid_bit || global.nbthread == 1)
-		root = &task_per_thread[tid].rqueue;
+		root = &sched->rqueue;
 	else
 		root = &rqueue;
 #else
-	struct eb_root *root = &task_per_thread[tid].rqueue;
+	struct eb_root *root = &sched->rqueue;
 #endif
 
 	state = _HA_ATOMIC_OR(&t->state, f);
@@ -201,7 +201,7 @@
 		global_rqueue_size--;
 	} else
 #endif
-		task_per_thread[tid].rqueue_size--;
+		sched->rqueue_size--;
 	eb32sc_delete(&t->rq);
 	if (likely(t->nice))
 		_HA_ATOMIC_SUB(&niced_tasks, 1);
@@ -236,7 +236,7 @@
  */
 static inline void tasklet_insert_into_tasklet_list(struct tasklet *tl)
 {
-	if (MT_LIST_ADDQ(&task_per_thread[tid].task_list, &tl->list) == 1)
+	if (MT_LIST_ADDQ(&sched->task_list, &tl->list) == 1)
 		_HA_ATOMIC_ADD(&tasks_run_queue, 1);
 }
 
@@ -317,8 +317,8 @@
  */
 static inline void __task_free(struct task *t)
 {
-	if (t == curr_task) {
-		curr_task = NULL;
+	if (t == sched->current) {
+		sched->current = NULL;
 		__ha_barrier_store();
 	}
 	pool_free(pool_head_task, t);
@@ -346,7 +346,7 @@
 	/* There's no need to protect t->state with a lock, as the task
 	 * has to run on the current thread.
 	 */
-	if (t == curr_task || !(t->state & (TASK_QUEUED | TASK_RUNNING)))
+	if (t == sched->current || !(t->state & (TASK_QUEUED | TASK_RUNNING)))
 		__task_free(t);
 	else
 		t->process = NULL;
@@ -401,7 +401,7 @@
 #endif
 	{
 		if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key))
-			__task_queue(task, &task_per_thread[tid].timers);
+			__task_queue(task, &sched->timers);
 	}
 }
 
@@ -434,7 +434,7 @@
 
 		task->expire = when;
 		if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key))
-			__task_queue(task, &task_per_thread[tid].timers);
+			__task_queue(task, &sched->timers);
 	}
 }
 
@@ -537,8 +537,8 @@
 static inline int thread_has_tasks(void)
 {
 	return (!!(global_tasks_mask & tid_bit) |
-	        (task_per_thread[tid].rqueue_size > 0) |
-	        !MT_LIST_ISEMPTY(&task_per_thread[tid].task_list));
+	        (sched->rqueue_size > 0) |
+	        !MT_LIST_ISEMPTY(&sched->task_list));
 }
 
 /* adds list item <item> to work list <work> and wake up the associated task */
diff --git a/include/types/task.h b/include/types/task.h
index aac3a6c..40304ee 100644
--- a/include/types/task.h
+++ b/include/types/task.h
@@ -64,6 +64,7 @@
 	struct mt_list task_list; /* List of tasks to be run, mixing tasks and tasklets */
 	int task_list_size;     /* Number of tasks in the task_list */
 	int rqueue_size;        /* Number of elements in the per-thread run queue */
+	struct task *current;   /* current task (not tasklet) */
 	__attribute__((aligned(64))) char end[0];
 };
 
diff --git a/src/debug.c b/src/debug.c
index a2b99c9..1a1301c 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -77,7 +77,7 @@
 		return;
 
 	chunk_appendf(buf, "             curr_task=");
-	ha_task_dump(buf, curr_task, "             ");
+	ha_task_dump(buf, sched->current, "             ");
 }
 
 
diff --git a/src/haproxy.c b/src/haproxy.c
index ee6dd27..f577104 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2659,6 +2659,7 @@
 	__decl_hathreads(static pthread_cond_t  init_cond  = PTHREAD_COND_INITIALIZER);
 
 	ha_set_tid((unsigned long)data);
+	sched = &task_per_thread[tid];
 
 #if (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
 #ifdef USE_THREAD
diff --git a/src/task.c b/src/task.c
index 5aa9ae4..0d81871 100644
--- a/src/task.c
+++ b/src/task.c
@@ -41,7 +41,7 @@
 unsigned int nb_tasks_cur = 0;     /* copy of the tasks count */
 unsigned int niced_tasks = 0;      /* number of niced tasks in the run queue */
 
-THREAD_LOCAL struct task *curr_task = NULL; /* task (not tasklet) currently running or NULL */
+THREAD_LOCAL struct task_per_thread *sched = &task_per_thread[0]; /* scheduler context for the current thread */
 
 __decl_aligned_spinlock(rq_lock); /* spin lock related to run queue */
 __decl_aligned_rwlock(wq_lock);   /* RW lock related to the wait queue */
@@ -159,7 +159,7 @@
  */
 int wake_expired_tasks()
 {
-	struct task_per_thread * const tt = &task_per_thread[tid]; // thread's tasks
+	struct task_per_thread * const tt = sched; // thread's tasks
 	struct task *task;
 	struct eb32_node *eb;
 	int ret = TICK_ETERNITY;
@@ -300,7 +300,7 @@
  */
 void process_runnable_tasks()
 {
-	struct task_per_thread * const tt = &task_per_thread[tid]; // thread's tasks
+	struct task_per_thread * const tt = sched;
 	struct eb32sc_node *lrq = NULL; // next local run queue entry
 	struct eb32sc_node *grq = NULL; // next global run queue entry
 	struct task *t;
@@ -418,7 +418,7 @@
 			t->call_date = now_ns;
 		}
 
-		curr_task = t;
+		sched->current = t;
 		__ha_barrier_store();
 		if (likely(process == process_stream))
 			t = process_stream(t, ctx, state);
@@ -426,7 +426,7 @@
 			t = process(t, ctx, state);
 		else {
 			__task_free(t);
-			curr_task = NULL;
+			sched->current = NULL;
 			__ha_barrier_store();
 			/* We don't want max_processed to be decremented if
 			 * we're just freeing a destroyed task, we should only
@@ -434,7 +434,7 @@
 			 */
 			continue;
 		}
-		curr_task = NULL;
+		sched->current = NULL;
 		__ha_barrier_store();
 		/* If there is a pending state  we have to wake up the task
 		 * immediately, else we defer it into wait queue