MINOR: task: make rqueue_ticks atomic

The runqueue ticks counter is per-thread and wasn't initially meant to
be shared. We'll soon have to share it so let's make it atomic. It's
only updated when waking up a task, and no performance difference was
observed. It was moved in the thread_ctx struct so that it doesn't
pollute the local cache line when it's later updated by other threads.
diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h
index b2395a4..df5ac7a 100644
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h
@@ -79,7 +79,6 @@
 	struct eb_root timers;              /* tree constituting the per-thread wait queue */
 	struct eb_root rqueue;              /* tree constituting the per-thread run queue */
 	struct task *current;               /* current task (not tasklet) */
-	unsigned int rqueue_ticks;          /* Insertion counter for the run queue */
 	int current_queue;                  /* points to current tasklet list being run, -1 if none */
 	unsigned int nb_tasks;              /* number of tasks allocated on this thread */
 	uint flags;                         /* thread flags, TH_FL_* */
@@ -96,11 +95,13 @@
 	// third cache line here on 64 bits: accessed mostly using atomic ops
 	ALWAYS_ALIGN(64);
 	struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
+	unsigned int rqueue_ticks;          /* Insertion counter for the run queue */
 	unsigned int rq_total;              /* total size of the run queue, prio_tree + tasklets */
 	int tasks_in_list;                  /* Number of tasks in the per-thread tasklets list */
+	uint idle_pct;                      /* idle to total ratio over last sample (percent) */
+
 	uint64_t prev_cpu_time;             /* previous per thread CPU time */
 	uint64_t prev_mono_time;            /* previous system wide monotonic time  */
-	uint idle_pct;                      /* idle to total ratio over last sample (percent) */
 	ALWAYS_ALIGN(128);
 };
 
diff --git a/src/task.c b/src/task.c
index b371b89..247ed80 100644
--- a/src/task.c
+++ b/src/task.c
@@ -249,7 +249,7 @@
 #endif
 	{
 		_HA_ATOMIC_INC(&th_ctx->rq_total);
-		t->rq.key = ++th_ctx->rqueue_ticks;
+		t->rq.key = _HA_ATOMIC_ADD_FETCH(&th_ctx->rqueue_ticks, 1);
 	}
 
 	if (likely(t->nice)) {
@@ -854,7 +854,7 @@
 		 */
 
 		if (!lrq) {
-			lrq = eb32sc_lookup_ge(&tt->rqueue, tt->rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
+			lrq = eb32sc_lookup_ge(&tt->rqueue, _HA_ATOMIC_LOAD(&tt->rqueue_ticks) - TIMER_LOOK_BACK, tid_bit);
 			if (unlikely(!lrq))
 				lrq = eb32sc_first(&tt->rqueue, tid_bit);
 		}