BUG/MEDIUM: tasks: Make sure we set TASK_QUEUED before adding a task to the rq.

Make sure we set TASK_QUEUED in every case before adding the task to the
run queue. task_wakeup() now checks if either TASK_QUEUED or TASK_RUNNING
is set, and if neither is set, add TASK_QUEUED and effectively add the task
to the runqueue.
No longer use __task_wakeup() anywhere except in task_wakeup(), always use
task_wakeup() instead.
With the old code, process_runnable_task() may re-add a task in the runqueue
without setting the TASK_QUEUED flag, and there were race conditions that could
lead to a task having the TASK_QUEUED flag but not in the runqueue, thus
being unschedulable.

This should be backported to 1.9.
diff --git a/include/proto/task.h b/include/proto/task.h
index fdf4d6a..1d4f911 100644
--- a/include/proto/task.h
+++ b/include/proto/task.h
@@ -151,11 +151,12 @@
 	struct eb_root *root = &task_per_thread[tid].rqueue;
 #endif
 
-	f |= TASK_QUEUED;
-	state = t->state;
-	while (!_HA_ATOMIC_CAS(&t->state, &state, state | f))
-		;
-	if (!(state & TASK_QUEUED))
+	state = _HA_ATOMIC_OR(&t->state, f);
+	while (!(state & (TASK_RUNNING | TASK_QUEUED))) {
+		if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED))
+			break;
+	}
+	if (!(state & (TASK_QUEUED | TASK_RUNNING)))
 		__task_wakeup(t, root);
 }
 
diff --git a/src/task.c b/src/task.c
index 33c50c4..56ba69e 100644
--- a/src/task.c
+++ b/src/task.c
@@ -66,8 +66,6 @@
  */
 void __task_wakeup(struct task *t, struct eb_root *root)
 {
-	void *expected = NULL;
-
 #ifdef USE_THREAD
 	if (root == &rqueue) {
 		HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock);
@@ -76,40 +74,6 @@
 	/* Make sure if the task isn't in the runqueue, nobody inserts it
 	 * in the meanwhile.
 	 */
-redo:
-	if (unlikely(!_HA_ATOMIC_CAS(&t->rq.node.leaf_p, &expected, (void *)0x1))) {
-#ifdef USE_THREAD
-		if (root == &rqueue)
-			HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
-#endif
-		return;
-	}
-	/* There's a small race condition, when running a task, the thread
-	 * first sets TASK_RUNNING, and then unlink the task.
-	 * If an another thread calls task_wakeup() for the same task,
-	 * it may set t->state before TASK_RUNNING was set, and then try
-	 * to set t->rq.nod.leaf_p after it was unlinked.
-	 * To make sure it is not a problem, we check if TASK_RUNNING is set
-	 * again. If it is, we unset t->rq.node.leaf_p.
-	 * We then check for TASK_RUNNING a third time. If it is still there,
-	 * then we can give up, the task will be re-queued later if it needs
-	 * to be. If it's not there, and there is still something in t->state,
-	 * then we have to requeue.
-	 */
-	if (((volatile unsigned short)(t->state)) & TASK_RUNNING) {
-		unsigned short state;
-		t->rq.node.leaf_p = NULL;
-		__ha_barrier_full();
-
-		state = (volatile unsigned short)(t->state);
-		if (unlikely(state != 0 && !(state & TASK_RUNNING)))
-			goto redo;
-#ifdef USE_THREAD
-		if (root == &rqueue)
-			HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock);
-#endif
-		return;
-	}
 	_HA_ATOMIC_ADD(&tasks_run_queue, 1);
 #ifdef USE_THREAD
 	if (root == &rqueue) {
@@ -443,12 +407,7 @@
 
 			state = _HA_ATOMIC_AND(&t->state, ~TASK_RUNNING);
 			if (state)
-#ifdef USE_THREAD
-				__task_wakeup(t, ((t->thread_mask & all_threads_mask) == tid_bit) ?
-				    &task_per_thread[tid].rqueue : &rqueue);
-#else
-				__task_wakeup(t, &task_per_thread[tid].rqueue);
-#endif
+				task_wakeup(t, 0);
 			else
 				task_queue(t);
 		}