MINOR: task: add a new task_instant_wakeup() function
This function's purpose is to wake up either a local or remote task,
bypassing the tree-based run queue. It is meant for fast wakeups that
are supposed to be equivalent to those used with tasklets, i.e. a task
had to pause some processing and can complete (typically a resource
becomes available again). In all cases, it's important to keep in mind
that the task must have gone through the regular scheduling path before
being blocked, otherwise the task priorities would be ignored.
The reason for this is that some wakeups are massively inter-thread
(e.g. server queues), that these inter-thread wakeups cause a huge
contention on the shared runqueue lock. A user reported 47% CPU spent
in process_runnable_tasks with only 32 threads and 80k requests in
queues. With this mechanism, purely one-to-one wakeups can avoid
taking the lock thanks to the mt_list used for the shared tasklet
queue.
Right now the shared tasklet queue moves everything to the TL_URGENT
queue. It's not dramatic but it would seem better to have a new shared
list dedicated to tasks, and that would deliver into TL_NORMAL, for an
even better fairness. This could be improved in the future.
diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h
index 9cdfa31..f71b1ec 100644
--- a/include/haproxy/task-t.h
+++ b/include/haproxy/task-t.h
@@ -104,6 +104,11 @@
struct task {
TASK_COMMON; /* must be at the beginning! */
struct eb32sc_node rq; /* ebtree node used to hold the task in the run queue */
+ /* WARNING: the struct task is often aliased as a struct tasklet when
+ * it is NOT in the run queue. The tasklet has its struct list here
+ * where rq starts and this works because both are exclusive. Never
+ * ever reorder these fields without taking this into account!
+ */
struct eb32_node wq; /* ebtree node used to hold the task in the wait queue */
int expire; /* next expiration date for this task, in ticks */
short nice; /* task prio from -1024 to +1024 */
@@ -118,6 +123,11 @@
struct tasklet {
TASK_COMMON; /* must be at the beginning! */
struct list list;
+ /* WARNING: the struct task is often aliased as a struct tasklet when
+ * it is not in the run queue. The task has its struct rq here where
+ * list starts and this works because both are exclusive. Never ever
+ * reorder these fields without taking this into account!
+ */
#ifdef DEBUG_TASK
uint64_t call_date; /* date of the last tasklet wakeup or call */
#endif
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index 6bd1272..b3e0544 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -414,6 +414,50 @@
*/
#define tasklet_wakeup(tl) _tasklet_wakeup_on(tl, (tl)->tid, __FILE__, __LINE__)
+/* instantly wakes up task <t> on its owner thread even if it's not the current
+ * one, bypassing the run queue. The purpose is to be able to avoid contention
+ * in the global run queue for massively remote tasks (e.g. queue) when there's
+ * no value in passing the task again through the priority ordering since it has
+ * already been subject to it once (e.g. before entering process_stream). The
+ * task goes directly into the shared mt_list as a tasklet and will run as
+ * TL_URGENT. Great care is taken to be certain it's not queued nor running
+ * already.
+ */
+#define task_instant_wakeup(t, f) _task_instant_wakeup(t, f, __FILE__, __LINE__)
+static inline void _task_instant_wakeup(struct task *t, unsigned int f, const char *file, int line)
+{
+ struct tasklet *tl = (struct tasklet *)t;
+ int thr = my_ffsl(t->thread_mask) - 1;
+ unsigned int state;
+
+ /* first, let's update the task's state with the wakeup condition */
+ state = _HA_ATOMIC_OR_FETCH(&tl->state, f);
+
+ /* next we need to make sure the task was not/will not be added to the
+ * run queue because the tasklet list's mt_list uses the same storage
+ * as the task's run_queue.
+ */
+ do {
+ /* do nothing if someone else already added it */
+ if (state & (TASK_QUEUED|TASK_RUNNING))
+ return;
+ } while (!_HA_ATOMIC_CAS(&tl->state, &state, state | TASK_QUEUED));
+
+ BUG_ON_HOT(task_in_rq(t));
+
+ /* at this point we're the first ones to add this task to the list */
+#ifdef DEBUG_TASK
+ if ((unsigned int)tl->debug.caller_idx > 1)
+ ABORT_NOW();
+ tl->debug.caller_idx = !tl->debug.caller_idx;
+ tl->debug.caller_file[tl->debug.caller_idx] = file;
+ tl->debug.caller_line[tl->debug.caller_idx] = line;
+ if (task_profiling_mask & tid_bit)
+ tl->call_date = now_mono_time();
+#endif
+ __tasklet_wakeup_on(tl, thr);
+}
+
/* This macro shows the current function name and the last known caller of the
* task (or tasklet) wakeup.
*/