CLEANUP: task: split the large tasklet_wakeup_on() function in two
This function has become large with the multi-queue scheduler. We need
to keep the fast path and the debugging parts inlined, but the rest now
moves to task.c just like was done for task_wakeup(). This has reduced
the code size by 6kB due to less inlining of large parts that are always
context-dependent, and as a side effect, has increased the overall
performance by 1%.
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index 018d97e..f1440d0 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -105,6 +105,7 @@
__decl_thread(extern HA_SPINLOCK_T rq_lock); /* spin lock related to run queue */
__decl_thread(extern HA_RWLOCK_T wq_lock); /* RW lock related to the wait queue */
+void __tasklet_wakeup_on(struct tasklet *tl, int thr);
void task_kill(struct task *t);
void __task_wakeup(struct task *t);
void __task_queue(struct task *task, struct eb_root *wq);
@@ -375,36 +376,7 @@
tl->debug.caller_file[tl->debug.caller_idx] = file;
tl->debug.caller_line[tl->debug.caller_idx] = line;
#endif
-
- if (likely(thr < 0)) {
- /* this tasklet runs on the caller thread */
- if (tl->state & TASK_SELF_WAKING) {
- LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
- sched->tl_class_mask |= 1 << TL_BULK;
- }
- else if ((struct task *)tl == sched->current) {
- _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
- LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
- sched->tl_class_mask |= 1 << TL_BULK;
- }
- else if (sched->current_queue < 0) {
- LIST_ADDQ(&sched->tasklets[TL_URGENT], &tl->list);
- sched->tl_class_mask |= 1 << TL_URGENT;
- }
- else {
- LIST_ADDQ(&sched->tasklets[sched->current_queue], &tl->list);
- sched->tl_class_mask |= 1 << sched->current_queue;
- }
- _HA_ATOMIC_ADD(&sched->rq_total, 1);
- } else {
- /* this tasklet runs on a specific thread. */
- MT_LIST_ADDQ(&task_per_thread[thr].shared_tasklet_list, (struct mt_list *)&tl->list);
- _HA_ATOMIC_ADD(&task_per_thread[thr].rq_total, 1);
- if (sleeping_thread_mask & (1UL << thr)) {
- _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
- wake_thread(thr);
- }
- }
+ __tasklet_wakeup_on(tl, thr);
}
/* schedules tasklet <tl> to run onto the thread designated by tl->tid, which
diff --git a/src/task.c b/src/task.c
index f91cd70..dbc6701 100644
--- a/src/task.c
+++ b/src/task.c
@@ -105,6 +105,44 @@
}
}
+/* Do not call this one, please use tasklet_wakeup_on() instead, as this one is
+ * the slow path of tasklet_wakeup_on() which performs some preliminary checks
+ * and sets TASK_IN_LIST before calling this one. A negative <thr> designates
+ * the current thread.
+ */
+void __tasklet_wakeup_on(struct tasklet *tl, int thr)
+{
+ if (likely(thr < 0)) {
+ /* this tasklet runs on the caller thread */
+ if (tl->state & TASK_SELF_WAKING) {
+ LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
+ sched->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if ((struct task *)tl == sched->current) {
+ _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
+ LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
+ sched->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if (sched->current_queue < 0) {
+ LIST_ADDQ(&sched->tasklets[TL_URGENT], &tl->list);
+ sched->tl_class_mask |= 1 << TL_URGENT;
+ }
+ else {
+ LIST_ADDQ(&sched->tasklets[sched->current_queue], &tl->list);
+ sched->tl_class_mask |= 1 << sched->current_queue;
+ }
+ _HA_ATOMIC_ADD(&sched->rq_total, 1);
+ } else {
+ /* this tasklet runs on a specific thread. */
+ MT_LIST_ADDQ(&task_per_thread[thr].shared_tasklet_list, (struct mt_list *)&tl->list);
+ _HA_ATOMIC_ADD(&task_per_thread[thr].rq_total, 1);
+ if (sleeping_thread_mask & (1UL << thr)) {
+ _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
+ wake_thread(thr);
+ }
+ }
+}
+
/* Puts the task <t> in run queue at a position depending on t->nice. <t> is
* returned. The nice value assigns boosts in 32th of the run queue size. A
* nice value of -1024 sets the task to -tasks_run_queue*32, while a nice value