MEDIUM: tasks: implement 3 different tasklet classes with their own queues

We used to mix high latency tasks and low latency tasklets in the same
list, and to even refill bulk tasklets there, causing some unfairness
in certain situations (e.g. poll-less transfers between many connections
saturating the machine with similarly-sized in and out network interfaces).

This patch changes the mechanism to split the load into 3 lists depending
on the task/tasklet's desired classes :
  - URGENT: this is mainly for tasklets used as deferred callbacks
  - NORMAL: this is for regular tasks
  - BULK: this is for bulk tasks/tasklets

Arbitrary ratios of max_processed are picked from each of these lists in
turn, with the ability to complete in one list from what was not picked
in the previous one. After some quick tests, the following setup gave
apparently good results both for raw TCP with splicing and for H2-to-H1
request rate:

  - 0 to 75% for urgent
  - 12 to 50% for normal
  - 12 to what remains for bulk

Bulk is not used yet.
diff --git a/src/task.c b/src/task.c
index a3c581a..2219262 100644
--- a/src/task.c
+++ b/src/task.c
@@ -427,7 +427,7 @@
 	 */
 	tmp_list = MT_LIST_BEHEAD(&sched->shared_tasklet_list);
 	if (tmp_list)
-		LIST_SPLICE_END_DETACHED(&sched->task_list, (struct list *)tmp_list);
+		LIST_SPLICE_END_DETACHED(&sched->tasklets[TL_URGENT], (struct list *)tmp_list);
 
 	tasks_run_queue_cur = tasks_run_queue; /* keep a copy for reporting */
 	nb_tasks_cur = nb_tasks;
@@ -436,9 +436,15 @@
 	if (likely(niced_tasks))
 		max_processed = (max_processed + 3) / 4;
 
+	/* run up to 3*max_processed/4 urgent tasklets */
+	done = run_tasks_from_list(&tt->tasklets[TL_URGENT], 3*(max_processed + 1) / 4);
+	max_processed -= done;
+
+	/* pick up to (max_processed-done+1)/2 regular tasks from prio-ordered run queues */
+
 	/* Note: the grq lock is always held when grq is not null */
 
-	while (tt->task_list_size < max_processed) {
+	while (tt->task_list_size < (max_processed + 1) / 2) {
 		if ((global_tasks_mask & tid_bit) && !grq) {
 #ifdef USE_THREAD
 			HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock);
@@ -489,7 +495,7 @@
 		/* Make sure the entry doesn't appear to be in a list */
 		LIST_INIT(&((struct tasklet *)t)->list);
 		/* And add it to the local task list */
-		tasklet_insert_into_tasklet_list((struct tasklet *)t);
+		tasklet_insert_into_tasklet_list(&tt->tasklets[TL_NORMAL], (struct tasklet *)t);
 		tt->task_list_size++;
 		activity[tid].tasksw++;
 	}
@@ -500,10 +506,17 @@
 		grq = NULL;
 	}
 
-	done = run_tasks_from_list(&tt->task_list, max_processed);
+	/* run between max_processed/8 and max_processed/2 regular tasks */
+	done = run_tasks_from_list(&tt->tasklets[TL_NORMAL], (max_processed + 1) / 2);
 	max_processed -= done;
 
-	if (!LIST_ISEMPTY(&tt->task_list))
+	/* run between max_processed/8 and max_processed bulk tasklets */
+	done = run_tasks_from_list(&tt->tasklets[TL_BULK], max_processed);
+	max_processed -= done;
+
+	if (!LIST_ISEMPTY(&sched->tasklets[TL_URGENT]) |
+	    !LIST_ISEMPTY(&sched->tasklets[TL_NORMAL]) |
+	    !LIST_ISEMPTY(&sched->tasklets[TL_BULK]))
 		activity[tid].long_rq++;
 }
 
@@ -607,7 +620,9 @@
 #endif
 	memset(&task_per_thread, 0, sizeof(task_per_thread));
 	for (i = 0; i < MAX_THREADS; i++) {
-		LIST_INIT(&task_per_thread[i].task_list);
+		LIST_INIT(&task_per_thread[i].tasklets[TL_URGENT]);
+		LIST_INIT(&task_per_thread[i].tasklets[TL_NORMAL]);
+		LIST_INIT(&task_per_thread[i].tasklets[TL_BULK]);
 		MT_LIST_INIT(&task_per_thread[i].shared_tasklet_list);
 	}
 }