MINOR: task: split the tasklet vs task code in process_runnable_tasks()

There are a number of tests there which are enforced on tasklets while
they will never apply (various handlers, destroyed task or not, arguments,
results, ...). Instead let's have a single TASK_IS_TASKLET() test and call
the tasklet processing function directly, skipping all the rest.

It now appears visible that the only unneeded code is the update to
curr_task that is never used for tasklets, except for opportunistic
reporting in the debug handler, which can only catch si_cs_io_cb,
which in practice doesn't appear in any report so the extra cost
incurred there is pointless.

This change alone removes 700 bytes of code, mostly in
process_runnable_tasks() and increases the performance by about
1%.
diff --git a/include/proto/task.h b/include/proto/task.h
index da7d892..b0e631a 100644
--- a/include/proto/task.h
+++ b/include/proto/task.h
@@ -359,10 +359,6 @@
 			_HA_ATOMIC_SUB(&tasks_run_queue, 1);
 	}
 
-	if ((struct task *)tl == curr_task) {
-		curr_task = NULL;
-		__ha_barrier_store();
-	}
 	pool_free(pool_head_tasklet, tl);
 	if (unlikely(stopping))
 		pool_flush(pool_head_tasklet);
diff --git a/src/task.c b/src/task.c
index 1ad6b01..5aa9ae4 100644
--- a/src/task.c
+++ b/src/task.c
@@ -41,7 +41,7 @@
 unsigned int nb_tasks_cur = 0;     /* copy of the tasks count */
 unsigned int niced_tasks = 0;      /* number of niced tasks in the run queue */
 
-THREAD_LOCAL struct task *curr_task = NULL; /* task currently running or NULL */
+THREAD_LOCAL struct task *curr_task = NULL; /* task (not tasklet) currently running or NULL */
 
 __decl_aligned_spinlock(rq_lock); /* spin lock related to run queue */
 __decl_aligned_rwlock(wq_lock);   /* RW lock related to the wait queue */
@@ -395,8 +395,6 @@
 			break;
 		state = _HA_ATOMIC_XCHG(&t->state, TASK_RUNNING);
 		__ha_barrier_atomic_store();
-		if (!TASK_IS_TASKLET(t))
-			_HA_ATOMIC_SUB(&tt->task_list_size, 1);
 
 		ti->flags &= ~TI_FL_STUCK; // this thread is still running
 		activity[tid].ctxsw++;
@@ -404,19 +402,28 @@
 		process = t->process;
 		t->calls++;
 
-		if (unlikely(!TASK_IS_TASKLET(t) && t->call_date)) {
+		if (TASK_IS_TASKLET(t)) {
+			process(NULL, ctx, state);
+			max_processed--;
+			continue;
+		}
+
+		/* OK then this is a regular task */
+
+		_HA_ATOMIC_SUB(&tt->task_list_size, 1);
+		if (unlikely(t->call_date)) {
 			uint64_t now_ns = now_mono_time();
 
 			t->lat_time += now_ns - t->call_date;
 			t->call_date = now_ns;
 		}
 
-		curr_task = (struct task *)t;
+		curr_task = t;
 		__ha_barrier_store();
 		if (likely(process == process_stream))
 			t = process_stream(t, ctx, state);
 		else if (process != NULL)
-			t = process(TASK_IS_TASKLET(t) ? NULL : t, ctx, state);
+			t = process(t, ctx, state);
 		else {
 			__task_free(t);
 			curr_task = NULL;