MEDIUM: tasks/activity: combine the called function with the caller
Now instead of getting aggregate stats per called function, we have
them per function AND per call place. The "byaddr" sort considers
the function pointer first, then the call count, so that dominant
callers of a given callee are instantly spotted. This allows to get
sorted outputs like this:
Tasks activity:
function calls cpu_tot cpu_avg lat_tot lat_avg
h1_io_cb 17357952 40.91s 2.357us 4.849m 16.76us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
sc_conn_io_cb 10357182 6.297s 607.0ns 27.93m 161.8us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup
process_stream 9891131 1.809m 10.97us 53.61m 325.2us <- sc_notify@src/stconn.c:1209 task_wakeup
process_stream 9823934 1.887m 11.52us 48.31m 295.1us <- stream_new@src/stream.c:563 task_wakeup
sc_conn_io_cb 9347863 16.59s 1.774us 6.143m 39.43us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup
h1_io_cb 501344 1.848s 3.686us 6.544m 783.2us <- conn_subscribe@src/connection.c:732 tasklet_wakeup
sc_conn_io_cb 239717 492.3ms 2.053us 3.213m 804.3us <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup
h2_io_cb 173019 4.204s 24.30us 40.95s 236.7us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup
h2_io_cb 149487 424.3ms 2.838us 14.63s 97.87us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup
other 101893 4.626s 45.40us 14.84s 145.7us
quic_lstnr_dghdlr 94389 614.0ms 6.504us 30.54s 323.6us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup
quic_conn_app_io_cb 92205 3.735s 40.51us 390.9ms 4.239us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after
qc_io_cb 50355 19.01s 377.5us 10.65s 211.4us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup
h1_io_cb 44427 155.0ms 3.489us 21.50s 484.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup
qc_io_cb 9018 4.924s 546.0us 3.084s 342.0us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup
h1_timeout_task 3236 1.172ms 362.0ns 1.119s 345.9us <- h1_release@src/mux_h1.c:1087 task_wakeup
h1_io_cb 2804 7.974ms 2.843us 1.980s 706.0us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup
sc_conn_io_cb 2804 33.44ms 11.92us 2.597s 926.2us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup
qc_io_cb 2623 2.669s 1.017ms 1.347s 513.5us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup
qc_process_timer 662 526.4us 795.0ns 1.081s 1.633ms <- wake_expired_tasks@src/task.c:344 task_wakeup
quic_conn_app_io_cb 648 12.62ms 19.47us 225.7ms 348.2us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup
accept_queue_process 286 1.571ms 5.494us 72.55ms 253.7us <- listener_accept@src/listener.c:1099 tasklet_wakeup
process_resolvers 176 157.8us 896.0ns 7.835ms 44.52us <- wake_expired_tasks@src/task.c:429 task_drop_running
qc_io_cb 167 10.71ms 64.12us 32.47ms 194.4us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup
sc_conn_io_cb 123 80.05us 650.0ns 50.35ms 409.4us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup
h2_timeout_task 32 30.69us 958.0ns 9.038ms 282.4us <- h2_release@src/mux_h2.c:1191 task_wakeup
task_run_applet 24 33.79ms 1.408ms 5.838ms 243.3us <- sc_applet_create@src/stconn.c:489 appctx_wakeup
accept_queue_process 17 56.34us 3.314us 7.505ms 441.5us <- accept_queue_process@src/listener.c:165 tasklet_wakeup
srv_cleanup_toremove_conns 16 1.133ms 70.81us 5.685ms 355.3us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup
srv_cleanup_idle_conns 16 74.57us 4.660us 2.797ms 174.8us <- wake_expired_tasks@src/task.c:429 task_drop_running
quic_conn_app_io_cb 12 786.9us 65.58us 2.042ms 170.1us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup
sc_conn_io_cb 9 20.55us 2.283us 2.475ms 275.0us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
h2_io_cb 8 34.12us 4.265us 1.784ms 223.0us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup
task_run_applet 4 6.615ms 1.654ms 2.306us 576.0ns <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup
quic_conn_io_cb 4 4.278ms 1.069ms 6.469us 1.617us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after
qc_io_cb 2 20.81us 10.40us 4.943us 2.471us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup
quic_conn_app_io_cb 2 752.9us 376.4us 63.97us 31.99us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup
quic_accept_run 2 13.84us 6.920us 172.8us 86.42us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup
qc_idle_timer_task 2 295.0us 147.5us 8.761us 4.380us <- wake_expired_tasks@src/task.c:344 task_wakeup
qc_io_cb 1 867.1us 867.1us 812.8us 812.8us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup
... and calls sorted by address like this:
Tasks activity:
function calls cpu_tot cpu_avg lat_tot lat_avg
task_run_applet 23 32.73ms 1.423ms 5.837ms 253.8us <- sc_applet_create@src/stconn.c:489 appctx_wakeup
task_run_applet 4 6.615ms 1.654ms 2.306us 576.0ns <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup
accept_queue_process 285 1.566ms 5.495us 72.49ms 254.3us <- listener_accept@src/listener.c:1099 tasklet_wakeup
accept_queue_process 17 56.34us 3.314us 7.505ms 441.5us <- accept_queue_process@src/listener.c:165 tasklet_wakeup
sc_conn_io_cb 10357182 6.297s 607.0ns 27.93m 161.8us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup
sc_conn_io_cb 9347863 16.59s 1.774us 6.143m 39.43us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup
sc_conn_io_cb 239717 492.3ms 2.053us 3.213m 804.3us <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup
sc_conn_io_cb 2804 33.44ms 11.92us 2.597s 926.2us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup
sc_conn_io_cb 123 80.05us 650.0ns 50.35ms 409.4us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup
sc_conn_io_cb 9 20.55us 2.283us 2.475ms 275.0us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
process_resolvers 159 145.9us 917.0ns 7.823ms 49.20us <- wake_expired_tasks@src/task.c:429 task_drop_running
srv_cleanup_idle_conns 16 74.57us 4.660us 2.797ms 174.8us <- wake_expired_tasks@src/task.c:429 task_drop_running
srv_cleanup_toremove_conns 16 1.133ms 70.81us 5.685ms 355.3us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup
process_stream 9891130 1.809m 10.97us 53.61m 325.2us <- sc_notify@src/stconn.c:1209 task_wakeup
process_stream 9823933 1.887m 11.52us 48.31m 295.1us <- stream_new@src/stream.c:563 task_wakeup
h1_io_cb 17357952 40.91s 2.357us 4.849m 16.76us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
h1_io_cb 501344 1.848s 3.686us 6.544m 783.2us <- conn_subscribe@src/connection.c:732 tasklet_wakeup
h1_io_cb 44427 155.0ms 3.489us 21.50s 484.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup
h1_io_cb 2804 7.974ms 2.843us 1.980s 706.0us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup
h1_timeout_task 3236 1.172ms 362.0ns 1.119s 345.9us <- h1_release@src/mux_h1.c:1087 task_wakeup
h2_timeout_task 32 30.69us 958.0ns 9.038ms 282.4us <- h2_release@src/mux_h2.c:1191 task_wakeup
h2_io_cb 173019 4.204s 24.30us 40.95s 236.7us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup
h2_io_cb 149487 424.3ms 2.838us 14.63s 97.87us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup
h2_io_cb 8 34.12us 4.265us 1.784ms 223.0us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup
qc_io_cb 50355 19.01s 377.5us 10.65s 211.4us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup
qc_io_cb 9018 4.924s 546.0us 3.084s 342.0us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup
qc_io_cb 2623 2.669s 1.017ms 1.347s 513.5us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup
qc_io_cb 167 10.71ms 64.12us 32.47ms 194.4us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup
qc_io_cb 2 20.81us 10.40us 4.943us 2.471us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup
qc_io_cb 1 867.1us 867.1us 812.8us 812.8us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup
qc_idle_timer_task 2 295.0us 147.5us 8.761us 4.380us <- wake_expired_tasks@src/task.c:344 task_wakeup
quic_conn_io_cb 4 4.278ms 1.069ms 6.469us 1.617us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after
quic_conn_app_io_cb 92205 3.735s 40.51us 390.9ms 4.239us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after
quic_conn_app_io_cb 648 12.62ms 19.47us 225.7ms 348.2us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup
quic_conn_app_io_cb 12 786.9us 65.58us 2.042ms 170.1us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup
quic_conn_app_io_cb 2 752.9us 376.4us 63.97us 31.99us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup
quic_lstnr_dghdlr 94389 614.0ms 6.504us 30.54s 323.6us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup
qc_process_timer 662 526.4us 795.0ns 1.081s 1.633ms <- wake_expired_tasks@src/task.c:344 task_wakeup
quic_accept_run 2 13.84us 6.920us 172.8us 86.42us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup
other 101892 4.626s 45.40us 14.84s 145.7us
It already becomes visible that some tasks have different very costs
depending where they're called (e.g. process_stream). The method used
to wake them up is also shown. Applets are handled specially and shown
as appctx_wakeup.
diff --git a/include/haproxy/activity-t.h b/include/haproxy/activity-t.h
index 90be2c1..309195f 100644
--- a/include/haproxy/activity-t.h
+++ b/include/haproxy/activity-t.h
@@ -126,6 +126,7 @@
*/
struct sched_activity {
const void *func;
+ const struct ha_caller *caller;
uint64_t calls;
uint64_t cpu_time;
uint64_t lat_time;
diff --git a/include/haproxy/activity.h b/include/haproxy/activity.h
index 9b18882..dbc8ec3 100644
--- a/include/haproxy/activity.h
+++ b/include/haproxy/activity.h
@@ -31,7 +31,7 @@
void report_stolen_time(uint64_t stolen);
void activity_count_runtime(uint32_t run_time);
-struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func);
+struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller);
#ifdef USE_MEMORY_PROFILING
struct memprof_stats *memprof_get_bin(const void *ra, enum memprof_method meth);
diff --git a/src/activity.c b/src/activity.c
index e90a7c6..42e0c54 100644
--- a/src/activity.c
+++ b/src/activity.c
@@ -445,6 +445,7 @@
HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
+ HA_ATOMIC_STORE(&sched_activity[i].caller, NULL);
}
}
else if (strcmp(args[3], "auto") == 0) {
@@ -482,6 +483,7 @@
return 0;
}
+/* sort by address first, then by call count */
static int cmp_sched_activity_addr(const void *a, const void *b)
{
const struct sched_activity *l = (const struct sched_activity *)a;
@@ -491,6 +493,10 @@
return -1;
else if (l->func < r->func)
return 1;
+ else if (l->calls > r->calls)
+ return -1;
+ else if (l->calls < r->calls)
+ return 1;
else
return 0;
}
@@ -524,23 +530,25 @@
}
#endif // USE_MEMORY_PROFILING
-/* Computes the index of function pointer <func> for use with sched_activity[]
- * or any other similar array passed in <array>, and returns a pointer to the
- * entry after having atomically assigned it to this function pointer. Note
- * that in case of collision, the first entry is returned instead ("other").
+/* Computes the index of function pointer <func> and caller <caller> for use
+ * with sched_activity[] or any other similar array passed in <array>, and
+ * returns a pointer to the entry after having atomically assigned it to this
+ * function pointer and caller combination. Note that in case of collision,
+ * the first entry is returned instead ("other").
*/
-struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func)
+struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller)
{
- uint32_t hash = ptr_hash(func, SCHED_ACT_HASH_BITS);
+ uint32_t hash = ptr2_hash(func, caller, SCHED_ACT_HASH_BITS);
struct sched_activity *ret;
const void *old = NULL;
ret = &array[hash];
- if (likely(ret->func == func))
+ if (likely(ret->func == func && ret->caller == caller))
return ret;
- if (HA_ATOMIC_CAS(&ret->func, &old, func))
+ if (HA_ATOMIC_CAS(&ret->func, &old, func) &&
+ HA_ATOMIC_CAS(&ret->caller, &old, caller))
return ret;
return array;
@@ -572,6 +580,7 @@
#endif
struct stconn *sc = appctx_sc(appctx);
struct buffer *name_buffer = get_trash_chunk();
+ const struct ha_caller *caller;
const char *str;
int max_lines;
int i, max;
@@ -626,6 +635,7 @@
for (i = ctx->linenum; i < max_lines && tmp_activity[i].calls; i++) {
ctx->linenum = i;
chunk_reset(name_buffer);
+ caller = HA_ATOMIC_LOAD(&tmp_activity[i].caller);
if (!tmp_activity[i].func)
chunk_printf(name_buffer, "other");
@@ -643,7 +653,14 @@
print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
- print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "");
+
+ if (caller && caller->what <= WAKEUP_TYPE_APPCTX_WAKEUP)
+ chunk_appendf(&trash, " <- %s@%s:%d %s",
+ caller->func, caller->file, caller->line,
+ task_wakeup_type_str(caller->what));
+
+ b_putchr(&trash, '\n');
if (applet_putchk(appctx, &trash) == -1) {
/* failed, try again */
@@ -834,7 +851,7 @@
rqnode = eb32_first(&ha_thread_ctx[thr].rqueue_shared);
while (rqnode) {
t = eb32_entry(rqnode, struct task, rq);
- entry = sched_activity_entry(tmp_activity, t->process);
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
if (t->wake_date) {
lat = now_ns - t->wake_date;
if ((int64_t)lat > 0)
@@ -851,7 +868,7 @@
rqnode = eb32_first(&ha_thread_ctx[thr].rqueue);
while (rqnode) {
t = eb32_entry(rqnode, struct task, rq);
- entry = sched_activity_entry(tmp_activity, t->process);
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
if (t->wake_date) {
lat = now_ns - t->wake_date;
if ((int64_t)lat > 0)
@@ -864,7 +881,7 @@
/* shared tasklet list */
list_for_each_entry(tl, mt_list_to_list(&ha_thread_ctx[thr].shared_tasklet_list), list) {
t = (const struct task *)tl;
- entry = sched_activity_entry(tmp_activity, t->process);
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
if (!TASK_IS_TASKLET(t) && t->wake_date) {
lat = now_ns - t->wake_date;
if ((int64_t)lat > 0)
@@ -877,7 +894,7 @@
for (queue = 0; queue < TL_CLASSES; queue++) {
list_for_each_entry(tl, &ha_thread_ctx[thr].tasklets[queue], list) {
t = (const struct task *)tl;
- entry = sched_activity_entry(tmp_activity, t->process);
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
if (!TASK_IS_TASKLET(t) && t->wake_date) {
lat = now_ns - t->wake_date;
if ((int64_t)lat > 0)
diff --git a/src/task.c b/src/task.c
index e851c63..b7a3f01 100644
--- a/src/task.c
+++ b/src/task.c
@@ -567,7 +567,7 @@
t->wake_date = 0;
th_ctx->sched_call_date = now_ns;
- profile_entry = sched_activity_entry(sched_activity, t->process);
+ profile_entry = sched_activity_entry(sched_activity, t->process, t->caller);
th_ctx->sched_profile_entry = profile_entry;
HA_ATOMIC_ADD(&profile_entry->lat_time, lat);
HA_ATOMIC_INC(&profile_entry->calls);