Willy Tarreau | 609aad9 | 2018-11-22 08:31:09 +0100 | [diff] [blame] | 1 | /* |
| 2 | * activity measurement functions. |
| 3 | * |
| 4 | * Copyright 2000-2018 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
Willy Tarreau | b255105 | 2020-06-09 09:07:15 +0200 | [diff] [blame] | 13 | #include <haproxy/activity-t.h> |
Willy Tarreau | 4c7e4b7 | 2020-05-27 12:58:42 +0200 | [diff] [blame] | 14 | #include <haproxy/api.h> |
Willy Tarreau | 6be7849 | 2020-06-05 00:00:29 +0200 | [diff] [blame] | 15 | #include <haproxy/cfgparse.h> |
Willy Tarreau | f1d32c4 | 2020-06-04 21:07:02 +0200 | [diff] [blame] | 16 | #include <haproxy/channel.h> |
Willy Tarreau | 83487a8 | 2020-06-04 20:19:54 +0200 | [diff] [blame] | 17 | #include <haproxy/cli.h> |
Willy Tarreau | b255105 | 2020-06-09 09:07:15 +0200 | [diff] [blame] | 18 | #include <haproxy/freq_ctr.h> |
Willy Tarreau | 5e539c9 | 2020-06-04 20:45:39 +0200 | [diff] [blame] | 19 | #include <haproxy/stream_interface.h> |
Willy Tarreau | 48fbcae | 2020-06-03 18:09:46 +0200 | [diff] [blame] | 20 | #include <haproxy/tools.h> |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 21 | |
| 22 | |
| 23 | /* bit field of profiling options. Beware, may be modified at runtime! */ |
Willy Tarreau | aa622b8 | 2021-01-28 21:44:22 +0100 | [diff] [blame] | 24 | unsigned int profiling = HA_PROF_TASKS_AOFF; |
Willy Tarreau | d9add3a | 2019-04-25 08:57:41 +0200 | [diff] [blame] | 25 | unsigned long task_profiling_mask = 0; |
Willy Tarreau | 609aad9 | 2018-11-22 08:31:09 +0100 | [diff] [blame] | 26 | |
| 27 | /* One struct per thread containing all collected measurements */ |
| 28 | struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { }; |
| 29 | |
Willy Tarreau | 3fb6a7b | 2021-01-28 19:19:26 +0100 | [diff] [blame] | 30 | /* One struct per function pointer hash entry (256 values, 0=collision) */ |
| 31 | struct sched_activity sched_activity[256] __attribute__((aligned(64))) = { }; |
Willy Tarreau | 609aad9 | 2018-11-22 08:31:09 +0100 | [diff] [blame] | 32 | |
| 33 | /* Updates the current thread's statistics about stolen CPU time. The unit for |
| 34 | * <stolen> is half-milliseconds. |
| 35 | */ |
| 36 | void report_stolen_time(uint64_t stolen) |
| 37 | { |
| 38 | activity[tid].cpust_total += stolen; |
| 39 | update_freq_ctr(&activity[tid].cpust_1s, stolen); |
| 40 | update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen); |
| 41 | } |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 42 | |
| 43 | /* config parser for global "profiling.tasks", accepts "on" or "off" */ |
| 44 | static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx, |
| 45 | struct proxy *defpx, const char *file, int line, |
| 46 | char **err) |
| 47 | { |
| 48 | if (too_many_args(1, args, err, NULL)) |
| 49 | return -1; |
| 50 | |
| 51 | if (strcmp(args[1], "on") == 0) |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 52 | profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON; |
| 53 | else if (strcmp(args[1], "auto") == 0) |
Willy Tarreau | aa622b8 | 2021-01-28 21:44:22 +0100 | [diff] [blame] | 54 | profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF; |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 55 | else if (strcmp(args[1], "off") == 0) |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 56 | profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF; |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 57 | else { |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 58 | memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]); |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 59 | return -1; |
| 60 | } |
| 61 | return 0; |
| 62 | } |
| 63 | |
| 64 | /* parse a "set profiling" command. It always returns 1. */ |
| 65 | static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private) |
| 66 | { |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 67 | if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) |
| 68 | return 1; |
| 69 | |
Willy Tarreau | 9d00869 | 2019-08-09 11:21:01 +0200 | [diff] [blame] | 70 | if (strcmp(args[2], "tasks") != 0) |
| 71 | return cli_err(appctx, "Expects 'tasks'.\n"); |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 72 | |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 73 | if (strcmp(args[3], "on") == 0) { |
| 74 | unsigned int old = profiling; |
Willy Tarreau | cfa7101 | 2021-01-29 11:56:21 +0100 | [diff] [blame] | 75 | int i; |
| 76 | |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 77 | while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON)) |
| 78 | ; |
Willy Tarreau | cfa7101 | 2021-01-29 11:56:21 +0100 | [diff] [blame] | 79 | /* also flush current profiling stats */ |
| 80 | for (i = 0; i < 256; i++) { |
| 81 | HA_ATOMIC_STORE(&sched_activity[i].calls, 0); |
| 82 | HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0); |
| 83 | HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0); |
| 84 | HA_ATOMIC_STORE(&sched_activity[i].func, NULL); |
| 85 | } |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 86 | } |
| 87 | else if (strcmp(args[3], "auto") == 0) { |
| 88 | unsigned int old = profiling; |
Willy Tarreau | aa622b8 | 2021-01-28 21:44:22 +0100 | [diff] [blame] | 89 | unsigned int new; |
| 90 | |
| 91 | do { |
| 92 | if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON) |
| 93 | new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON; |
| 94 | else |
| 95 | new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF; |
| 96 | } while (!_HA_ATOMIC_CAS(&profiling, &old, new)); |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 97 | } |
| 98 | else if (strcmp(args[3], "off") == 0) { |
| 99 | unsigned int old = profiling; |
| 100 | while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF)) |
| 101 | ; |
| 102 | } |
Willy Tarreau | 9d00869 | 2019-08-09 11:21:01 +0200 | [diff] [blame] | 103 | else |
| 104 | return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n"); |
| 105 | |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 106 | return 1; |
| 107 | } |
| 108 | |
Willy Tarreau | 1bd67e9 | 2021-01-29 00:07:40 +0100 | [diff] [blame] | 109 | static int cmp_sched_activity(const void *a, const void *b) |
| 110 | { |
| 111 | const struct sched_activity *l = (const struct sched_activity *)a; |
| 112 | const struct sched_activity *r = (const struct sched_activity *)b; |
| 113 | |
| 114 | if (l->calls > r->calls) |
| 115 | return -1; |
| 116 | else if (l->calls < r->calls) |
| 117 | return 1; |
| 118 | else |
| 119 | return 0; |
| 120 | } |
| 121 | |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 122 | /* This function dumps all profiling settings. It returns 0 if the output |
| 123 | * buffer is full and it needs to be called again, otherwise non-zero. |
| 124 | */ |
| 125 | static int cli_io_handler_show_profiling(struct appctx *appctx) |
| 126 | { |
Willy Tarreau | 1bd67e9 | 2021-01-29 00:07:40 +0100 | [diff] [blame] | 127 | struct sched_activity tmp_activity[256] __attribute__((aligned(64))); |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 128 | struct stream_interface *si = appctx->owner; |
Willy Tarreau | 1bd67e9 | 2021-01-29 00:07:40 +0100 | [diff] [blame] | 129 | struct buffer *name_buffer = get_trash_chunk(); |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 130 | const char *str; |
Willy Tarreau | 1bd67e9 | 2021-01-29 00:07:40 +0100 | [diff] [blame] | 131 | int i, max; |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 132 | |
| 133 | if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW))) |
| 134 | return 1; |
| 135 | |
| 136 | chunk_reset(&trash); |
| 137 | |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 138 | switch (profiling & HA_PROF_TASKS_MASK) { |
Willy Tarreau | aa622b8 | 2021-01-28 21:44:22 +0100 | [diff] [blame] | 139 | case HA_PROF_TASKS_AOFF: str="auto-off"; break; |
| 140 | case HA_PROF_TASKS_AON: str="auto-on"; break; |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 141 | case HA_PROF_TASKS_ON: str="on"; break; |
| 142 | default: str="off"; break; |
| 143 | } |
| 144 | |
Willy Tarreau | 1bd67e9 | 2021-01-29 00:07:40 +0100 | [diff] [blame] | 145 | memcpy(tmp_activity, sched_activity, sizeof(tmp_activity)); |
| 146 | qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity); |
| 147 | |
Willy Tarreau | d2d3348 | 2019-04-25 17:09:07 +0200 | [diff] [blame] | 148 | chunk_printf(&trash, |
| 149 | "Per-task CPU profiling : %s # set profiling tasks {on|auto|off}\n", |
| 150 | str); |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 151 | |
Willy Tarreau | 1bd67e9 | 2021-01-29 00:07:40 +0100 | [diff] [blame] | 152 | chunk_appendf(&trash, "Tasks activity:\n" |
| 153 | " function calls cpu_tot cpu_avg lat_tot lat_avg\n"); |
| 154 | |
| 155 | for (i = 0; i < 256 && tmp_activity[i].calls; i++) { |
| 156 | chunk_reset(name_buffer); |
| 157 | |
| 158 | if (!tmp_activity[i].func) |
| 159 | chunk_printf(name_buffer, "other"); |
| 160 | else |
| 161 | resolve_sym_name(name_buffer, "", tmp_activity[i].func); |
| 162 | |
| 163 | /* reserve 35 chars for name+' '+#calls, knowing that longer names |
| 164 | * are often used for less often called functions. |
| 165 | */ |
| 166 | max = 35 - name_buffer->data; |
| 167 | if (max < 1) |
| 168 | max = 1; |
| 169 | chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls); |
| 170 | |
| 171 | print_time_short(&trash, " ", tmp_activity[i].cpu_time, ""); |
| 172 | print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, ""); |
| 173 | print_time_short(&trash, " ", tmp_activity[i].lat_time, ""); |
| 174 | print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n"); |
| 175 | } |
| 176 | |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 177 | if (ci_putchk(si_ic(si), &trash) == -1) { |
| 178 | /* failed, try again */ |
| 179 | si_rx_room_blk(si); |
| 180 | return 0; |
| 181 | } |
| 182 | return 1; |
| 183 | } |
| 184 | |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 185 | /* This function scans all threads' run queues and collects statistics about |
| 186 | * running tasks. It returns 0 if the output buffer is full and it needs to be |
| 187 | * called again, otherwise non-zero. |
| 188 | */ |
| 189 | static int cli_io_handler_show_tasks(struct appctx *appctx) |
| 190 | { |
| 191 | struct sched_activity tmp_activity[256] __attribute__((aligned(64))); |
| 192 | struct stream_interface *si = appctx->owner; |
| 193 | struct buffer *name_buffer = get_trash_chunk(); |
| 194 | struct sched_activity *entry; |
| 195 | const struct tasklet *tl; |
| 196 | const struct task *t; |
| 197 | uint64_t now_ns, lat; |
| 198 | struct eb32sc_node *rqnode; |
| 199 | uint64_t tot_calls; |
| 200 | int thr, queue; |
| 201 | int i, max; |
| 202 | |
| 203 | if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW))) |
| 204 | return 1; |
| 205 | |
| 206 | /* It's not possible to scan queues in small chunks and yield in the |
| 207 | * middle of the dump and come back again. So what we're doing instead |
| 208 | * is to freeze all threads and inspect their queues at once as fast as |
| 209 | * possible, using a sched_activity array to collect metrics with |
| 210 | * limited collision, then we'll report statistics only. The tasks' |
| 211 | * #calls will reflect the number of occurrences, and the lat_time will |
Willy Tarreau | 75f7233 | 2021-01-29 15:04:16 +0100 | [diff] [blame] | 212 | * reflect the latency when set. We prefer to take the time before |
| 213 | * calling thread_isolate() so that the wait time doesn't impact the |
| 214 | * measurement accuracy. However this requires to take care of negative |
| 215 | * times since tasks might be queued after we retrieve it. |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 216 | */ |
| 217 | |
| 218 | now_ns = now_mono_time(); |
| 219 | memset(tmp_activity, 0, sizeof(tmp_activity)); |
| 220 | |
| 221 | thread_isolate(); |
| 222 | |
| 223 | /* 1. global run queue */ |
| 224 | |
| 225 | #ifdef USE_THREAD |
| 226 | rqnode = eb32sc_first(&rqueue, ~0UL); |
| 227 | while (rqnode) { |
| 228 | t = eb32sc_entry(rqnode, struct task, rq); |
| 229 | entry = sched_activity_entry(tmp_activity, t->process); |
| 230 | if (t->call_date) { |
| 231 | lat = now_ns - t->call_date; |
Willy Tarreau | 75f7233 | 2021-01-29 15:04:16 +0100 | [diff] [blame] | 232 | if ((int64_t)lat > 0) |
| 233 | entry->lat_time += lat; |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 234 | } |
| 235 | entry->calls++; |
| 236 | rqnode = eb32sc_next(rqnode, ~0UL); |
| 237 | } |
| 238 | #endif |
| 239 | /* 2. all threads's local run queues */ |
| 240 | for (thr = 0; thr < global.nbthread; thr++) { |
| 241 | /* task run queue */ |
| 242 | rqnode = eb32sc_first(&task_per_thread[thr].rqueue, ~0UL); |
| 243 | while (rqnode) { |
| 244 | t = eb32sc_entry(rqnode, struct task, rq); |
| 245 | entry = sched_activity_entry(tmp_activity, t->process); |
| 246 | if (t->call_date) { |
| 247 | lat = now_ns - t->call_date; |
Willy Tarreau | 75f7233 | 2021-01-29 15:04:16 +0100 | [diff] [blame] | 248 | if ((int64_t)lat > 0) |
| 249 | entry->lat_time += lat; |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 250 | } |
| 251 | entry->calls++; |
| 252 | rqnode = eb32sc_next(rqnode, ~0UL); |
| 253 | } |
| 254 | |
| 255 | /* shared tasklet list */ |
| 256 | list_for_each_entry(tl, mt_list_to_list(&task_per_thread[thr].shared_tasklet_list), list) { |
| 257 | t = (const struct task *)tl; |
| 258 | entry = sched_activity_entry(tmp_activity, t->process); |
| 259 | if (!TASK_IS_TASKLET(t) && t->call_date) { |
| 260 | lat = now_ns - t->call_date; |
Willy Tarreau | 75f7233 | 2021-01-29 15:04:16 +0100 | [diff] [blame] | 261 | if ((int64_t)lat > 0) |
| 262 | entry->lat_time += lat; |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 263 | } |
| 264 | entry->calls++; |
| 265 | } |
| 266 | |
| 267 | /* classful tasklets */ |
| 268 | for (queue = 0; queue < TL_CLASSES; queue++) { |
| 269 | list_for_each_entry(tl, &task_per_thread[thr].tasklets[queue], list) { |
| 270 | t = (const struct task *)tl; |
| 271 | entry = sched_activity_entry(tmp_activity, t->process); |
| 272 | if (!TASK_IS_TASKLET(t) && t->call_date) { |
| 273 | lat = now_ns - t->call_date; |
Willy Tarreau | 75f7233 | 2021-01-29 15:04:16 +0100 | [diff] [blame] | 274 | if ((int64_t)lat > 0) |
| 275 | entry->lat_time += lat; |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 276 | } |
| 277 | entry->calls++; |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | /* hopefully we're done */ |
| 283 | thread_release(); |
| 284 | |
| 285 | chunk_reset(&trash); |
| 286 | |
| 287 | tot_calls = 0; |
| 288 | for (i = 0; i < 256; i++) |
| 289 | tot_calls += tmp_activity[i].calls; |
| 290 | |
| 291 | qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity); |
| 292 | |
| 293 | chunk_appendf(&trash, "Running tasks: %d (%d threads)\n" |
| 294 | " function places %% lat_tot lat_avg\n", |
| 295 | (int)tot_calls, global.nbthread); |
| 296 | |
| 297 | for (i = 0; i < 256 && tmp_activity[i].calls; i++) { |
| 298 | chunk_reset(name_buffer); |
| 299 | |
| 300 | if (!tmp_activity[i].func) |
| 301 | chunk_printf(name_buffer, "other"); |
| 302 | else |
| 303 | resolve_sym_name(name_buffer, "", tmp_activity[i].func); |
| 304 | |
| 305 | /* reserve 35 chars for name+' '+#calls, knowing that longer names |
| 306 | * are often used for less often called functions. |
| 307 | */ |
| 308 | max = 35 - name_buffer->data; |
| 309 | if (max < 1) |
| 310 | max = 1; |
| 311 | chunk_appendf(&trash, " %s%*llu %3d.%1d", |
| 312 | name_buffer->area, max, (unsigned long long)tmp_activity[i].calls, |
| 313 | (int)(100ULL * tmp_activity[i].calls / tot_calls), |
| 314 | (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10)); |
| 315 | print_time_short(&trash, " ", tmp_activity[i].lat_time, ""); |
| 316 | print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n"); |
| 317 | } |
| 318 | |
| 319 | if (ci_putchk(si_ic(si), &trash) == -1) { |
| 320 | /* failed, try again */ |
| 321 | si_rx_room_blk(si); |
| 322 | return 0; |
| 323 | } |
| 324 | return 1; |
| 325 | } |
| 326 | |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 327 | /* config keyword parsers */ |
| 328 | static struct cfg_kw_list cfg_kws = {ILH, { |
| 329 | { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks }, |
| 330 | { 0, NULL, NULL } |
| 331 | }}; |
| 332 | |
Willy Tarreau | 0108d90 | 2018-11-25 19:14:37 +0100 | [diff] [blame] | 333 | INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); |
| 334 | |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 335 | /* register cli keywords */ |
| 336 | static struct cli_kw_list cli_kws = {{ },{ |
| 337 | { { "show", "profiling", NULL }, "show profiling : show CPU profiling options", NULL, cli_io_handler_show_profiling, NULL }, |
Willy Tarreau | 7eff06e | 2021-01-29 11:32:55 +0100 | [diff] [blame] | 338 | { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL }, |
Willy Tarreau | 75c62c2 | 2018-11-22 11:02:09 +0100 | [diff] [blame] | 339 | { { "set", "profiling", NULL }, "set profiling : enable/disable CPU profiling", cli_parse_set_profiling, NULL }, |
| 340 | {{},} |
| 341 | }}; |
| 342 | |
Willy Tarreau | 0108d90 | 2018-11-25 19:14:37 +0100 | [diff] [blame] | 343 | INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); |