blob: 5162c22272cce626aa0f310fb67401848892c99c [file] [log] [blame]
Willy Tarreau609aad92018-11-22 08:31:09 +01001/*
2 * activity measurement functions.
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaudb87fc72021-05-05 16:50:40 +020013#include <malloc.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020014#include <haproxy/activity-t.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020015#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020016#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020017#include <haproxy/channel.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020018#include <haproxy/cli.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020019#include <haproxy/freq_ctr.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020020#include <haproxy/stream_interface.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020021#include <haproxy/tools.h>
Willy Tarreau75c62c22018-11-22 11:02:09 +010022
23
24/* bit field of profiling options. Beware, may be modified at runtime! */
Willy Tarreauef7380f2021-05-05 16:28:31 +020025unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF;
26unsigned long task_profiling_mask __read_mostly = 0;
Willy Tarreau609aad92018-11-22 08:31:09 +010027
28/* One struct per thread containing all collected measurements */
29struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
30
Willy Tarreau3fb6a7b2021-01-28 19:19:26 +010031/* One struct per function pointer hash entry (256 values, 0=collision) */
32struct sched_activity sched_activity[256] __attribute__((aligned(64))) = { };
Willy Tarreau609aad92018-11-22 08:31:09 +010033
Willy Tarreaudb87fc72021-05-05 16:50:40 +020034
35#if USE_MEMORY_PROFILING
36/* determine the number of buckets to store stats */
37#define MEMPROF_HASH_BITS 10
38#define MEMPROF_HASH_BUCKETS (1U << MEMPROF_HASH_BITS)
39
40/* stats:
41 * - malloc increases alloc
42 * - free increases free (if non null)
43 * - realloc increases either depending on the size change.
44 * when the real size is known (malloc_usable_size()), it's used in free_tot
45 * and alloc_tot, otherwise the requested size is reported in alloc_tot and
46 * zero in free_tot.
47 */
48struct memprof_stats {
49 const void *caller;
50 unsigned long long alloc_calls;
51 unsigned long long free_calls;
52 unsigned long long alloc_tot;
53 unsigned long long free_tot;
54};
55
56/* last one is for hash collisions ("others") and has no caller address */
57struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1] = { };
58
59#endif // USE_MEMORY_PROFILING
60
Willy Tarreau609aad92018-11-22 08:31:09 +010061/* Updates the current thread's statistics about stolen CPU time. The unit for
62 * <stolen> is half-milliseconds.
63 */
64void report_stolen_time(uint64_t stolen)
65{
66 activity[tid].cpust_total += stolen;
67 update_freq_ctr(&activity[tid].cpust_1s, stolen);
68 update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
69}
Willy Tarreau75c62c22018-11-22 11:02:09 +010070
71/* config parser for global "profiling.tasks", accepts "on" or "off" */
72static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx,
Willy Tarreau01825162021-03-09 09:53:46 +010073 const struct proxy *defpx, const char *file, int line,
Willy Tarreau75c62c22018-11-22 11:02:09 +010074 char **err)
75{
76 if (too_many_args(1, args, err, NULL))
77 return -1;
78
79 if (strcmp(args[1], "on") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020080 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
81 else if (strcmp(args[1], "auto") == 0)
Willy Tarreauaa622b82021-01-28 21:44:22 +010082 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010083 else if (strcmp(args[1], "off") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020084 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010085 else {
Willy Tarreaud2d33482019-04-25 17:09:07 +020086 memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
Willy Tarreau75c62c22018-11-22 11:02:09 +010087 return -1;
88 }
89 return 0;
90}
91
92/* parse a "set profiling" command. It always returns 1. */
93static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
94{
Willy Tarreau75c62c22018-11-22 11:02:09 +010095 if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
96 return 1;
97
Willy Tarreau00dd44f2021-05-05 16:44:23 +020098 if (strcmp(args[2], "memory") == 0) {
Willy Tarreaudb87fc72021-05-05 16:50:40 +020099#ifdef USE_MEMORY_PROFILING
100 if (strcmp(args[3], "on") == 0) {
101 unsigned int old = profiling;
102 int i;
103
104 while (!_HA_ATOMIC_CAS(&profiling, &old, old | HA_PROF_MEMORY))
105 ;
106
107 /* also flush current profiling stats */
108 for (i = 0; i < sizeof(memprof_stats) / sizeof(memprof_stats[0]); i++) {
109 HA_ATOMIC_STORE(&memprof_stats[i].alloc_calls, 0);
110 HA_ATOMIC_STORE(&memprof_stats[i].free_calls, 0);
111 HA_ATOMIC_STORE(&memprof_stats[i].alloc_tot, 0);
112 HA_ATOMIC_STORE(&memprof_stats[i].free_tot, 0);
113 HA_ATOMIC_STORE(&memprof_stats[i].caller, NULL);
114 }
115 }
116 else if (strcmp(args[3], "off") == 0) {
117 unsigned int old = profiling;
118
119 while (!_HA_ATOMIC_CAS(&profiling, &old, old & ~HA_PROF_MEMORY))
120 ;
121 }
122 else
123 return cli_err(appctx, "Expects either 'on' or 'off'.\n");
124 return 1;
125#else
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200126 return cli_err(appctx, "Memory profiling not compiled in.\n");
Willy Tarreaudb87fc72021-05-05 16:50:40 +0200127#endif
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200128 }
129
Willy Tarreau9d008692019-08-09 11:21:01 +0200130 if (strcmp(args[2], "tasks") != 0)
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200131 return cli_err(appctx, "Expects etiher 'tasks' or 'memory'.\n");
Willy Tarreau75c62c22018-11-22 11:02:09 +0100132
Willy Tarreaud2d33482019-04-25 17:09:07 +0200133 if (strcmp(args[3], "on") == 0) {
134 unsigned int old = profiling;
Willy Tarreaucfa71012021-01-29 11:56:21 +0100135 int i;
136
Willy Tarreaud2d33482019-04-25 17:09:07 +0200137 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
138 ;
Willy Tarreaucfa71012021-01-29 11:56:21 +0100139 /* also flush current profiling stats */
140 for (i = 0; i < 256; i++) {
141 HA_ATOMIC_STORE(&sched_activity[i].calls, 0);
142 HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
143 HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
144 HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
145 }
Willy Tarreaud2d33482019-04-25 17:09:07 +0200146 }
147 else if (strcmp(args[3], "auto") == 0) {
148 unsigned int old = profiling;
Willy Tarreauaa622b82021-01-28 21:44:22 +0100149 unsigned int new;
150
151 do {
152 if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON)
153 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON;
154 else
155 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
156 } while (!_HA_ATOMIC_CAS(&profiling, &old, new));
Willy Tarreaud2d33482019-04-25 17:09:07 +0200157 }
158 else if (strcmp(args[3], "off") == 0) {
159 unsigned int old = profiling;
160 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
161 ;
162 }
Willy Tarreau9d008692019-08-09 11:21:01 +0200163 else
164 return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n");
165
Willy Tarreau75c62c22018-11-22 11:02:09 +0100166 return 1;
167}
168
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100169static int cmp_sched_activity(const void *a, const void *b)
170{
171 const struct sched_activity *l = (const struct sched_activity *)a;
172 const struct sched_activity *r = (const struct sched_activity *)b;
173
174 if (l->calls > r->calls)
175 return -1;
176 else if (l->calls < r->calls)
177 return 1;
178 else
179 return 0;
180}
181
Willy Tarreau75c62c22018-11-22 11:02:09 +0100182/* This function dumps all profiling settings. It returns 0 if the output
183 * buffer is full and it needs to be called again, otherwise non-zero.
184 */
185static int cli_io_handler_show_profiling(struct appctx *appctx)
186{
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100187 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
Willy Tarreau75c62c22018-11-22 11:02:09 +0100188 struct stream_interface *si = appctx->owner;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100189 struct buffer *name_buffer = get_trash_chunk();
Willy Tarreaud2d33482019-04-25 17:09:07 +0200190 const char *str;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100191 int i, max;
Willy Tarreau75c62c22018-11-22 11:02:09 +0100192
193 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
194 return 1;
195
196 chunk_reset(&trash);
197
Willy Tarreaud2d33482019-04-25 17:09:07 +0200198 switch (profiling & HA_PROF_TASKS_MASK) {
Willy Tarreauaa622b82021-01-28 21:44:22 +0100199 case HA_PROF_TASKS_AOFF: str="auto-off"; break;
200 case HA_PROF_TASKS_AON: str="auto-on"; break;
Willy Tarreaud2d33482019-04-25 17:09:07 +0200201 case HA_PROF_TASKS_ON: str="on"; break;
202 default: str="off"; break;
203 }
204
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100205 memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
206 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
207
Willy Tarreaud2d33482019-04-25 17:09:07 +0200208 chunk_printf(&trash,
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200209 "Per-task CPU profiling : %-8s # set profiling tasks {on|auto|off}\n"
210 "Memory usage profiling : %-8s # set profiling memory {on|off}\n",
211 str, (profiling & HA_PROF_MEMORY) ? "on" : "off");
Willy Tarreau75c62c22018-11-22 11:02:09 +0100212
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100213 chunk_appendf(&trash, "Tasks activity:\n"
214 " function calls cpu_tot cpu_avg lat_tot lat_avg\n");
215
216 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
217 chunk_reset(name_buffer);
218
219 if (!tmp_activity[i].func)
220 chunk_printf(name_buffer, "other");
221 else
222 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
223
224 /* reserve 35 chars for name+' '+#calls, knowing that longer names
225 * are often used for less often called functions.
226 */
227 max = 35 - name_buffer->data;
228 if (max < 1)
229 max = 1;
230 chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
231
232 print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
233 print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
234 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
235 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
236 }
237
Willy Tarreau75c62c22018-11-22 11:02:09 +0100238 if (ci_putchk(si_ic(si), &trash) == -1) {
239 /* failed, try again */
240 si_rx_room_blk(si);
241 return 0;
242 }
243 return 1;
244}
245
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100246/* This function scans all threads' run queues and collects statistics about
247 * running tasks. It returns 0 if the output buffer is full and it needs to be
248 * called again, otherwise non-zero.
249 */
250static int cli_io_handler_show_tasks(struct appctx *appctx)
251{
252 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
253 struct stream_interface *si = appctx->owner;
254 struct buffer *name_buffer = get_trash_chunk();
255 struct sched_activity *entry;
256 const struct tasklet *tl;
257 const struct task *t;
258 uint64_t now_ns, lat;
259 struct eb32sc_node *rqnode;
260 uint64_t tot_calls;
261 int thr, queue;
262 int i, max;
263
264 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
265 return 1;
266
267 /* It's not possible to scan queues in small chunks and yield in the
268 * middle of the dump and come back again. So what we're doing instead
269 * is to freeze all threads and inspect their queues at once as fast as
270 * possible, using a sched_activity array to collect metrics with
271 * limited collision, then we'll report statistics only. The tasks'
272 * #calls will reflect the number of occurrences, and the lat_time will
Willy Tarreau75f72332021-01-29 15:04:16 +0100273 * reflect the latency when set. We prefer to take the time before
274 * calling thread_isolate() so that the wait time doesn't impact the
275 * measurement accuracy. However this requires to take care of negative
276 * times since tasks might be queued after we retrieve it.
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100277 */
278
279 now_ns = now_mono_time();
280 memset(tmp_activity, 0, sizeof(tmp_activity));
281
282 thread_isolate();
283
284 /* 1. global run queue */
285
286#ifdef USE_THREAD
287 rqnode = eb32sc_first(&rqueue, ~0UL);
288 while (rqnode) {
289 t = eb32sc_entry(rqnode, struct task, rq);
290 entry = sched_activity_entry(tmp_activity, t->process);
291 if (t->call_date) {
292 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100293 if ((int64_t)lat > 0)
294 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100295 }
296 entry->calls++;
297 rqnode = eb32sc_next(rqnode, ~0UL);
298 }
299#endif
300 /* 2. all threads's local run queues */
301 for (thr = 0; thr < global.nbthread; thr++) {
302 /* task run queue */
303 rqnode = eb32sc_first(&task_per_thread[thr].rqueue, ~0UL);
304 while (rqnode) {
305 t = eb32sc_entry(rqnode, struct task, rq);
306 entry = sched_activity_entry(tmp_activity, t->process);
307 if (t->call_date) {
308 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100309 if ((int64_t)lat > 0)
310 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100311 }
312 entry->calls++;
313 rqnode = eb32sc_next(rqnode, ~0UL);
314 }
315
316 /* shared tasklet list */
317 list_for_each_entry(tl, mt_list_to_list(&task_per_thread[thr].shared_tasklet_list), list) {
318 t = (const struct task *)tl;
319 entry = sched_activity_entry(tmp_activity, t->process);
320 if (!TASK_IS_TASKLET(t) && t->call_date) {
321 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100322 if ((int64_t)lat > 0)
323 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100324 }
325 entry->calls++;
326 }
327
328 /* classful tasklets */
329 for (queue = 0; queue < TL_CLASSES; queue++) {
330 list_for_each_entry(tl, &task_per_thread[thr].tasklets[queue], list) {
331 t = (const struct task *)tl;
332 entry = sched_activity_entry(tmp_activity, t->process);
333 if (!TASK_IS_TASKLET(t) && t->call_date) {
334 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100335 if ((int64_t)lat > 0)
336 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100337 }
338 entry->calls++;
339 }
340 }
341 }
342
343 /* hopefully we're done */
344 thread_release();
345
346 chunk_reset(&trash);
347
348 tot_calls = 0;
349 for (i = 0; i < 256; i++)
350 tot_calls += tmp_activity[i].calls;
351
352 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
353
354 chunk_appendf(&trash, "Running tasks: %d (%d threads)\n"
355 " function places %% lat_tot lat_avg\n",
356 (int)tot_calls, global.nbthread);
357
358 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
359 chunk_reset(name_buffer);
360
361 if (!tmp_activity[i].func)
362 chunk_printf(name_buffer, "other");
363 else
364 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
365
366 /* reserve 35 chars for name+' '+#calls, knowing that longer names
367 * are often used for less often called functions.
368 */
369 max = 35 - name_buffer->data;
370 if (max < 1)
371 max = 1;
372 chunk_appendf(&trash, " %s%*llu %3d.%1d",
373 name_buffer->area, max, (unsigned long long)tmp_activity[i].calls,
374 (int)(100ULL * tmp_activity[i].calls / tot_calls),
375 (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10));
376 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
377 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
378 }
379
380 if (ci_putchk(si_ic(si), &trash) == -1) {
381 /* failed, try again */
382 si_rx_room_blk(si);
383 return 0;
384 }
385 return 1;
386}
387
Willy Tarreau75c62c22018-11-22 11:02:09 +0100388/* config keyword parsers */
389static struct cfg_kw_list cfg_kws = {ILH, {
390 { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks },
391 { 0, NULL, NULL }
392}};
393
Willy Tarreau0108d902018-11-25 19:14:37 +0100394INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
395
Willy Tarreau75c62c22018-11-22 11:02:09 +0100396/* register cli keywords */
397static struct cli_kw_list cli_kws = {{ },{
398 { { "show", "profiling", NULL }, "show profiling : show CPU profiling options", NULL, cli_io_handler_show_profiling, NULL },
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100399 { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL },
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200400 { { "set", "profiling", NULL }, "set profiling : enable/disable resource profiling", cli_parse_set_profiling, NULL },
Willy Tarreau75c62c22018-11-22 11:02:09 +0100401 {{},}
402}};
403
Willy Tarreau0108d902018-11-25 19:14:37 +0100404INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);