blob: 18dd6ae5baa5294b8beb965447aa7f33b85905c2 [file] [log] [blame]
Willy Tarreau609aad92018-11-22 08:31:09 +01001/*
2 * activity measurement functions.
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaub2551052020-06-09 09:07:15 +020013#include <haproxy/activity-t.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020014#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020015#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020016#include <haproxy/channel.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020017#include <haproxy/cli.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020018#include <haproxy/freq_ctr.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020019#include <haproxy/stream_interface.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020020#include <haproxy/tools.h>
Willy Tarreau75c62c22018-11-22 11:02:09 +010021
22
23/* bit field of profiling options. Beware, may be modified at runtime! */
Willy Tarreauef7380f2021-05-05 16:28:31 +020024unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF;
25unsigned long task_profiling_mask __read_mostly = 0;
Willy Tarreau609aad92018-11-22 08:31:09 +010026
27/* One struct per thread containing all collected measurements */
28struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
29
Willy Tarreau3fb6a7b2021-01-28 19:19:26 +010030/* One struct per function pointer hash entry (256 values, 0=collision) */
31struct sched_activity sched_activity[256] __attribute__((aligned(64))) = { };
Willy Tarreau609aad92018-11-22 08:31:09 +010032
33/* Updates the current thread's statistics about stolen CPU time. The unit for
34 * <stolen> is half-milliseconds.
35 */
36void report_stolen_time(uint64_t stolen)
37{
38 activity[tid].cpust_total += stolen;
39 update_freq_ctr(&activity[tid].cpust_1s, stolen);
40 update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
41}
Willy Tarreau75c62c22018-11-22 11:02:09 +010042
43/* config parser for global "profiling.tasks", accepts "on" or "off" */
44static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx,
Willy Tarreau01825162021-03-09 09:53:46 +010045 const struct proxy *defpx, const char *file, int line,
Willy Tarreau75c62c22018-11-22 11:02:09 +010046 char **err)
47{
48 if (too_many_args(1, args, err, NULL))
49 return -1;
50
51 if (strcmp(args[1], "on") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020052 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
53 else if (strcmp(args[1], "auto") == 0)
Willy Tarreauaa622b82021-01-28 21:44:22 +010054 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010055 else if (strcmp(args[1], "off") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020056 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010057 else {
Willy Tarreaud2d33482019-04-25 17:09:07 +020058 memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
Willy Tarreau75c62c22018-11-22 11:02:09 +010059 return -1;
60 }
61 return 0;
62}
63
64/* parse a "set profiling" command. It always returns 1. */
65static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
66{
Willy Tarreau75c62c22018-11-22 11:02:09 +010067 if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
68 return 1;
69
Willy Tarreau00dd44f2021-05-05 16:44:23 +020070 if (strcmp(args[2], "memory") == 0) {
71 return cli_err(appctx, "Memory profiling not compiled in.\n");
72 }
73
Willy Tarreau9d008692019-08-09 11:21:01 +020074 if (strcmp(args[2], "tasks") != 0)
Willy Tarreau00dd44f2021-05-05 16:44:23 +020075 return cli_err(appctx, "Expects etiher 'tasks' or 'memory'.\n");
Willy Tarreau75c62c22018-11-22 11:02:09 +010076
Willy Tarreaud2d33482019-04-25 17:09:07 +020077 if (strcmp(args[3], "on") == 0) {
78 unsigned int old = profiling;
Willy Tarreaucfa71012021-01-29 11:56:21 +010079 int i;
80
Willy Tarreaud2d33482019-04-25 17:09:07 +020081 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
82 ;
Willy Tarreaucfa71012021-01-29 11:56:21 +010083 /* also flush current profiling stats */
84 for (i = 0; i < 256; i++) {
85 HA_ATOMIC_STORE(&sched_activity[i].calls, 0);
86 HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
87 HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
88 HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
89 }
Willy Tarreaud2d33482019-04-25 17:09:07 +020090 }
91 else if (strcmp(args[3], "auto") == 0) {
92 unsigned int old = profiling;
Willy Tarreauaa622b82021-01-28 21:44:22 +010093 unsigned int new;
94
95 do {
96 if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON)
97 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON;
98 else
99 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
100 } while (!_HA_ATOMIC_CAS(&profiling, &old, new));
Willy Tarreaud2d33482019-04-25 17:09:07 +0200101 }
102 else if (strcmp(args[3], "off") == 0) {
103 unsigned int old = profiling;
104 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
105 ;
106 }
Willy Tarreau9d008692019-08-09 11:21:01 +0200107 else
108 return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n");
109
Willy Tarreau75c62c22018-11-22 11:02:09 +0100110 return 1;
111}
112
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100113static int cmp_sched_activity(const void *a, const void *b)
114{
115 const struct sched_activity *l = (const struct sched_activity *)a;
116 const struct sched_activity *r = (const struct sched_activity *)b;
117
118 if (l->calls > r->calls)
119 return -1;
120 else if (l->calls < r->calls)
121 return 1;
122 else
123 return 0;
124}
125
Willy Tarreau75c62c22018-11-22 11:02:09 +0100126/* This function dumps all profiling settings. It returns 0 if the output
127 * buffer is full and it needs to be called again, otherwise non-zero.
128 */
129static int cli_io_handler_show_profiling(struct appctx *appctx)
130{
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100131 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
Willy Tarreau75c62c22018-11-22 11:02:09 +0100132 struct stream_interface *si = appctx->owner;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100133 struct buffer *name_buffer = get_trash_chunk();
Willy Tarreaud2d33482019-04-25 17:09:07 +0200134 const char *str;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100135 int i, max;
Willy Tarreau75c62c22018-11-22 11:02:09 +0100136
137 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
138 return 1;
139
140 chunk_reset(&trash);
141
Willy Tarreaud2d33482019-04-25 17:09:07 +0200142 switch (profiling & HA_PROF_TASKS_MASK) {
Willy Tarreauaa622b82021-01-28 21:44:22 +0100143 case HA_PROF_TASKS_AOFF: str="auto-off"; break;
144 case HA_PROF_TASKS_AON: str="auto-on"; break;
Willy Tarreaud2d33482019-04-25 17:09:07 +0200145 case HA_PROF_TASKS_ON: str="on"; break;
146 default: str="off"; break;
147 }
148
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100149 memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
150 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
151
Willy Tarreaud2d33482019-04-25 17:09:07 +0200152 chunk_printf(&trash,
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200153 "Per-task CPU profiling : %-8s # set profiling tasks {on|auto|off}\n"
154 "Memory usage profiling : %-8s # set profiling memory {on|off}\n",
155 str, (profiling & HA_PROF_MEMORY) ? "on" : "off");
Willy Tarreau75c62c22018-11-22 11:02:09 +0100156
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100157 chunk_appendf(&trash, "Tasks activity:\n"
158 " function calls cpu_tot cpu_avg lat_tot lat_avg\n");
159
160 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
161 chunk_reset(name_buffer);
162
163 if (!tmp_activity[i].func)
164 chunk_printf(name_buffer, "other");
165 else
166 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
167
168 /* reserve 35 chars for name+' '+#calls, knowing that longer names
169 * are often used for less often called functions.
170 */
171 max = 35 - name_buffer->data;
172 if (max < 1)
173 max = 1;
174 chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
175
176 print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
177 print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
178 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
179 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
180 }
181
Willy Tarreau75c62c22018-11-22 11:02:09 +0100182 if (ci_putchk(si_ic(si), &trash) == -1) {
183 /* failed, try again */
184 si_rx_room_blk(si);
185 return 0;
186 }
187 return 1;
188}
189
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100190/* This function scans all threads' run queues and collects statistics about
191 * running tasks. It returns 0 if the output buffer is full and it needs to be
192 * called again, otherwise non-zero.
193 */
194static int cli_io_handler_show_tasks(struct appctx *appctx)
195{
196 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
197 struct stream_interface *si = appctx->owner;
198 struct buffer *name_buffer = get_trash_chunk();
199 struct sched_activity *entry;
200 const struct tasklet *tl;
201 const struct task *t;
202 uint64_t now_ns, lat;
203 struct eb32sc_node *rqnode;
204 uint64_t tot_calls;
205 int thr, queue;
206 int i, max;
207
208 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
209 return 1;
210
211 /* It's not possible to scan queues in small chunks and yield in the
212 * middle of the dump and come back again. So what we're doing instead
213 * is to freeze all threads and inspect their queues at once as fast as
214 * possible, using a sched_activity array to collect metrics with
215 * limited collision, then we'll report statistics only. The tasks'
216 * #calls will reflect the number of occurrences, and the lat_time will
Willy Tarreau75f72332021-01-29 15:04:16 +0100217 * reflect the latency when set. We prefer to take the time before
218 * calling thread_isolate() so that the wait time doesn't impact the
219 * measurement accuracy. However this requires to take care of negative
220 * times since tasks might be queued after we retrieve it.
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100221 */
222
223 now_ns = now_mono_time();
224 memset(tmp_activity, 0, sizeof(tmp_activity));
225
226 thread_isolate();
227
228 /* 1. global run queue */
229
230#ifdef USE_THREAD
231 rqnode = eb32sc_first(&rqueue, ~0UL);
232 while (rqnode) {
233 t = eb32sc_entry(rqnode, struct task, rq);
234 entry = sched_activity_entry(tmp_activity, t->process);
235 if (t->call_date) {
236 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100237 if ((int64_t)lat > 0)
238 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100239 }
240 entry->calls++;
241 rqnode = eb32sc_next(rqnode, ~0UL);
242 }
243#endif
244 /* 2. all threads's local run queues */
245 for (thr = 0; thr < global.nbthread; thr++) {
246 /* task run queue */
247 rqnode = eb32sc_first(&task_per_thread[thr].rqueue, ~0UL);
248 while (rqnode) {
249 t = eb32sc_entry(rqnode, struct task, rq);
250 entry = sched_activity_entry(tmp_activity, t->process);
251 if (t->call_date) {
252 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100253 if ((int64_t)lat > 0)
254 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100255 }
256 entry->calls++;
257 rqnode = eb32sc_next(rqnode, ~0UL);
258 }
259
260 /* shared tasklet list */
261 list_for_each_entry(tl, mt_list_to_list(&task_per_thread[thr].shared_tasklet_list), list) {
262 t = (const struct task *)tl;
263 entry = sched_activity_entry(tmp_activity, t->process);
264 if (!TASK_IS_TASKLET(t) && t->call_date) {
265 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100266 if ((int64_t)lat > 0)
267 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100268 }
269 entry->calls++;
270 }
271
272 /* classful tasklets */
273 for (queue = 0; queue < TL_CLASSES; queue++) {
274 list_for_each_entry(tl, &task_per_thread[thr].tasklets[queue], list) {
275 t = (const struct task *)tl;
276 entry = sched_activity_entry(tmp_activity, t->process);
277 if (!TASK_IS_TASKLET(t) && t->call_date) {
278 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100279 if ((int64_t)lat > 0)
280 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100281 }
282 entry->calls++;
283 }
284 }
285 }
286
287 /* hopefully we're done */
288 thread_release();
289
290 chunk_reset(&trash);
291
292 tot_calls = 0;
293 for (i = 0; i < 256; i++)
294 tot_calls += tmp_activity[i].calls;
295
296 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
297
298 chunk_appendf(&trash, "Running tasks: %d (%d threads)\n"
299 " function places %% lat_tot lat_avg\n",
300 (int)tot_calls, global.nbthread);
301
302 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
303 chunk_reset(name_buffer);
304
305 if (!tmp_activity[i].func)
306 chunk_printf(name_buffer, "other");
307 else
308 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
309
310 /* reserve 35 chars for name+' '+#calls, knowing that longer names
311 * are often used for less often called functions.
312 */
313 max = 35 - name_buffer->data;
314 if (max < 1)
315 max = 1;
316 chunk_appendf(&trash, " %s%*llu %3d.%1d",
317 name_buffer->area, max, (unsigned long long)tmp_activity[i].calls,
318 (int)(100ULL * tmp_activity[i].calls / tot_calls),
319 (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10));
320 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
321 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
322 }
323
324 if (ci_putchk(si_ic(si), &trash) == -1) {
325 /* failed, try again */
326 si_rx_room_blk(si);
327 return 0;
328 }
329 return 1;
330}
331
Willy Tarreau75c62c22018-11-22 11:02:09 +0100332/* config keyword parsers */
333static struct cfg_kw_list cfg_kws = {ILH, {
334 { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks },
335 { 0, NULL, NULL }
336}};
337
Willy Tarreau0108d902018-11-25 19:14:37 +0100338INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
339
Willy Tarreau75c62c22018-11-22 11:02:09 +0100340/* register cli keywords */
341static struct cli_kw_list cli_kws = {{ },{
342 { { "show", "profiling", NULL }, "show profiling : show CPU profiling options", NULL, cli_io_handler_show_profiling, NULL },
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100343 { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL },
Willy Tarreau00dd44f2021-05-05 16:44:23 +0200344 { { "set", "profiling", NULL }, "set profiling : enable/disable resource profiling", cli_parse_set_profiling, NULL },
Willy Tarreau75c62c22018-11-22 11:02:09 +0100345 {{},}
346}};
347
Willy Tarreau0108d902018-11-25 19:14:37 +0100348INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);