blob: 764613ffc1709be0c0234e6a5aae005a86df8710 [file] [log] [blame]
Willy Tarreau609aad92018-11-22 08:31:09 +01001/*
2 * activity measurement functions.
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaub2551052020-06-09 09:07:15 +020013#include <haproxy/activity-t.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020014#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020015#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020016#include <haproxy/channel.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020017#include <haproxy/cli.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020018#include <haproxy/freq_ctr.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020019#include <haproxy/stream_interface.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020020#include <haproxy/tools.h>
Willy Tarreau75c62c22018-11-22 11:02:09 +010021
22
23/* bit field of profiling options. Beware, may be modified at runtime! */
Willy Tarreauaa622b82021-01-28 21:44:22 +010024unsigned int profiling = HA_PROF_TASKS_AOFF;
Willy Tarreaud9add3a2019-04-25 08:57:41 +020025unsigned long task_profiling_mask = 0;
Willy Tarreau609aad92018-11-22 08:31:09 +010026
27/* One struct per thread containing all collected measurements */
28struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
29
Willy Tarreau3fb6a7b2021-01-28 19:19:26 +010030/* One struct per function pointer hash entry (256 values, 0=collision) */
31struct sched_activity sched_activity[256] __attribute__((aligned(64))) = { };
Willy Tarreau609aad92018-11-22 08:31:09 +010032
33/* Updates the current thread's statistics about stolen CPU time. The unit for
34 * <stolen> is half-milliseconds.
35 */
36void report_stolen_time(uint64_t stolen)
37{
38 activity[tid].cpust_total += stolen;
39 update_freq_ctr(&activity[tid].cpust_1s, stolen);
40 update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
41}
Willy Tarreau75c62c22018-11-22 11:02:09 +010042
43/* config parser for global "profiling.tasks", accepts "on" or "off" */
44static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx,
Willy Tarreau01825162021-03-09 09:53:46 +010045 const struct proxy *defpx, const char *file, int line,
Willy Tarreau75c62c22018-11-22 11:02:09 +010046 char **err)
47{
48 if (too_many_args(1, args, err, NULL))
49 return -1;
50
51 if (strcmp(args[1], "on") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020052 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
53 else if (strcmp(args[1], "auto") == 0)
Willy Tarreauaa622b82021-01-28 21:44:22 +010054 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010055 else if (strcmp(args[1], "off") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020056 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010057 else {
Willy Tarreaud2d33482019-04-25 17:09:07 +020058 memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
Willy Tarreau75c62c22018-11-22 11:02:09 +010059 return -1;
60 }
61 return 0;
62}
63
64/* parse a "set profiling" command. It always returns 1. */
65static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
66{
Willy Tarreau75c62c22018-11-22 11:02:09 +010067 if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
68 return 1;
69
Willy Tarreau9d008692019-08-09 11:21:01 +020070 if (strcmp(args[2], "tasks") != 0)
71 return cli_err(appctx, "Expects 'tasks'.\n");
Willy Tarreau75c62c22018-11-22 11:02:09 +010072
Willy Tarreaud2d33482019-04-25 17:09:07 +020073 if (strcmp(args[3], "on") == 0) {
74 unsigned int old = profiling;
Willy Tarreaucfa71012021-01-29 11:56:21 +010075 int i;
76
Willy Tarreaud2d33482019-04-25 17:09:07 +020077 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
78 ;
Willy Tarreaucfa71012021-01-29 11:56:21 +010079 /* also flush current profiling stats */
80 for (i = 0; i < 256; i++) {
81 HA_ATOMIC_STORE(&sched_activity[i].calls, 0);
82 HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
83 HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
84 HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
85 }
Willy Tarreaud2d33482019-04-25 17:09:07 +020086 }
87 else if (strcmp(args[3], "auto") == 0) {
88 unsigned int old = profiling;
Willy Tarreauaa622b82021-01-28 21:44:22 +010089 unsigned int new;
90
91 do {
92 if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON)
93 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON;
94 else
95 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
96 } while (!_HA_ATOMIC_CAS(&profiling, &old, new));
Willy Tarreaud2d33482019-04-25 17:09:07 +020097 }
98 else if (strcmp(args[3], "off") == 0) {
99 unsigned int old = profiling;
100 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
101 ;
102 }
Willy Tarreau9d008692019-08-09 11:21:01 +0200103 else
104 return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n");
105
Willy Tarreau75c62c22018-11-22 11:02:09 +0100106 return 1;
107}
108
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100109static int cmp_sched_activity(const void *a, const void *b)
110{
111 const struct sched_activity *l = (const struct sched_activity *)a;
112 const struct sched_activity *r = (const struct sched_activity *)b;
113
114 if (l->calls > r->calls)
115 return -1;
116 else if (l->calls < r->calls)
117 return 1;
118 else
119 return 0;
120}
121
Willy Tarreau75c62c22018-11-22 11:02:09 +0100122/* This function dumps all profiling settings. It returns 0 if the output
123 * buffer is full and it needs to be called again, otherwise non-zero.
124 */
125static int cli_io_handler_show_profiling(struct appctx *appctx)
126{
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100127 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
Willy Tarreau75c62c22018-11-22 11:02:09 +0100128 struct stream_interface *si = appctx->owner;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100129 struct buffer *name_buffer = get_trash_chunk();
Willy Tarreaud2d33482019-04-25 17:09:07 +0200130 const char *str;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100131 int i, max;
Willy Tarreau75c62c22018-11-22 11:02:09 +0100132
133 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
134 return 1;
135
136 chunk_reset(&trash);
137
Willy Tarreaud2d33482019-04-25 17:09:07 +0200138 switch (profiling & HA_PROF_TASKS_MASK) {
Willy Tarreauaa622b82021-01-28 21:44:22 +0100139 case HA_PROF_TASKS_AOFF: str="auto-off"; break;
140 case HA_PROF_TASKS_AON: str="auto-on"; break;
Willy Tarreaud2d33482019-04-25 17:09:07 +0200141 case HA_PROF_TASKS_ON: str="on"; break;
142 default: str="off"; break;
143 }
144
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100145 memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
146 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
147
Willy Tarreaud2d33482019-04-25 17:09:07 +0200148 chunk_printf(&trash,
149 "Per-task CPU profiling : %s # set profiling tasks {on|auto|off}\n",
150 str);
Willy Tarreau75c62c22018-11-22 11:02:09 +0100151
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100152 chunk_appendf(&trash, "Tasks activity:\n"
153 " function calls cpu_tot cpu_avg lat_tot lat_avg\n");
154
155 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
156 chunk_reset(name_buffer);
157
158 if (!tmp_activity[i].func)
159 chunk_printf(name_buffer, "other");
160 else
161 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
162
163 /* reserve 35 chars for name+' '+#calls, knowing that longer names
164 * are often used for less often called functions.
165 */
166 max = 35 - name_buffer->data;
167 if (max < 1)
168 max = 1;
169 chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
170
171 print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
172 print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
173 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
174 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
175 }
176
Willy Tarreau75c62c22018-11-22 11:02:09 +0100177 if (ci_putchk(si_ic(si), &trash) == -1) {
178 /* failed, try again */
179 si_rx_room_blk(si);
180 return 0;
181 }
182 return 1;
183}
184
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100185/* This function scans all threads' run queues and collects statistics about
186 * running tasks. It returns 0 if the output buffer is full and it needs to be
187 * called again, otherwise non-zero.
188 */
189static int cli_io_handler_show_tasks(struct appctx *appctx)
190{
191 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
192 struct stream_interface *si = appctx->owner;
193 struct buffer *name_buffer = get_trash_chunk();
194 struct sched_activity *entry;
195 const struct tasklet *tl;
196 const struct task *t;
197 uint64_t now_ns, lat;
198 struct eb32sc_node *rqnode;
199 uint64_t tot_calls;
200 int thr, queue;
201 int i, max;
202
203 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
204 return 1;
205
206 /* It's not possible to scan queues in small chunks and yield in the
207 * middle of the dump and come back again. So what we're doing instead
208 * is to freeze all threads and inspect their queues at once as fast as
209 * possible, using a sched_activity array to collect metrics with
210 * limited collision, then we'll report statistics only. The tasks'
211 * #calls will reflect the number of occurrences, and the lat_time will
Willy Tarreau75f72332021-01-29 15:04:16 +0100212 * reflect the latency when set. We prefer to take the time before
213 * calling thread_isolate() so that the wait time doesn't impact the
214 * measurement accuracy. However this requires to take care of negative
215 * times since tasks might be queued after we retrieve it.
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100216 */
217
218 now_ns = now_mono_time();
219 memset(tmp_activity, 0, sizeof(tmp_activity));
220
221 thread_isolate();
222
223 /* 1. global run queue */
224
225#ifdef USE_THREAD
226 rqnode = eb32sc_first(&rqueue, ~0UL);
227 while (rqnode) {
228 t = eb32sc_entry(rqnode, struct task, rq);
229 entry = sched_activity_entry(tmp_activity, t->process);
230 if (t->call_date) {
231 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100232 if ((int64_t)lat > 0)
233 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100234 }
235 entry->calls++;
236 rqnode = eb32sc_next(rqnode, ~0UL);
237 }
238#endif
239 /* 2. all threads's local run queues */
240 for (thr = 0; thr < global.nbthread; thr++) {
241 /* task run queue */
242 rqnode = eb32sc_first(&task_per_thread[thr].rqueue, ~0UL);
243 while (rqnode) {
244 t = eb32sc_entry(rqnode, struct task, rq);
245 entry = sched_activity_entry(tmp_activity, t->process);
246 if (t->call_date) {
247 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100248 if ((int64_t)lat > 0)
249 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100250 }
251 entry->calls++;
252 rqnode = eb32sc_next(rqnode, ~0UL);
253 }
254
255 /* shared tasklet list */
256 list_for_each_entry(tl, mt_list_to_list(&task_per_thread[thr].shared_tasklet_list), list) {
257 t = (const struct task *)tl;
258 entry = sched_activity_entry(tmp_activity, t->process);
259 if (!TASK_IS_TASKLET(t) && t->call_date) {
260 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100261 if ((int64_t)lat > 0)
262 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100263 }
264 entry->calls++;
265 }
266
267 /* classful tasklets */
268 for (queue = 0; queue < TL_CLASSES; queue++) {
269 list_for_each_entry(tl, &task_per_thread[thr].tasklets[queue], list) {
270 t = (const struct task *)tl;
271 entry = sched_activity_entry(tmp_activity, t->process);
272 if (!TASK_IS_TASKLET(t) && t->call_date) {
273 lat = now_ns - t->call_date;
Willy Tarreau75f72332021-01-29 15:04:16 +0100274 if ((int64_t)lat > 0)
275 entry->lat_time += lat;
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100276 }
277 entry->calls++;
278 }
279 }
280 }
281
282 /* hopefully we're done */
283 thread_release();
284
285 chunk_reset(&trash);
286
287 tot_calls = 0;
288 for (i = 0; i < 256; i++)
289 tot_calls += tmp_activity[i].calls;
290
291 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
292
293 chunk_appendf(&trash, "Running tasks: %d (%d threads)\n"
294 " function places %% lat_tot lat_avg\n",
295 (int)tot_calls, global.nbthread);
296
297 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
298 chunk_reset(name_buffer);
299
300 if (!tmp_activity[i].func)
301 chunk_printf(name_buffer, "other");
302 else
303 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
304
305 /* reserve 35 chars for name+' '+#calls, knowing that longer names
306 * are often used for less often called functions.
307 */
308 max = 35 - name_buffer->data;
309 if (max < 1)
310 max = 1;
311 chunk_appendf(&trash, " %s%*llu %3d.%1d",
312 name_buffer->area, max, (unsigned long long)tmp_activity[i].calls,
313 (int)(100ULL * tmp_activity[i].calls / tot_calls),
314 (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10));
315 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
316 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
317 }
318
319 if (ci_putchk(si_ic(si), &trash) == -1) {
320 /* failed, try again */
321 si_rx_room_blk(si);
322 return 0;
323 }
324 return 1;
325}
326
Willy Tarreau75c62c22018-11-22 11:02:09 +0100327/* config keyword parsers */
328static struct cfg_kw_list cfg_kws = {ILH, {
329 { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks },
330 { 0, NULL, NULL }
331}};
332
Willy Tarreau0108d902018-11-25 19:14:37 +0100333INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
334
Willy Tarreau75c62c22018-11-22 11:02:09 +0100335/* register cli keywords */
336static struct cli_kw_list cli_kws = {{ },{
337 { { "show", "profiling", NULL }, "show profiling : show CPU profiling options", NULL, cli_io_handler_show_profiling, NULL },
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100338 { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL },
Willy Tarreau75c62c22018-11-22 11:02:09 +0100339 { { "set", "profiling", NULL }, "set profiling : enable/disable CPU profiling", cli_parse_set_profiling, NULL },
340 {{},}
341}};
342
Willy Tarreau0108d902018-11-25 19:14:37 +0100343INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);