blob: 96f5008f32a9f9bdd483954197871a1c975f44be [file] [log] [blame]
Willy Tarreau609aad92018-11-22 08:31:09 +01001/*
2 * activity measurement functions.
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaub2551052020-06-09 09:07:15 +020013#include <haproxy/activity-t.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020014#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020015#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020016#include <haproxy/channel.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020017#include <haproxy/cli.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020018#include <haproxy/freq_ctr.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020019#include <haproxy/stream_interface.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020020#include <haproxy/tools.h>
Willy Tarreau75c62c22018-11-22 11:02:09 +010021
22
23/* bit field of profiling options. Beware, may be modified at runtime! */
Willy Tarreauaa622b82021-01-28 21:44:22 +010024unsigned int profiling = HA_PROF_TASKS_AOFF;
Willy Tarreaud9add3a2019-04-25 08:57:41 +020025unsigned long task_profiling_mask = 0;
Willy Tarreau609aad92018-11-22 08:31:09 +010026
27/* One struct per thread containing all collected measurements */
28struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
29
Willy Tarreau3fb6a7b2021-01-28 19:19:26 +010030/* One struct per function pointer hash entry (256 values, 0=collision) */
31struct sched_activity sched_activity[256] __attribute__((aligned(64))) = { };
Willy Tarreau609aad92018-11-22 08:31:09 +010032
33/* Updates the current thread's statistics about stolen CPU time. The unit for
34 * <stolen> is half-milliseconds.
35 */
36void report_stolen_time(uint64_t stolen)
37{
38 activity[tid].cpust_total += stolen;
39 update_freq_ctr(&activity[tid].cpust_1s, stolen);
40 update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
41}
Willy Tarreau75c62c22018-11-22 11:02:09 +010042
43/* config parser for global "profiling.tasks", accepts "on" or "off" */
44static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx,
45 struct proxy *defpx, const char *file, int line,
46 char **err)
47{
48 if (too_many_args(1, args, err, NULL))
49 return -1;
50
51 if (strcmp(args[1], "on") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020052 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
53 else if (strcmp(args[1], "auto") == 0)
Willy Tarreauaa622b82021-01-28 21:44:22 +010054 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010055 else if (strcmp(args[1], "off") == 0)
Willy Tarreaud2d33482019-04-25 17:09:07 +020056 profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
Willy Tarreau75c62c22018-11-22 11:02:09 +010057 else {
Willy Tarreaud2d33482019-04-25 17:09:07 +020058 memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
Willy Tarreau75c62c22018-11-22 11:02:09 +010059 return -1;
60 }
61 return 0;
62}
63
64/* parse a "set profiling" command. It always returns 1. */
65static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
66{
Willy Tarreau75c62c22018-11-22 11:02:09 +010067 if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
68 return 1;
69
Willy Tarreau9d008692019-08-09 11:21:01 +020070 if (strcmp(args[2], "tasks") != 0)
71 return cli_err(appctx, "Expects 'tasks'.\n");
Willy Tarreau75c62c22018-11-22 11:02:09 +010072
Willy Tarreaud2d33482019-04-25 17:09:07 +020073 if (strcmp(args[3], "on") == 0) {
74 unsigned int old = profiling;
Willy Tarreaucfa71012021-01-29 11:56:21 +010075 int i;
76
Willy Tarreaud2d33482019-04-25 17:09:07 +020077 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
78 ;
Willy Tarreaucfa71012021-01-29 11:56:21 +010079 /* also flush current profiling stats */
80 for (i = 0; i < 256; i++) {
81 HA_ATOMIC_STORE(&sched_activity[i].calls, 0);
82 HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
83 HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
84 HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
85 }
Willy Tarreaud2d33482019-04-25 17:09:07 +020086 }
87 else if (strcmp(args[3], "auto") == 0) {
88 unsigned int old = profiling;
Willy Tarreauaa622b82021-01-28 21:44:22 +010089 unsigned int new;
90
91 do {
92 if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON)
93 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON;
94 else
95 new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
96 } while (!_HA_ATOMIC_CAS(&profiling, &old, new));
Willy Tarreaud2d33482019-04-25 17:09:07 +020097 }
98 else if (strcmp(args[3], "off") == 0) {
99 unsigned int old = profiling;
100 while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
101 ;
102 }
Willy Tarreau9d008692019-08-09 11:21:01 +0200103 else
104 return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n");
105
Willy Tarreau75c62c22018-11-22 11:02:09 +0100106 return 1;
107}
108
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100109static int cmp_sched_activity(const void *a, const void *b)
110{
111 const struct sched_activity *l = (const struct sched_activity *)a;
112 const struct sched_activity *r = (const struct sched_activity *)b;
113
114 if (l->calls > r->calls)
115 return -1;
116 else if (l->calls < r->calls)
117 return 1;
118 else
119 return 0;
120}
121
Willy Tarreau75c62c22018-11-22 11:02:09 +0100122/* This function dumps all profiling settings. It returns 0 if the output
123 * buffer is full and it needs to be called again, otherwise non-zero.
124 */
125static int cli_io_handler_show_profiling(struct appctx *appctx)
126{
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100127 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
Willy Tarreau75c62c22018-11-22 11:02:09 +0100128 struct stream_interface *si = appctx->owner;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100129 struct buffer *name_buffer = get_trash_chunk();
Willy Tarreaud2d33482019-04-25 17:09:07 +0200130 const char *str;
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100131 int i, max;
Willy Tarreau75c62c22018-11-22 11:02:09 +0100132
133 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
134 return 1;
135
136 chunk_reset(&trash);
137
Willy Tarreaud2d33482019-04-25 17:09:07 +0200138 switch (profiling & HA_PROF_TASKS_MASK) {
Willy Tarreauaa622b82021-01-28 21:44:22 +0100139 case HA_PROF_TASKS_AOFF: str="auto-off"; break;
140 case HA_PROF_TASKS_AON: str="auto-on"; break;
Willy Tarreaud2d33482019-04-25 17:09:07 +0200141 case HA_PROF_TASKS_ON: str="on"; break;
142 default: str="off"; break;
143 }
144
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100145 memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
146 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
147
Willy Tarreaud2d33482019-04-25 17:09:07 +0200148 chunk_printf(&trash,
149 "Per-task CPU profiling : %s # set profiling tasks {on|auto|off}\n",
150 str);
Willy Tarreau75c62c22018-11-22 11:02:09 +0100151
Willy Tarreau1bd67e92021-01-29 00:07:40 +0100152 chunk_appendf(&trash, "Tasks activity:\n"
153 " function calls cpu_tot cpu_avg lat_tot lat_avg\n");
154
155 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
156 chunk_reset(name_buffer);
157
158 if (!tmp_activity[i].func)
159 chunk_printf(name_buffer, "other");
160 else
161 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
162
163 /* reserve 35 chars for name+' '+#calls, knowing that longer names
164 * are often used for less often called functions.
165 */
166 max = 35 - name_buffer->data;
167 if (max < 1)
168 max = 1;
169 chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
170
171 print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
172 print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
173 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
174 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
175 }
176
Willy Tarreau75c62c22018-11-22 11:02:09 +0100177 if (ci_putchk(si_ic(si), &trash) == -1) {
178 /* failed, try again */
179 si_rx_room_blk(si);
180 return 0;
181 }
182 return 1;
183}
184
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100185/* This function scans all threads' run queues and collects statistics about
186 * running tasks. It returns 0 if the output buffer is full and it needs to be
187 * called again, otherwise non-zero.
188 */
189static int cli_io_handler_show_tasks(struct appctx *appctx)
190{
191 struct sched_activity tmp_activity[256] __attribute__((aligned(64)));
192 struct stream_interface *si = appctx->owner;
193 struct buffer *name_buffer = get_trash_chunk();
194 struct sched_activity *entry;
195 const struct tasklet *tl;
196 const struct task *t;
197 uint64_t now_ns, lat;
198 struct eb32sc_node *rqnode;
199 uint64_t tot_calls;
200 int thr, queue;
201 int i, max;
202
203 if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
204 return 1;
205
206 /* It's not possible to scan queues in small chunks and yield in the
207 * middle of the dump and come back again. So what we're doing instead
208 * is to freeze all threads and inspect their queues at once as fast as
209 * possible, using a sched_activity array to collect metrics with
210 * limited collision, then we'll report statistics only. The tasks'
211 * #calls will reflect the number of occurrences, and the lat_time will
212 * reflect the latency when set.
213 */
214
215 now_ns = now_mono_time();
216 memset(tmp_activity, 0, sizeof(tmp_activity));
217
218 thread_isolate();
219
220 /* 1. global run queue */
221
222#ifdef USE_THREAD
223 rqnode = eb32sc_first(&rqueue, ~0UL);
224 while (rqnode) {
225 t = eb32sc_entry(rqnode, struct task, rq);
226 entry = sched_activity_entry(tmp_activity, t->process);
227 if (t->call_date) {
228 lat = now_ns - t->call_date;
229 entry->lat_time += lat;
230 }
231 entry->calls++;
232 rqnode = eb32sc_next(rqnode, ~0UL);
233 }
234#endif
235 /* 2. all threads's local run queues */
236 for (thr = 0; thr < global.nbthread; thr++) {
237 /* task run queue */
238 rqnode = eb32sc_first(&task_per_thread[thr].rqueue, ~0UL);
239 while (rqnode) {
240 t = eb32sc_entry(rqnode, struct task, rq);
241 entry = sched_activity_entry(tmp_activity, t->process);
242 if (t->call_date) {
243 lat = now_ns - t->call_date;
244 entry->lat_time += lat;
245 }
246 entry->calls++;
247 rqnode = eb32sc_next(rqnode, ~0UL);
248 }
249
250 /* shared tasklet list */
251 list_for_each_entry(tl, mt_list_to_list(&task_per_thread[thr].shared_tasklet_list), list) {
252 t = (const struct task *)tl;
253 entry = sched_activity_entry(tmp_activity, t->process);
254 if (!TASK_IS_TASKLET(t) && t->call_date) {
255 lat = now_ns - t->call_date;
256 entry->lat_time += lat;
257 }
258 entry->calls++;
259 }
260
261 /* classful tasklets */
262 for (queue = 0; queue < TL_CLASSES; queue++) {
263 list_for_each_entry(tl, &task_per_thread[thr].tasklets[queue], list) {
264 t = (const struct task *)tl;
265 entry = sched_activity_entry(tmp_activity, t->process);
266 if (!TASK_IS_TASKLET(t) && t->call_date) {
267 lat = now_ns - t->call_date;
268 entry->lat_time += lat;
269 }
270 entry->calls++;
271 }
272 }
273 }
274
275 /* hopefully we're done */
276 thread_release();
277
278 chunk_reset(&trash);
279
280 tot_calls = 0;
281 for (i = 0; i < 256; i++)
282 tot_calls += tmp_activity[i].calls;
283
284 qsort(tmp_activity, 256, sizeof(tmp_activity[0]), cmp_sched_activity);
285
286 chunk_appendf(&trash, "Running tasks: %d (%d threads)\n"
287 " function places %% lat_tot lat_avg\n",
288 (int)tot_calls, global.nbthread);
289
290 for (i = 0; i < 256 && tmp_activity[i].calls; i++) {
291 chunk_reset(name_buffer);
292
293 if (!tmp_activity[i].func)
294 chunk_printf(name_buffer, "other");
295 else
296 resolve_sym_name(name_buffer, "", tmp_activity[i].func);
297
298 /* reserve 35 chars for name+' '+#calls, knowing that longer names
299 * are often used for less often called functions.
300 */
301 max = 35 - name_buffer->data;
302 if (max < 1)
303 max = 1;
304 chunk_appendf(&trash, " %s%*llu %3d.%1d",
305 name_buffer->area, max, (unsigned long long)tmp_activity[i].calls,
306 (int)(100ULL * tmp_activity[i].calls / tot_calls),
307 (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10));
308 print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
309 print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
310 }
311
312 if (ci_putchk(si_ic(si), &trash) == -1) {
313 /* failed, try again */
314 si_rx_room_blk(si);
315 return 0;
316 }
317 return 1;
318}
319
Willy Tarreau75c62c22018-11-22 11:02:09 +0100320/* config keyword parsers */
321static struct cfg_kw_list cfg_kws = {ILH, {
322 { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks },
323 { 0, NULL, NULL }
324}};
325
Willy Tarreau0108d902018-11-25 19:14:37 +0100326INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
327
Willy Tarreau75c62c22018-11-22 11:02:09 +0100328/* register cli keywords */
329static struct cli_kw_list cli_kws = {{ },{
330 { { "show", "profiling", NULL }, "show profiling : show CPU profiling options", NULL, cli_io_handler_show_profiling, NULL },
Willy Tarreau7eff06e2021-01-29 11:32:55 +0100331 { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL },
Willy Tarreau75c62c22018-11-22 11:02:09 +0100332 { { "set", "profiling", NULL }, "set profiling : enable/disable CPU profiling", cli_parse_set_profiling, NULL },
333 {{},}
334}};
335
Willy Tarreau0108d902018-11-25 19:14:37 +0100336INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);