MINOR: activity: enable automatic profiling turn on/off Instead of having to manually turn task profiling on/off in the configuration, by default it will work in "auto" mode, which automatically turns on on any thread experiencing sustained loop latencies over one millisecond averaged over the last 1024 samples. This may happen with configs using lots of regex (thing map_reg for example, which is the lazy way to convert Apache's rewrite rules but must not be abused), and such high latencies affect all the process and the problem is most often intermittent (e.g. hitting a map which is only used for certain host names). Thus now by default, with profiling set to "auto", it remains off all the time until something bad happens. This also helps better focus on the issues when looking at the logs as well as in "show sess" output. It automatically turns off when the average loop latency over the last 1024 calls goes below 990 microseconds (which typically takes a while when in idle). This patch could be backported to stable versions after a bit more exposure, as it definitely improves observability and the ability to quickly spot the culprit. In this case, previous patch ("MINOR: activity: make the profiling status per thread and not global") must also be taken.

commit: d2d3348acbaca96a2e5254eed3daa2feb7f724c8 [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Thu Apr 25 17:09:07 2019 +0200
committer: Willy Tarreau <w@1wt.eu> Thu Apr 25 17:26:46 2019 +0200
tree: 2757ed25f6ee811881c937cf5bd46357126a4c75
parent: d9add3acc815f56408a4c8ba0c45baf42745f718 [diff]
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 8323737..205f5de 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt

@@ -1462,11 +1462,22 @@
   Disables the use of SO_REUSEPORT - see socket(7). It is equivalent to the
   command line argument "-dR".
 
-profiling.tasks { on | off }
-  Enables ('on') or disables ('off') per-task CPU profiling. CPU profiling per
-  task can be very convenient to report where the time is spent and which
-  requests have what effect on which other request. It is not enabled by
-  default as it may consume a little bit extra CPU. This requires a system
+profiling.tasks { auto | on | off }
+  Enables ('on') or disables ('off') per-task CPU profiling. When set to 'auto'
+  the profiling automatically turns on a thread when it starts to suffer from
+  an average latency of 1000 microseconds or higher as reported in the
+  "avg_loop_us" activity field, and automatically turns off when the latency
+  reutnrs below 990 microseconds (this value is an average over the last 1024
+  loops so it does not vary quickly and tends to significantly smooth short
+  spikes). It may also spontaneously trigger from time to time on overloaded
+  systems, containers, or virtual machines, or when the system swaps (which
+  must absolutely never happen on a load balancer).
+
+  CPU profiling per task can be very convenient to report where the time is
+  spent and which requests have what effect on which other request. Enabling
+  it will typically affect the overall's performance by less than 1%, thus it
+  is recommended to leave it to the default 'auto' value so that it only
+  operates when a problem is identified. This feature requires a system
   supporting the clock_gettime(2) syscall with clock identifiers
   CLOCK_MONOTONIC and CLOCK_THREAD_CPUTIME_ID, otherwise the reported time will
   be zero. This option may be changed at run time using "set profiling" on the

diff --git a/doc/management.txt b/doc/management.txt
index 952a25f..0d3944a 100644
--- a/doc/management.txt
+++ b/doc/management.txt

@@ -1667,7 +1667,7 @@
   delayed until the threshold is reached. A value of zero restores the initial
   setting.
 
-set profiling { tasks } { on | off }
+set profiling { tasks } { auto | on | off }
   Enables or disables CPU profiling for the indicated subsystem. This is
   equivalent to setting or clearing the "profiling" settings in the "global"
   section of the configuration file. Please also see "show profiling".

diff --git a/include/proto/activity.h b/include/proto/activity.h
index 922f493..dd78344 100644
--- a/include/proto/activity.h
+++ b/include/proto/activity.h

@@ -29,7 +29,10 @@
 #include <proto/freq_ctr.h>
 
 /* bit fields for "profiling" */
-#define HA_PROF_TASKS       0x00000001     /* enable per-task CPU profiling */
+#define HA_PROF_TASKS_OFF   0x00000000     /* per-task CPU profiling forced disabled */
+#define HA_PROF_TASKS_AUTO  0x00000001     /* per-task CPU profiling automatic */
+#define HA_PROF_TASKS_ON    0x00000002     /* per-task CPU profiling forced enabled */
+#define HA_PROF_TASKS_MASK  0x00000003     /* per-task CPU profiling mask */
 
 extern unsigned int profiling;
 extern unsigned long task_profiling_mask;
@@ -49,6 +52,13 @@
 	uint64_t new_cpu_time;
 	int64_t stolen;
 	uint32_t run_time;
+	uint32_t up, down;
+
+	/* 1 millisecond per loop on average over last 1024 iterations is
+	 * enough to turn on profiling.
+	 */
+	up = 1000;
+	down = up * 99 / 100;
 
 	new_cpu_time   = now_cpu_time();
 	new_mono_time  = now_mono_time();
@@ -69,12 +79,22 @@
 	run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec);
 	swrate_add(&activity[tid].avg_loop_us, TIME_STATS_SAMPLES, run_time);
 
+	/* reaching the "up" threshold on average switches profiling to "on"
+	 * when automatic, and going back below the "down" threshold switches
+	 * to off.
+	 */
 	if (!(task_profiling_mask & tid_bit)) {
-		if (unlikely(profiling & HA_PROF_TASKS))
-			_HA_ATOMIC_OR(&task_profiling_mask, tid_bit);
+		if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
+			     ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AUTO && run_time >= up))) {
+			if (swrate_avg(activity[tid].avg_loop_us, TIME_STATS_SAMPLES) >= up)
+				_HA_ATOMIC_OR(&task_profiling_mask, tid_bit);
+		}
 	} else {
-		if (unlikely(!(profiling & HA_PROF_TASKS)))
-			_HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit);
+		if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
+			     ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AUTO && run_time <= down))) {
+			if (swrate_avg(activity[tid].avg_loop_us, TIME_STATS_SAMPLES) <= down)
+				_HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit);
+		}
 	}
 }
 

diff --git a/src/activity.c b/src/activity.c
index eac2c10..5ffa662 100644
--- a/src/activity.c
+++ b/src/activity.c

@@ -23,7 +23,7 @@
 
 
 /* bit field of profiling options. Beware, may be modified at runtime! */
-unsigned int profiling = 0;
+unsigned int profiling = HA_PROF_TASKS_AUTO;
 unsigned long task_profiling_mask = 0;
 
 /* One struct per thread containing all collected measurements */
@@ -49,11 +49,13 @@
 		return -1;
 
 	if (strcmp(args[1], "on") == 0)
-		profiling |=  HA_PROF_TASKS;
+		profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
+	else if (strcmp(args[1], "auto") == 0)
+		profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AUTO;
 	else if (strcmp(args[1], "off") == 0)
-		profiling &= ~HA_PROF_TASKS;
+		profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
 	else {
-		memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+		memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
 		return -1;
 	}
 	return 0;
@@ -62,27 +64,34 @@
 /* parse a "set profiling" command. It always returns 1. */
 static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
 {
-	unsigned int bit = 0;
-
 	if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
 		return 1;
 
-	if (strcmp(args[2], "tasks") == 0)
-		bit = HA_PROF_TASKS;
-	else {
+	if (strcmp(args[2], "tasks") != 0) {
 		appctx->ctx.cli.severity = LOG_ERR;
 		appctx->ctx.cli.msg = "Expects 'tasks'.\n";
 		appctx->st0 = CLI_ST_PRINT;
 		return 1;
 	}
 
-	if (strcmp(args[3], "on") == 0)
-		_HA_ATOMIC_OR(&profiling, bit);
-	else if (strcmp(args[3], "off") == 0)
-		_HA_ATOMIC_AND(&profiling, ~bit);
+	if (strcmp(args[3], "on") == 0) {
+		unsigned int old = profiling;
+		while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
+			;
+	}
+	else if (strcmp(args[3], "auto") == 0) {
+		unsigned int old = profiling;
+		while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AUTO))
+			;
+	}
+	else if (strcmp(args[3], "off") == 0) {
+		unsigned int old = profiling;
+		while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
+			;
+	}
 	else {
 		appctx->ctx.cli.severity = LOG_ERR;
-		appctx->ctx.cli.msg = "Expects either 'on' or 'off'.\n";
+		appctx->ctx.cli.msg = "Expects 'on', 'auto', or 'off'.\n";
 		appctx->st0 = CLI_ST_PRINT;
 		return 1;
 	}
@@ -95,14 +104,22 @@
 static int cli_io_handler_show_profiling(struct appctx *appctx)
 {
 	struct stream_interface *si = appctx->owner;
+	const char *str;
 
 	if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW)))
 		return 1;
 
 	chunk_reset(&trash);
 
+	switch (profiling & HA_PROF_TASKS_MASK) {
+	case HA_PROF_TASKS_AUTO: str="auto"; break;
+	case HA_PROF_TASKS_ON:   str="on"; break;
+	default:                 str="off"; break;
+	}
+
-	chunk_printf(&trash, "Per-task CPU profiling              : %s      # set profiling tasks {on|off}\n",
-		     (profiling & HA_PROF_TASKS) ? "on" : "off");
+	chunk_printf(&trash,
+	             "Per-task CPU profiling              : %s      # set profiling tasks {on|auto|off}\n",
+	             str);
 
 	if (ci_putchk(si_ic(si), &trash) == -1) {
 		/* failed, try again */

diff --git a/src/proto_http.c b/src/proto_http.c
index 8329324..82292c1 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c

@@ -3449,7 +3449,7 @@
 	s->task->calls     = 0;
 	s->task->cpu_time  = 0;
 	s->task->lat_time  = 0;
-	s->task->call_date = (profiling & HA_PROF_TASKS) ? now_mono_time() : 0;
+	s->task->call_date = ((profiling & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AUTO) ? now_mono_time() : 0;
 
 	s->logs.accept_date = date; /* user-visible date for logging */
 	s->logs.tv_accept = now;  /* corrected date for internal use */
commit	d2d3348acbaca96a2e5254eed3daa2feb7f724c8	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Thu Apr 25 17:09:07 2019 +0200
committer	Willy Tarreau <w@1wt.eu>	Thu Apr 25 17:26:46 2019 +0200
tree	2757ed25f6ee811881c937cf5bd46357126a4c75
parent	d9add3acc815f56408a4c8ba0c45baf42745f718 [diff]