MINOIR: checks/trace: Register a new trace source with its events

Add the trace support for the checks. Only tcp-check based health-checks are
supported, including the agent-check.

In traces, the first argument is always a check object. So it is easy to get
all info related to the check. The tcp-check ruleset, the conn-stream and
the connection, the server state...
diff --git a/src/check.c b/src/check.c
index f4653ac..4c625ef 100644
--- a/src/check.c
+++ b/src/check.c
@@ -65,8 +65,82 @@
 #include <haproxy/thread.h>
 #include <haproxy/time.h>
 #include <haproxy/tools.h>
+#include <haproxy/trace.h>
 #include <haproxy/vars.h>
 
+/* trace source and events */
+static void check_trace(enum trace_level level, uint64_t mask,
+			const struct trace_source *src,
+			const struct ist where, const struct ist func,
+			const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ *   check  - check
+ *
+ * CHECK_EV_* macros are defined in <haproxy/check.h>
+ */
+static const struct trace_event check_trace_events[] = {
+	{ .mask = CHK_EV_TASK_WAKE,   .name = "task_wake",        .desc = "Check task woken up" },
+	{ .mask = CHK_EV_HCHK_START,  .name = "hchck_start",      .desc = "Health-check started" },
+	{ .mask = CHK_EV_HCHK_WAKE,   .name = "hchck_wake",       .desc = "Health-check woken up" },
+	{ .mask = CHK_EV_HCHK_RUN,    .name = "hchck_run",        .desc = "Health-check running" },
+	{ .mask = CHK_EV_HCHK_END,    .name = "hchck_end",        .desc = "Health-check terminated" },
+	{ .mask = CHK_EV_HCHK_SUCC,   .name = "hchck_succ",       .desc = "Health-check success" },
+	{ .mask = CHK_EV_HCHK_ERR,    .name = "hchck_err",        .desc = "Health-check failure" },
+
+	{ .mask = CHK_EV_TCPCHK_EVAL, .name = "tcp_check_eval",   .desc = "tcp-check rules evaluation" },
+	{ .mask = CHK_EV_TCPCHK_ERR,  .name = "tcp_check_err",    .desc = "tcp-check evaluation error" },
+	{ .mask = CHK_EV_TCPCHK_CONN, .name = "tcp_check_conn",   .desc = "tcp-check connection rule" },
+	{ .mask = CHK_EV_TCPCHK_SND,  .name = "tcp_check_send",   .desc = "tcp-check send rule" },
+	{ .mask = CHK_EV_TCPCHK_EXP,  .name = "tcp_check_expect", .desc = "tcp-check expect rule" },
+	{ .mask = CHK_EV_TCPCHK_ACT,  .name = "tcp_check_action", .desc = "tcp-check action rule" },
+
+	{ .mask = CHK_EV_RX_DATA,     .name = "rx_data",          .desc = "receipt of data" },
+	{ .mask = CHK_EV_RX_BLK,      .name = "rx_blk",           .desc = "receipt blocked" },
+	{ .mask = CHK_EV_RX_ERR,      .name = "rx_err",           .desc = "receipt error" },
+
+	{ .mask = CHK_EV_TX_DATA,     .name = "tx_data",          .desc = "transmission of data" },
+	{ .mask = CHK_EV_TX_BLK,      .name = "tx_blk",           .desc = "transmission blocked" },
+	{ .mask = CHK_EV_TX_ERR,      .name = "tx_err",           .desc = "transmission error" },
+
+	{}
+};
+
+static const struct name_desc check_trace_lockon_args[4] = {
+	/* arg1 */ { /* already used by the check */ },
+	/* arg2 */ { },
+	/* arg3 */ { },
+	/* arg4 */ { }
+};
+
+static const struct name_desc check_trace_decoding[] = {
+#define CHK_VERB_CLEAN    1
+	{ .name="clean",    .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define CHK_VERB_MINIMAL  2
+	{ .name="minimal",  .desc="report info on stream and stream-interfaces" },
+#define CHK_VERB_SIMPLE   3
+	{ .name="simple",   .desc="add info on request and response channels" },
+#define CHK_VERB_ADVANCED 4
+	{ .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
+#define CHK_VERB_COMPLETE 5
+	{ .name="complete", .desc="add info on channel's buffer" },
+	{ /* end */ }
+};
+
+struct trace_source trace_check = {
+	.name = IST("check"),
+	.desc = "Health-check",
+	.arg_def = TRC_ARG1_CHK,  // TRACE()'s first argument is always a stream
+	.default_cb = check_trace,
+	.known_events = check_trace_events,
+	.lockon_args = check_trace_lockon_args,
+	.decoding = check_trace_decoding,
+	.report_events = ~0,  // report everything by default
+};
+
+#define TRACE_SOURCE &trace_check
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
 
 static int wake_srv_chk(struct conn_stream *cs);
 struct data_cb check_conn_cb = {
@@ -78,6 +152,154 @@
 /* Dummy frontend used to create all checks sessions. */
 struct proxy checks_fe;
 
+
+static inline void check_trace_buf(const struct buffer *buf, size_t ofs, size_t len)
+{
+	size_t block1, block2;
+	int line, ptr, newptr;
+
+	block1 = b_contig_data(buf, ofs);
+	block2 = 0;
+	if (block1 > len)
+		block1 = len;
+	block2 = len - block1;
+
+	ofs = b_peek_ofs(buf, ofs);
+
+	line = 0;
+	ptr = ofs;
+	while (ptr < ofs + block1) {
+		newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), ofs + block1, &line, ptr);
+		if (newptr == ptr)
+			break;
+		ptr = newptr;
+	}
+
+	line = ptr = 0;
+	while (ptr < block2) {
+		newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), block2, &line, ptr);
+		if (newptr == ptr)
+			break;
+		ptr = newptr;
+	}
+}
+
+/* trace source and events */
+static void check_trace(enum trace_level level, uint64_t mask,
+			const struct trace_source *src,
+			const struct ist where, const struct ist func,
+			const void *a1, const void *a2, const void *a3, const void *a4)
+{
+	const struct check *check = a1;
+	const struct server *srv = (check ? check->server : NULL);
+	const size_t        *val = a4;
+	const char *res;
+
+	if (!check || src->verbosity < CHK_VERB_CLEAN)
+		return;
+
+	chunk_appendf(&trace_buf, " : [%c] SRV=%s",
+		      ((check->type == PR_O2_EXT_CHK) ? 'E' : (check->state & CHK_ST_AGENT ? 'A' : 'H')),
+		      srv->id);
+
+	chunk_appendf(&trace_buf, " status=%d/%d %s",
+		      (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
+		      (check->health >= check->rise) ? check->fall : check->rise,
+		      (check->health >= check->rise) ? (srv->uweight ? "UP" : "DRAIN") : "DOWN");
+
+	switch (check->result) {
+	case CHK_RES_NEUTRAL: res = "-";     break;
+	case CHK_RES_FAILED:   res = "FAIL"; break;
+	case CHK_RES_PASSED:   res = "PASS"; break;
+	case CHK_RES_CONDPASS: res = "COND"; break;
+	default:               res = "UNK";  break;
+	}
+
+	if (src->verbosity == CHK_VERB_CLEAN)
+		return;
+
+	chunk_appendf(&trace_buf, " - last=%s(%d)/%s(%d)",
+		      get_check_status_info(check->status), check->status,
+		      res, check->result);
+
+	/* Display the value to the 4th argument (level > STATE) */
+	if (src->level > TRACE_LEVEL_STATE && val)
+		chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+	chunk_appendf(&trace_buf, " check=%p(0x%08x)", check, check->state);
+
+	if (src->verbosity == CHK_VERB_MINIMAL)
+		return;
+
+
+	if (check->cs) {
+		chunk_appendf(&trace_buf, " - conn=%p(0x%08x)", check->cs->conn, check->cs->conn->flags);
+		chunk_appendf(&trace_buf, " cs=%p(0x%08x)", check->cs, check->cs->flags);
+	}
+
+	if (mask & CHK_EV_TCPCHK) {
+		const char *type;
+
+		switch (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) {
+			case TCPCHK_RULES_PGSQL_CHK: type = "PGSQL"; break;
+			case TCPCHK_RULES_REDIS_CHK: type = "REDIS"; break;
+			case TCPCHK_RULES_SMTP_CHK:  type = "SMTP";  break;
+			case TCPCHK_RULES_HTTP_CHK:  type = "HTTP";  break;
+			case TCPCHK_RULES_MYSQL_CHK: type = "MYSQL"; break;
+			case TCPCHK_RULES_LDAP_CHK:  type = "LDAP";  break;
+			case TCPCHK_RULES_SSL3_CHK:  type = "SSL3";  break;
+			case TCPCHK_RULES_AGENT_CHK: type = "AGENT"; break;
+			case TCPCHK_RULES_SPOP_CHK:  type = "SPOP";  break;
+			case TCPCHK_RULES_TCP_CHK:   type = "TCP";   break;
+			default:                     type = "???"; break;
+		}
+		if (check->current_step)
+			chunk_appendf(&trace_buf, " - tcp-check=(%s,%d)", type, tcpcheck_get_step_id(check, NULL));
+		else
+			chunk_appendf(&trace_buf, " - tcp-check=(%s,-)", type);
+	}
+
+	/* Display bi and bo buffer info (level > USER & verbosity > SIMPLE) */
+	if (src->level > TRACE_LEVEL_USER) {
+		const struct buffer *buf = NULL;
+
+		chunk_appendf(&trace_buf, " bi=%u@%p+%u/%u",
+			      (unsigned int)b_data(&check->bi), b_orig(&check->bi),
+			      (unsigned int)b_head_ofs(&check->bi), (unsigned int)b_size(&check->bi));
+		chunk_appendf(&trace_buf, " bo=%u@%p+%u/%u",
+			      (unsigned int)b_data(&check->bo), b_orig(&check->bo),
+			      (unsigned int)b_head_ofs(&check->bo), (unsigned int)b_size(&check->bo));
+
+		if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_RX)))
+			buf = (b_is_null(&check->bi) ? NULL : &check->bi);
+		else if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_TX)))
+			buf = (b_is_null(&check->bo) ? NULL : &check->bo);
+
+		if (buf) {
+			if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+				int full = (src->verbosity == CHK_VERB_COMPLETE);
+
+				chunk_memcat(&trace_buf, "\n\t", 2);
+				htx_dump(&trace_buf, htxbuf(buf), full);
+			}
+			else {
+				int max = ((src->verbosity == CHK_VERB_COMPLETE) ? 1024 : 256);
+
+				chunk_memcat(&trace_buf, "\n", 1);
+				if (b_data(buf) > max) {
+					check_trace_buf(buf, 0, max);
+					chunk_memcat(&trace_buf, "  ...\n", 6);
+				}
+				else
+					check_trace_buf(buf, 0, b_data(buf));
+			}
+
+		}
+	}
+
+}
+
+
 /**************************************************************************/
 /************************ Handle check results ****************************/
 /**************************************************************************/
@@ -222,6 +444,8 @@
 	short prev_status = check->status;
 	int report = 0;
 
+	TRACE_POINT(CHK_EV_HCHK_RUN, check);
+
 	if (status == HCHK_STATUS_START) {
 		check->result = CHK_RES_UNKNOWN;	/* no result yet */
 		check->desc[0] = '\0';
@@ -261,6 +485,7 @@
 	    return;
 	report = 0;
 
+
 	switch (check->result) {
 	case CHK_RES_FAILED:
 		/* Failure to connect to the agent as a secondary check should not
@@ -338,6 +563,7 @@
 	if (check->health > 0)
 		return;
 
+	TRACE_STATE("health-check failed, set server DOWN", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 	/* We only report a reason for the check if we did not do so previously */
 	srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
 }
@@ -372,6 +598,7 @@
 	if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
 		return;
 
+	TRACE_STATE("health-check succeeded, set server RUNNING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
 	srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
 }
 
@@ -400,6 +627,7 @@
 	if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
 		return;
 
+	TRACE_STATE("health-check condionnaly succeeded, set server STOPPING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
 	srv_set_stopping(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
 }
 
@@ -554,8 +782,9 @@
 	struct buffer *chk;
 	int step;
 
-	if (check->result != CHK_RES_UNKNOWN)
+	if (check->result != CHK_RES_UNKNOWN) {
 		return;
+	}
 
 	errno = unclean_errno(errno_bck);
 	if (conn && errno)
@@ -565,6 +794,8 @@
 	    !(cs->flags & CS_FL_ERROR) && !expired)
 		return;
 
+	TRACE_ENTER(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check, 0, 0, (size_t[]){expired});
+
 	/* we'll try to build a meaningful error message depending on the
 	 * context of the error possibly present in conn->err_code, and the
 	 * socket error possibly collected above. This is useful to know the
@@ -575,8 +806,10 @@
 	if (check->type == PR_O2_TCPCHK_CHK &&
 	    (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
 		step = tcpcheck_get_step_id(check, NULL);
-		if (!step)
+		if (!step) {
+			TRACE_DEVEL("initial connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 			chunk_printf(chk, " at initial connection step of tcp-check");
+		}
 		else {
 			chunk_printf(chk, " at step %d of tcp-check", step);
 			/* we were looking for a string */
@@ -585,6 +818,7 @@
 					chunk_appendf(chk, " (connect port %d)" ,check->current_step->connect.port);
 				else
 					chunk_appendf(chk, " (connect)");
+				TRACE_DEVEL("connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 			}
 			else if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
 				struct tcpcheck_expect *expect = &check->current_step->expect;
@@ -633,9 +867,11 @@
 					chunk_appendf(chk, " (undefined expect!)");
 					break;
 				}
+				TRACE_DEVEL("expect rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 			}
 			else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
 				chunk_appendf(chk, " (send)");
+				TRACE_DEVEL("send rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 			}
 
 			if (check->current_step && check->current_step->comment)
@@ -665,7 +901,6 @@
 
 	if (check->state & CHK_ST_PORT_MISS) {
 		/* NOTE: this is reported after <fall> tries */
-		chunk_printf(chk, "No port available for the TCP connection");
 		set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
 	}
 
@@ -709,6 +944,7 @@
 		set_server_check_status(check, tout, err_msg);
 	}
 
+	TRACE_LEAVE(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 	return;
 }
 
@@ -785,6 +1021,7 @@
 	struct email_alertq *q = container_of(check, typeof(*q), check);
 	int ret = 0;
 
+	TRACE_ENTER(CHK_EV_HCHK_WAKE, check);
 	if (check->server)
 		HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
 	else
@@ -803,6 +1040,7 @@
 		 * main processing task so let's simply wake it up. If we get here,
 		 * we expect errno to still be valid.
 		 */
+		TRACE_ERROR("report connection error", CHK_EV_HCHK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 		chk_report_conn_err(check, errno, 0);
 		task_wakeup(check->task, TASK_WOKEN_IO);
 	}
@@ -830,6 +1068,7 @@
 	else
 		HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
 
+	TRACE_LEAVE(CHK_EV_HCHK_WAKE, check);
 	return ret;
 }
 
@@ -858,25 +1097,32 @@
 	int rv;
 	int expired = tick_is_expired(t->expire, now_ms);
 
+	TRACE_ENTER(CHK_EV_TASK_WAKE, check);
+
 	if (check->server)
 		HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
 	if (!(check->state & CHK_ST_INPROGRESS)) {
 		/* no check currently running */
-		if (!expired) /* woke up too early */
+		if (!expired) /* woke up too early */ {
+			TRACE_STATE("health-check wake up too early", CHK_EV_TASK_WAKE, check);
 			goto out_unlock;
+		}
 
 		/* we don't send any health-checks when the proxy is
 		 * stopped, the server should not be checked or the check
 		 * is disabled.
 		 */
 		if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
-		    proxy->disabled)
+		    proxy->disabled) {
+			TRACE_STATE("health-check paused or disabled", CHK_EV_TASK_WAKE, check);
 			goto reschedule;
+		}
 
 		/* we'll initiate a new check */
 		set_server_check_status(check, HCHK_STATUS_START, NULL);
 
 		check->state |= CHK_ST_INPROGRESS;
+		TRACE_STATE("init new health-check", CHK_EV_TASK_WAKE|CHK_EV_HCHK_START, check);
 
 		task_set_affinity(t, tid_bit);
 
@@ -894,10 +1140,12 @@
 			 * error would have already been detected
 			 */
 			if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) {
+				TRACE_ERROR("report connection error", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 				chk_report_conn_err(check, 0, expired);
 			}
 			else {
 				if (check->state & CHK_ST_CLOSE_CONN) {
+					TRACE_DEVEL("closing current connection", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
 					cs_destroy(cs);
 					cs = NULL;
 					conn = NULL;
@@ -905,12 +1153,15 @@
 					check->state &= ~CHK_ST_CLOSE_CONN;
 					tcpcheck_main(check);
 				}
-				if (check->result == CHK_RES_UNKNOWN)
+				if (check->result == CHK_RES_UNKNOWN) {
+					TRACE_DEVEL("health-check not expired", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
 					goto out_unlock; /* timeout not reached, wait again */
+				}
 			}
 		}
 
 		/* check complete or aborted */
+		TRACE_STATE("health-check complete or aborted", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END, check);
 
 		check->current_step = NULL;
 
@@ -945,14 +1196,17 @@
 		if (check->server) {
 			if (check->result == CHK_RES_FAILED) {
 				/* a failure or timeout detected */
+				TRACE_DEVEL("report failure", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
 				check_notify_failure(check);
 			}
 			else if (check->result == CHK_RES_CONDPASS) {
 				/* check is OK but asks for stopping mode */
+				TRACE_DEVEL("report conditionnal success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
 				check_notify_stopping(check);
 			}
 			else if (check->result == CHK_RES_PASSED) {
 				/* a success was detected */
+				TRACE_DEVEL("report success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
 				check_notify_success(check);
 			}
 		}
@@ -977,6 +1231,8 @@
  out_unlock:
 	if (check->server)
 		HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+
+	TRACE_LEAVE(CHK_EV_TASK_WAKE, check);
 	return t;
 }
 
@@ -995,11 +1251,13 @@
 	struct check *check = target;
 
 	if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi)) {
+		TRACE_STATE("unblocking check, input buffer allocated", CHK_EV_TCPCHK_EXP|CHK_EV_RX_BLK, check);
 		check->state &= ~CHK_ST_IN_ALLOC;
 		tasklet_wakeup(check->wait_list.tasklet);
 		return 1;
 	}
 	if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo)) {
+		TRACE_STATE("unblocking check, output buffer allocated", CHK_EV_TCPCHK_SND|CHK_EV_TX_BLK, check);
 		check->state &= ~CHK_ST_OUT_ALLOC;
 		tasklet_wakeup(check->wait_list.tasklet);
 		return 1;