[MINOR] term_trace: add better instrumentations to trace the code
A new member has been added to the struct session. It keeps a trace
of what block of code performs a close or a shutdown on a socket, and
in what sequence. This is extremely convenient for post-mortem analysis
where flag combinations and states seem impossible. A new ABORT_NOW()
macro has also been added to make the code immediately segfault where
called.
diff --git a/include/common/debug.h b/include/common/debug.h
index cbd752e..fe05a44 100644
--- a/include/common/debug.h
+++ b/include/common/debug.h
@@ -36,6 +36,11 @@
#define FSM_PRINTF(x...)
#endif
+/* This abort is more efficient than abort() because it does not mangle the
+ * stack and stops at the exact location we need.
+ */
+#define ABORT_NOW() (*(int*)0=0)
+
/* this one is provided for easy code tracing.
* Usage: TRACE(sess||0, fmt, args...);
* TRACE(sess, "");
diff --git a/include/proto/session.h b/include/proto/session.h
index 348e650..cdd01bf 100644
--- a/include/proto/session.h
+++ b/include/proto/session.h
@@ -36,6 +36,12 @@
void session_process_counters(struct session *s);
void sess_change_server(struct session *sess, struct server *newsrv);
+static void inline trace_term(struct session *s, unsigned int code)
+{
+ s->term_trace <<= TT_BIT_SHIFT;
+ s->term_trace |= code;
+}
+
#endif /* _PROTO_SESSION_H */
/*
diff --git a/include/types/session.h b/include/types/session.h
index c94401a..436f652 100644
--- a/include/types/session.h
+++ b/include/types/session.h
@@ -94,6 +94,62 @@
#define AN_RTR_ANY (AN_RTR_INSPECT|AN_RTR_HTTP_HDR|AN_RTR_HTTP_BODY)
+/* Termination sequence tracing.
+ *
+ * These values have to be set into the field term_trace of a session when
+ * closing a session (half or full). They are only meant for post-mortem
+ * analysis. The value must be assigned this way :
+ * trace_term(s, TT_XXX);
+ *
+ * One TT_XXX value is assigned to each location in the code which may be
+ * involved in a connection closing. Since a full session close generally
+ * involves 4 steps, we will be able to read these steps afterwards by simply
+ * checking the code. Value TT_NONE is zero and must never be set, as it means
+ * the connection was not closed. Value TT_ANON must be used when no value was
+ * assigned to a specific code part. Never ever reuse an already assigned code
+ * as it will defeat the purpose of this trace. It is wise to use a per-file
+ * anonymous value though.
+ */
+#define TT_BIT_SHIFT 8
+enum {
+ TT_NONE = 0,
+ TT_ANON = 1,
+ TT_CLIENT = 0x10,
+ TT_CLIENT_1,
+ TT_CLIENT_2,
+ TT_HTTP_CLI = 0x20,
+ TT_HTTP_CLI_1,
+ TT_HTTP_CLI_2,
+ TT_HTTP_CLI_3,
+ TT_HTTP_CLI_4,
+ TT_HTTP_CLI_5,
+ TT_HTTP_CLI_6,
+ TT_HTTP_CLI_7,
+ TT_HTTP_CLI_8,
+ TT_HTTP_CLI_9,
+ TT_HTTP_CLI_10,
+ TT_HTTP_SRV = 0x30,
+ TT_HTTP_SRV_1,
+ TT_HTTP_SRV_2,
+ TT_HTTP_SRV_3,
+ TT_HTTP_SRV_4,
+ TT_HTTP_SRV_5,
+ TT_HTTP_SRV_6,
+ TT_HTTP_SRV_7,
+ TT_HTTP_SRV_8,
+ TT_HTTP_SRV_9,
+ TT_HTTP_SRV_10,
+ TT_HTTP_SRV_11,
+ TT_HTTP_SRV_12,
+ TT_HTTP_SRV_13,
+ TT_HTTP_SRV_14,
+ TT_HTTP_CNT = 0x40,
+ TT_HTTP_CNT_1,
+ TT_HTTP_URI = 0x50,
+ TT_HTTP_URI_1,
+};
+
+
/*
* Note: some session flags have dependencies :
* - SN_DIRECT cannot exist without SN_ASSIGNED, because a server is
@@ -117,6 +173,7 @@
int srv_state; /* state of the server side */
int conn_retries; /* number of connect retries left */
int flags; /* some flags describing the session */
+ unsigned term_trace; /* term trace: 4*8 bits indicating which part of the code closed */
unsigned int analysis; /* bit field indicating remaining analysis to perform on data */
struct buffer *req; /* request buffer */
struct buffer *rep; /* response buffer */
diff --git a/src/client.c b/src/client.c
index f5d6bb9..77b1d96 100644
--- a/src/client.c
+++ b/src/client.c
@@ -108,6 +108,7 @@
s->flags = 0;
s->analysis = 0;
+ s->term_trace = 0;
/* if this session comes from a known monitoring system, we want to ignore
* it as soon as possible, which means closing it immediately for TCP.
@@ -380,11 +381,13 @@
*/
struct chunk msg = { .str = "HTTP/1.0 200 OK\r\n\r\n", .len = 19 };
client_retnclose(s, &msg); /* forge a 200 response */
+ trace_term(s, TT_CLIENT_1);
t->expire = s->rep->wex;
}
else if (p->mode == PR_MODE_HEALTH) { /* health check mode, no client reading */
struct chunk msg = { .str = "OK\n", .len = 3 };
client_retnclose(s, &msg); /* forge an "OK" response */
+ trace_term(s, TT_CLIENT_2);
t->expire = s->rep->wex;
}
else {
diff --git a/src/proto_http.c b/src/proto_http.c
index 07bdf40..6121a93 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -3016,6 +3016,7 @@
buffer_shutw_done(rep);
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
+ trace_term(t, TT_HTTP_CLI_1);
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_CLICL;
if (!(t->flags & SN_FINST_MASK)) {
@@ -3036,10 +3037,12 @@
buffer_shutr_done(req);
if (!(rep->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->cli_fd, DIR_RD);
+ trace_term(t, TT_HTTP_CLI_2);
} else {
/* output was already closed */
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
+ trace_term(t, TT_HTTP_CLI_3);
}
return 1;
}
@@ -3057,9 +3060,11 @@
/* FIXME: is this still true ? */
EV_FD_SET(t->cli_fd, DIR_RD);
req->rex = tick_add_ifset(now_ms, t->fe->timeout.client);
+ trace_term(t, TT_HTTP_CLI_4);
} else {
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
+ trace_term(t, TT_HTTP_CLI_5);
}
return 1;
}
@@ -3069,10 +3074,12 @@
req->flags |= BF_READ_TIMEOUT;
if (!(rep->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->cli_fd, DIR_RD);
+ trace_term(t, TT_HTTP_CLI_6);
} else {
/* output was already closed */
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
+ trace_term(t, TT_HTTP_CLI_7);
}
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_CLITO;
@@ -3099,9 +3106,11 @@
/* FIXME: is this still true ? */
EV_FD_SET(t->cli_fd, DIR_RD);
req->rex = tick_add_ifset(now_ms, t->fe->timeout.client);
+ trace_term(t, TT_HTTP_CLI_8);
} else {
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
+ trace_term(t, TT_HTTP_CLI_9);
}
if (!(t->flags & SN_ERR_MASK))
@@ -3144,6 +3153,7 @@
buffer_shutw_done(rep);
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
+ trace_term(t, TT_HTTP_CLI_10);
return 1;
}
}
@@ -3226,6 +3236,7 @@
else
srv_close_with_err(t, SN_ERR_CLICL, t->pend_pos ? SN_FINST_Q : SN_FINST_C, 0, NULL);
+ trace_term(t, TT_HTTP_SRV_1);
return 1;
}
else if (req->flags & BF_MAY_CONNECT) {
@@ -3248,6 +3259,7 @@
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_T,
500, error_message(t, HTTP_ERR_500));
+ trace_term(t, TT_HTTP_SRV_2);
return 1;
}
@@ -3265,6 +3277,7 @@
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q,
503, error_message(t, HTTP_ERR_503));
+ trace_term(t, TT_HTTP_SRV_3);
if (t->srv)
t->srv->failed_conns++;
t->be->failed_conns++;
@@ -3315,6 +3328,8 @@
rdr.len += 4;
srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_C, 302, &rdr);
+ trace_term(t, TT_HTTP_SRV_3);
+
/* FIXME: we should increase a counter of redirects per server and per backend. */
if (t->srv)
t->srv->cum_sess++;
@@ -3324,6 +3339,7 @@
t->fe->failed_req++;
srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_C,
400, error_message(t, HTTP_ERR_400));
+ trace_term(t, TT_HTTP_SRV_4);
return 1;
}
@@ -3356,6 +3372,7 @@
* overwrite the client_retnclose() output.
*/
srv_close_with_err(t, SN_ERR_CLICL, SN_FINST_C, 0, NULL);
+ trace_term(t, TT_HTTP_SRV_5);
return 1;
}
if (!(req->flags & BF_WRITE_STATUS) && !tick_is_expired(req->cex, now_ms)) {
@@ -3498,6 +3515,7 @@
}
t->be->failed_resp++;
t->srv_state = SV_STCLOSE;
+ trace_term(t, TT_HTTP_SRV_6);
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_SRVCL;
if (!(t->flags & SN_FINST_MASK))
@@ -3514,6 +3532,7 @@
buffer_shutr_done(rep);
if (!(req->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_RD);
+ trace_term(t, TT_HTTP_SRV_7);
} else {
/* output was already closed */
fd_delete(t->srv_fd);
@@ -3522,6 +3541,7 @@
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
+ trace_term(t, TT_HTTP_SRV_8);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@@ -3542,6 +3562,7 @@
if (!(rep->flags & BF_SHUTR_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_WR);
shutdown(t->srv_fd, SHUT_WR);
+ trace_term(t, TT_HTTP_SRV_9);
/* We must ensure that the read part is still alive when switching to shutw */
/* FIXME: is this still true ? */
EV_FD_SET(t->srv_fd, DIR_RD);
@@ -3553,6 +3574,7 @@
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
+ trace_term(t, TT_HTTP_SRV_10);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@@ -3565,6 +3587,7 @@
rep->flags |= BF_READ_TIMEOUT;
if (!(req->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_RD);
+ trace_term(t, TT_HTTP_SRV_11);
} else {
fd_delete(t->srv_fd);
if (t->srv) {
@@ -3572,6 +3595,7 @@
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
+ trace_term(t, TT_HTTP_SRV_12);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@@ -3589,6 +3613,7 @@
if (!(rep->flags & BF_SHUTR_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_WR);
shutdown(t->srv_fd, SHUT_WR);
+ trace_term(t, TT_HTTP_SRV_13);
/* We must ensure that the read part is still alive when switching to shutw */
/* FIXME: is this still needed ? */
EV_FD_SET(t->srv_fd, DIR_RD);
@@ -3600,6 +3625,7 @@
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
+ trace_term(t, TT_HTTP_SRV_14);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@@ -3689,6 +3715,7 @@
/* unknown data source or internal error */
s->txn.status = 500;
client_retnclose(s, error_message(s, HTTP_ERR_500));
+ trace_term(s, TT_HTTP_CNT_1);
if (!(s->flags & SN_ERR_MASK))
s->flags |= SN_ERR_PRXCOND;
if (!(s->flags & SN_FINST_MASK))
@@ -5056,6 +5083,7 @@
msg.len = sprintf(trash, HTTP_401_fmt, uri_auth->auth_realm);
txn->status = 401;
client_retnclose(t, &msg);
+ trace_term(t, TT_HTTP_URI_1);
t->analysis &= ~AN_REQ_ANY;
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_PRXCOND;
diff --git a/src/proto_uxst.c b/src/proto_uxst.c
index 9053805..f8026ce 100644
--- a/src/proto_uxst.c
+++ b/src/proto_uxst.c
@@ -413,6 +413,7 @@
s->flags = 0;
s->analysis = 0;
+ s->term_trace = 0;
if ((t = pool_alloc2(pool2_task)) == NULL) {
Alert("out of memory in uxst_event_accept().\n");