[MINOR] Capture & display more data from health checks, v2
Capture & display more data from health checks, like
strerror(errno) for L4 failed checks or a first line
from a response for L7 successes/failed checks.
Non ascii or control characters are masked with
chunk_htmlencode() (html stats) or chunk_asciiencode() (logs).
diff --git a/include/common/defaults.h b/include/common/defaults.h
index c09f5a4..b0aee86 100644
--- a/include/common/defaults.h
+++ b/include/common/defaults.h
@@ -174,4 +174,9 @@
#define MAX_HOSTNAME_LEN 32
#endif
+/* Maximum health check description length */
+#ifndef HCHK_DESC_LEN
+#define HCHK_DESC_LEN 128
+#endif
+
#endif /* _COMMON_DEFAULTS_H */
diff --git a/include/types/server.h b/include/types/server.h
index f6a7171..b3fe83d 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -139,6 +139,7 @@
struct timeval check_start; /* last health check start time */
unsigned long check_duration; /* time in ms took to finish last health check */
short check_status, check_code; /* check result, check code */
+ char check_desc[HCHK_DESC_LEN]; /* healt check descritpion */
struct freq_ctr sess_per_sec; /* sessions per second on this server */
int puid; /* proxy-unique server ID, used for SNMP */
diff --git a/src/checks.c b/src/checks.c
index a21386f..0e9aca5 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -125,6 +125,17 @@
if (s->check_status >= HCHK_STATUS_L57DATA)
chunk_printf(msg, ", code: %d", s->check_code);
+ if (*s->check_desc) {
+ struct chunk src;
+
+ chunk_printf(msg, ", info: \"");
+
+ chunk_initlen(&src, s->check_desc, 0, strlen(s->check_desc));
+ chunk_asciiencode(msg, &src, '"');
+
+ chunk_printf(msg, "\"");
+ }
+
chunk_printf(msg, ", check duration: %lums", s->check_duration);
}
@@ -151,12 +162,13 @@
* Show information in logs about failed health check if server is UP
* or succeeded health checks if server is DOWN.
*/
-static void set_server_check_status(struct server *s, short status) {
+static void set_server_check_status(struct server *s, short status, char *desc) {
struct chunk msg;
if (status == HCHK_STATUS_START) {
s->result = SRV_CHK_UNKNOWN; /* no result yet */
+ s->check_desc[0] = '\0';
s->check_start = now;
return;
}
@@ -164,6 +176,12 @@
if (!s->check_status)
return;
+ if (desc && *desc) {
+ strncpy(s->check_desc, desc, HCHK_DESC_LEN-1);
+ s->check_desc[HCHK_DESC_LEN-1] = '\0';
+ } else
+ s->check_desc[0] = '\0';
+
s->check_status = status;
if (check_statuses[status].result)
s->result |= check_statuses[status].result;
@@ -503,7 +521,7 @@
//fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state));
if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))) {
- set_server_check_status(s, HCHK_STATUS_L4CON);
+ set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
goto out_error;
}
@@ -539,11 +557,11 @@
switch (errno) {
case ECONNREFUSED:
case ENETUNREACH:
- set_server_check_status(s, HCHK_STATUS_L4CON);
+ set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
break;
default:
- set_server_check_status(s, HCHK_STATUS_SOCKERR);
+ set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
}
goto out_error;
@@ -572,12 +590,12 @@
goto out_poll;
if (errno && errno != EISCONN) {
- set_server_check_status(s, HCHK_STATUS_L4CON);
+ set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
goto out_error;
}
/* good TCP connection is enough */
- set_server_check_status(s, HCHK_STATUS_L4OK);
+ set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
goto out_wakeup;
}
}
@@ -621,6 +639,7 @@
struct task *t = fdtab[fd].owner;
struct server *s = t->context;
int skerr;
+ char *desc;
socklen_t lskerr = sizeof(skerr);
len = -1;
@@ -633,7 +652,7 @@
/* in case of TCP only, this tells us if the connection failed */
if (!(s->result & SRV_CHK_ERROR))
- set_server_check_status(s, HCHK_STATUS_SOCKERR);
+ set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
goto out_wakeup;
}
@@ -649,6 +668,11 @@
return 0;
}
+ if (len < sizeof(trash))
+ trash[len] = '\0';
+ else
+ trash[len-1] = '\0';
+
/* Note: the response will only be accepted if read at once */
if (s->proxy->options & PR_O_HTTP_CHK) {
/* Check if the server speaks HTTP 1.X */
@@ -656,50 +680,62 @@
(memcmp(trash, "HTTP/1.", 7) != 0 ||
(trash[12] != ' ' && trash[12] != '\r')) ||
!isdigit(trash[9]) || !isdigit(trash[10]) || !isdigit(trash[11])) {
- set_server_check_status(s, HCHK_STATUS_L7RSP);
+
+ cut_crlf(trash);
+ set_server_check_status(s, HCHK_STATUS_L7RSP, trash);
+
goto out_wakeup;
}
s->check_code = str2uic(&trash[9]);
+ desc = ltrim(&trash[12], ' ');
+ cut_crlf(desc);
+
/* check the reply : HTTP/1.X 2xx and 3xx are OK */
if (trash[9] == '2' || trash[9] == '3')
- set_server_check_status(s, HCHK_STATUS_L7OKD);
+ set_server_check_status(s, HCHK_STATUS_L7OKD, desc);
else if ((s->proxy->options & PR_O_DISABLE404) &&
(s->state & SRV_RUNNING) &&
(s->check_code == 404))
/* 404 may be accepted as "stopping" only if the server was up */
- set_server_check_status(s, HCHK_STATUS_L7OKCD);
+ set_server_check_status(s, HCHK_STATUS_L7OKCD, desc);
else
- set_server_check_status(s, HCHK_STATUS_L7STS);
+ set_server_check_status(s, HCHK_STATUS_L7STS, desc);
}
else if (s->proxy->options & PR_O_SSL3_CHK) {
/* Check for SSLv3 alert or handshake */
if ((len >= 5) && (trash[0] == 0x15 || trash[0] == 0x16))
- set_server_check_status(s, HCHK_STATUS_L6OK);
+ set_server_check_status(s, HCHK_STATUS_L6OK, NULL);
else
- set_server_check_status(s, HCHK_STATUS_L6RSP);
+ set_server_check_status(s, HCHK_STATUS_L6RSP, NULL);
}
else if (s->proxy->options & PR_O_SMTP_CHK) {
/* Check if the server speaks SMTP */
if ((len < strlen("000\r")) ||
(trash[3] != ' ' && trash[3] != '\r') ||
!isdigit(trash[0]) || !isdigit(trash[1]) || !isdigit(trash[2])) {
- set_server_check_status(s, HCHK_STATUS_L7RSP);
+
+ cut_crlf(trash);
+ set_server_check_status(s, HCHK_STATUS_L7RSP, trash);
+
goto out_wakeup;
}
s->check_code = str2uic(&trash[0]);
+ desc = ltrim(&trash[3], ' ');
+ cut_crlf(desc);
+
/* Check for SMTP code 2xx (should be 250) */
if (trash[0] == '2')
- set_server_check_status(s, HCHK_STATUS_L7OKD);
+ set_server_check_status(s, HCHK_STATUS_L7OKD, desc);
else
- set_server_check_status(s, HCHK_STATUS_L7STS);
+ set_server_check_status(s, HCHK_STATUS_L7STS, desc);
}
else {
/* other checks are valid if the connection succeeded anyway */
- set_server_check_status(s, HCHK_STATUS_L4OK);
+ set_server_check_status(s, HCHK_STATUS_L4OK, NULL);
}
out_wakeup:
@@ -749,7 +785,7 @@
}
/* we'll initiate a new check */
- set_server_check_status(s, HCHK_STATUS_START);
+ set_server_check_status(s, HCHK_STATUS_START, NULL);
if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) {
if ((fd < global.maxsock) &&
(fcntl(fd, F_SETFL, O_NONBLOCK) != -1) &&
@@ -824,7 +860,7 @@
}
if (ret) {
- set_server_check_status(s, HCHK_STATUS_SOCKERR);
+ set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
switch (ret) {
case 1:
Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
@@ -855,7 +891,7 @@
#endif
ret = tcpv4_bind_socket(fd, flags, &s->proxy->source_addr, remote);
if (ret) {
- set_server_check_status(s, HCHK_STATUS_SOCKERR);
+ set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL);
switch (ret) {
case 1:
Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n",
@@ -918,11 +954,11 @@
/* FIXME: is it possible to get ECONNREFUSED/ENETUNREACH with O_NONBLOCK? */
case ECONNREFUSED:
case ENETUNREACH:
- set_server_check_status(s, HCHK_STATUS_L4CON);
+ set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno));
break;
default:
- set_server_check_status(s, HCHK_STATUS_SOCKERR);
+ set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno));
}
}
}
@@ -1020,12 +1056,12 @@
else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) {
if (!(s->result & SRV_CHK_ERROR)) {
if (!EV_FD_ISSET(fd, DIR_RD)) {
- set_server_check_status(s, HCHK_STATUS_L4TOUT);
+ set_server_check_status(s, HCHK_STATUS_L4TOUT, NULL);
} else {
if (s->proxy->options & PR_O_SSL3_CHK)
- set_server_check_status(s, HCHK_STATUS_L6TOUT);
+ set_server_check_status(s, HCHK_STATUS_L6TOUT, NULL);
else /* HTTP, SMTP */
- set_server_check_status(s, HCHK_STATUS_L7TOUT);
+ set_server_check_status(s, HCHK_STATUS_L7TOUT, NULL);
}
}
diff --git a/src/dumpstats.c b/src/dumpstats.c
index 5275dae..510d0d2 100644
--- a/src/dumpstats.c
+++ b/src/dumpstats.c
@@ -1500,9 +1500,20 @@
srv_hlt_st[sv_state],
(svs->state & SRV_RUNNING) ? (svs->health - svs->rise + 1) : (svs->health),
(svs->state & SRV_RUNNING) ? (svs->fall) : (svs->rise));
+
+ chunk_printf(&msg, "</td><td title=\"%s",
+ get_check_status_description(sv->check_status));
+
+ if (*sv->check_desc) {
+ struct chunk src;
+
+ chunk_printf(&msg, ": ");
+
+ chunk_initlen(&src, sv->check_desc, 0, strlen(sv->check_desc));
+ chunk_htmlencode(&msg, &src);
+ }
- chunk_printf(&msg, "</td><td title=\"%s\" nowrap> %s%s",
- get_check_status_description(sv->check_status),
+ chunk_printf(&msg, "\" nowrap> %s%s",
tv_iszero(&sv->check_start)?"":"* ",
get_check_status_info(sv->check_status));