BUG/MEDIUM: log: check result details truncated.
Fix regression introduced by commit:
'MAJOR: servers: propagate server status changes asynchronously.'
The building of the log line was re-worked to be done at the
postponed point without lack of data.
[wt: this only affects 1.8-dev, no backport needed]
diff --git a/include/proto/server.h b/include/proto/server.h
index 1a649f5..b521995 100644
--- a/include/proto/server.h
+++ b/include/proto/server.h
@@ -152,45 +152,11 @@
*/
void srv_shutdown_backup_streams(struct proxy *px, int why);
-/* Appends some information to a message string related to a server going UP or
- * DOWN. If both <forced> and <reason> are null and the server tracks another
- * one, a "via" information will be provided to know where the status came from.
- * If <reason> is non-null, the entire string will be appended after a comma and
- * a space (eg: to report some information from the check that changed the state).
- * If <xferred> is non-negative, some information about requeued sessions are
- * provided.
- */
-void srv_append_status(struct chunk *msg, struct server *s, const char *reason, int xferred, int forced);
-
-/* Marks server <s> down, regardless of its checks' statuses, notifies by all
- * available means, recounts the remaining servers on the proxy and transfers
- * queued sessions whenever possible to other servers. It automatically
- * recomputes the number of servers, but not the map. Maintenance servers are
- * ignored. It reports <reason> if non-null as the reason for going down. Note
- * that it makes use of the trash to build the log strings, so <reason> must
- * not be placed there.
- */
-void srv_set_stopped(struct server *s, const char *reason);
-
-/* Marks server <s> up regardless of its checks' statuses and provided it isn't
- * in maintenance. Notifies by all available means, recounts the remaining
- * servers on the proxy and tries to grab requests from the proxy. It
- * automatically recomputes the number of servers, but not the map. Maintenance
- * servers are ignored. It reports <reason> if non-null as the reason for going
- * up. Note that it makes use of the trash to build the log strings, so <reason>
- * must not be placed there.
- */
-void srv_set_running(struct server *s, const char *reason);
+void srv_append_status(struct chunk *msg, struct server *s, struct check *, int xferred, int forced);
-/* Marks server <s> stopping regardless of its checks' statuses and provided it
- * isn't in maintenance. Notifies by all available means, recounts the remaining
- * servers on the proxy and tries to grab requests from the proxy. It
- * automatically recomputes the number of servers, but not the map. Maintenance
- * servers are ignored. It reports <reason> if non-null as the reason for going
- * up. Note that it makes use of the trash to build the log strings, so <reason>
- * must not be placed there.
- */
-void srv_set_stopping(struct server *s, const char *reason);
+void srv_set_stopped(struct server *s, const char *reason, struct check *check);
+void srv_set_running(struct server *s, const char *reason, struct check *check);
+void srv_set_stopping(struct server *s, const char *reason, struct check *check);
/* Enables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
* enforce either maint mode or drain mode. It is not allowed to set more than
diff --git a/include/types/server.h b/include/types/server.h
index 75054a0..cdc3359 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -300,7 +300,11 @@
} tmpl_info;
struct dns_srvrq *srvrq; /* Pointer representing the DNS SRV requeest, if any */
struct list update_status; /* to attach to list of servers chnaging status */
- char op_st_chg_reason[48]; /* operational status change's reason */
+ struct {
+ char reason[128];
+ short status, code;
+ long duration;
+ } op_st_chg; /* operational status change's reason */
char adm_st_chg_cause[48]; /* adminstrative status change's cause */
};
diff --git a/src/checks.c b/src/checks.c
index 413365b..48e2bb9 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -183,41 +183,6 @@
return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
}
-/* Builds a string containing some information about the health check's result.
- * The output string is allocated from the trash chunks. If the check is NULL,
- * NULL is returned. This is designed to be used when emitting logs about health
- * checks.
- */
-static const char *check_reason_string(struct check *check)
-{
- struct chunk *msg;
-
- if (!check)
- return NULL;
-
- msg = get_trash_chunk();
- chunk_printf(msg, "reason: %s", get_check_status_description(check->status));
-
- if (check->status >= HCHK_STATUS_L57DATA)
- chunk_appendf(msg, ", code: %d", check->code);
-
- if (*check->desc) {
- struct chunk src;
-
- chunk_appendf(msg, ", info: \"");
-
- chunk_initlen(&src, check->desc, 0, strlen(check->desc));
- chunk_asciiencode(msg, &src, '"');
-
- chunk_appendf(msg, "\"");
- }
-
- if (check->duration >= 0)
- chunk_appendf(msg, ", check duration: %ldms", check->duration);
-
- return msg->str;
-}
-
/*
* Set check->status, update check->duration and fill check->result with
* an adequate CHK_RES_* value. The new check->health is computed based
@@ -312,7 +277,7 @@
(check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
(check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
- srv_append_status(&trash, s, check_reason_string(check), -1, 0);
+ srv_append_status(&trash, s, check, -1, 0);
chunk_appendf(&trash, ", status: %d/%d %s",
(check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
@@ -345,7 +310,7 @@
return;
/* We only report a reason for the check if we did not do so previously */
- srv_set_stopped(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
+ srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
}
/* Marks the check <check> as valid and tries to set its server up, provided
@@ -378,7 +343,7 @@
if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
return;
- srv_set_running(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
+ srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
}
/* Marks the check <check> as valid and tries to set its server into stopping mode
@@ -406,7 +371,7 @@
if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
return;
- srv_set_stopping(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
+ srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
}
/* note: use health_adjust() only, which first checks that the observe mode is
diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c
index bc6bc91..2ae1bbb 100644
--- a/src/hlua_fcn.c
+++ b/src/hlua_fcn.c
@@ -685,7 +685,7 @@
sv = hlua_check_server(L, 1);
if (!(sv->track)) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
- srv_set_running(sv, "changed from Lua script");
+ srv_set_running(sv, "changed from Lua script", NULL);
}
return 0;
}
@@ -697,7 +697,7 @@
sv = hlua_check_server(L, 1);
if (!(sv->track)) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
- srv_set_stopping(sv, "changed from Lua script");
+ srv_set_stopping(sv, "changed from Lua script", NULL);
}
return 0;
}
@@ -709,7 +709,7 @@
sv = hlua_check_server(L, 1);
if (!(sv->track)) {
sv->check.health = 0;
- srv_set_stopped(sv, "changed from Lua script");
+ srv_set_stopped(sv, "changed from Lua script", NULL);
}
return 0;
}
@@ -743,7 +743,7 @@
sv = hlua_check_server(L, 1);
if (sv->agent.state & CHK_ST_ENABLED) {
sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
- srv_set_running(sv, "changed from Lua script");
+ srv_set_running(sv, "changed from Lua script", NULL);
}
return 0;
}
@@ -755,7 +755,7 @@
sv = hlua_check_server(L, 1);
if (sv->agent.state & CHK_ST_ENABLED) {
sv->agent.health = 0;
- srv_set_stopped(sv, "changed from Lua script");
+ srv_set_stopped(sv, "changed from Lua script", NULL);
}
return 0;
}
diff --git a/src/server.c b/src/server.c
index b937c19..f57434a 100644
--- a/src/server.c
+++ b/src/server.c
@@ -782,17 +782,53 @@
/* Appends some information to a message string related to a server going UP or
* DOWN. If both <forced> and <reason> are null and the server tracks another
* one, a "via" information will be provided to know where the status came from.
- * If <reason> is non-null, the entire string will be appended after a comma and
- * a space (eg: to report some information from the check that changed the state).
- * If <xferred> is non-negative, some information about requeued streams are
+ * If <check> is non-null, an entire string describing the check result will be
+ * appended after a comma and a space (eg: to report some information from the
+ * check that changed the state). In the other case, the string will be built
+ * using the check results stored into the struct server if present.
+ * If <xferred> is non-negative, some information about requeued sessions are
* provided.
*/
-void srv_append_status(struct chunk *msg, struct server *s, const char *reason, int xferred, int forced)
+void srv_append_status(struct chunk *msg, struct server *s, struct check *check, int xferred, int forced)
{
- if (reason)
- chunk_appendf(msg, ", %s", reason);
- else if (!forced && s->track)
+ short status = s->op_st_chg.status;
+ short code = s->op_st_chg.code;
+ long duration = s->op_st_chg.duration;
+ char *desc = s->op_st_chg.reason;
+
+ if (check) {
+ status = check->status;
+ code = check->code;
+ duration = check->duration;
+ desc = check->desc;
+ }
+
+ if (status != -1) {
+ chunk_appendf(msg, ", reason: %s", get_check_status_description(status));
+
+ if (status >= HCHK_STATUS_L57DATA)
+ chunk_appendf(msg, ", code: %d", code);
+
+ if (desc && *desc) {
+ struct chunk src;
+
+ chunk_appendf(msg, ", info: \"");
+
+ chunk_initlen(&src, desc, 0, strlen(desc));
+ chunk_asciiencode(msg, &src, '"');
+
+ chunk_appendf(msg, "\"");
+ }
+
+ if (duration >= 0)
+ chunk_appendf(msg, ", check duration: %ldms", duration);
+ }
+ else if (desc && *desc) {
+ chunk_appendf(msg, ", %s", desc);
+ }
+ else if (!forced && s->track) {
chunk_appendf(msg, " via %s/%s", s->track->proxy->id, s->track->id);
+ }
if (xferred >= 0) {
if (s->next_state == SRV_ST_STOPPED)
@@ -810,15 +846,14 @@
}
}
-/* Marks server <s> down, regardless of its checks' statuses, notifies by all
- * available means, recounts the remaining servers on the proxy and transfers
- * queued streams whenever possible to other servers. It automatically
- * recomputes the number of servers, but not the map. Maintenance servers are
- * ignored. It reports <reason> if non-null as the reason for going down. Note
- * that it makes use of the trash to build the log strings, so <reason> must
- * not be placed there.
+/* Marks server <s> down, regardless of its checks' statuses. The server is
+ * registered in a list to postpone the counting of the remaining servers on
+ * the proxy and transfers queued streams whenever possible to other servers at
+ * a sync point. Maintenance servers are ignored. It stores the <reason> if
+ * non-null as the reason for going down or the available data from the check
+ * struct to recompute this reason later.
*/
-void srv_set_stopped(struct server *s, const char *reason)
+void srv_set_stopped(struct server *s, const char *reason, struct check *check)
{
struct server *srv;
@@ -826,26 +861,35 @@
return;
s->next_state = SRV_ST_STOPPED;
- if (reason)
- strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason));
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = -1;
+ if (reason) {
+ strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason));
+ }
+ else if (check) {
+ if (check->desc)
+ strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason));
+ s->op_st_chg.code = check->code;
+ s->op_st_chg.status = check->status;
+ s->op_st_chg.duration = check->duration;
+ }
/* Register changes to be applied asynchronously */
if (LIST_ISEMPTY(&s->update_status))
LIST_ADDQ(&updated_servers, &s->update_status);
for (srv = s->trackers; srv; srv = srv->tracknext)
- srv_set_stopped(srv, NULL);
+ srv_set_stopped(srv, NULL, NULL);
}
/* Marks server <s> up regardless of its checks' statuses and provided it isn't
- * in maintenance. Notifies by all available means, recounts the remaining
- * servers on the proxy and tries to grab requests from the proxy. It
- * automatically recomputes the number of servers, but not the map. Maintenance
- * servers are ignored. It reports <reason> if non-null as the reason for going
- * up. Note that it makes use of the trash to build the log strings, so <reason>
- * must not be placed there.
+ * in maintenance. The server is registered in a list to postpone the counting
+ * of the remaining servers on the proxy and tries to grab requests from the
+ * proxy at a sync point. Maintenance servers are ignored. It stores the
+ * <reason> if non-null as the reason for going down or the available data
+ * from the check struct to recompute this reason later.
*/
-void srv_set_running(struct server *s, const char *reason)
+void srv_set_running(struct server *s, const char *reason, struct check *check)
{
struct server *srv;
@@ -856,8 +900,18 @@
return;
s->next_state = SRV_ST_STARTING;
- if (reason)
- strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason));
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = -1;
+ if (reason) {
+ strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason));
+ }
+ else if (check) {
+ if (check->desc)
+ strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason));
+ s->op_st_chg.code = check->code;
+ s->op_st_chg.status = check->status;
+ s->op_st_chg.duration = check->duration;
+ }
if (s->slowstart <= 0)
s->next_state = SRV_ST_RUNNING;
@@ -867,18 +921,19 @@
LIST_ADDQ(&updated_servers, &s->update_status);
for (srv = s->trackers; srv; srv = srv->tracknext)
- srv_set_running(srv, NULL);
+ srv_set_running(srv, NULL, NULL);
}
/* Marks server <s> stopping regardless of its checks' statuses and provided it
- * isn't in maintenance. Notifies by all available means, recounts the remaining
- * servers on the proxy and tries to grab requests from the proxy. It
- * automatically recomputes the number of servers, but not the map. Maintenance
- * servers are ignored. It reports <reason> if non-null as the reason for going
+ * isn't in maintenance. The server is registered in a list to postpone the
+ * counting of the remaining servers on the proxy and tries to grab requests
+ * from the proxy. Maintenance servers are ignored. It stores the
+ * <reason> if non-null as the reason for going down or the available data
+ * from the check struct to recompute this reason later.
* up. Note that it makes use of the trash to build the log strings, so <reason>
* must not be placed there.
*/
-void srv_set_stopping(struct server *s, const char *reason)
+void srv_set_stopping(struct server *s, const char *reason, struct check *check)
{
struct server *srv;
@@ -889,15 +944,25 @@
return;
s->next_state = SRV_ST_STOPPING;
- if (reason)
- strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason));
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = -1;
+ if (reason) {
+ strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason));
+ }
+ else if (check) {
+ if (check->desc)
+ strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason));
+ s->op_st_chg.code = check->code;
+ s->op_st_chg.status = check->status;
+ s->op_st_chg.duration = check->duration;
+ }
/* Register changes to be applied asynchronously */
if (LIST_ISEMPTY(&s->update_status))
LIST_ADDQ(&updated_servers, &s->update_status);
for (srv = s->trackers; srv; srv = srv->tracknext)
- srv_set_stopping(srv, NULL);
+ srv_set_stopping(srv, NULL, NULL);
}
/* Enables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
@@ -2766,18 +2831,18 @@
switch (srv_op_state) {
case SRV_ST_STOPPED:
srv->check.health = 0;
- srv_set_stopped(srv, "changed from server-state after a reload");
+ srv_set_stopped(srv, "changed from server-state after a reload", NULL);
break;
case SRV_ST_STARTING:
srv->next_state = srv_op_state;
break;
case SRV_ST_STOPPING:
srv->check.health = srv->check.rise + srv->check.fall - 1;
- srv_set_stopping(srv, "changed from server-state after a reload");
+ srv_set_stopping(srv, "changed from server-state after a reload", NULL);
break;
case SRV_ST_RUNNING:
srv->check.health = srv->check.rise + srv->check.fall - 1;
- srv_set_running(srv, "");
+ srv_set_running(srv, "", NULL);
break;
}
@@ -4066,15 +4131,15 @@
}
else if (strcmp(args[4], "up") == 0) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
- srv_set_running(sv, "changed from CLI");
+ srv_set_running(sv, "changed from CLI", NULL);
}
else if (strcmp(args[4], "stopping") == 0) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
- srv_set_stopping(sv, "changed from CLI");
+ srv_set_stopping(sv, "changed from CLI", NULL);
}
else if (strcmp(args[4], "down") == 0) {
sv->check.health = 0;
- srv_set_stopped(sv, "changed from CLI");
+ srv_set_stopped(sv, "changed from CLI", NULL);
}
else {
appctx->ctx.cli.severity = LOG_ERR;
@@ -4090,11 +4155,11 @@
}
else if (strcmp(args[4], "up") == 0) {
sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
- srv_set_running(sv, "changed from CLI");
+ srv_set_running(sv, "changed from CLI", NULL);
}
else if (strcmp(args[4], "down") == 0) {
sv->agent.health = 0;
- srv_set_stopped(sv, "changed from CLI");
+ srv_set_stopped(sv, "changed from CLI", NULL);
}
else {
appctx->ctx.cli.severity = LOG_ERR;
@@ -4407,7 +4472,6 @@
cli_register_kw(&cli_kws);
}
-
/*
* This function applies server's status changes, it is
* is designed to be called asynchronously.
@@ -4454,7 +4518,7 @@
"%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
- srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0);
+ srv_append_status(tmptrash, s, NULL, xferred, 0);
Warning("%s.\n", tmptrash->str);
/* we don't send an alert if the server was previously paused */
@@ -4486,7 +4550,7 @@
"%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
- srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0);
+ srv_append_status(tmptrash, s, NULL, xferred, 0);
Warning("%s.\n", tmptrash->str);
send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
@@ -4545,7 +4609,7 @@
"%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
- srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0);
+ srv_append_status(tmptrash, s, NULL, xferred, 0);
Warning("%s.\n", tmptrash->str);
send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str);
send_email_alert(s, LOG_NOTICE, "%s", tmptrash->str);
@@ -4576,6 +4640,10 @@
s->next_admin = next_admin;
}
+ /* reset operational state change */
+ *s->op_st_chg.reason = 0;
+ s->op_st_chg.status = s->op_st_chg.code = -1;
+ s->op_st_chg.duration = 0;
/* Now we try to apply pending admin changes */
@@ -4900,7 +4968,6 @@
}
/* Re-set log strings to empty */
- *s->op_st_chg_reason = 0;
*s->adm_st_chg_cause = 0;
}
/*
diff --git a/src/stats.c b/src/stats.c
index 614350d..66e9be4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -2810,7 +2810,7 @@
case ST_ADM_ACTION_HRUNN:
if (!(sv->track)) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
- srv_set_running(sv, "changed from Web interface");
+ srv_set_running(sv, "changed from Web interface", NULL);
altered_servers++;
total_servers++;
}
@@ -2818,7 +2818,7 @@
case ST_ADM_ACTION_HNOLB:
if (!(sv->track)) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
- srv_set_stopping(sv, "changed from Web interface");
+ srv_set_stopping(sv, "changed from Web interface", NULL);
altered_servers++;
total_servers++;
}
@@ -2826,7 +2826,7 @@
case ST_ADM_ACTION_HDOWN:
if (!(sv->track)) {
sv->check.health = 0;
- srv_set_stopped(sv, "changed from Web interface");
+ srv_set_stopped(sv, "changed from Web interface", NULL);
altered_servers++;
total_servers++;
}
@@ -2848,7 +2848,7 @@
case ST_ADM_ACTION_ARUNN:
if (sv->agent.state & CHK_ST_ENABLED) {
sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
- srv_set_running(sv, "changed from Web interface");
+ srv_set_running(sv, "changed from Web interface", NULL);
altered_servers++;
total_servers++;
}
@@ -2856,7 +2856,7 @@
case ST_ADM_ACTION_ADOWN:
if (sv->agent.state & CHK_ST_ENABLED) {
sv->agent.health = 0;
- srv_set_stopped(sv, "changed from Web interface");
+ srv_set_stopped(sv, "changed from Web interface", NULL);
altered_servers++;
total_servers++;
}