MEDIUM: Set rise and fall of agent checks to 1
This is achieved by moving rise and fall from struct server to struct check.
After this move the behaviour of the primary check, server->check is
unchanged. However, the secondary agent check, server->agent now has
independent rise and fall values each of which are set to 1.
The result is that receiving "fail", "stopped" or "down" just once from the
agent will mark the server as down. And receiving a weight just once will
allow the server to be marked up if its primary check is in good health.
This opens up the scope to allow the rise and fall values of the agent
check to be configurable, however this has not been implemented at this
stage.
Signed-off-by: Simon Horman <horms@verge.net.au>
diff --git a/include/common/defaults.h b/include/common/defaults.h
index 30ab148..13fb1e3 100644
--- a/include/common/defaults.h
+++ b/include/common/defaults.h
@@ -127,6 +127,8 @@
#define DEF_CHKINTR 2000
#define DEF_FALLTIME 3
#define DEF_RISETIME 2
+#define DEF_AGENT_FALLTIME 1
+#define DEF_AGENT_RISETIME 1
#define DEF_CHECK_REQ "OPTIONS / HTTP/1.0\r\n"
#define DEF_SMTP_CHECK_REQ "HELO localhost\r\n"
#define DEF_LDAP_CHECK_REQ "\x30\x0c\x02\x01\x01\x60\x07\x02\x01\x03\x04\x00\x80\x00"
diff --git a/include/types/server.h b/include/types/server.h
index 73d426d..51f70de 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -123,8 +123,9 @@
int inter, fastinter, downinter; /* checks: time in milliseconds */
int result; /* health-check result : SRV_CHK_* */
int state; /* health-check result : CHK_* */
- int health; /* 0 to server->rise-1 = bad;
- * rise to server->rise+server->fall-1 = good */
+ int health; /* 0 to rise-1 = bad;
+ * rise to rise+fall-1 = good */
+ int rise, fall; /* time in iterations */
int type; /* Check type, one of PR_O2_*_CHK */
struct server *server; /* back-pointer to server */
};
@@ -157,7 +158,6 @@
struct server *tracknext, *track; /* next server in a tracking list, tracked server */
char *trackit; /* temporary variable to make assignment deferrable */
int consecutive_errors; /* current number of consecutive errors */
- int rise, fall; /* time in iterations */
int consecutive_errors_limit; /* number of consecutive errors that triggers an event */
short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */
short onmarkeddown; /* what to do when marked down: one of HANA_ONMARKEDDOWN_* */
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 7df7de0..8c289f1 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -1328,8 +1328,10 @@
defproxy.defsrv.agent.inter = DEF_CHKINTR;
defproxy.defsrv.agent.fastinter = 0;
defproxy.defsrv.agent.downinter = 0;
- defproxy.defsrv.rise = DEF_RISETIME;
- defproxy.defsrv.fall = DEF_FALLTIME;
+ defproxy.defsrv.check.rise = DEF_RISETIME;
+ defproxy.defsrv.check.fall = DEF_FALLTIME;
+ defproxy.defsrv.agent.rise = DEF_AGENT_RISETIME;
+ defproxy.defsrv.agent.fall = DEF_AGENT_FALLTIME;
defproxy.defsrv.check.port = 0;
defproxy.defsrv.agent.port = 0;
defproxy.defsrv.maxqueue = 0;
@@ -4287,8 +4289,6 @@
newsrv->agent.inter = curproxy->defsrv.agent.inter;
newsrv->agent.fastinter = curproxy->defsrv.agent.fastinter;
newsrv->agent.downinter = curproxy->defsrv.agent.downinter;
- newsrv->rise = curproxy->defsrv.rise;
- newsrv->fall = curproxy->defsrv.fall;
newsrv->maxqueue = curproxy->defsrv.maxqueue;
newsrv->minconn = curproxy->defsrv.minconn;
newsrv->maxconn = curproxy->defsrv.maxconn;
@@ -4303,11 +4303,15 @@
= curproxy->defsrv.iweight;
newsrv->check.status = HCHK_STATUS_INI;
- newsrv->check.health = newsrv->rise; /* up, but will fall down at first failure */
+ newsrv->check.rise = curproxy->defsrv.check.rise;
+ newsrv->check.fall = curproxy->defsrv.check.fall;
+ newsrv->check.health = newsrv->check.rise; /* up, but will fall down at first failure */
newsrv->check.server = newsrv;
newsrv->agent.status = HCHK_STATUS_INI;
- newsrv->agent.health = newsrv->rise; /* up, but will fall down at first failure */
+ newsrv->agent.rise = curproxy->defsrv.agent.rise;
+ newsrv->agent.fall = curproxy->defsrv.agent.fall;
+ newsrv->agent.health = newsrv->agent.rise; /* up, but will fall down at first failure */
newsrv->agent.server = newsrv;
cur_arg = 3;
@@ -4361,8 +4365,8 @@
goto out;
}
- newsrv->rise = atol(args[cur_arg + 1]);
- if (newsrv->rise <= 0) {
+ newsrv->check.rise = atol(args[cur_arg + 1]);
+ if (newsrv->check.rise <= 0) {
Alert("parsing [%s:%d]: '%s' has to be > 0.\n",
file, linenum, args[cur_arg]);
err_code |= ERR_ALERT | ERR_FATAL;
@@ -4370,13 +4374,11 @@
}
if (newsrv->check.health)
- newsrv->check.health = newsrv->rise;
- if (newsrv->agent.health)
- newsrv->agent.health = newsrv->rise;
+ newsrv->check.health = newsrv->check.rise;
cur_arg += 2;
}
else if (!strcmp(args[cur_arg], "fall")) {
- newsrv->fall = atol(args[cur_arg + 1]);
+ newsrv->check.fall = atol(args[cur_arg + 1]);
if (!*args[cur_arg + 1]) {
Alert("parsing [%s:%d]: '%s' expects an integer argument.\n",
@@ -4385,7 +4387,7 @@
goto out;
}
- if (newsrv->fall <= 0) {
+ if (newsrv->check.fall <= 0) {
Alert("parsing [%s:%d]: '%s' has to be > 0.\n",
file, linenum, args[cur_arg]);
err_code |= ERR_ALERT | ERR_FATAL;
diff --git a/src/checks.c b/src/checks.c
index 4ab29e5..2113c8c 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -236,7 +236,7 @@
if (s->proxy->options2 & PR_O2_LOGHCHKS &&
(((check->health != 0) && (check->result & SRV_CHK_FAILED)) ||
- ((check->health != s->rise + s->fall - 1) && (check->result & SRV_CHK_PASSED)) ||
+ ((check->health != check->rise + check->fall - 1) && (check->result & SRV_CHK_PASSED)) ||
((s->state & SRV_GOINGDOWN) && !(check->result & SRV_CHK_DISABLE)) ||
(!(s->state & SRV_GOINGDOWN) && (check->result & SRV_CHK_DISABLE)))) {
@@ -246,8 +246,8 @@
/* FIXME begin: calculate local version of the health/rise/fall/state */
health = check->health;
- rise = s->rise;
- fall = s->fall;
+ rise = check->rise;
+ fall = check->fall;
state = s->state;
if (check->result & SRV_CHK_FAILED) {
@@ -401,10 +401,10 @@
int xferred;
if (s->state & SRV_MAINTAIN) {
- check->health = s->rise;
+ check->health = check->rise;
}
- if ((s->state & SRV_RUNNING && check->health == s->rise) || s->track) {
+ if ((s->state & SRV_RUNNING && check->health == check->rise) || s->track) {
int srv_was_paused = s->state & SRV_GOINGDOWN;
int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
@@ -468,11 +468,11 @@
unsigned int old_state = s->state;
if (s->state & SRV_MAINTAIN) {
- check->health = s->rise;
+ check->health = check->rise;
}
- if ((s->check.health >= s->rise && s->agent.health >= s->rise &&
- check->health == s->rise) || s->track) {
+ if ((s->check.health >= s->check.rise && s->agent.health >= s->agent.rise &&
+ check->health == check->rise) || s->track) {
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
if (s->proxy->last_change < now.tv_sec) // ignore negative times
s->proxy->down_time += now.tv_sec - s->proxy->last_change;
@@ -533,8 +533,8 @@
set_server_up(check);
}
- if (check->health >= s->rise)
- check->health = s->rise + s->fall - 1; /* OK now */
+ if (check->health >= check->rise)
+ check->health = check->rise + check->fall - 1; /* OK now */
}
@@ -623,7 +623,7 @@
if (check == &s->agent && check->status != HCHK_STATUS_L7STS)
return;
- if (check->health > s->rise) {
+ if (check->health > check->rise) {
check->health--; /* still good */
s->counters.failed_checks++;
}
@@ -680,8 +680,8 @@
case HANA_ONERR_SUDDTH:
/* simulate a pre-fatal failed health check */
- if (s->check.health > s->rise)
- s->check.health = s->rise + 1;
+ if (s->check.health > s->check.rise)
+ s->check.health = s->check.rise + 1;
/* no break - fall through */
@@ -694,7 +694,7 @@
case HANA_ONERR_MARKDWN:
/* mark server down */
- s->check.health = s->rise;
+ s->check.health = s->check.rise;
set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
set_server_down(&s->check);
@@ -734,7 +734,7 @@
if (!(s->state & SRV_CHECKED))
sv_state = 6; /* should obviously never happen */
else if (s->state & SRV_RUNNING) {
- if (s->check.health == s->rise + s->fall - 1)
+ if (s->check.health == s->check.rise + s->check.fall - 1)
sv_state = 3; /* UP */
else
sv_state = 2; /* going down */
@@ -750,8 +750,8 @@
hlen += sprintf(buffer + hlen,
srv_hlt_st[sv_state],
- (s->state & SRV_RUNNING) ? (s->check.health - s->rise + 1) : (s->check.health),
- (s->state & SRV_RUNNING) ? (s->fall) : (s->rise));
+ (s->state & SRV_RUNNING) ? (s->check.health - s->check.rise + 1) : (s->check.health),
+ (s->state & SRV_RUNNING) ? (s->check.fall) : (s->check.rise));
hlen += sprintf(buffer + hlen, "; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
s->proxy->id, s->id,
@@ -1498,7 +1498,7 @@
set_server_disabled(check);
}
- if (check->health < s->rise + s->fall - 1) {
+ if (check->health < check->rise + check->fall - 1) {
check->health++; /* was bad, stays for a while */
set_server_up(check);
}
diff --git a/src/dumpstats.c b/src/dumpstats.c
index 99d16d7..3d60b09 100644
--- a/src/dumpstats.c
+++ b/src/dumpstats.c
@@ -1309,14 +1309,14 @@
*/
if (sv->track->state & SRV_RUNNING) {
set_server_up(&sv->check);
- sv->check.health = sv->rise; /* up, but will fall down at first failure */
+ sv->check.health = sv->check.rise; /* up, but will fall down at first failure */
} else {
sv->state &= ~SRV_MAINTAIN;
set_server_down(&sv->check);
}
} else {
set_server_up(&sv->check);
- sv->check.health = sv->rise; /* up, but will fall down at first failure */
+ sv->check.health = sv->check.rise; /* up, but will fall down at first failure */
}
}
@@ -2266,8 +2266,8 @@
chunk_appendf(&trash, "%s ", human_time(now.tv_sec - ref->last_change, 1));
chunk_appendf(&trash,
srv_hlt_st[state],
- (ref->state & SRV_RUNNING) ? (ref->check.health - ref->rise + 1) : (ref->check.health),
- (ref->state & SRV_RUNNING) ? (ref->fall) : (ref->rise));
+ (ref->state & SRV_RUNNING) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health),
+ (ref->state & SRV_RUNNING) ? (ref->check.fall) : (ref->check.rise));
}
if (sv->state & SRV_CHECKED) {
@@ -2374,8 +2374,8 @@
else
chunk_appendf(&trash,
srv_hlt_st[state],
- (ref->state & SRV_RUNNING) ? (ref->check.health - ref->rise + 1) : (ref->check.health),
- (ref->state & SRV_RUNNING) ? (ref->fall) : (ref->rise));
+ (ref->state & SRV_RUNNING) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health),
+ (ref->state & SRV_RUNNING) ? (ref->check.fall) : (ref->check.rise));
chunk_appendf(&trash,
/* weight, active, backup */
@@ -2944,7 +2944,7 @@
if (!(svs->state & SRV_CHECKED))
sv_state = 6;
else if (svs->state & SRV_RUNNING) {
- if (svs->check.health == svs->rise + svs->fall - 1)
+ if (svs->check.health == svs->check.rise + svs->check.fall - 1)
sv_state = 3; /* UP */
else
sv_state = 2; /* going down */
diff --git a/src/proto_http.c b/src/proto_http.c
index 5ad865b..1921b93 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -2920,7 +2920,7 @@
if ((px->state != PR_STSTOPPED) && (sv->state & SRV_MAINTAIN)) {
/* Already in maintenance, we can change the server state */
set_server_up(&sv->check);
- sv->check.health = sv->rise; /* up, but will fall down at first failure */
+ sv->check.health = sv->check.rise; /* up, but will fall down at first failure */
altered_servers++;
total_servers++;
}
diff --git a/src/server.c b/src/server.c
index efba257..a316daa 100644
--- a/src/server.c
+++ b/src/server.c
@@ -34,7 +34,7 @@
{
const struct server *s = check->server;
- if ((s->state & SRV_CHECKED) && (check->health == s->rise + s->fall - 1))
+ if ((s->state & SRV_CHECKED) && (check->health == check->rise + check->fall - 1))
return check->inter;
if (!(s->state & SRV_RUNNING) && check->health == 0)