blob: 8014a668b6a501ef24d24a417ccc728f1a8c4889 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Health-checks functions.
3 *
Willy Tarreau26c25062009-03-08 09:38:41 +01004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02005 * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
Willy Tarreaubaaee002006-06-26 02:48:02 +02006 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
Willy Tarreaub8816082008-01-18 12:18:15 +010014#include <assert.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020015#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020016#include <errno.h>
17#include <fcntl.h>
18#include <stdio.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020019#include <stdlib.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <string.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020021#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020022#include <unistd.h>
23#include <sys/socket.h>
Dmitry Sivachenkocaf58982009-08-24 15:11:06 +040024#include <sys/types.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025#include <netinet/in.h>
Willy Tarreau1274bc42009-07-15 07:16:31 +020026#include <netinet/tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020027#include <arpa/inet.h>
28
Willy Tarreauc7e42382012-08-24 19:22:53 +020029#include <common/chunk.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020030#include <common/compat.h>
31#include <common/config.h>
32#include <common/mini-clist.h>
Willy Tarreau83749182007-04-15 20:56:27 +020033#include <common/standard.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020034#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035
36#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037
38#include <proto/backend.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020039#include <proto/checks.h>
Simon Hormana2b9dad2013-02-12 10:45:54 +090040#include <proto/dumpstats.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/fd.h>
42#include <proto/log.h>
43#include <proto/queue.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020044#include <proto/port_range.h>
Willy Tarreau3d300592007-03-18 18:34:41 +010045#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010046#include <proto/proto_tcp.h>
Willy Tarreau2b5652f2006-12-31 17:46:05 +010047#include <proto/proxy.h>
Willy Tarreaufb56aab2012-09-28 14:40:02 +020048#include <proto/raw_sock.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020049#include <proto/server.h>
Simon Hormane0d1bfb2011-06-21 14:34:58 +090050#include <proto/session.h>
Willy Tarreau9e000c62011-03-10 14:03:36 +010051#include <proto/stream_interface.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020052#include <proto/task.h>
53
Willy Tarreaubd741542010-03-16 18:46:54 +010054static int httpchk_expect(struct server *s, int done);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +020055static int tcpcheck_get_step_id(struct server *);
56static void tcpcheck_main(struct connection *);
Willy Tarreaubd741542010-03-16 18:46:54 +010057
Simon Horman63a4a822012-03-19 07:24:41 +090058static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010059 [HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
60 [HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +020061 [HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020062
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010063 [HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +010064
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010065 [HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020066
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010067 [HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
68 [HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
69 [HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020070
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010071 [HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
72 [HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
73 [HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020074
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010075 [HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
76 [HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020077
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +020078 [HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020079
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010080 [HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
81 [HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
82 [HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020083};
84
Simon Horman63a4a822012-03-19 07:24:41 +090085static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +010086 [HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
87
88 [HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
89 [HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
90
91 [HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
92 [HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
93 [HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
94 [HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
95
96 [HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
97 [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
98 [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
99};
100
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200101/*
102 * Convert check_status code to description
103 */
104const char *get_check_status_description(short check_status) {
105
106 const char *desc;
107
108 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200109 desc = check_statuses[check_status].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200110 else
111 desc = NULL;
112
113 if (desc && *desc)
114 return desc;
115 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200116 return check_statuses[HCHK_STATUS_UNKNOWN].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200117}
118
119/*
120 * Convert check_status code to short info
121 */
122const char *get_check_status_info(short check_status) {
123
124 const char *info;
125
126 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200127 info = check_statuses[check_status].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200128 else
129 info = NULL;
130
131 if (info && *info)
132 return info;
133 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200134 return check_statuses[HCHK_STATUS_UNKNOWN].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200135}
136
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100137const char *get_analyze_status(short analyze_status) {
138
139 const char *desc;
140
141 if (analyze_status < HANA_STATUS_SIZE)
142 desc = analyze_statuses[analyze_status].desc;
143 else
144 desc = NULL;
145
146 if (desc && *desc)
147 return desc;
148 else
149 return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
150}
151
Simon Horman4a741432013-02-23 15:35:38 +0900152static void server_status_printf(struct chunk *msg, struct server *s, struct check *check, int xferred) {
Willy Tarreau44267702011-10-28 15:35:33 +0200153 if (s->track)
Willy Tarreau77804732012-10-29 16:14:26 +0100154 chunk_appendf(msg, " via %s/%s",
Willy Tarreau44267702011-10-28 15:35:33 +0200155 s->track->proxy->id, s->track->id);
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200156
Simon Horman4a741432013-02-23 15:35:38 +0900157 if (check) {
158 chunk_appendf(msg, ", reason: %s", get_check_status_description(check->status));
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200159
Simon Horman4a741432013-02-23 15:35:38 +0900160 if (check->status >= HCHK_STATUS_L57DATA)
161 chunk_appendf(msg, ", code: %d", check->code);
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200162
Simon Horman4a741432013-02-23 15:35:38 +0900163 if (*check->desc) {
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200164 struct chunk src;
165
Willy Tarreau77804732012-10-29 16:14:26 +0100166 chunk_appendf(msg, ", info: \"");
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200167
Simon Horman4a741432013-02-23 15:35:38 +0900168 chunk_initlen(&src, check->desc, 0, strlen(check->desc));
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200169 chunk_asciiencode(msg, &src, '"');
170
Willy Tarreau77804732012-10-29 16:14:26 +0100171 chunk_appendf(msg, "\"");
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200172 }
173
Simon Horman4a741432013-02-23 15:35:38 +0900174 if (check->duration >= 0)
175 chunk_appendf(msg, ", check duration: %ldms", check->duration);
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200176 }
177
Krzysztof Piotr Oledzki3bb05712010-09-27 13:10:50 +0200178 if (xferred >= 0) {
Krzysztof Piotr Oledzkib16a6072010-01-10 21:12:58 +0100179 if (!(s->state & SRV_RUNNING))
Willy Tarreau77804732012-10-29 16:14:26 +0100180 chunk_appendf(msg, ". %d active and %d backup servers left.%s"
Krzysztof Piotr Oledzki9f2b9d52010-01-11 13:16:27 +0100181 " %d sessions active, %d requeued, %d remaining in queue",
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200182 s->proxy->srv_act, s->proxy->srv_bck,
183 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
184 s->cur_sess, xferred, s->nbpend);
185 else
Willy Tarreau77804732012-10-29 16:14:26 +0100186 chunk_appendf(msg, ". %d active and %d backup servers online.%s"
Krzysztof Piotr Oledzki9f2b9d52010-01-11 13:16:27 +0100187 " %d sessions requeued, %d total in queue",
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200188 s->proxy->srv_act, s->proxy->srv_bck,
189 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
190 xferred, s->nbpend);
191 }
192}
193
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200194/*
Simon Horman4a741432013-02-23 15:35:38 +0900195 * Set check->status, update check->duration and fill check->result with
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100196 * an adequate CHK_RES_* value.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200197 *
198 * Show information in logs about failed health check if server is UP
199 * or succeeded health checks if server is DOWN.
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200200 */
Simon Horman4a741432013-02-23 15:35:38 +0900201static void set_server_check_status(struct check *check, short status, const char *desc)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100202{
Simon Horman4a741432013-02-23 15:35:38 +0900203 struct server *s = check->server;
204
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200205 if (status == HCHK_STATUS_START) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100206 check->result = CHK_RES_UNKNOWN; /* no result yet */
Simon Horman4a741432013-02-23 15:35:38 +0900207 check->desc[0] = '\0';
208 check->start = now;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200209 return;
210 }
211
Simon Horman4a741432013-02-23 15:35:38 +0900212 if (!check->status)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200213 return;
214
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200215 if (desc && *desc) {
Simon Horman4a741432013-02-23 15:35:38 +0900216 strncpy(check->desc, desc, HCHK_DESC_LEN-1);
217 check->desc[HCHK_DESC_LEN-1] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200218 } else
Simon Horman4a741432013-02-23 15:35:38 +0900219 check->desc[0] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200220
Simon Horman4a741432013-02-23 15:35:38 +0900221 check->status = status;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200222 if (check_statuses[status].result)
Simon Horman4a741432013-02-23 15:35:38 +0900223 check->result = check_statuses[status].result;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200224
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100225 if (status == HCHK_STATUS_HANA)
Simon Horman4a741432013-02-23 15:35:38 +0900226 check->duration = -1;
227 else if (!tv_iszero(&check->start)) {
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200228 /* set_server_check_status() may be called more than once */
Simon Horman4a741432013-02-23 15:35:38 +0900229 check->duration = tv_ms_elapsed(&check->start, &now);
230 tv_zero(&check->start);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200231 }
232
Simon Horman2f1f9552013-11-25 10:46:37 +0900233 /* Failure to connect to the agent as a secondary check should not
234 * cause the server to be marked down. So only log status changes
235 * for HCHK_STATUS_* statuses */
Willy Tarreau33434322013-12-11 21:15:19 +0100236 if ((check->state & CHK_ST_AGENT) && check->status < HCHK_STATUS_L7TOUT)
Simon Horman2f1f9552013-11-25 10:46:37 +0900237 return;
238
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200239 if (s->proxy->options2 & PR_O2_LOGHCHKS &&
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100240 (((check->health != 0) && (check->result == CHK_RES_FAILED)) ||
Simon Horman8c3d0be2013-11-25 10:46:40 +0900241 (((check->health != check->rise + check->fall - 1) ||
242 (!s->uweight && !(s->state & SRV_DRAIN)) ||
243 (s->uweight && (s->state & SRV_DRAIN))) &&
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100244 (check->result >= CHK_RES_PASSED)) ||
245 ((s->state & SRV_GOINGDOWN) && (check->result != CHK_RES_CONDPASS)) ||
246 (!(s->state & SRV_GOINGDOWN) && (check->result == CHK_RES_CONDPASS)))) {
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200247
248 int health, rise, fall, state;
249
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100250 chunk_reset(&trash);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200251
252 /* FIXME begin: calculate local version of the health/rise/fall/state */
Simon Horman125d0992013-02-24 17:23:38 +0900253 health = check->health;
Simon Horman58c32972013-11-25 10:46:38 +0900254 rise = check->rise;
255 fall = check->fall;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200256 state = s->state;
257
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100258 switch (check->result) {
259 case CHK_RES_FAILED:
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200260 if (health > rise) {
261 health--; /* still good */
262 } else {
263 if (health == rise)
264 state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
265
266 health = 0;
267 }
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100268 break;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200269
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100270 case CHK_RES_PASSED:
271 case CHK_RES_CONDPASS:
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200272 if (health < rise + fall - 1) {
273 health++; /* was bad, stays for a while */
274
275 if (health == rise)
276 state |= SRV_RUNNING;
277
278 if (health >= rise)
279 health = rise + fall - 1; /* OK now */
280 }
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100281
282 /* clear consecutive_errors if observing is enabled */
283 if (s->onerror)
284 s->consecutive_errors = 0;
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100285 break;
286 default:
287 break;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200288 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200289
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100290 chunk_appendf(&trash,
291 "Health check for %sserver %s/%s %s%s",
292 s->state & SRV_BACKUP ? "backup " : "",
293 s->proxy->id, s->id,
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100294 (check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
295 (check->result >= CHK_RES_PASSED) ? "succeeded":"failed");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200296
Simon Horman4a741432013-02-23 15:35:38 +0900297 server_status_printf(&trash, s, check, -1);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200298
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100299 chunk_appendf(&trash, ", status: %d/%d %s",
300 (state & SRV_RUNNING) ? (health - rise + 1) : (health),
301 (state & SRV_RUNNING) ? (fall) : (rise),
Simon Horman8c3d0be2013-11-25 10:46:40 +0900302 (state & SRV_RUNNING)?(s->eweight?"UP":"DRAIN"):"DOWN");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200303
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100304 Warning("%s.\n", trash.str);
305 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200306 }
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200307}
308
Willy Tarreau48494c02007-11-30 10:41:39 +0100309/* sends a log message when a backend goes down, and also sets last
310 * change date.
311 */
312static void set_backend_down(struct proxy *be)
313{
314 be->last_change = now.tv_sec;
315 be->down_trans++;
316
317 Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
318 send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
319}
320
321/* Redistribute pending connections when a server goes down. The number of
322 * connections redistributed is returned.
323 */
324static int redistribute_pending(struct server *s)
325{
326 struct pendconn *pc, *pc_bck, *pc_end;
327 int xferred = 0;
328
329 FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
330 struct session *sess = pc->sess;
Willy Tarreau4de91492010-01-22 19:10:05 +0100331 if ((sess->be->options & (PR_O_REDISP|PR_O_PERSIST)) == PR_O_REDISP &&
332 !(sess->flags & SN_FORCE_PRST)) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100333 /* The REDISP option was specified. We will ignore
334 * cookie and force to balance or use the dispatcher.
335 */
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +0100336
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100337 /* it's left to the dispatcher to choose a server */
Willy Tarreau48494c02007-11-30 10:41:39 +0100338 sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +0100339
Willy Tarreau48494c02007-11-30 10:41:39 +0100340 pendconn_free(pc);
Willy Tarreaufdccded2008-08-29 18:19:04 +0200341 task_wakeup(sess->task, TASK_WOKEN_RES);
Willy Tarreau48494c02007-11-30 10:41:39 +0100342 xferred++;
343 }
344 }
345 return xferred;
346}
347
348/* Check for pending connections at the backend, and assign some of them to
349 * the server coming up. The server's weight is checked before being assigned
350 * connections it may not be able to handle. The total number of transferred
351 * connections is returned.
352 */
353static int check_for_pending(struct server *s)
354{
355 int xferred;
356
357 if (!s->eweight)
358 return 0;
359
360 for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
361 struct session *sess;
362 struct pendconn *p;
363
364 p = pendconn_from_px(s->proxy);
365 if (!p)
366 break;
Willy Tarreau3fdb3662012-11-12 00:42:33 +0100367 p->sess->target = &s->obj_type;
Willy Tarreau48494c02007-11-30 10:41:39 +0100368 sess = p->sess;
369 pendconn_free(p);
Willy Tarreaufdccded2008-08-29 18:19:04 +0200370 task_wakeup(sess->task, TASK_WOKEN_RES);
Willy Tarreau48494c02007-11-30 10:41:39 +0100371 }
372 return xferred;
373}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200374
Justin Karnegeseb2c24a2012-05-24 15:28:52 -0700375/* Shutdown all connections of a server. The caller must pass a termination
376 * code in <why>, which must be one of SN_ERR_* indicating the reason for the
377 * shutdown.
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900378 */
Justin Karnegeseb2c24a2012-05-24 15:28:52 -0700379static void shutdown_sessions(struct server *srv, int why)
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900380{
381 struct session *session, *session_bck;
382
Willy Tarreaua2a64e92011-09-07 23:01:56 +0200383 list_for_each_entry_safe(session, session_bck, &srv->actconns, by_srv)
384 if (session->srv_conn == srv)
Justin Karnegeseb2c24a2012-05-24 15:28:52 -0700385 session_shutdown(session, why);
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900386}
387
Justin Karnegeseb2c24a2012-05-24 15:28:52 -0700388/* Shutdown all connections of all backup servers of a proxy. The caller must
389 * pass a termination code in <why>, which must be one of SN_ERR_* indicating
390 * the reason for the shutdown.
391 */
392static void shutdown_backup_sessions(struct proxy *px, int why)
393{
394 struct server *srv;
395
396 for (srv = px->srv; srv != NULL; srv = srv->next)
397 if (srv->state & SRV_BACKUP)
398 shutdown_sessions(srv, why);
399}
400
Willy Tarreaubaaee002006-06-26 02:48:02 +0200401/* Sets server <s> down, notifies by all available means, recounts the
402 * remaining servers on the proxy and transfers queued sessions whenever
Willy Tarreau5af3a692007-07-24 23:32:33 +0200403 * possible to other servers. It automatically recomputes the number of
404 * servers, but not the map.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200405 */
Simon Horman4a741432013-02-23 15:35:38 +0900406void set_server_down(struct check *check)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200407{
Simon Horman4a741432013-02-23 15:35:38 +0900408 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100409 struct server *srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200410 int xferred;
411
Cyril Bontécd19e512010-01-31 22:34:03 +0100412 if (s->state & SRV_MAINTAIN) {
Simon Horman58c32972013-11-25 10:46:38 +0900413 check->health = check->rise;
Cyril Bontécd19e512010-01-31 22:34:03 +0100414 }
415
Simon Horman58c32972013-11-25 10:46:38 +0900416 if ((s->state & SRV_RUNNING && check->health == check->rise) || s->track) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100417 int srv_was_paused = s->state & SRV_GOINGDOWN;
Willy Tarreaud64d2252010-10-17 17:16:42 +0200418 int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200419
420 s->last_change = now.tv_sec;
Willy Tarreau48494c02007-11-30 10:41:39 +0100421 s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
Willy Tarreau9580d162012-05-19 19:07:40 +0200422 if (s->proxy->lbprm.set_server_status_down)
423 s->proxy->lbprm.set_server_status_down(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200424
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900425 if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
Justin Karnegeseb2c24a2012-05-24 15:28:52 -0700426 shutdown_sessions(s, SN_ERR_DOWN);
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900427
Willy Tarreaubaaee002006-06-26 02:48:02 +0200428 /* we might have sessions queued on this server and waiting for
429 * a connection. Those which are redispatchable will be queued
430 * to another server or to the proxy itself.
431 */
Willy Tarreau48494c02007-11-30 10:41:39 +0100432 xferred = redistribute_pending(s);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100433
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100434 chunk_reset(&trash);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100435
Cyril Bontécd19e512010-01-31 22:34:03 +0100436 if (s->state & SRV_MAINTAIN) {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100437 chunk_appendf(&trash,
438 "%sServer %s/%s is DOWN for maintenance", s->state & SRV_BACKUP ? "Backup " : "",
439 s->proxy->id, s->id);
Cyril Bontécd19e512010-01-31 22:34:03 +0100440 } else {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100441 chunk_appendf(&trash,
442 "%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
443 s->proxy->id, s->id);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100444
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100445 server_status_printf(&trash, s,
Simon Horman4a741432013-02-23 15:35:38 +0900446 ((!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : 0),
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100447 xferred);
Cyril Bontécd19e512010-01-31 22:34:03 +0100448 }
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100449 Warning("%s.\n", trash.str);
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200450
Willy Tarreau48494c02007-11-30 10:41:39 +0100451 /* we don't send an alert if the server was previously paused */
452 if (srv_was_paused)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100453 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Willy Tarreau48494c02007-11-30 10:41:39 +0100454 else
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100455 send_log(s->proxy, LOG_ALERT, "%s.\n", trash.str);
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200456
Willy Tarreaud64d2252010-10-17 17:16:42 +0200457 if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
Willy Tarreau48494c02007-11-30 10:41:39 +0100458 set_backend_down(s->proxy);
459
Krzysztof Piotr Oledzki052d4fd2009-10-04 14:52:57 +0200460 s->counters.down_trans++;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100461
Willy Tarreau1a53a3a2013-12-11 15:27:05 +0100462 for (srv = s->trackers; srv; srv = srv->tracknext)
463 if (!(srv->state & SRV_MAINTAIN))
464 /* Only notify tracking servers that are not already in maintenance. */
465 set_server_down(&srv->check);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200466 }
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100467
Simon Horman125d0992013-02-24 17:23:38 +0900468 check->health = 0; /* failure */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200469}
470
Simon Horman4a741432013-02-23 15:35:38 +0900471void set_server_up(struct check *check) {
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100472
Simon Horman4a741432013-02-23 15:35:38 +0900473 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100474 struct server *srv;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100475 int xferred;
Willy Tarreau45446782012-03-09 17:16:09 +0100476 unsigned int old_state = s->state;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100477
Cyril Bontécd19e512010-01-31 22:34:03 +0100478 if (s->state & SRV_MAINTAIN) {
Simon Horman58c32972013-11-25 10:46:38 +0900479 check->health = check->rise;
Cyril Bontécd19e512010-01-31 22:34:03 +0100480 }
481
Willy Tarreaubb9665e2013-12-14 16:14:15 +0100482 if (s->track ||
483 (s->check.health == s->check.rise && (s->agent.health >= s->agent.rise || !(s->agent.state & CHK_ST_ENABLED))) ||
484 (s->agent.health == s->agent.rise && (s->check.health >= s->check.rise || !(s->check.state & CHK_ST_ENABLED)))) {
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100485 if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
486 if (s->proxy->last_change < now.tv_sec) // ignore negative times
487 s->proxy->down_time += now.tv_sec - s->proxy->last_change;
488 s->proxy->last_change = now.tv_sec;
489 }
490
491 if (s->last_change < now.tv_sec) // ignore negative times
492 s->down_time += now.tv_sec - s->last_change;
493
494 s->last_change = now.tv_sec;
495 s->state |= SRV_RUNNING;
Willy Tarreau45446782012-03-09 17:16:09 +0100496 s->state &= ~SRV_MAINTAIN;
Willy Tarreau33a08db2013-12-11 21:03:31 +0100497 s->check.state &= ~CHK_ST_PAUSED;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100498
499 if (s->slowstart > 0) {
500 s->state |= SRV_WARMINGUP;
Willy Tarreau2e993902011-10-31 11:53:20 +0100501 task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100502 }
Willy Tarreau004e0452013-11-21 11:22:01 +0100503
504 server_recalc_eweight(s);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100505
Justin Karnegeseb2c24a2012-05-24 15:28:52 -0700506 /* If the server is set with "on-marked-up shutdown-backup-sessions",
507 * and it's not a backup server and its effective weight is > 0,
508 * then it can accept new connections, so we shut down all sessions
509 * on all backup servers.
510 */
511 if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
512 !(s->state & SRV_BACKUP) && s->eweight)
513 shutdown_backup_sessions(s->proxy, SN_ERR_UP);
514
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100515 /* check if we can handle some connections queued at the proxy. We
516 * will take as many as we can handle.
517 */
518 xferred = check_for_pending(s);
519
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100520 chunk_reset(&trash);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100521
Willy Tarreau45446782012-03-09 17:16:09 +0100522 if (old_state & SRV_MAINTAIN) {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100523 chunk_appendf(&trash,
524 "%sServer %s/%s is UP (leaving maintenance)", s->state & SRV_BACKUP ? "Backup " : "",
525 s->proxy->id, s->id);
Cyril Bontécd19e512010-01-31 22:34:03 +0100526 } else {
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100527 chunk_appendf(&trash,
528 "%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
529 s->proxy->id, s->id);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100530
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100531 server_status_printf(&trash, s,
Simon Horman4a741432013-02-23 15:35:38 +0900532 ((!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL),
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100533 xferred);
Cyril Bontécd19e512010-01-31 22:34:03 +0100534 }
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100535
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100536 Warning("%s.\n", trash.str);
537 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100538
Willy Tarreau1a53a3a2013-12-11 15:27:05 +0100539 for (srv = s->trackers; srv; srv = srv->tracknext)
540 if (!(srv->state & SRV_MAINTAIN))
541 /* Only notify tracking servers if they're not in maintenance. */
542 set_server_up(&srv->check);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100543 }
544
Simon Horman58c32972013-11-25 10:46:38 +0900545 if (check->health >= check->rise)
546 check->health = check->rise + check->fall - 1; /* OK now */
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100547
548}
549
Simon Horman4a741432013-02-23 15:35:38 +0900550static void set_server_disabled(struct check *check) {
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100551
Simon Horman4a741432013-02-23 15:35:38 +0900552 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100553 struct server *srv;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100554 int xferred;
555
556 s->state |= SRV_GOINGDOWN;
Willy Tarreau9580d162012-05-19 19:07:40 +0200557 if (s->proxy->lbprm.set_server_status_down)
558 s->proxy->lbprm.set_server_status_down(s);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100559
560 /* we might have sessions queued on this server and waiting for
561 * a connection. Those which are redispatchable will be queued
562 * to another server or to the proxy itself.
563 */
564 xferred = redistribute_pending(s);
565
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100566 chunk_reset(&trash);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100567
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100568 chunk_appendf(&trash,
569 "Load-balancing on %sServer %s/%s is disabled",
570 s->state & SRV_BACKUP ? "Backup " : "",
571 s->proxy->id, s->id);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100572
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100573 server_status_printf(&trash, s,
Simon Horman4a741432013-02-23 15:35:38 +0900574 ((!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL),
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100575 xferred);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100576
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100577 Warning("%s.\n", trash.str);
578 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100579
580 if (!s->proxy->srv_bck && !s->proxy->srv_act)
581 set_backend_down(s->proxy);
582
Willy Tarreau1a53a3a2013-12-11 15:27:05 +0100583 for (srv = s->trackers; srv; srv = srv->tracknext)
584 set_server_disabled(&srv->check);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100585}
586
Simon Horman4a741432013-02-23 15:35:38 +0900587static void set_server_enabled(struct check *check) {
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100588
Simon Horman4a741432013-02-23 15:35:38 +0900589 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100590 struct server *srv;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100591 int xferred;
592
593 s->state &= ~SRV_GOINGDOWN;
Willy Tarreau9580d162012-05-19 19:07:40 +0200594 if (s->proxy->lbprm.set_server_status_up)
595 s->proxy->lbprm.set_server_status_up(s);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100596
597 /* check if we can handle some connections queued at the proxy. We
598 * will take as many as we can handle.
599 */
600 xferred = check_for_pending(s);
601
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100602 chunk_reset(&trash);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100603
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100604 chunk_appendf(&trash,
605 "Load-balancing on %sServer %s/%s is enabled again",
606 s->state & SRV_BACKUP ? "Backup " : "",
607 s->proxy->id, s->id);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100608
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100609 server_status_printf(&trash, s,
Simon Horman4a741432013-02-23 15:35:38 +0900610 ((!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL),
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100611 xferred);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100612
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100613 Warning("%s.\n", trash.str);
614 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100615
Willy Tarreau1a53a3a2013-12-11 15:27:05 +0100616 for (srv = s->trackers; srv; srv = srv->tracknext)
617 set_server_enabled(&srv->check);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100618}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200619
Simon Hormand8583062013-11-25 10:46:33 +0900620static void check_failed(struct check *check)
621{
622 struct server *s = check->server;
623
Simon Horman2f1f9552013-11-25 10:46:37 +0900624 /* The agent secondary check should only cause a server to be marked
625 * as down if check->status is HCHK_STATUS_L7STS, which indicates
626 * that the agent returned "fail", "stopped" or "down".
627 * The implication here is that failure to connect to the agent
628 * as a secondary check should not cause the server to be marked
629 * down. */
Willy Tarreau33434322013-12-11 21:15:19 +0100630 if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
Simon Horman2f1f9552013-11-25 10:46:37 +0900631 return;
632
Simon Horman58c32972013-11-25 10:46:38 +0900633 if (check->health > check->rise) {
Simon Hormand8583062013-11-25 10:46:33 +0900634 check->health--; /* still good */
635 s->counters.failed_checks++;
636 }
637 else
638 set_server_down(check);
639}
640
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100641void health_adjust(struct server *s, short status)
642{
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100643 int failed;
644 int expire;
645
646 /* return now if observing nor health check is not enabled */
Willy Tarreau5b3a2022012-09-28 15:01:02 +0200647 if (!s->observe || !s->check.task)
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100648 return;
649
650 if (s->observe >= HANA_OBS_SIZE)
651 return;
652
Willy Tarreaubb956662013-01-24 00:37:39 +0100653 if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100654 return;
655
656 switch (analyze_statuses[status].lr[s->observe - 1]) {
657 case 1:
658 failed = 1;
659 break;
660
661 case 2:
662 failed = 0;
663 break;
664
665 default:
666 return;
667 }
668
669 if (!failed) {
670 /* good: clear consecutive_errors */
671 s->consecutive_errors = 0;
672 return;
673 }
674
675 s->consecutive_errors++;
676
677 if (s->consecutive_errors < s->consecutive_errors_limit)
678 return;
679
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100680 chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
681 s->consecutive_errors, get_analyze_status(status));
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100682
683 switch (s->onerror) {
684 case HANA_ONERR_FASTINTER:
685 /* force fastinter - nothing to do here as all modes force it */
686 break;
687
688 case HANA_ONERR_SUDDTH:
689 /* simulate a pre-fatal failed health check */
Simon Horman58c32972013-11-25 10:46:38 +0900690 if (s->check.health > s->check.rise)
691 s->check.health = s->check.rise + 1;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100692
693 /* no break - fall through */
694
695 case HANA_ONERR_FAILCHK:
696 /* simulate a failed health check */
Simon Horman4a741432013-02-23 15:35:38 +0900697 set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
Simon Hormand8583062013-11-25 10:46:33 +0900698 check_failed(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100699
700 break;
701
702 case HANA_ONERR_MARKDWN:
703 /* mark server down */
Simon Horman58c32972013-11-25 10:46:38 +0900704 s->check.health = s->check.rise;
Simon Horman4a741432013-02-23 15:35:38 +0900705 set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
706 set_server_down(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100707
708 break;
709
710 default:
711 /* write a warning? */
712 break;
713 }
714
715 s->consecutive_errors = 0;
716 s->counters.failed_hana++;
717
Simon Horman66183002013-02-23 10:16:43 +0900718 if (s->check.fastinter) {
719 expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
Sergiy Prykhodko1d57e502013-09-21 12:05:00 +0300720 if (s->check.task->expire > expire) {
Willy Tarreau5b3a2022012-09-28 15:01:02 +0200721 s->check.task->expire = expire;
Sergiy Prykhodko1d57e502013-09-21 12:05:00 +0300722 /* requeue check task with new expire */
723 task_queue(s->check.task);
724 }
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100725 }
726}
727
Willy Tarreauef781042010-01-27 11:53:01 +0100728static int httpchk_build_status_header(struct server *s, char *buffer)
729{
730 int sv_state;
731 int ratio;
732 int hlen = 0;
733 const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
734 "UP %d/%d", "UP",
735 "NOLB %d/%d", "NOLB",
736 "no check" };
737
738 memcpy(buffer + hlen, "X-Haproxy-Server-State: ", 24);
739 hlen += 24;
740
Willy Tarreauff5ae352013-12-11 20:36:34 +0100741 if (!(s->check.state & CHK_ST_ENABLED))
742 sv_state = 6;
Willy Tarreauef781042010-01-27 11:53:01 +0100743 else if (s->state & SRV_RUNNING) {
Simon Horman58c32972013-11-25 10:46:38 +0900744 if (s->check.health == s->check.rise + s->check.fall - 1)
Willy Tarreauef781042010-01-27 11:53:01 +0100745 sv_state = 3; /* UP */
746 else
747 sv_state = 2; /* going down */
748
749 if (s->state & SRV_GOINGDOWN)
750 sv_state += 2;
751 } else {
Simon Horman125d0992013-02-24 17:23:38 +0900752 if (s->check.health)
Willy Tarreauef781042010-01-27 11:53:01 +0100753 sv_state = 1; /* going up */
754 else
755 sv_state = 0; /* DOWN */
756 }
757
758 hlen += sprintf(buffer + hlen,
759 srv_hlt_st[sv_state],
Simon Horman58c32972013-11-25 10:46:38 +0900760 (s->state & SRV_RUNNING) ? (s->check.health - s->check.rise + 1) : (s->check.health),
761 (s->state & SRV_RUNNING) ? (s->check.fall) : (s->check.rise));
Willy Tarreauef781042010-01-27 11:53:01 +0100762
763 hlen += sprintf(buffer + hlen, "; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
764 s->proxy->id, s->id,
765 global.node,
766 (s->eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
767 (s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
768 s->cur_sess, s->proxy->beconn - s->proxy->nbpend,
769 s->nbpend);
770
771 if ((s->state & SRV_WARMINGUP) &&
772 now.tv_sec < s->last_change + s->slowstart &&
773 now.tv_sec >= s->last_change) {
774 ratio = MAX(1, 100 * (now.tv_sec - s->last_change) / s->slowstart);
775 hlen += sprintf(buffer + hlen, "; throttle=%d%%", ratio);
776 }
777
778 buffer[hlen++] = '\r';
779 buffer[hlen++] = '\n';
780
781 return hlen;
782}
783
Willy Tarreau20a18342013-12-05 00:31:46 +0100784/* Check the connection. If an error has already been reported or the socket is
785 * closed, keep errno intact as it is supposed to contain the valid error code.
786 * If no error is reported, check the socket's error queue using getsockopt().
787 * Warning, this must be done only once when returning from poll, and never
788 * after an I/O error was attempted, otherwise the error queue might contain
789 * inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
790 * socket. Returns non-zero if an error was reported, zero if everything is
791 * clean (including a properly closed socket).
792 */
793static int retrieve_errno_from_socket(struct connection *conn)
794{
795 int skerr;
796 socklen_t lskerr = sizeof(skerr);
797
798 if (conn->flags & CO_FL_ERROR && ((errno && errno != EAGAIN) || !conn->ctrl))
799 return 1;
800
Willy Tarreauf79c8172013-10-21 16:30:56 +0200801 if (!(conn->flags & CO_FL_CTRL_READY) || !conn->ctrl)
Willy Tarreau20a18342013-12-05 00:31:46 +0100802 return 0;
803
804 if (getsockopt(conn->t.sock.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
805 errno = skerr;
806
807 if (errno == EAGAIN)
808 errno = 0;
809
810 if (!errno) {
811 /* we could not retrieve an error, that does not mean there is
812 * none. Just don't change anything and only report the prior
813 * error if any.
814 */
815 if (conn->flags & CO_FL_ERROR)
816 return 1;
817 else
818 return 0;
819 }
820
821 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
822 return 1;
823}
824
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100825/* Try to collect as much information as possible on the connection status,
826 * and adjust the server status accordingly. It may make use of <errno_bck>
827 * if non-null when the caller is absolutely certain of its validity (eg:
828 * checked just after a syscall). If the caller doesn't have a valid errno,
829 * it can pass zero, and retrieve_errno_from_socket() will be called to try
830 * to extract errno from the socket. If no error is reported, it will consider
831 * the <expired> flag. This is intended to be used when a connection error was
832 * reported in conn->flags or when a timeout was reported in <expired>. The
833 * function takes care of not updating a server status which was already set.
834 * All situations where at least one of <expired> or CO_FL_ERROR are set
835 * produce a status.
836 */
837static void chk_report_conn_err(struct connection *conn, int errno_bck, int expired)
838{
839 struct check *check = conn->owner;
840 const char *err_msg;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200841 struct chunk *chk;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100842
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100843 if (check->result != CHK_RES_UNKNOWN)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100844 return;
845
846 errno = errno_bck;
847 if (!errno || errno == EAGAIN)
848 retrieve_errno_from_socket(conn);
849
850 if (!(conn->flags & CO_FL_ERROR) && !expired)
851 return;
852
853 /* we'll try to build a meaningful error message depending on the
854 * context of the error possibly present in conn->err_code, and the
855 * socket error possibly collected above. This is useful to know the
856 * exact step of the L6 layer (eg: SSL handshake).
857 */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200858 chk = get_trash_chunk();
859
860 if (check->type == PR_O2_TCPCHK_CHK) {
861 chunk_printf(chk, " at step %d of tcp-check", tcpcheck_get_step_id(check->server));
862 /* we were looking for a string */
863 if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
864 if (check->current_step->string)
865 chunk_appendf(chk, " (string '%s')", check->current_step->string);
866 else if (check->current_step->expect_regex)
867 chunk_appendf(chk, " (expect regex)");
868 }
869 else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
870 chunk_appendf(chk, " (send)");
871 }
872 }
873
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100874 if (conn->err_code) {
875 if (errno && errno != EAGAIN)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200876 chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100877 else
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200878 chunk_printf(&trash, "%s%s", conn_err_code_str(conn), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100879 err_msg = trash.str;
880 }
881 else {
882 if (errno && errno != EAGAIN) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200883 chunk_printf(&trash, "%s%s", strerror(errno), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100884 err_msg = trash.str;
885 }
886 else {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200887 err_msg = chk->str;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100888 }
889 }
890
891 if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L4_CONN)) == CO_FL_WAIT_L4_CONN) {
892 /* L4 not established (yet) */
893 if (conn->flags & CO_FL_ERROR)
894 set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
895 else if (expired)
896 set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
897 }
898 else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L6_CONN)) == CO_FL_WAIT_L6_CONN) {
899 /* L6 not established (yet) */
900 if (conn->flags & CO_FL_ERROR)
901 set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
902 else if (expired)
903 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
904 }
905 else if (conn->flags & CO_FL_ERROR) {
906 /* I/O error after connection was established and before we could diagnose */
907 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
908 }
909 else if (expired) {
910 /* connection established but expired check */
911 if (check->type == PR_O2_SSL3_CHK)
912 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
913 else /* HTTP, SMTP, ... */
914 set_server_check_status(check, HCHK_STATUS_L7TOUT, err_msg);
915 }
916
917 return;
918}
919
Willy Tarreaubaaee002006-06-26 02:48:02 +0200920/*
921 * This function is used only for server health-checks. It handles
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200922 * the connection acknowledgement. If the proxy requires L7 health-checks,
923 * it sends the request. In other cases, it calls set_server_check_status()
Simon Horman4a741432013-02-23 15:35:38 +0900924 * to set check->status, check->duration and check->result.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200925 */
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200926static void event_srv_chk_w(struct connection *conn)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200927{
Simon Horman4a741432013-02-23 15:35:38 +0900928 struct check *check = conn->owner;
929 struct server *s = check->server;
Simon Horman4a741432013-02-23 15:35:38 +0900930 struct task *t = check->task;
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200931
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100932 if (unlikely(check->result == CHK_RES_FAILED))
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100933 goto out_wakeup;
934
935 if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_WR))
936 return;
937
Willy Tarreau20a18342013-12-05 00:31:46 +0100938 if (retrieve_errno_from_socket(conn)) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100939 chk_report_conn_err(conn, errno, 0);
Willy Tarreau20a18342013-12-05 00:31:46 +0100940 __conn_data_stop_both(conn);
941 goto out_wakeup;
942 }
Krzysztof Piotr Oledzki6492db52010-01-02 22:03:01 +0100943
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100944 if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH)) {
Willy Tarreau20a18342013-12-05 00:31:46 +0100945 /* if the output is closed, we can't do anything */
946 conn->flags |= CO_FL_ERROR;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100947 chk_report_conn_err(conn, 0, 0);
Willy Tarreau20a18342013-12-05 00:31:46 +0100948 goto out_wakeup;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200949 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200950
Willy Tarreau06559ac2013-12-05 01:53:08 +0100951 /* here, we know that the connection is established. That's enough for
952 * a pure TCP check.
953 */
954 if (!check->type)
955 goto out_wakeup;
956
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200957 if (check->type == PR_O2_TCPCHK_CHK) {
958 tcpcheck_main(conn);
959 return;
960 }
961
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100962 if (check->bo->o) {
963 conn->xprt->snd_buf(conn, check->bo, MSG_DONTWAIT | MSG_NOSIGNAL);
964 if (conn->flags & CO_FL_ERROR) {
965 chk_report_conn_err(conn, errno, 0);
966 __conn_data_stop_both(conn);
967 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200968 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100969 if (check->bo->o)
970 return;
971 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200972
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100973 /* full request sent, we allow up to <timeout.check> if nonzero for a response */
974 if (s->proxy->timeout.check) {
975 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
976 task_queue(t);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200977 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100978 goto out_nowake;
979
Willy Tarreau83749182007-04-15 20:56:27 +0200980 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200981 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau83749182007-04-15 20:56:27 +0200982 out_nowake:
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200983 __conn_data_stop_send(conn); /* nothing more to write */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200984}
985
Willy Tarreaubaaee002006-06-26 02:48:02 +0200986/*
Willy Tarreauf3c69202006-07-09 16:42:34 +0200987 * This function is used only for server health-checks. It handles the server's
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +0200988 * reply to an HTTP request, SSL HELLO or MySQL client Auth. It calls
Simon Horman4a741432013-02-23 15:35:38 +0900989 * set_server_check_status() to update check->status, check->duration
990 * and check->result.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200991
992 * The set_server_check_status function is called with HCHK_STATUS_L7OKD if
993 * an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP server
994 * returns 2xx, HCHK_STATUS_L6OK if an SSL server returns at least 5 bytes in
995 * response to an SSL HELLO (the principle is that this is enough to
996 * distinguish between an SSL server and a pure TCP relay). All other cases will
997 * call it with a proper error status like HCHK_STATUS_L7STS, HCHK_STATUS_L6RSP,
998 * etc.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200999 */
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001000static void event_srv_chk_r(struct connection *conn)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001001{
Simon Horman4a741432013-02-23 15:35:38 +09001002 struct check *check = conn->owner;
1003 struct server *s = check->server;
1004 struct task *t = check->task;
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +02001005 char *desc;
Willy Tarreau03938182010-03-17 21:52:07 +01001006 int done;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001007 unsigned short msglen;
Willy Tarreau83749182007-04-15 20:56:27 +02001008
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001009 if (unlikely(check->result == CHK_RES_FAILED))
Willy Tarreau83749182007-04-15 20:56:27 +02001010 goto out_wakeup;
Willy Tarreau83749182007-04-15 20:56:27 +02001011
Willy Tarreau5a78f362012-11-23 12:47:05 +01001012 if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_RD))
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001013 return;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02001014
1015 if (check->type == PR_O2_TCPCHK_CHK) {
1016 tcpcheck_main(conn);
1017 return;
1018 }
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001019
Willy Tarreau83749182007-04-15 20:56:27 +02001020 /* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
1021 * but the connection was closed on the remote end. Fortunately, recv still
1022 * works correctly and we don't need to do the getsockopt() on linux.
1023 */
Nick Chalk57b1bf72010-03-16 15:50:46 +00001024
1025 /* Set buffer to point to the end of the data already read, and check
1026 * that there is free space remaining. If the buffer is full, proceed
1027 * with running the checks without attempting another socket read.
1028 */
Nick Chalk57b1bf72010-03-16 15:50:46 +00001029
Willy Tarreau03938182010-03-17 21:52:07 +01001030 done = 0;
Nick Chalk57b1bf72010-03-16 15:50:46 +00001031
Simon Horman4a741432013-02-23 15:35:38 +09001032 conn->xprt->rcv_buf(conn, check->bi, check->bi->size);
Willy Tarreauf1503172012-09-28 19:39:36 +02001033 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
Willy Tarreau03938182010-03-17 21:52:07 +01001034 done = 1;
Simon Horman4a741432013-02-23 15:35:38 +09001035 if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
Willy Tarreauf1503172012-09-28 19:39:36 +02001036 /* Report network errors only if we got no other data. Otherwise
1037 * we'll let the upper layers decide whether the response is OK
1038 * or not. It is very common that an RST sent by the server is
1039 * reported as an error just after the last data chunk.
1040 */
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001041 chk_report_conn_err(conn, errno, 0);
Willy Tarreauc1a07962010-03-16 20:55:43 +01001042 goto out_wakeup;
1043 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001044 }
1045
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001046
Willy Tarreau03938182010-03-17 21:52:07 +01001047 /* Intermediate or complete response received.
Simon Horman4a741432013-02-23 15:35:38 +09001048 * Terminate string in check->bi->data buffer.
Willy Tarreau03938182010-03-17 21:52:07 +01001049 */
Simon Horman4a741432013-02-23 15:35:38 +09001050 if (check->bi->i < check->bi->size)
1051 check->bi->data[check->bi->i] = '\0';
Willy Tarreau03938182010-03-17 21:52:07 +01001052 else {
Simon Horman4a741432013-02-23 15:35:38 +09001053 check->bi->data[check->bi->i - 1] = '\0';
Willy Tarreau03938182010-03-17 21:52:07 +01001054 done = 1; /* buffer full, don't wait for more data */
1055 }
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +02001056
Nick Chalk57b1bf72010-03-16 15:50:46 +00001057 /* Run the checks... */
Simon Horman4a741432013-02-23 15:35:38 +09001058 switch (check->type) {
Willy Tarreau1620ec32011-08-06 17:05:02 +02001059 case PR_O2_HTTP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001060 if (!done && check->bi->i < strlen("HTTP/1.0 000\r"))
Willy Tarreau03938182010-03-17 21:52:07 +01001061 goto wait_more_data;
1062
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001063 /* Check if the server speaks HTTP 1.X */
Simon Horman4a741432013-02-23 15:35:38 +09001064 if ((check->bi->i < strlen("HTTP/1.0 000\r")) ||
1065 (memcmp(check->bi->data, "HTTP/1.", 7) != 0 ||
1066 (*(check->bi->data + 12) != ' ' && *(check->bi->data + 12) != '\r')) ||
1067 !isdigit((unsigned char) *(check->bi->data + 9)) || !isdigit((unsigned char) *(check->bi->data + 10)) ||
1068 !isdigit((unsigned char) *(check->bi->data + 11))) {
1069 cut_crlf(check->bi->data);
1070 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +02001071
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001072 goto out_wakeup;
1073 }
1074
Simon Horman4a741432013-02-23 15:35:38 +09001075 check->code = str2uic(check->bi->data + 9);
1076 desc = ltrim(check->bi->data + 12, ' ');
Nick Chalk57b1bf72010-03-16 15:50:46 +00001077
Willy Tarreaubd741542010-03-16 18:46:54 +01001078 if ((s->proxy->options & PR_O_DISABLE404) &&
Simon Horman4a741432013-02-23 15:35:38 +09001079 (s->state & SRV_RUNNING) && (check->code == 404)) {
Nick Chalk57b1bf72010-03-16 15:50:46 +00001080 /* 404 may be accepted as "stopping" only if the server was up */
1081 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +09001082 set_server_check_status(check, HCHK_STATUS_L7OKCD, desc);
Nick Chalk57b1bf72010-03-16 15:50:46 +00001083 }
Willy Tarreaubd741542010-03-16 18:46:54 +01001084 else if (s->proxy->options2 & PR_O2_EXP_TYPE) {
1085 /* Run content verification check... We know we have at least 13 chars */
1086 if (!httpchk_expect(s, done))
1087 goto wait_more_data;
1088 }
1089 /* check the reply : HTTP/1.X 2xx and 3xx are OK */
Simon Horman4a741432013-02-23 15:35:38 +09001090 else if (*(check->bi->data + 9) == '2' || *(check->bi->data + 9) == '3') {
Willy Tarreaubd741542010-03-16 18:46:54 +01001091 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +09001092 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Willy Tarreaubd741542010-03-16 18:46:54 +01001093 }
Nick Chalk57b1bf72010-03-16 15:50:46 +00001094 else {
1095 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +09001096 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Nick Chalk57b1bf72010-03-16 15:50:46 +00001097 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001098 break;
1099
1100 case PR_O2_SSL3_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001101 if (!done && check->bi->i < 5)
Willy Tarreau03938182010-03-17 21:52:07 +01001102 goto wait_more_data;
1103
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001104 /* Check for SSLv3 alert or handshake */
Simon Horman4a741432013-02-23 15:35:38 +09001105 if ((check->bi->i >= 5) && (*check->bi->data == 0x15 || *check->bi->data == 0x16))
1106 set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +02001107 else
Simon Horman4a741432013-02-23 15:35:38 +09001108 set_server_check_status(check, HCHK_STATUS_L6RSP, NULL);
Willy Tarreau1620ec32011-08-06 17:05:02 +02001109 break;
1110
1111 case PR_O2_SMTP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001112 if (!done && check->bi->i < strlen("000\r"))
Willy Tarreau03938182010-03-17 21:52:07 +01001113 goto wait_more_data;
1114
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02001115 /* Check if the server speaks SMTP */
Simon Horman4a741432013-02-23 15:35:38 +09001116 if ((check->bi->i < strlen("000\r")) ||
1117 (*(check->bi->data + 3) != ' ' && *(check->bi->data + 3) != '\r') ||
1118 !isdigit((unsigned char) *check->bi->data) || !isdigit((unsigned char) *(check->bi->data + 1)) ||
1119 !isdigit((unsigned char) *(check->bi->data + 2))) {
1120 cut_crlf(check->bi->data);
1121 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +02001122
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02001123 goto out_wakeup;
1124 }
1125
Simon Horman4a741432013-02-23 15:35:38 +09001126 check->code = str2uic(check->bi->data);
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02001127
Simon Horman4a741432013-02-23 15:35:38 +09001128 desc = ltrim(check->bi->data + 3, ' ');
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +02001129 cut_crlf(desc);
1130
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001131 /* Check for SMTP code 2xx (should be 250) */
Simon Horman4a741432013-02-23 15:35:38 +09001132 if (*check->bi->data == '2')
1133 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +02001134 else
Simon Horman4a741432013-02-23 15:35:38 +09001135 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Willy Tarreau1620ec32011-08-06 17:05:02 +02001136 break;
1137
Simon Hormana2b9dad2013-02-12 10:45:54 +09001138 case PR_O2_LB_AGENT_CHK: {
1139 short status = HCHK_STATUS_L7RSP;
1140 const char *desc = "Unknown feedback string";
1141 const char *down_cmd = NULL;
Simon Horman671b6f02013-11-25 10:46:39 +09001142 int disabled;
Willy Tarreau9809b782013-12-11 21:40:11 +01001143 char *p;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001144
Willy Tarreau9809b782013-12-11 21:40:11 +01001145 /* get a complete line first */
1146 p = check->bi->data;
1147 while (*p && *p != '\n' && *p != '\r')
1148 p++;
1149
1150 if (!*p) {
1151 if (!done)
1152 goto wait_more_data;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001153
Willy Tarreau9809b782013-12-11 21:40:11 +01001154 /* at least inform the admin that the agent is mis-behaving */
1155 set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
1156 break;
1157 }
1158 *p = 0;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001159
Simon Horman671b6f02013-11-25 10:46:39 +09001160 /*
1161 * The agent may have been disabled after a check was
1162 * initialised. If so, ignore weight changes and drain
1163 * settings from the agent. Note that the setting is
1164 * always present in the state of the agent the server,
1165 * regardless of if the agent is being run as a primary or
1166 * secondary check. That is, regardless of if the check
1167 * parameter of this function is the agent or check field
1168 * of the server.
1169 */
Willy Tarreau2e10f5a2013-12-11 20:11:55 +01001170 disabled = !(check->server->agent.state & CHK_ST_ENABLED);
Simon Horman671b6f02013-11-25 10:46:39 +09001171
Simon Horman4a741432013-02-23 15:35:38 +09001172 if (strchr(check->bi->data, '%')) {
Simon Horman671b6f02013-11-25 10:46:39 +09001173 if (disabled)
1174 break;
Simon Horman4a741432013-02-23 15:35:38 +09001175 desc = server_parse_weight_change_request(s, check->bi->data);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001176 if (!desc) {
1177 status = HCHK_STATUS_L7OKD;
Simon Horman4a741432013-02-23 15:35:38 +09001178 desc = check->bi->data;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001179 }
Simon Horman4a741432013-02-23 15:35:38 +09001180 } else if (!strcasecmp(check->bi->data, "drain")) {
Simon Horman671b6f02013-11-25 10:46:39 +09001181 if (disabled)
1182 break;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001183 desc = server_parse_weight_change_request(s, "0%");
1184 if (!desc) {
1185 desc = "drain";
1186 status = HCHK_STATUS_L7OKD;
1187 }
Simon Horman4a741432013-02-23 15:35:38 +09001188 } else if (!strncasecmp(check->bi->data, "down", strlen("down"))) {
Simon Hormana2b9dad2013-02-12 10:45:54 +09001189 down_cmd = "down";
Simon Horman4a741432013-02-23 15:35:38 +09001190 } else if (!strncasecmp(check->bi->data, "stopped", strlen("stopped"))) {
Simon Hormana2b9dad2013-02-12 10:45:54 +09001191 down_cmd = "stopped";
Simon Horman4a741432013-02-23 15:35:38 +09001192 } else if (!strncasecmp(check->bi->data, "fail", strlen("fail"))) {
Simon Hormana2b9dad2013-02-12 10:45:54 +09001193 down_cmd = "fail";
1194 }
1195
1196 if (down_cmd) {
Simon Horman4a741432013-02-23 15:35:38 +09001197 const char *end = check->bi->data + strlen(down_cmd);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001198 /*
1199 * The command keyword must terminated the string or
1200 * be followed by a blank.
1201 */
Willy Tarreau8b4c3762013-02-13 12:47:12 +01001202 if (end[0] == '\0' || end[0] == ' ' || end[0] == '\t') {
Simon Hormana2b9dad2013-02-12 10:45:54 +09001203 status = HCHK_STATUS_L7STS;
Simon Horman80fefae2013-11-25 10:46:34 +09001204 desc = check->bi->data;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001205 }
1206 }
1207
Simon Horman4a741432013-02-23 15:35:38 +09001208 set_server_check_status(check, status, desc);
Simon Horman8c3d0be2013-11-25 10:46:40 +09001209 set_server_drain_state(check->server);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001210 break;
1211 }
1212
Willy Tarreau1620ec32011-08-06 17:05:02 +02001213 case PR_O2_PGSQL_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001214 if (!done && check->bi->i < 9)
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001215 goto wait_more_data;
1216
Simon Horman4a741432013-02-23 15:35:38 +09001217 if (check->bi->data[0] == 'R') {
1218 set_server_check_status(check, HCHK_STATUS_L7OKD, "PostgreSQL server is ok");
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001219 }
1220 else {
Simon Horman4a741432013-02-23 15:35:38 +09001221 if ((check->bi->data[0] == 'E') && (check->bi->data[5]!=0) && (check->bi->data[6]!=0))
1222 desc = &check->bi->data[6];
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001223 else
1224 desc = "PostgreSQL unknown error";
1225
Simon Horman4a741432013-02-23 15:35:38 +09001226 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001227 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001228 break;
1229
1230 case PR_O2_REDIS_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001231 if (!done && check->bi->i < 7)
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001232 goto wait_more_data;
1233
Simon Horman4a741432013-02-23 15:35:38 +09001234 if (strcmp(check->bi->data, "+PONG\r\n") == 0) {
1235 set_server_check_status(check, HCHK_STATUS_L7OKD, "Redis server is ok");
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001236 }
1237 else {
Simon Horman4a741432013-02-23 15:35:38 +09001238 set_server_check_status(check, HCHK_STATUS_L7STS, check->bi->data);
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001239 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001240 break;
1241
1242 case PR_O2_MYSQL_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001243 if (!done && check->bi->i < 5)
Willy Tarreau03938182010-03-17 21:52:07 +01001244 goto wait_more_data;
1245
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001246 if (s->proxy->check_len == 0) { // old mode
Simon Horman4a741432013-02-23 15:35:38 +09001247 if (*(check->bi->data + 4) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001248 /* We set the MySQL Version in description for information purpose
1249 * FIXME : it can be cool to use MySQL Version for other purpose,
1250 * like mark as down old MySQL server.
1251 */
Simon Horman4a741432013-02-23 15:35:38 +09001252 if (check->bi->i > 51) {
1253 desc = ltrim(check->bi->data + 5, ' ');
1254 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001255 }
1256 else {
1257 if (!done)
1258 goto wait_more_data;
1259 /* it seems we have a OK packet but without a valid length,
1260 * it must be a protocol error
1261 */
Simon Horman4a741432013-02-23 15:35:38 +09001262 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001263 }
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001264 }
1265 else {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001266 /* An error message is attached in the Error packet */
Simon Horman4a741432013-02-23 15:35:38 +09001267 desc = ltrim(check->bi->data + 7, ' ');
1268 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001269 }
1270 } else {
Simon Horman4a741432013-02-23 15:35:38 +09001271 unsigned int first_packet_len = ((unsigned int) *check->bi->data) +
1272 (((unsigned int) *(check->bi->data + 1)) << 8) +
1273 (((unsigned int) *(check->bi->data + 2)) << 16);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001274
Simon Horman4a741432013-02-23 15:35:38 +09001275 if (check->bi->i == first_packet_len + 4) {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001276 /* MySQL Error packet always begin with field_count = 0xff */
Simon Horman4a741432013-02-23 15:35:38 +09001277 if (*(check->bi->data + 4) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001278 /* We have only one MySQL packet and it is a Handshake Initialization packet
1279 * but we need to have a second packet to know if it is alright
1280 */
Simon Horman4a741432013-02-23 15:35:38 +09001281 if (!done && check->bi->i < first_packet_len + 5)
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001282 goto wait_more_data;
1283 }
1284 else {
1285 /* We have only one packet and it is an Error packet,
1286 * an error message is attached, so we can display it
1287 */
Simon Horman4a741432013-02-23 15:35:38 +09001288 desc = &check->bi->data[7];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001289 //Warning("onlyoneERR: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001290 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001291 }
Simon Horman4a741432013-02-23 15:35:38 +09001292 } else if (check->bi->i > first_packet_len + 4) {
1293 unsigned int second_packet_len = ((unsigned int) *(check->bi->data + first_packet_len + 4)) +
1294 (((unsigned int) *(check->bi->data + first_packet_len + 5)) << 8) +
1295 (((unsigned int) *(check->bi->data + first_packet_len + 6)) << 16);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001296
Simon Horman4a741432013-02-23 15:35:38 +09001297 if (check->bi->i == first_packet_len + 4 + second_packet_len + 4 ) {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001298 /* We have 2 packets and that's good */
1299 /* Check if the second packet is a MySQL Error packet or not */
Simon Horman4a741432013-02-23 15:35:38 +09001300 if (*(check->bi->data + first_packet_len + 8) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001301 /* No error packet */
1302 /* We set the MySQL Version in description for information purpose */
Simon Horman4a741432013-02-23 15:35:38 +09001303 desc = &check->bi->data[5];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001304 //Warning("2packetOK: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001305 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001306 }
1307 else {
1308 /* An error message is attached in the Error packet
1309 * so we can display it ! :)
1310 */
Simon Horman4a741432013-02-23 15:35:38 +09001311 desc = &check->bi->data[first_packet_len+11];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001312 //Warning("2packetERR: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001313 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001314 }
1315 }
1316 }
1317 else {
Willy Tarreau03938182010-03-17 21:52:07 +01001318 if (!done)
1319 goto wait_more_data;
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001320 /* it seems we have a Handshake Initialization packet but without a valid length,
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001321 * it must be a protocol error
1322 */
Simon Horman4a741432013-02-23 15:35:38 +09001323 desc = &check->bi->data[5];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001324 //Warning("protoerr: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001325 set_server_check_status(check, HCHK_STATUS_L7RSP, desc);
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001326 }
1327 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001328 break;
1329
1330 case PR_O2_LDAP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001331 if (!done && check->bi->i < 14)
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001332 goto wait_more_data;
1333
1334 /* Check if the server speaks LDAP (ASN.1/BER)
1335 * http://en.wikipedia.org/wiki/Basic_Encoding_Rules
1336 * http://tools.ietf.org/html/rfc4511
1337 */
1338
1339 /* http://tools.ietf.org/html/rfc4511#section-4.1.1
1340 * LDAPMessage: 0x30: SEQUENCE
1341 */
Simon Horman4a741432013-02-23 15:35:38 +09001342 if ((check->bi->i < 14) || (*(check->bi->data) != '\x30')) {
1343 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001344 }
1345 else {
1346 /* size of LDAPMessage */
Simon Horman4a741432013-02-23 15:35:38 +09001347 msglen = (*(check->bi->data + 1) & 0x80) ? (*(check->bi->data + 1) & 0x7f) : 0;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001348
1349 /* http://tools.ietf.org/html/rfc4511#section-4.2.2
1350 * messageID: 0x02 0x01 0x01: INTEGER 1
1351 * protocolOp: 0x61: bindResponse
1352 */
1353 if ((msglen > 2) ||
Simon Horman4a741432013-02-23 15:35:38 +09001354 (memcmp(check->bi->data + 2 + msglen, "\x02\x01\x01\x61", 4) != 0)) {
1355 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001356
1357 goto out_wakeup;
1358 }
1359
1360 /* size of bindResponse */
Simon Horman4a741432013-02-23 15:35:38 +09001361 msglen += (*(check->bi->data + msglen + 6) & 0x80) ? (*(check->bi->data + msglen + 6) & 0x7f) : 0;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001362
1363 /* http://tools.ietf.org/html/rfc4511#section-4.1.9
1364 * ldapResult: 0x0a 0x01: ENUMERATION
1365 */
1366 if ((msglen > 4) ||
Simon Horman4a741432013-02-23 15:35:38 +09001367 (memcmp(check->bi->data + 7 + msglen, "\x0a\x01", 2) != 0)) {
1368 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001369
1370 goto out_wakeup;
1371 }
1372
1373 /* http://tools.ietf.org/html/rfc4511#section-4.1.9
1374 * resultCode
1375 */
Simon Horman4a741432013-02-23 15:35:38 +09001376 check->code = *(check->bi->data + msglen + 9);
1377 if (check->code) {
1378 set_server_check_status(check, HCHK_STATUS_L7STS, "See RFC: http://tools.ietf.org/html/rfc4511#section-4.1.9");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001379 } else {
Simon Horman4a741432013-02-23 15:35:38 +09001380 set_server_check_status(check, HCHK_STATUS_L7OKD, "Success");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001381 }
1382 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001383 break;
1384
1385 default:
Willy Tarreau06559ac2013-12-05 01:53:08 +01001386 /* for other checks (eg: pure TCP), delegate to the main task */
Willy Tarreau1620ec32011-08-06 17:05:02 +02001387 break;
1388 } /* switch */
Willy Tarreau83749182007-04-15 20:56:27 +02001389
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001390 out_wakeup:
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001391 /* collect possible new errors */
1392 if (conn->flags & CO_FL_ERROR)
1393 chk_report_conn_err(conn, 0, 0);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001394
Nick Chalk57b1bf72010-03-16 15:50:46 +00001395 /* Reset the check buffer... */
Simon Horman4a741432013-02-23 15:35:38 +09001396 *check->bi->data = '\0';
1397 check->bi->i = 0;
Nick Chalk57b1bf72010-03-16 15:50:46 +00001398
Willy Tarreaufd29cc52012-11-23 09:18:20 +01001399 /* Close the connection... We absolutely want to perform a hard close
1400 * and reset the connection if some data are pending, otherwise we end
1401 * up with many TIME_WAITs and eat all the source port range quickly.
1402 * To avoid sending RSTs all the time, we first try to drain pending
1403 * data.
1404 */
Willy Tarreauf1503172012-09-28 19:39:36 +02001405 if (conn->xprt && conn->xprt->shutw)
1406 conn->xprt->shutw(conn, 0);
Willy Tarreau2b57cb82013-06-10 19:56:38 +02001407
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001408 /* OK, let's not stay here forever */
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001409 if (check->result == CHK_RES_FAILED)
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001410 conn->flags |= CO_FL_ERROR;
1411
Willy Tarreaua522f802012-11-23 08:56:35 +01001412 __conn_data_stop_both(conn);
Willy Tarreaufdccded2008-08-29 18:19:04 +02001413 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau3267d362012-08-17 23:53:56 +02001414 return;
Willy Tarreau03938182010-03-17 21:52:07 +01001415
1416 wait_more_data:
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001417 __conn_data_poll_recv(conn);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001418}
1419
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001420/*
1421 * This function is used only for server health-checks. It handles connection
1422 * status updates including errors. If necessary, it wakes the check task up.
1423 * It always returns 0.
1424 */
1425static int wake_srv_chk(struct connection *conn)
Willy Tarreau20bea422012-07-06 12:00:49 +02001426{
Simon Horman4a741432013-02-23 15:35:38 +09001427 struct check *check = conn->owner;
Willy Tarreau20bea422012-07-06 12:00:49 +02001428
Willy Tarreau6c560da2012-11-24 11:14:45 +01001429 if (unlikely(conn->flags & CO_FL_ERROR)) {
Willy Tarreau02b0f582013-12-03 15:42:33 +01001430 /* We may get error reports bypassing the I/O handlers, typically
1431 * the case when sending a pure TCP check which fails, then the I/O
1432 * handlers above are not called. This is completely handled by the
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001433 * main processing task so let's simply wake it up. If we get here,
1434 * we expect errno to still be valid.
1435 */
1436 chk_report_conn_err(conn, errno, 0);
1437
Willy Tarreau2d351b62013-12-05 02:36:25 +01001438 __conn_data_stop_both(conn);
1439 task_wakeup(check->task, TASK_WOKEN_IO);
1440 }
1441
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001442 if (check->result != CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001443 /* We're here because nobody wants to handle the error, so we
1444 * sure want to abort the hard way.
Willy Tarreau02b0f582013-12-03 15:42:33 +01001445 */
Willy Tarreauad38ace2013-12-15 14:19:38 +01001446 if (conn_ctrl_ready(conn) && !(conn->flags & CO_FL_SOCK_RD_SH)) {
1447 if (!(conn->flags & CO_FL_WAIT_RD) && conn->ctrl->drain && conn->ctrl->drain(conn->t.sock.fd))
1448 fdtab[conn->t.sock.fd].linger_risk = 0;
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001449 }
Willy Tarreauf79c8172013-10-21 16:30:56 +02001450 conn_force_close(conn);
Willy Tarreau2d351b62013-12-05 02:36:25 +01001451 }
Willy Tarreau3267d362012-08-17 23:53:56 +02001452 return 0;
Willy Tarreau20bea422012-07-06 12:00:49 +02001453}
1454
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001455struct data_cb check_conn_cb = {
1456 .recv = event_srv_chk_r,
1457 .send = event_srv_chk_w,
1458 .wake = wake_srv_chk,
1459};
1460
Willy Tarreaubaaee002006-06-26 02:48:02 +02001461/*
Willy Tarreau2e993902011-10-31 11:53:20 +01001462 * updates the server's weight during a warmup stage. Once the final weight is
1463 * reached, the task automatically stops. Note that any server status change
1464 * must have updated s->last_change accordingly.
1465 */
1466static struct task *server_warmup(struct task *t)
1467{
1468 struct server *s = t->context;
1469
1470 /* by default, plan on stopping the task */
1471 t->expire = TICK_ETERNITY;
1472 if ((s->state & (SRV_RUNNING|SRV_WARMINGUP|SRV_MAINTAIN)) != (SRV_RUNNING|SRV_WARMINGUP))
1473 return t;
1474
Willy Tarreau004e0452013-11-21 11:22:01 +01001475 server_recalc_eweight(s);
Willy Tarreau2e993902011-10-31 11:53:20 +01001476
1477 /* probably that we can refill this server with a bit more connections */
1478 check_for_pending(s);
1479
1480 /* get back there in 1 second or 1/20th of the slowstart interval,
1481 * whichever is greater, resulting in small 5% steps.
1482 */
1483 if (s->state & SRV_WARMINGUP)
1484 t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
1485 return t;
1486}
1487
1488/*
Willy Tarreaubaaee002006-06-26 02:48:02 +02001489 * manages a server health-check. Returns
1490 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
1491 */
Simon Horman63a4a822012-03-19 07:24:41 +09001492static struct task *process_chk(struct task *t)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001493{
Simon Horman4a741432013-02-23 15:35:38 +09001494 struct check *check = t->context;
1495 struct server *s = check->server;
1496 struct connection *conn = check->conn;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001497 int rv;
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001498 int ret;
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01001499 int expired = tick_is_expired(t->expire, now_ms);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001500
Willy Tarreau2c115e52013-12-11 19:41:16 +01001501 if (!(check->state & CHK_ST_INPROGRESS)) {
Willy Tarreau5a78f362012-11-23 12:47:05 +01001502 /* no check currently running */
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01001503 if (!expired) /* woke up too early */
Willy Tarreau26c25062009-03-08 09:38:41 +01001504 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001505
Simon Horman671b6f02013-11-25 10:46:39 +09001506 /* we don't send any health-checks when the proxy is
1507 * stopped, the server should not be checked or the check
1508 * is disabled.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001509 */
Willy Tarreau0d924cc2013-12-11 21:26:24 +01001510 if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
Willy Tarreau33a08db2013-12-11 21:03:31 +01001511 s->proxy->state == PR_STSTOPPED)
Willy Tarreau5a78f362012-11-23 12:47:05 +01001512 goto reschedule;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001513
1514 /* we'll initiate a new check */
Simon Horman4a741432013-02-23 15:35:38 +09001515 set_server_check_status(check, HCHK_STATUS_START, NULL);
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001516
Willy Tarreau2c115e52013-12-11 19:41:16 +01001517 check->state |= CHK_ST_INPROGRESS;
Simon Horman4a741432013-02-23 15:35:38 +09001518 check->bi->p = check->bi->data;
1519 check->bi->i = 0;
1520 check->bo->p = check->bo->data;
1521 check->bo->o = 0;
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001522
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02001523 /* tcpcheck send/expect initialisation */
1524 if (check->type == PR_O2_TCPCHK_CHK)
1525 check->current_step = NULL;
1526
1527 /* prepare the check buffer.
1528 * This should not be used if check is the secondary agent check
1529 * of a server as s->proxy->check_req will relate to the
1530 * configuration of the primary check. Similarly, tcp-check uses
1531 * its own strings.
1532 */
Willy Tarreau33434322013-12-11 21:15:19 +01001533 if (check->type && check->type != PR_O2_TCPCHK_CHK && !(check->state & CHK_ST_AGENT)) {
Simon Horman4a741432013-02-23 15:35:38 +09001534 bo_putblk(check->bo, s->proxy->check_req, s->proxy->check_len);
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001535
1536 /* we want to check if this host replies to HTTP or SSLv3 requests
1537 * so we'll send the request, and won't wake the checker up now.
1538 */
Simon Horman4a741432013-02-23 15:35:38 +09001539 if ((check->type) == PR_O2_SSL3_CHK) {
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001540 /* SSL requires that we put Unix time in the request */
1541 int gmt_time = htonl(date.tv_sec);
Simon Horman4a741432013-02-23 15:35:38 +09001542 memcpy(check->bo->data + 11, &gmt_time, 4);
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001543 }
Simon Horman4a741432013-02-23 15:35:38 +09001544 else if ((check->type) == PR_O2_HTTP_CHK) {
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001545 if (s->proxy->options2 & PR_O2_CHK_SNDST)
Simon Horman4a741432013-02-23 15:35:38 +09001546 bo_putblk(check->bo, trash.str, httpchk_build_status_header(s, trash.str));
1547 bo_putstr(check->bo, "\r\n");
1548 *check->bo->p = '\0'; /* to make gdb output easier to read */
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001549 }
1550 }
1551
1552 /* prepare a new connection */
Willy Tarreau4bdae8a2013-10-14 17:29:15 +02001553 conn_init(conn);
Willy Tarreau910c6aa2013-10-24 15:08:37 +02001554 conn_prepare(conn, s->check_common.proto, s->check_common.xprt);
Willy Tarreau7abddb52013-10-24 15:31:04 +02001555 conn_attach(conn, check, &check_conn_cb);
Willy Tarreau4bdae8a2013-10-14 17:29:15 +02001556 conn->target = &s->obj_type;
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001557
Willy Tarreau5f2877a2012-10-26 19:57:58 +02001558 /* no client address */
1559 clear_addr(&conn->addr.from);
1560
Simon Horman66183002013-02-23 10:16:43 +09001561 if (is_addr(&s->check_common.addr))
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001562 /* we'll connect to the check addr specified on the server */
Simon Horman66183002013-02-23 10:16:43 +09001563 conn->addr.to = s->check_common.addr;
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001564 else
1565 /* we'll connect to the addr on the server */
1566 conn->addr.to = s->addr;
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001567
Simon Horman4a741432013-02-23 15:35:38 +09001568 set_host_port(&conn->addr.to, check->port);
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001569
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001570 /* It can return one of :
1571 * - SN_ERR_NONE if everything's OK
1572 * - SN_ERR_SRVTO if there are no more servers
1573 * - SN_ERR_SRVCL if the connection was refused by the server
1574 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1575 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1576 * - SN_ERR_INTERNAL for any other purely internal errors
1577 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
Willy Tarreau24db47e2012-11-23 14:16:39 +01001578 * Note that we try to prevent the network stack from sending the ACK during the
Willy Tarreauf0837b22012-11-24 10:24:27 +01001579 * connect() when a pure TCP check is used (without PROXY protocol).
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001580 */
Willy Tarreau8f46cca2013-03-04 20:07:44 +01001581 ret = SN_ERR_INTERNAL;
Simon Horman66183002013-02-23 10:16:43 +09001582 if (s->check_common.proto->connect)
Willy Tarreau57cd3e42013-10-24 22:01:26 +02001583 ret = s->check_common.proto->connect(conn, check->type, (check->type) ? 0 : 2);
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001584 conn->flags |= CO_FL_WAKE_DATA;
Willy Tarreau57cd3e42013-10-24 22:01:26 +02001585 if (s->check.send_proxy) {
1586 conn->send_proxy_ofs = 1;
1587 conn->flags |= CO_FL_SEND_PROXY;
1588 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001589
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001590 switch (ret) {
1591 case SN_ERR_NONE:
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001592 /* we allow up to min(inter, timeout.connect) for a connection
1593 * to establish but only when timeout.check is set
1594 * as it may be to short for a full check otherwise
1595 */
Simon Horman4a741432013-02-23 15:35:38 +09001596 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02001597
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001598 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
1599 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
1600 t->expire = tick_first(t->expire, t_con);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001601 }
Willy Tarreau06559ac2013-12-05 01:53:08 +01001602
1603 if (check->type)
1604 conn_data_want_recv(conn); /* prepare for reading a possible reply */
1605
Willy Tarreau5a78f362012-11-23 12:47:05 +01001606 goto reschedule;
1607
1608 case SN_ERR_SRVTO: /* ETIMEDOUT */
1609 case SN_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
Simon Horman4a741432013-02-23 15:35:38 +09001610 set_server_check_status(check, HCHK_STATUS_L4CON, strerror(errno));
Willy Tarreau5a78f362012-11-23 12:47:05 +01001611 break;
1612 case SN_ERR_PRXCOND:
1613 case SN_ERR_RESOURCE:
1614 case SN_ERR_INTERNAL:
Simon Horman4a741432013-02-23 15:35:38 +09001615 set_server_check_status(check, HCHK_STATUS_SOCKERR, NULL);
Willy Tarreau5a78f362012-11-23 12:47:05 +01001616 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001617 }
1618
Willy Tarreau5a78f362012-11-23 12:47:05 +01001619 /* here, we have seen a synchronous error, no fd was allocated */
Willy Tarreau6b0a8502012-11-23 08:51:32 +01001620
Willy Tarreau2c115e52013-12-11 19:41:16 +01001621 check->state &= ~CHK_ST_INPROGRESS;
Simon Hormand8583062013-11-25 10:46:33 +09001622 check_failed(check);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001623
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01001624 /* we allow up to min(inter, timeout.connect) for a connection
1625 * to establish but only when timeout.check is set
1626 * as it may be to short for a full check otherwise
1627 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001628 while (tick_is_expired(t->expire, now_ms)) {
1629 int t_con;
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01001630
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001631 t_con = tick_add(t->expire, s->proxy->timeout.connect);
Simon Horman4a741432013-02-23 15:35:38 +09001632 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01001633
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001634 if (s->proxy->timeout.check)
1635 t->expire = tick_first(t->expire, t_con);
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01001636 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001637 }
1638 else {
Willy Tarreauf1503172012-09-28 19:39:36 +02001639 /* there was a test running.
1640 * First, let's check whether there was an uncaught error,
1641 * which can happen on connect timeout or error.
1642 */
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001643 if (s->check.result == CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001644 /* good connection is enough for pure TCP check */
1645 if ((conn->flags & CO_FL_CONNECTED) && !check->type) {
Simon Horman4a741432013-02-23 15:35:38 +09001646 if (check->use_ssl)
1647 set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
Willy Tarreauf1503172012-09-28 19:39:36 +02001648 else
Simon Horman4a741432013-02-23 15:35:38 +09001649 set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01001650 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001651 else if ((conn->flags & CO_FL_ERROR) || expired) {
1652 chk_report_conn_err(conn, 0, expired);
Willy Tarreauf1503172012-09-28 19:39:36 +02001653 }
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01001654 else
1655 goto out_wait; /* timeout not reached, wait again */
Willy Tarreauf1503172012-09-28 19:39:36 +02001656 }
1657
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01001658 /* check complete or aborted */
Willy Tarreau5ba04f62013-02-12 15:23:12 +01001659 if (conn->xprt) {
1660 /* The check was aborted and the connection was not yet closed.
1661 * This can happen upon timeout, or when an external event such
1662 * as a failed response coupled with "observe layer7" caused the
1663 * server state to be suddenly changed.
1664 */
Willy Tarreauad38ace2013-12-15 14:19:38 +01001665 if (conn_ctrl_ready(conn) && !(conn->flags & CO_FL_SOCK_RD_SH)) {
1666 if (!(conn->flags & CO_FL_WAIT_RD) && conn->ctrl->drain && conn->ctrl->drain(conn->t.sock.fd))
1667 fdtab[conn->t.sock.fd].linger_risk = 0;
Willy Tarreau2d351b62013-12-05 02:36:25 +01001668 }
Willy Tarreauf79c8172013-10-21 16:30:56 +02001669 conn_force_close(conn);
Willy Tarreau5ba04f62013-02-12 15:23:12 +01001670 }
1671
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001672 if (check->result == CHK_RES_FAILED) /* a failure or timeout detected */
Simon Hormand8583062013-11-25 10:46:33 +09001673 check_failed(check);
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01001674 else { /* check was OK */
Willy Tarreau48494c02007-11-30 10:41:39 +01001675 /* we may have to add/remove this server from the LB group */
1676 if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001677 if ((s->state & SRV_GOINGDOWN) && (check->result != CHK_RES_CONDPASS))
Simon Horman4a741432013-02-23 15:35:38 +09001678 set_server_enabled(check);
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001679 else if (!(s->state & SRV_GOINGDOWN) && (check->result == CHK_RES_CONDPASS))
Simon Horman4a741432013-02-23 15:35:38 +09001680 set_server_disabled(check);
Willy Tarreau48494c02007-11-30 10:41:39 +01001681 }
1682
Simon Horman58c32972013-11-25 10:46:38 +09001683 if (check->health < check->rise + check->fall - 1) {
Simon Horman125d0992013-02-24 17:23:38 +09001684 check->health++; /* was bad, stays for a while */
Simon Horman4a741432013-02-23 15:35:38 +09001685 set_server_up(check);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001686 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001687 }
Willy Tarreau2c115e52013-12-11 19:41:16 +01001688 check->state &= ~CHK_ST_INPROGRESS;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001689
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01001690 rv = 0;
1691 if (global.spread_checks > 0) {
Simon Horman4a741432013-02-23 15:35:38 +09001692 rv = srv_getinter(check) * global.spread_checks / 100;
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01001693 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001694 }
Simon Horman4a741432013-02-23 15:35:38 +09001695 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001696 }
Willy Tarreau5a78f362012-11-23 12:47:05 +01001697
1698 reschedule:
1699 while (tick_is_expired(t->expire, now_ms))
Simon Horman4a741432013-02-23 15:35:38 +09001700 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01001701 out_wait:
Willy Tarreau26c25062009-03-08 09:38:41 +01001702 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001703}
1704
Simon Horman5c942422013-11-25 10:46:32 +09001705static int start_check_task(struct check *check, int mininter,
1706 int nbcheck, int srvpos)
1707{
1708 struct task *t;
1709 /* task for the check */
1710 if ((t = task_new()) == NULL) {
1711 Alert("Starting [%s:%s] check: out of memory.\n",
1712 check->server->proxy->id, check->server->id);
1713 return 0;
1714 }
1715
1716 check->task = t;
1717 t->process = process_chk;
1718 t->context = check;
1719
1720 /* check this every ms */
1721 t->expire = tick_add(now_ms,
1722 MS_TO_TICKS(((mininter &&
1723 mininter >= srv_getinter(check)) ?
1724 mininter : srv_getinter(check)) * srvpos / nbcheck));
1725 check->start = now;
1726 task_queue(t);
1727
1728 return 1;
1729}
1730
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001731/*
1732 * Start health-check.
1733 * Returns 0 if OK, -1 if error, and prints the error in this case.
1734 */
1735int start_checks() {
1736
1737 struct proxy *px;
1738 struct server *s;
1739 struct task *t;
Simon Horman4a741432013-02-23 15:35:38 +09001740 int nbcheck=0, mininter=0, srvpos=0;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001741
Willy Tarreau2c43a1e2007-10-14 23:05:39 +02001742 /* 1- count the checkers to run simultaneously.
1743 * We also determine the minimum interval among all of those which
1744 * have an interval larger than SRV_CHK_INTER_THRES. This interval
1745 * will be used to spread their start-up date. Those which have
Jamie Gloudon801a0a32012-08-25 00:18:33 -04001746 * a shorter interval will start independently and will not dictate
Willy Tarreau2c43a1e2007-10-14 23:05:39 +02001747 * too short an interval for all others.
1748 */
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001749 for (px = proxy; px; px = px->next) {
1750 for (s = px->srv; s; s = s->next) {
Willy Tarreaue7b73482013-11-21 11:50:50 +01001751 if (s->slowstart) {
1752 if ((t = task_new()) == NULL) {
1753 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
1754 return -1;
1755 }
1756 /* We need a warmup task that will be called when the server
1757 * state switches from down to up.
1758 */
1759 s->warmup = t;
1760 t->process = server_warmup;
1761 t->context = s;
1762 t->expire = TICK_ETERNITY;
1763 }
1764
Willy Tarreaud8514a22013-12-11 21:10:14 +01001765 if (s->check.state & CHK_ST_CONFIGURED) {
1766 nbcheck++;
1767 if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
1768 (!mininter || mininter > srv_getinter(&s->check)))
1769 mininter = srv_getinter(&s->check);
1770 }
Willy Tarreau15f39102013-12-11 20:41:18 +01001771
Willy Tarreaud8514a22013-12-11 21:10:14 +01001772 if (s->agent.state & CHK_ST_CONFIGURED) {
1773 nbcheck++;
1774 if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
1775 (!mininter || mininter > srv_getinter(&s->agent)))
1776 mininter = srv_getinter(&s->agent);
1777 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001778 }
1779 }
1780
Simon Horman4a741432013-02-23 15:35:38 +09001781 if (!nbcheck)
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001782 return 0;
1783
1784 srand((unsigned)time(NULL));
1785
1786 /*
1787 * 2- start them as far as possible from each others. For this, we will
1788 * start them after their interval set to the min interval divided by
1789 * the number of servers, weighted by the server's position in the list.
1790 */
1791 for (px = proxy; px; px = px->next) {
1792 for (s = px->srv; s; s = s->next) {
Simon Hormand60d6912013-11-25 10:46:36 +09001793 /* A task for the main check */
Willy Tarreauff5ae352013-12-11 20:36:34 +01001794 if (s->check.state & CHK_ST_CONFIGURED) {
Simon Hormand60d6912013-11-25 10:46:36 +09001795 if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
1796 return -1;
1797 srvpos++;
1798 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001799
Simon Hormand60d6912013-11-25 10:46:36 +09001800 /* A task for a auxiliary agent check */
Willy Tarreauff5ae352013-12-11 20:36:34 +01001801 if (s->agent.state & CHK_ST_CONFIGURED) {
Simon Hormand60d6912013-11-25 10:46:36 +09001802 if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
1803 return -1;
1804 }
1805 srvpos++;
1806 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001807 }
1808 }
1809 return 0;
1810}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001811
1812/*
Willy Tarreau5b3a2022012-09-28 15:01:02 +02001813 * Perform content verification check on data in s->check.buffer buffer.
Willy Tarreaubd741542010-03-16 18:46:54 +01001814 * The buffer MUST be terminated by a null byte before calling this function.
1815 * Sets server status appropriately. The caller is responsible for ensuring
1816 * that the buffer contains at least 13 characters. If <done> is zero, we may
1817 * return 0 to indicate that data is required to decide of a match.
1818 */
1819static int httpchk_expect(struct server *s, int done)
1820{
1821 static char status_msg[] = "HTTP status check returned code <000>";
1822 char status_code[] = "000";
1823 char *contentptr;
1824 int crlf;
1825 int ret;
1826
1827 switch (s->proxy->options2 & PR_O2_EXP_TYPE) {
1828 case PR_O2_EXP_STS:
1829 case PR_O2_EXP_RSTS:
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001830 memcpy(status_code, s->check.bi->data + 9, 3);
1831 memcpy(status_msg + strlen(status_msg) - 4, s->check.bi->data + 9, 3);
Willy Tarreaubd741542010-03-16 18:46:54 +01001832
1833 if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STS)
1834 ret = strncmp(s->proxy->expect_str, status_code, 3) == 0;
1835 else
1836 ret = regexec(s->proxy->expect_regex, status_code, MAX_MATCH, pmatch, 0) == 0;
1837
1838 /* we necessarily have the response, so there are no partial failures */
1839 if (s->proxy->options2 & PR_O2_EXP_INV)
1840 ret = !ret;
1841
Simon Horman4a741432013-02-23 15:35:38 +09001842 set_server_check_status(&s->check, ret ? HCHK_STATUS_L7OKD : HCHK_STATUS_L7STS, status_msg);
Willy Tarreaubd741542010-03-16 18:46:54 +01001843 break;
1844
1845 case PR_O2_EXP_STR:
1846 case PR_O2_EXP_RSTR:
1847 /* very simple response parser: ignore CR and only count consecutive LFs,
1848 * stop with contentptr pointing to first char after the double CRLF or
1849 * to '\0' if crlf < 2.
1850 */
1851 crlf = 0;
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001852 for (contentptr = s->check.bi->data; *contentptr; contentptr++) {
Willy Tarreaubd741542010-03-16 18:46:54 +01001853 if (crlf >= 2)
1854 break;
1855 if (*contentptr == '\r')
1856 continue;
1857 else if (*contentptr == '\n')
1858 crlf++;
1859 else
1860 crlf = 0;
1861 }
1862
1863 /* Check that response contains a body... */
1864 if (crlf < 2) {
1865 if (!done)
1866 return 0;
1867
Simon Horman4a741432013-02-23 15:35:38 +09001868 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01001869 "HTTP content check could not find a response body");
1870 return 1;
1871 }
1872
1873 /* Check that response body is not empty... */
1874 if (*contentptr == '\0') {
Willy Tarreaua164fb52011-04-13 09:32:41 +02001875 if (!done)
1876 return 0;
1877
Simon Horman4a741432013-02-23 15:35:38 +09001878 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01001879 "HTTP content check found empty response body");
1880 return 1;
1881 }
1882
1883 /* Check the response content against the supplied string
1884 * or regex... */
1885 if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STR)
1886 ret = strstr(contentptr, s->proxy->expect_str) != NULL;
1887 else
1888 ret = regexec(s->proxy->expect_regex, contentptr, MAX_MATCH, pmatch, 0) == 0;
1889
1890 /* if we don't match, we may need to wait more */
1891 if (!ret && !done)
1892 return 0;
1893
1894 if (ret) {
1895 /* content matched */
1896 if (s->proxy->options2 & PR_O2_EXP_INV)
Simon Horman4a741432013-02-23 15:35:38 +09001897 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01001898 "HTTP check matched unwanted content");
1899 else
Simon Horman4a741432013-02-23 15:35:38 +09001900 set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
Willy Tarreaubd741542010-03-16 18:46:54 +01001901 "HTTP content check matched");
1902 }
1903 else {
1904 if (s->proxy->options2 & PR_O2_EXP_INV)
Simon Horman4a741432013-02-23 15:35:38 +09001905 set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
Willy Tarreaubd741542010-03-16 18:46:54 +01001906 "HTTP check did not match unwanted content");
1907 else
Simon Horman4a741432013-02-23 15:35:38 +09001908 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01001909 "HTTP content check did not match");
1910 }
1911 break;
1912 }
1913 return 1;
1914}
1915
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02001916/*
1917 * return the id of a step in a send/expect session
1918 */
1919static int tcpcheck_get_step_id(struct server *s)
1920{
1921 struct tcpcheck_rule *cur = NULL, *next = NULL;
1922 int i = 0;
1923
1924 cur = s->check.current_step;
1925
1926 /* no step => first step */
1927 if (cur == NULL)
1928 return 1;
1929
1930 /* increment i until current step */
1931 list_for_each_entry(next, &s->proxy->tcpcheck_rules, list) {
1932 if (next->list.p == &cur->list)
1933 break;
1934 ++i;
1935 }
1936
1937 return i;
1938}
1939
1940static void tcpcheck_main(struct connection *conn)
1941{
1942 char *contentptr;
1943 unsigned int contentlen;
1944 struct list *head = NULL;
1945 struct tcpcheck_rule *cur = NULL;
1946 int done = 0, ret = 0;
1947
1948 struct check *check = conn->owner;
1949 struct server *s = check->server;
1950 struct task *t = check->task;
1951
1952 /* don't do anything until the connection is established */
1953 if (!(conn->flags & CO_FL_CONNECTED)) {
1954 /* update expire time, should be done by process_chk */
1955 /* we allow up to min(inter, timeout.connect) for a connection
1956 * to establish but only when timeout.check is set
1957 * as it may be to short for a full check otherwise
1958 */
1959 while (tick_is_expired(t->expire, now_ms)) {
1960 int t_con;
1961
1962 t_con = tick_add(t->expire, s->proxy->timeout.connect);
1963 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1964
1965 if (s->proxy->timeout.check)
1966 t->expire = tick_first(t->expire, t_con);
1967 }
1968 return;
1969 }
1970
1971 /* here, we know that the connection is established */
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001972 if (check->result != CHK_RES_UNKNOWN)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02001973 goto out_end_tcpcheck;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02001974
1975 /* head is be the first element of the double chained list */
1976 head = &s->proxy->tcpcheck_rules;
1977
1978 /* no step means first step
1979 * initialisation */
1980 if (check->current_step == NULL) {
1981 check->bo->p = check->bo->data;
1982 check->bo->o = 0;
1983 check->bi->p = check->bi->data;
1984 check->bi->i = 0;
1985 cur = check->current_step = LIST_ELEM(head->n, struct tcpcheck_rule *, list);
1986 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1987 if (s->proxy->timeout.check)
1988 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
1989 }
1990 /* keep on processing step */
1991 else {
1992 cur = check->current_step;
1993 }
1994
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01001995 if (conn->flags & CO_FL_HANDSHAKE)
1996 return;
1997
1998 /* It's only the rules which will enable send/recv */
1999 __conn_data_stop_both(conn);
2000
Willy Tarreauabca5b62013-12-06 14:19:25 +01002001 while (1) {
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002002 /* we have to try to flush the output buffer before reading, at the end,
2003 * or if we're about to send a string that does not fit in the remaining space.
2004 */
2005 if (check->bo->o &&
2006 (&cur->list == head ||
2007 check->current_step->action != TCPCHK_ACT_SEND ||
2008 check->current_step->string_len >= buffer_total_space(check->bo))) {
2009
2010 if ((conn->flags & CO_FL_WAIT_WR) ||
2011 conn->xprt->snd_buf(conn, check->bo, MSG_DONTWAIT | MSG_NOSIGNAL) <= 0) {
2012 if (conn->flags & CO_FL_ERROR) {
2013 chk_report_conn_err(conn, errno, 0);
2014 __conn_data_stop_both(conn);
2015 goto out_end_tcpcheck;
2016 }
2017 goto out_need_io;
Willy Tarreauabca5b62013-12-06 14:19:25 +01002018 }
Willy Tarreauabca5b62013-12-06 14:19:25 +01002019 }
2020
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002021 /* did we reach the end ? If so, let's check that everything was sent */
2022 if (&cur->list == head) {
2023 if (check->bo->o)
2024 goto out_need_io;
Willy Tarreauabca5b62013-12-06 14:19:25 +01002025 break;
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002026 }
Willy Tarreauabca5b62013-12-06 14:19:25 +01002027
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002028 if (check->current_step->action == TCPCHK_ACT_SEND) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002029 /* reset the read buffer */
2030 if (*check->bi->data != '\0') {
2031 *check->bi->data = '\0';
2032 check->bi->i = 0;
2033 }
2034
2035 if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH)) {
2036 conn->flags |= CO_FL_ERROR;
2037 chk_report_conn_err(conn, 0, 0);
2038 goto out_end_tcpcheck;
2039 }
2040
Willy Tarreauabca5b62013-12-06 14:19:25 +01002041 if (check->current_step->string_len >= check->bo->size) {
2042 chunk_printf(&trash, "tcp-check send : string too large (%d) for buffer size (%d) at step %d",
2043 check->current_step->string_len, check->bo->size,
2044 tcpcheck_get_step_id(s));
2045 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2046 goto out_end_tcpcheck;
2047 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002048
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002049 /* do not try to send if there is no space */
2050 if (check->current_step->string_len >= buffer_total_space(check->bo))
2051 continue;
2052
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002053 bo_putblk(check->bo, check->current_step->string, check->current_step->string_len);
2054 *check->bo->p = '\0'; /* to make gdb output easier to read */
2055
Willy Tarreauabca5b62013-12-06 14:19:25 +01002056 /* go to next rule and try to send */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002057 cur = (struct tcpcheck_rule *)cur->list.n;
2058 check->current_step = cur;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002059 } /* end 'send' */
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002060 else if (check->current_step->action == TCPCHK_ACT_EXPECT) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01002061 if (unlikely(check->result == CHK_RES_FAILED))
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002062 goto out_end_tcpcheck;
2063
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002064 if ((conn->flags & CO_FL_WAIT_RD) ||
2065 conn->xprt->rcv_buf(conn, check->bi, buffer_total_space(check->bi)) <= 0) {
2066 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
2067 done = 1;
2068 if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
2069 /* Report network errors only if we got no other data. Otherwise
2070 * we'll let the upper layers decide whether the response is OK
2071 * or not. It is very common that an RST sent by the server is
2072 * reported as an error just after the last data chunk.
2073 */
2074 chk_report_conn_err(conn, errno, 0);
2075 goto out_end_tcpcheck;
2076 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002077 }
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002078 else
2079 goto out_need_io;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002080 }
2081
2082 /* Intermediate or complete response received.
2083 * Terminate string in check->bi->data buffer.
2084 */
2085 if (check->bi->i < check->bi->size) {
2086 check->bi->data[check->bi->i] = '\0';
2087 }
2088 else {
2089 check->bi->data[check->bi->i - 1] = '\0';
2090 done = 1; /* buffer full, don't wait for more data */
2091 }
2092
2093 contentptr = check->bi->data;
2094 contentlen = check->bi->i;
2095
2096 /* Check that response body is not empty... */
2097 if (*contentptr == '\0') {
2098 if (!done)
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002099 continue;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002100
2101 /* empty response */
2102 chunk_printf(&trash, "TCPCHK got an empty response at step %d",
2103 tcpcheck_get_step_id(s));
2104 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2105
2106 goto out_end_tcpcheck;
2107 }
2108
2109 if (!done && (cur->string != NULL) && (check->bi->i < cur->string_len) )
Willy Tarreaua970c282013-12-06 12:47:19 +01002110 continue; /* try to read more */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002111
Willy Tarreaua970c282013-12-06 12:47:19 +01002112 tcpcheck_expect:
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002113 if (cur->string != NULL)
2114 ret = my_memmem(contentptr, contentlen, cur->string, cur->string_len) != NULL;
2115 else if (cur->expect_regex != NULL)
2116 ret = regexec(cur->expect_regex, contentptr, MAX_MATCH, pmatch, 0) == 0;
2117
2118 if (!ret && !done)
Willy Tarreaua970c282013-12-06 12:47:19 +01002119 continue; /* try to read more */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002120
2121 /* matched */
2122 if (ret) {
2123 /* matched but we did not want to => ERROR */
2124 if (cur->inverse) {
2125 /* we were looking for a string */
2126 if (cur->string != NULL) {
2127 chunk_printf(&trash, "TCPCHK matched unwanted content '%s' at step %d",
2128 cur->string, tcpcheck_get_step_id(s));
2129 }
2130 else {
2131 /* we were looking for a regex */
2132 chunk_printf(&trash, "TCPCHK matched unwanted content (regex) at step %d",
2133 tcpcheck_get_step_id(s));
2134 }
2135 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2136 goto out_end_tcpcheck;
2137 }
2138 /* matched and was supposed to => OK, next step */
2139 else {
2140 cur = (struct tcpcheck_rule*)cur->list.n;
2141 check->current_step = cur;
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002142 if (check->current_step->action == TCPCHK_ACT_EXPECT)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002143 goto tcpcheck_expect;
2144 __conn_data_stop_recv(conn);
2145 }
2146 }
2147 else {
2148 /* not matched */
2149 /* not matched and was not supposed to => OK, next step */
2150 if (cur->inverse) {
2151 cur = (struct tcpcheck_rule*)cur->list.n;
2152 check->current_step = cur;
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002153 if (check->current_step->action == TCPCHK_ACT_EXPECT)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002154 goto tcpcheck_expect;
2155 __conn_data_stop_recv(conn);
2156 }
2157 /* not matched but was supposed to => ERROR */
2158 else {
2159 /* we were looking for a string */
2160 if (cur->string != NULL) {
2161 chunk_printf(&trash, "TCPCHK did not match content '%s' at step %d",
2162 cur->string, tcpcheck_get_step_id(s));
2163 }
2164 else {
2165 /* we were looking for a regex */
2166 chunk_printf(&trash, "TCPCHK did not match content (regex) at step %d",
2167 tcpcheck_get_step_id(s));
2168 }
2169 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2170 goto out_end_tcpcheck;
2171 }
2172 }
2173 } /* end expect */
2174 } /* end loop over double chained step list */
2175
2176 set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
2177 goto out_end_tcpcheck;
2178
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002179 out_need_io:
2180 if (check->bo->o)
2181 __conn_data_want_send(conn);
2182
2183 if (check->current_step->action == TCPCHK_ACT_EXPECT)
2184 __conn_data_want_recv(conn);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002185 return;
2186
2187 out_end_tcpcheck:
2188 /* collect possible new errors */
2189 if (conn->flags & CO_FL_ERROR)
2190 chk_report_conn_err(conn, 0, 0);
2191
2192 /* Close the connection... We absolutely want to perform a hard close
2193 * and reset the connection if some data are pending, otherwise we end
2194 * up with many TIME_WAITs and eat all the source port range quickly.
2195 * To avoid sending RSTs all the time, we first try to drain pending
2196 * data.
2197 */
2198 if (conn->xprt && conn->xprt->shutw)
2199 conn->xprt->shutw(conn, 0);
2200
2201 check->current_step = NULL;
2202
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01002203 if (check->result == CHK_RES_FAILED)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002204 conn->flags |= CO_FL_ERROR;
2205
2206 __conn_data_stop_both(conn);
2207 task_wakeup(t, TASK_WOKEN_IO);
2208
2209 return;
2210}
2211
2212
Willy Tarreaubd741542010-03-16 18:46:54 +01002213/*
Willy Tarreaubaaee002006-06-26 02:48:02 +02002214 * Local variables:
2215 * c-indent-level: 8
2216 * c-basic-offset: 8
2217 * End:
2218 */