blob: 3702d9a4b0fe9fb54e21c79eea0fbb897bed6e40 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Health-checks functions.
3 *
Willy Tarreau26c25062009-03-08 09:38:41 +01004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02005 * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
Willy Tarreaubaaee002006-06-26 02:48:02 +02006 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
Willy Tarreaub8816082008-01-18 12:18:15 +010014#include <assert.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020015#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020016#include <errno.h>
17#include <fcntl.h>
Willy Tarreau9b39dc52014-07-08 00:54:10 +020018#include <signal.h>
Simon Horman0ba0e4a2015-01-30 11:23:00 +090019#include <stdarg.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020#include <stdio.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020021#include <stdlib.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#include <string.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020023#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020024#include <unistd.h>
25#include <sys/socket.h>
Dmitry Sivachenkocaf58982009-08-24 15:11:06 +040026#include <sys/types.h>
Simon Horman98637e52014-06-20 12:30:16 +090027#include <sys/wait.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <netinet/in.h>
Willy Tarreau1274bc42009-07-15 07:16:31 +020029#include <netinet/tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030#include <arpa/inet.h>
31
Willy Tarreauc7e42382012-08-24 19:22:53 +020032#include <common/chunk.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020033#include <common/compat.h>
34#include <common/config.h>
35#include <common/mini-clist.h>
Willy Tarreau83749182007-04-15 20:56:27 +020036#include <common/standard.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020037#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038
39#include <types/global.h>
Simon Horman0ba0e4a2015-01-30 11:23:00 +090040#include <types/mailers.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041
Baptiste Assmann69e273f2013-12-11 00:52:19 +010042#ifdef USE_OPENSSL
43#include <types/ssl_sock.h>
44#include <proto/ssl_sock.h>
45#endif /* USE_OPENSSL */
46
Willy Tarreaubaaee002006-06-26 02:48:02 +020047#include <proto/backend.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020048#include <proto/checks.h>
Simon Hormana2b9dad2013-02-12 10:45:54 +090049#include <proto/dumpstats.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020050#include <proto/fd.h>
51#include <proto/log.h>
52#include <proto/queue.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020053#include <proto/port_range.h>
Willy Tarreau3d300592007-03-18 18:34:41 +010054#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010055#include <proto/proto_tcp.h>
Baptiste Assmann69e273f2013-12-11 00:52:19 +010056#include <proto/protocol.h>
Willy Tarreau2b5652f2006-12-31 17:46:05 +010057#include <proto/proxy.h>
Willy Tarreaufb56aab2012-09-28 14:40:02 +020058#include <proto/raw_sock.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020059#include <proto/server.h>
Willy Tarreau9e000c62011-03-10 14:03:36 +010060#include <proto/stream_interface.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020061#include <proto/task.h>
62
Willy Tarreaubd741542010-03-16 18:46:54 +010063static int httpchk_expect(struct server *s, int done);
Simon Hormane16c1b32015-01-30 11:22:57 +090064static int tcpcheck_get_step_id(struct check *);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +020065static void tcpcheck_main(struct connection *);
Willy Tarreaubd741542010-03-16 18:46:54 +010066
Simon Horman63a4a822012-03-19 07:24:41 +090067static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010068 [HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
69 [HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +020070 [HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020071
Willy Tarreau23964182014-05-20 20:56:30 +020072 /* Below we have finished checks */
73 [HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010074 [HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +010075
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010076 [HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020077
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010078 [HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
79 [HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
80 [HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020081
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010082 [HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
83 [HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
84 [HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020085
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010086 [HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
87 [HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020088
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +020089 [HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020090
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010091 [HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
92 [HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
93 [HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
Simon Horman98637e52014-06-20 12:30:16 +090094
95 [HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
96 [HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
Cyril Bonté77010d82014-08-07 01:55:37 +020097 [HCHK_STATUS_PROCOK] = { CHK_RES_PASSED, "PROCOK", "External check passed" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020098};
99
Cyril Bontéac92a062014-12-27 22:28:38 +0100100const struct extcheck_env extcheck_envs[EXTCHK_SIZE] = {
101 [EXTCHK_PATH] = { "PATH", EXTCHK_SIZE_EVAL_INIT },
102 [EXTCHK_HAPROXY_PROXY_NAME] = { "HAPROXY_PROXY_NAME", EXTCHK_SIZE_EVAL_INIT },
103 [EXTCHK_HAPROXY_PROXY_ID] = { "HAPROXY_PROXY_ID", EXTCHK_SIZE_EVAL_INIT },
104 [EXTCHK_HAPROXY_PROXY_ADDR] = { "HAPROXY_PROXY_ADDR", EXTCHK_SIZE_EVAL_INIT },
105 [EXTCHK_HAPROXY_PROXY_PORT] = { "HAPROXY_PROXY_PORT", EXTCHK_SIZE_EVAL_INIT },
106 [EXTCHK_HAPROXY_SERVER_NAME] = { "HAPROXY_SERVER_NAME", EXTCHK_SIZE_EVAL_INIT },
107 [EXTCHK_HAPROXY_SERVER_ID] = { "HAPROXY_SERVER_ID", EXTCHK_SIZE_EVAL_INIT },
108 [EXTCHK_HAPROXY_SERVER_ADDR] = { "HAPROXY_SERVER_ADDR", EXTCHK_SIZE_EVAL_INIT },
109 [EXTCHK_HAPROXY_SERVER_PORT] = { "HAPROXY_SERVER_PORT", EXTCHK_SIZE_EVAL_INIT },
110 [EXTCHK_HAPROXY_SERVER_MAXCONN] = { "HAPROXY_SERVER_MAXCONN", EXTCHK_SIZE_EVAL_INIT },
111 [EXTCHK_HAPROXY_SERVER_CURCONN] = { "HAPROXY_SERVER_CURCONN", EXTCHK_SIZE_ULONG },
112};
113
Simon Horman63a4a822012-03-19 07:24:41 +0900114static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100115 [HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
116
117 [HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
118 [HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
119
120 [HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
121 [HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
122 [HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
123 [HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
124
125 [HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
126 [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
127 [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
128};
129
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200130/*
131 * Convert check_status code to description
132 */
133const char *get_check_status_description(short check_status) {
134
135 const char *desc;
136
137 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200138 desc = check_statuses[check_status].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200139 else
140 desc = NULL;
141
142 if (desc && *desc)
143 return desc;
144 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200145 return check_statuses[HCHK_STATUS_UNKNOWN].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200146}
147
148/*
149 * Convert check_status code to short info
150 */
151const char *get_check_status_info(short check_status) {
152
153 const char *info;
154
155 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200156 info = check_statuses[check_status].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200157 else
158 info = NULL;
159
160 if (info && *info)
161 return info;
162 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200163 return check_statuses[HCHK_STATUS_UNKNOWN].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200164}
165
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100166const char *get_analyze_status(short analyze_status) {
167
168 const char *desc;
169
170 if (analyze_status < HANA_STATUS_SIZE)
171 desc = analyze_statuses[analyze_status].desc;
172 else
173 desc = NULL;
174
175 if (desc && *desc)
176 return desc;
177 else
178 return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
179}
180
Willy Tarreaua150cf12014-05-20 21:57:23 +0200181/* Builds a string containing some information about the health check's result.
182 * The output string is allocated from the trash chunks. If the check is NULL,
183 * NULL is returned. This is designed to be used when emitting logs about health
184 * checks.
Willy Tarreauddd329c2014-05-16 16:46:12 +0200185 */
Willy Tarreaua150cf12014-05-20 21:57:23 +0200186static const char *check_reason_string(struct check *check)
Willy Tarreauddd329c2014-05-16 16:46:12 +0200187{
Willy Tarreaua150cf12014-05-20 21:57:23 +0200188 struct chunk *msg;
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200189
Willy Tarreaua150cf12014-05-20 21:57:23 +0200190 if (!check)
191 return NULL;
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200192
Willy Tarreaua150cf12014-05-20 21:57:23 +0200193 msg = get_trash_chunk();
194 chunk_printf(msg, "reason: %s", get_check_status_description(check->status));
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200195
Willy Tarreaua150cf12014-05-20 21:57:23 +0200196 if (check->status >= HCHK_STATUS_L57DATA)
197 chunk_appendf(msg, ", code: %d", check->code);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200198
Willy Tarreaua150cf12014-05-20 21:57:23 +0200199 if (*check->desc) {
200 struct chunk src;
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200201
Willy Tarreaua150cf12014-05-20 21:57:23 +0200202 chunk_appendf(msg, ", info: \"");
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200203
Willy Tarreaua150cf12014-05-20 21:57:23 +0200204 chunk_initlen(&src, check->desc, 0, strlen(check->desc));
205 chunk_asciiencode(msg, &src, '"');
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200206
Willy Tarreaua150cf12014-05-20 21:57:23 +0200207 chunk_appendf(msg, "\"");
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200208 }
209
Willy Tarreaua150cf12014-05-20 21:57:23 +0200210 if (check->duration >= 0)
211 chunk_appendf(msg, ", check duration: %ldms", check->duration);
212
213 return msg->str;
Krzysztof Piotr Oledzki99ab5f82009-09-27 17:28:21 +0200214}
215
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200216/*
Simon Horman4a741432013-02-23 15:35:38 +0900217 * Set check->status, update check->duration and fill check->result with
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200218 * an adequate CHK_RES_* value. The new check->health is computed based
219 * on the result.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200220 *
221 * Show information in logs about failed health check if server is UP
222 * or succeeded health checks if server is DOWN.
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200223 */
Simon Horman4a741432013-02-23 15:35:38 +0900224static void set_server_check_status(struct check *check, short status, const char *desc)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100225{
Simon Horman4a741432013-02-23 15:35:38 +0900226 struct server *s = check->server;
Willy Tarreaubef1b322014-05-13 21:01:39 +0200227 short prev_status = check->status;
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200228 int report = 0;
Simon Horman4a741432013-02-23 15:35:38 +0900229
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200230 if (status == HCHK_STATUS_START) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100231 check->result = CHK_RES_UNKNOWN; /* no result yet */
Simon Horman4a741432013-02-23 15:35:38 +0900232 check->desc[0] = '\0';
233 check->start = now;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200234 return;
235 }
236
Simon Horman4a741432013-02-23 15:35:38 +0900237 if (!check->status)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200238 return;
239
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200240 if (desc && *desc) {
Simon Horman4a741432013-02-23 15:35:38 +0900241 strncpy(check->desc, desc, HCHK_DESC_LEN-1);
242 check->desc[HCHK_DESC_LEN-1] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200243 } else
Simon Horman4a741432013-02-23 15:35:38 +0900244 check->desc[0] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200245
Simon Horman4a741432013-02-23 15:35:38 +0900246 check->status = status;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200247 if (check_statuses[status].result)
Simon Horman4a741432013-02-23 15:35:38 +0900248 check->result = check_statuses[status].result;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200249
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100250 if (status == HCHK_STATUS_HANA)
Simon Horman4a741432013-02-23 15:35:38 +0900251 check->duration = -1;
252 else if (!tv_iszero(&check->start)) {
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200253 /* set_server_check_status() may be called more than once */
Simon Horman4a741432013-02-23 15:35:38 +0900254 check->duration = tv_ms_elapsed(&check->start, &now);
255 tv_zero(&check->start);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200256 }
257
Willy Tarreau23964182014-05-20 20:56:30 +0200258 /* no change is expected if no state change occurred */
259 if (check->result == CHK_RES_NEUTRAL)
260 return;
261
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200262 report = 0;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200263
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200264 switch (check->result) {
265 case CHK_RES_FAILED:
Willy Tarreau12634e12014-05-23 11:32:36 +0200266 /* Failure to connect to the agent as a secondary check should not
267 * cause the server to be marked down.
268 */
269 if ((!(check->state & CHK_ST_AGENT) ||
Simon Hormaneaabd522015-02-26 11:26:17 +0900270 (check->status >= HCHK_STATUS_L57DATA)) &&
Willy Tarreau12634e12014-05-23 11:32:36 +0200271 (check->health >= check->rise)) {
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200272 s->counters.failed_checks++;
273 report = 1;
274 check->health--;
275 if (check->health < check->rise)
276 check->health = 0;
277 }
278 break;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200279
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200280 case CHK_RES_PASSED:
281 case CHK_RES_CONDPASS: /* "condpass" cannot make the first step but it OK after a "passed" */
282 if ((check->health < check->rise + check->fall - 1) &&
283 (check->result == CHK_RES_PASSED || check->health > 0)) {
284 report = 1;
285 check->health++;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200286
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200287 if (check->health >= check->rise)
288 check->health = check->rise + check->fall - 1; /* OK now */
289 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200290
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200291 /* clear consecutive_errors if observing is enabled */
292 if (s->onerror)
293 s->consecutive_errors = 0;
294 break;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100295
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200296 default:
297 break;
298 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200299
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200300 if (s->proxy->options2 & PR_O2_LOGHCHKS &&
301 (status != prev_status || report)) {
302 chunk_printf(&trash,
Willy Tarreau12634e12014-05-23 11:32:36 +0200303 "%s check for %sserver %s/%s %s%s",
304 (check->state & CHK_ST_AGENT) ? "Agent" : "Health",
Willy Tarreauc93cd162014-05-13 15:54:22 +0200305 s->flags & SRV_F_BACKUP ? "backup " : "",
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100306 s->proxy->id, s->id,
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100307 (check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200308 (check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200309
Willy Tarreaua150cf12014-05-20 21:57:23 +0200310 srv_append_status(&trash, s, check_reason_string(check), -1, 0);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200311
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100312 chunk_appendf(&trash, ", status: %d/%d %s",
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200313 (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
314 (check->health >= check->rise) ? check->fall : check->rise,
315 (check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200316
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100317 Warning("%s.\n", trash.str);
318 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Simon Horman64e34162015-02-06 11:11:57 +0900319 send_email_alert(s, LOG_NOTICE, "%s", trash.str);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200320 }
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200321}
322
Willy Tarreau4eec5472014-05-20 22:32:27 +0200323/* Marks the check <check>'s server down if the current check is already failed
324 * and the server is not down yet nor in maintenance.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200325 */
Willy Tarreau4eec5472014-05-20 22:32:27 +0200326static void check_notify_failure(struct check *check)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200327{
Simon Horman4a741432013-02-23 15:35:38 +0900328 struct server *s = check->server;
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900329
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200330 /* The agent secondary check should only cause a server to be marked
331 * as down if check->status is HCHK_STATUS_L7STS, which indicates
332 * that the agent returned "fail", "stopped" or "down".
333 * The implication here is that failure to connect to the agent
334 * as a secondary check should not cause the server to be marked
335 * down. */
336 if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
337 return;
338
Willy Tarreau4eec5472014-05-20 22:32:27 +0200339 if (check->health > 0)
340 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100341
Willy Tarreau4eec5472014-05-20 22:32:27 +0200342 /* We only report a reason for the check if we did not do so previously */
343 srv_set_stopped(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200344}
345
Willy Tarreauaf549582014-05-16 17:37:50 +0200346/* Marks the check <check> as valid and tries to set its server up, provided
Willy Tarreau3e048382014-05-21 10:30:54 +0200347 * it isn't in maintenance, it is not tracking a down server and other checks
348 * comply. The rule is simple : by default, a server is up, unless any of the
349 * following conditions is true :
350 * - health check failed (check->health < rise)
351 * - agent check failed (agent->health < rise)
352 * - the server tracks a down server (track && track->state == STOPPED)
353 * Note that if the server has a slowstart, it will switch to STARTING instead
354 * of RUNNING. Also, only the health checks support the nolb mode, so the
355 * agent's success may not take the server out of this mode.
Willy Tarreauaf549582014-05-16 17:37:50 +0200356 */
Willy Tarreau3e048382014-05-21 10:30:54 +0200357static void check_notify_success(struct check *check)
Willy Tarreauaf549582014-05-16 17:37:50 +0200358{
Simon Horman4a741432013-02-23 15:35:38 +0900359 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100360
Willy Tarreauaf549582014-05-16 17:37:50 +0200361 if (s->admin & SRV_ADMF_MAINT)
362 return;
Cyril Bontécd19e512010-01-31 22:34:03 +0100363
Willy Tarreau3e048382014-05-21 10:30:54 +0200364 if (s->track && s->track->state == SRV_ST_STOPPED)
Willy Tarreauaf549582014-05-16 17:37:50 +0200365 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100366
Willy Tarreau3e048382014-05-21 10:30:54 +0200367 if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
368 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100369
Willy Tarreau3e048382014-05-21 10:30:54 +0200370 if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
371 return;
Willy Tarreauaf549582014-05-16 17:37:50 +0200372
Willy Tarreau3e048382014-05-21 10:30:54 +0200373 if ((check->state & CHK_ST_AGENT) && s->state == SRV_ST_STOPPING)
374 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100375
Willy Tarreau3e048382014-05-21 10:30:54 +0200376 srv_set_running(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100377}
378
Willy Tarreaudb58b792014-05-21 13:57:23 +0200379/* Marks the check <check> as valid and tries to set its server into stopping mode
380 * if it was running or starting, and provided it isn't in maintenance and other
381 * checks comply. The conditions for the server to be marked in stopping mode are
382 * the same as for it to be turned up. Also, only the health checks support the
383 * nolb mode.
Willy Tarreauaf549582014-05-16 17:37:50 +0200384 */
Willy Tarreaudb58b792014-05-21 13:57:23 +0200385static void check_notify_stopping(struct check *check)
Willy Tarreauaf549582014-05-16 17:37:50 +0200386{
Simon Horman4a741432013-02-23 15:35:38 +0900387 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100388
Willy Tarreauaf549582014-05-16 17:37:50 +0200389 if (s->admin & SRV_ADMF_MAINT)
390 return;
391
Willy Tarreaudb58b792014-05-21 13:57:23 +0200392 if (check->state & CHK_ST_AGENT)
393 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100394
Willy Tarreaudb58b792014-05-21 13:57:23 +0200395 if (s->track && s->track->state == SRV_ST_STOPPED)
396 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100397
Willy Tarreaudb58b792014-05-21 13:57:23 +0200398 if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
399 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100400
Willy Tarreaudb58b792014-05-21 13:57:23 +0200401 if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
402 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100403
Willy Tarreaudb58b792014-05-21 13:57:23 +0200404 srv_set_stopping(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100405}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200406
Willy Tarreau9fe7aae2013-12-31 23:47:37 +0100407/* note: use health_adjust() only, which first checks that the observe mode is
408 * enabled.
409 */
410void __health_adjust(struct server *s, short status)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100411{
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100412 int failed;
413 int expire;
414
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100415 if (s->observe >= HANA_OBS_SIZE)
416 return;
417
Willy Tarreaubb956662013-01-24 00:37:39 +0100418 if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100419 return;
420
421 switch (analyze_statuses[status].lr[s->observe - 1]) {
422 case 1:
423 failed = 1;
424 break;
425
426 case 2:
427 failed = 0;
428 break;
429
430 default:
431 return;
432 }
433
434 if (!failed) {
435 /* good: clear consecutive_errors */
436 s->consecutive_errors = 0;
437 return;
438 }
439
440 s->consecutive_errors++;
441
442 if (s->consecutive_errors < s->consecutive_errors_limit)
443 return;
444
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100445 chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
446 s->consecutive_errors, get_analyze_status(status));
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100447
448 switch (s->onerror) {
449 case HANA_ONERR_FASTINTER:
450 /* force fastinter - nothing to do here as all modes force it */
451 break;
452
453 case HANA_ONERR_SUDDTH:
454 /* simulate a pre-fatal failed health check */
Simon Horman58c32972013-11-25 10:46:38 +0900455 if (s->check.health > s->check.rise)
456 s->check.health = s->check.rise + 1;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100457
458 /* no break - fall through */
459
460 case HANA_ONERR_FAILCHK:
461 /* simulate a failed health check */
Simon Horman4a741432013-02-23 15:35:38 +0900462 set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200463 check_notify_failure(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100464 break;
465
466 case HANA_ONERR_MARKDWN:
467 /* mark server down */
Simon Horman58c32972013-11-25 10:46:38 +0900468 s->check.health = s->check.rise;
Simon Horman4a741432013-02-23 15:35:38 +0900469 set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200470 check_notify_failure(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100471 break;
472
473 default:
474 /* write a warning? */
475 break;
476 }
477
478 s->consecutive_errors = 0;
479 s->counters.failed_hana++;
480
Simon Horman66183002013-02-23 10:16:43 +0900481 if (s->check.fastinter) {
482 expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
Sergiy Prykhodko1d57e502013-09-21 12:05:00 +0300483 if (s->check.task->expire > expire) {
Willy Tarreau5b3a2022012-09-28 15:01:02 +0200484 s->check.task->expire = expire;
Sergiy Prykhodko1d57e502013-09-21 12:05:00 +0300485 /* requeue check task with new expire */
486 task_queue(s->check.task);
487 }
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100488 }
489}
490
Willy Tarreaua1dab552014-04-14 15:04:54 +0200491static int httpchk_build_status_header(struct server *s, char *buffer, int size)
Willy Tarreauef781042010-01-27 11:53:01 +0100492{
493 int sv_state;
494 int ratio;
495 int hlen = 0;
Joseph Lynch514061c2015-01-15 17:52:59 -0800496 char addr[46];
497 char port[6];
Willy Tarreauef781042010-01-27 11:53:01 +0100498 const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
499 "UP %d/%d", "UP",
500 "NOLB %d/%d", "NOLB",
501 "no check" };
502
503 memcpy(buffer + hlen, "X-Haproxy-Server-State: ", 24);
504 hlen += 24;
505
Willy Tarreauff5ae352013-12-11 20:36:34 +0100506 if (!(s->check.state & CHK_ST_ENABLED))
507 sv_state = 6;
Willy Tarreau892337c2014-05-13 23:41:20 +0200508 else if (s->state != SRV_ST_STOPPED) {
Simon Horman58c32972013-11-25 10:46:38 +0900509 if (s->check.health == s->check.rise + s->check.fall - 1)
Willy Tarreauef781042010-01-27 11:53:01 +0100510 sv_state = 3; /* UP */
511 else
512 sv_state = 2; /* going down */
513
Willy Tarreau892337c2014-05-13 23:41:20 +0200514 if (s->state == SRV_ST_STOPPING)
Willy Tarreauef781042010-01-27 11:53:01 +0100515 sv_state += 2;
516 } else {
Simon Horman125d0992013-02-24 17:23:38 +0900517 if (s->check.health)
Willy Tarreauef781042010-01-27 11:53:01 +0100518 sv_state = 1; /* going up */
519 else
520 sv_state = 0; /* DOWN */
521 }
522
Willy Tarreaua1dab552014-04-14 15:04:54 +0200523 hlen += snprintf(buffer + hlen, size - hlen,
Willy Tarreauef781042010-01-27 11:53:01 +0100524 srv_hlt_st[sv_state],
Willy Tarreau892337c2014-05-13 23:41:20 +0200525 (s->state != SRV_ST_STOPPED) ? (s->check.health - s->check.rise + 1) : (s->check.health),
526 (s->state != SRV_ST_STOPPED) ? (s->check.fall) : (s->check.rise));
Willy Tarreauef781042010-01-27 11:53:01 +0100527
Joseph Lynch514061c2015-01-15 17:52:59 -0800528 addr_to_str(&s->addr, addr, sizeof(addr));
529 port_to_str(&s->addr, port, sizeof(port));
530
531 hlen += snprintf(buffer + hlen, size - hlen, "; address=%s; port=%s; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
532 addr, port, s->proxy->id, s->id,
Willy Tarreauef781042010-01-27 11:53:01 +0100533 global.node,
534 (s->eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
535 (s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
536 s->cur_sess, s->proxy->beconn - s->proxy->nbpend,
537 s->nbpend);
538
Willy Tarreau892337c2014-05-13 23:41:20 +0200539 if ((s->state == SRV_ST_STARTING) &&
Willy Tarreauef781042010-01-27 11:53:01 +0100540 now.tv_sec < s->last_change + s->slowstart &&
541 now.tv_sec >= s->last_change) {
542 ratio = MAX(1, 100 * (now.tv_sec - s->last_change) / s->slowstart);
Willy Tarreaua1dab552014-04-14 15:04:54 +0200543 hlen += snprintf(buffer + hlen, size - hlen, "; throttle=%d%%", ratio);
Willy Tarreauef781042010-01-27 11:53:01 +0100544 }
545
546 buffer[hlen++] = '\r';
547 buffer[hlen++] = '\n';
548
549 return hlen;
550}
551
Willy Tarreau20a18342013-12-05 00:31:46 +0100552/* Check the connection. If an error has already been reported or the socket is
553 * closed, keep errno intact as it is supposed to contain the valid error code.
554 * If no error is reported, check the socket's error queue using getsockopt().
555 * Warning, this must be done only once when returning from poll, and never
556 * after an I/O error was attempted, otherwise the error queue might contain
557 * inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
558 * socket. Returns non-zero if an error was reported, zero if everything is
559 * clean (including a properly closed socket).
560 */
561static int retrieve_errno_from_socket(struct connection *conn)
562{
563 int skerr;
564 socklen_t lskerr = sizeof(skerr);
565
566 if (conn->flags & CO_FL_ERROR && ((errno && errno != EAGAIN) || !conn->ctrl))
567 return 1;
568
Willy Tarreau3c728722014-01-23 13:50:42 +0100569 if (!conn_ctrl_ready(conn))
Willy Tarreau20a18342013-12-05 00:31:46 +0100570 return 0;
571
572 if (getsockopt(conn->t.sock.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
573 errno = skerr;
574
575 if (errno == EAGAIN)
576 errno = 0;
577
578 if (!errno) {
579 /* we could not retrieve an error, that does not mean there is
580 * none. Just don't change anything and only report the prior
581 * error if any.
582 */
583 if (conn->flags & CO_FL_ERROR)
584 return 1;
585 else
586 return 0;
587 }
588
589 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
590 return 1;
591}
592
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100593/* Try to collect as much information as possible on the connection status,
594 * and adjust the server status accordingly. It may make use of <errno_bck>
595 * if non-null when the caller is absolutely certain of its validity (eg:
596 * checked just after a syscall). If the caller doesn't have a valid errno,
597 * it can pass zero, and retrieve_errno_from_socket() will be called to try
598 * to extract errno from the socket. If no error is reported, it will consider
599 * the <expired> flag. This is intended to be used when a connection error was
600 * reported in conn->flags or when a timeout was reported in <expired>. The
601 * function takes care of not updating a server status which was already set.
602 * All situations where at least one of <expired> or CO_FL_ERROR are set
603 * produce a status.
604 */
605static void chk_report_conn_err(struct connection *conn, int errno_bck, int expired)
606{
607 struct check *check = conn->owner;
608 const char *err_msg;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200609 struct chunk *chk;
Willy Tarreau213c6782014-10-02 14:51:02 +0200610 int step;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100611
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100612 if (check->result != CHK_RES_UNKNOWN)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100613 return;
614
615 errno = errno_bck;
616 if (!errno || errno == EAGAIN)
617 retrieve_errno_from_socket(conn);
618
619 if (!(conn->flags & CO_FL_ERROR) && !expired)
620 return;
621
622 /* we'll try to build a meaningful error message depending on the
623 * context of the error possibly present in conn->err_code, and the
624 * socket error possibly collected above. This is useful to know the
625 * exact step of the L6 layer (eg: SSL handshake).
626 */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200627 chk = get_trash_chunk();
628
629 if (check->type == PR_O2_TCPCHK_CHK) {
Simon Hormane16c1b32015-01-30 11:22:57 +0900630 step = tcpcheck_get_step_id(check);
Willy Tarreau213c6782014-10-02 14:51:02 +0200631 if (!step)
632 chunk_printf(chk, " at initial connection step of tcp-check");
633 else {
634 chunk_printf(chk, " at step %d of tcp-check", step);
635 /* we were looking for a string */
636 if (check->last_started_step && check->last_started_step->action == TCPCHK_ACT_CONNECT) {
637 if (check->last_started_step->port)
638 chunk_appendf(chk, " (connect port %d)" ,check->last_started_step->port);
639 else
640 chunk_appendf(chk, " (connect)");
641 }
642 else if (check->last_started_step && check->last_started_step->action == TCPCHK_ACT_EXPECT) {
643 if (check->last_started_step->string)
644 chunk_appendf(chk, " (string '%s')", check->last_started_step->string);
645 else if (check->last_started_step->expect_regex)
646 chunk_appendf(chk, " (expect regex)");
647 }
648 else if (check->last_started_step && check->last_started_step->action == TCPCHK_ACT_SEND) {
649 chunk_appendf(chk, " (send)");
650 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200651 }
652 }
653
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100654 if (conn->err_code) {
655 if (errno && errno != EAGAIN)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200656 chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100657 else
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200658 chunk_printf(&trash, "%s%s", conn_err_code_str(conn), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100659 err_msg = trash.str;
660 }
661 else {
662 if (errno && errno != EAGAIN) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200663 chunk_printf(&trash, "%s%s", strerror(errno), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100664 err_msg = trash.str;
665 }
666 else {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200667 err_msg = chk->str;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100668 }
669 }
670
671 if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L4_CONN)) == CO_FL_WAIT_L4_CONN) {
672 /* L4 not established (yet) */
673 if (conn->flags & CO_FL_ERROR)
674 set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
675 else if (expired)
676 set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
677 }
678 else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L6_CONN)) == CO_FL_WAIT_L6_CONN) {
679 /* L6 not established (yet) */
680 if (conn->flags & CO_FL_ERROR)
681 set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
682 else if (expired)
683 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
684 }
685 else if (conn->flags & CO_FL_ERROR) {
686 /* I/O error after connection was established and before we could diagnose */
687 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
688 }
689 else if (expired) {
690 /* connection established but expired check */
691 if (check->type == PR_O2_SSL3_CHK)
692 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
693 else /* HTTP, SMTP, ... */
694 set_server_check_status(check, HCHK_STATUS_L7TOUT, err_msg);
695 }
696
697 return;
698}
699
Willy Tarreaubaaee002006-06-26 02:48:02 +0200700/*
701 * This function is used only for server health-checks. It handles
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200702 * the connection acknowledgement. If the proxy requires L7 health-checks,
703 * it sends the request. In other cases, it calls set_server_check_status()
Simon Horman4a741432013-02-23 15:35:38 +0900704 * to set check->status, check->duration and check->result.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200705 */
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200706static void event_srv_chk_w(struct connection *conn)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200707{
Simon Horman4a741432013-02-23 15:35:38 +0900708 struct check *check = conn->owner;
709 struct server *s = check->server;
Simon Horman4a741432013-02-23 15:35:38 +0900710 struct task *t = check->task;
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200711
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100712 if (unlikely(check->result == CHK_RES_FAILED))
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100713 goto out_wakeup;
714
Willy Tarreau310987a2014-01-22 19:46:33 +0100715 if (conn->flags & CO_FL_HANDSHAKE)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100716 return;
717
Willy Tarreau20a18342013-12-05 00:31:46 +0100718 if (retrieve_errno_from_socket(conn)) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100719 chk_report_conn_err(conn, errno, 0);
Willy Tarreau20a18342013-12-05 00:31:46 +0100720 __conn_data_stop_both(conn);
721 goto out_wakeup;
722 }
Krzysztof Piotr Oledzki6492db52010-01-02 22:03:01 +0100723
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100724 if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH)) {
Willy Tarreau20a18342013-12-05 00:31:46 +0100725 /* if the output is closed, we can't do anything */
726 conn->flags |= CO_FL_ERROR;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100727 chk_report_conn_err(conn, 0, 0);
Willy Tarreau20a18342013-12-05 00:31:46 +0100728 goto out_wakeup;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200729 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200730
Willy Tarreau06559ac2013-12-05 01:53:08 +0100731 /* here, we know that the connection is established. That's enough for
732 * a pure TCP check.
733 */
734 if (!check->type)
735 goto out_wakeup;
736
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200737 if (check->type == PR_O2_TCPCHK_CHK) {
738 tcpcheck_main(conn);
739 return;
740 }
741
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100742 if (check->bo->o) {
Willy Tarreau1049b1f2014-02-02 01:51:17 +0100743 conn->xprt->snd_buf(conn, check->bo, 0);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100744 if (conn->flags & CO_FL_ERROR) {
745 chk_report_conn_err(conn, errno, 0);
746 __conn_data_stop_both(conn);
747 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200748 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100749 if (check->bo->o)
750 return;
751 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200752
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100753 /* full request sent, we allow up to <timeout.check> if nonzero for a response */
754 if (s->proxy->timeout.check) {
755 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
756 task_queue(t);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200757 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100758 goto out_nowake;
759
Willy Tarreau83749182007-04-15 20:56:27 +0200760 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200761 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau83749182007-04-15 20:56:27 +0200762 out_nowake:
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200763 __conn_data_stop_send(conn); /* nothing more to write */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200764}
765
Willy Tarreaubaaee002006-06-26 02:48:02 +0200766/*
Willy Tarreauf3c69202006-07-09 16:42:34 +0200767 * This function is used only for server health-checks. It handles the server's
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +0200768 * reply to an HTTP request, SSL HELLO or MySQL client Auth. It calls
Simon Horman4a741432013-02-23 15:35:38 +0900769 * set_server_check_status() to update check->status, check->duration
770 * and check->result.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200771
772 * The set_server_check_status function is called with HCHK_STATUS_L7OKD if
773 * an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP server
774 * returns 2xx, HCHK_STATUS_L6OK if an SSL server returns at least 5 bytes in
775 * response to an SSL HELLO (the principle is that this is enough to
776 * distinguish between an SSL server and a pure TCP relay). All other cases will
777 * call it with a proper error status like HCHK_STATUS_L7STS, HCHK_STATUS_L6RSP,
778 * etc.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200779 */
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200780static void event_srv_chk_r(struct connection *conn)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200781{
Simon Horman4a741432013-02-23 15:35:38 +0900782 struct check *check = conn->owner;
783 struct server *s = check->server;
784 struct task *t = check->task;
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200785 char *desc;
Willy Tarreau03938182010-03-17 21:52:07 +0100786 int done;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +0200787 unsigned short msglen;
Willy Tarreau83749182007-04-15 20:56:27 +0200788
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100789 if (unlikely(check->result == CHK_RES_FAILED))
Willy Tarreau83749182007-04-15 20:56:27 +0200790 goto out_wakeup;
Willy Tarreau83749182007-04-15 20:56:27 +0200791
Willy Tarreau310987a2014-01-22 19:46:33 +0100792 if (conn->flags & CO_FL_HANDSHAKE)
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200793 return;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200794
795 if (check->type == PR_O2_TCPCHK_CHK) {
796 tcpcheck_main(conn);
797 return;
798 }
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200799
Willy Tarreau83749182007-04-15 20:56:27 +0200800 /* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
801 * but the connection was closed on the remote end. Fortunately, recv still
802 * works correctly and we don't need to do the getsockopt() on linux.
803 */
Nick Chalk57b1bf72010-03-16 15:50:46 +0000804
805 /* Set buffer to point to the end of the data already read, and check
806 * that there is free space remaining. If the buffer is full, proceed
807 * with running the checks without attempting another socket read.
808 */
Nick Chalk57b1bf72010-03-16 15:50:46 +0000809
Willy Tarreau03938182010-03-17 21:52:07 +0100810 done = 0;
Nick Chalk57b1bf72010-03-16 15:50:46 +0000811
Simon Horman4a741432013-02-23 15:35:38 +0900812 conn->xprt->rcv_buf(conn, check->bi, check->bi->size);
Willy Tarreauf1503172012-09-28 19:39:36 +0200813 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
Willy Tarreau03938182010-03-17 21:52:07 +0100814 done = 1;
Simon Horman4a741432013-02-23 15:35:38 +0900815 if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
Willy Tarreauf1503172012-09-28 19:39:36 +0200816 /* Report network errors only if we got no other data. Otherwise
817 * we'll let the upper layers decide whether the response is OK
818 * or not. It is very common that an RST sent by the server is
819 * reported as an error just after the last data chunk.
820 */
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100821 chk_report_conn_err(conn, errno, 0);
Willy Tarreauc1a07962010-03-16 20:55:43 +0100822 goto out_wakeup;
823 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200824 }
825
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100826
Willy Tarreau03938182010-03-17 21:52:07 +0100827 /* Intermediate or complete response received.
Simon Horman4a741432013-02-23 15:35:38 +0900828 * Terminate string in check->bi->data buffer.
Willy Tarreau03938182010-03-17 21:52:07 +0100829 */
Simon Horman4a741432013-02-23 15:35:38 +0900830 if (check->bi->i < check->bi->size)
831 check->bi->data[check->bi->i] = '\0';
Willy Tarreau03938182010-03-17 21:52:07 +0100832 else {
Simon Horman4a741432013-02-23 15:35:38 +0900833 check->bi->data[check->bi->i - 1] = '\0';
Willy Tarreau03938182010-03-17 21:52:07 +0100834 done = 1; /* buffer full, don't wait for more data */
835 }
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200836
Nick Chalk57b1bf72010-03-16 15:50:46 +0000837 /* Run the checks... */
Simon Horman4a741432013-02-23 15:35:38 +0900838 switch (check->type) {
Willy Tarreau1620ec32011-08-06 17:05:02 +0200839 case PR_O2_HTTP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +0900840 if (!done && check->bi->i < strlen("HTTP/1.0 000\r"))
Willy Tarreau03938182010-03-17 21:52:07 +0100841 goto wait_more_data;
842
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100843 /* Check if the server speaks HTTP 1.X */
Simon Horman4a741432013-02-23 15:35:38 +0900844 if ((check->bi->i < strlen("HTTP/1.0 000\r")) ||
845 (memcmp(check->bi->data, "HTTP/1.", 7) != 0 ||
846 (*(check->bi->data + 12) != ' ' && *(check->bi->data + 12) != '\r')) ||
847 !isdigit((unsigned char) *(check->bi->data + 9)) || !isdigit((unsigned char) *(check->bi->data + 10)) ||
848 !isdigit((unsigned char) *(check->bi->data + 11))) {
849 cut_crlf(check->bi->data);
850 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200851
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100852 goto out_wakeup;
853 }
854
Simon Horman4a741432013-02-23 15:35:38 +0900855 check->code = str2uic(check->bi->data + 9);
856 desc = ltrim(check->bi->data + 12, ' ');
Nick Chalk57b1bf72010-03-16 15:50:46 +0000857
Willy Tarreaubd741542010-03-16 18:46:54 +0100858 if ((s->proxy->options & PR_O_DISABLE404) &&
Willy Tarreau892337c2014-05-13 23:41:20 +0200859 (s->state != SRV_ST_STOPPED) && (check->code == 404)) {
Nick Chalk57b1bf72010-03-16 15:50:46 +0000860 /* 404 may be accepted as "stopping" only if the server was up */
861 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +0900862 set_server_check_status(check, HCHK_STATUS_L7OKCD, desc);
Nick Chalk57b1bf72010-03-16 15:50:46 +0000863 }
Willy Tarreaubd741542010-03-16 18:46:54 +0100864 else if (s->proxy->options2 & PR_O2_EXP_TYPE) {
865 /* Run content verification check... We know we have at least 13 chars */
866 if (!httpchk_expect(s, done))
867 goto wait_more_data;
868 }
869 /* check the reply : HTTP/1.X 2xx and 3xx are OK */
Simon Horman4a741432013-02-23 15:35:38 +0900870 else if (*(check->bi->data + 9) == '2' || *(check->bi->data + 9) == '3') {
Willy Tarreaubd741542010-03-16 18:46:54 +0100871 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +0900872 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Willy Tarreaubd741542010-03-16 18:46:54 +0100873 }
Nick Chalk57b1bf72010-03-16 15:50:46 +0000874 else {
875 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +0900876 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Nick Chalk57b1bf72010-03-16 15:50:46 +0000877 }
Willy Tarreau1620ec32011-08-06 17:05:02 +0200878 break;
879
880 case PR_O2_SSL3_CHK:
Simon Horman4a741432013-02-23 15:35:38 +0900881 if (!done && check->bi->i < 5)
Willy Tarreau03938182010-03-17 21:52:07 +0100882 goto wait_more_data;
883
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100884 /* Check for SSLv3 alert or handshake */
Simon Horman4a741432013-02-23 15:35:38 +0900885 if ((check->bi->i >= 5) && (*check->bi->data == 0x15 || *check->bi->data == 0x16))
886 set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200887 else
Simon Horman4a741432013-02-23 15:35:38 +0900888 set_server_check_status(check, HCHK_STATUS_L6RSP, NULL);
Willy Tarreau1620ec32011-08-06 17:05:02 +0200889 break;
890
891 case PR_O2_SMTP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +0900892 if (!done && check->bi->i < strlen("000\r"))
Willy Tarreau03938182010-03-17 21:52:07 +0100893 goto wait_more_data;
894
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200895 /* Check if the server speaks SMTP */
Simon Horman4a741432013-02-23 15:35:38 +0900896 if ((check->bi->i < strlen("000\r")) ||
897 (*(check->bi->data + 3) != ' ' && *(check->bi->data + 3) != '\r') ||
898 !isdigit((unsigned char) *check->bi->data) || !isdigit((unsigned char) *(check->bi->data + 1)) ||
899 !isdigit((unsigned char) *(check->bi->data + 2))) {
900 cut_crlf(check->bi->data);
901 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200902
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200903 goto out_wakeup;
904 }
905
Simon Horman4a741432013-02-23 15:35:38 +0900906 check->code = str2uic(check->bi->data);
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200907
Simon Horman4a741432013-02-23 15:35:38 +0900908 desc = ltrim(check->bi->data + 3, ' ');
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200909 cut_crlf(desc);
910
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100911 /* Check for SMTP code 2xx (should be 250) */
Simon Horman4a741432013-02-23 15:35:38 +0900912 if (*check->bi->data == '2')
913 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200914 else
Simon Horman4a741432013-02-23 15:35:38 +0900915 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Willy Tarreau1620ec32011-08-06 17:05:02 +0200916 break;
917
Simon Hormana2b9dad2013-02-12 10:45:54 +0900918 case PR_O2_LB_AGENT_CHK: {
Willy Tarreau81f5d942013-12-09 20:51:51 +0100919 int status = HCHK_STATUS_CHECKED;
920 const char *hs = NULL; /* health status */
921 const char *as = NULL; /* admin status */
922 const char *ps = NULL; /* performance status */
923 const char *err = NULL; /* first error to report */
924 const char *wrn = NULL; /* first warning to report */
925 char *cmd, *p;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900926
Willy Tarreau81f5d942013-12-09 20:51:51 +0100927 /* We're getting an agent check response. The agent could
928 * have been disabled in the mean time with a long check
929 * still pending. It is important that we ignore the whole
930 * response.
931 */
932 if (!(check->server->agent.state & CHK_ST_ENABLED))
933 break;
934
935 /* The agent supports strings made of a single line ended by the
936 * first CR ('\r') or LF ('\n'). This line is composed of words
937 * delimited by spaces (' '), tabs ('\t'), or commas (','). The
938 * line may optionally contained a description of a state change
939 * after a sharp ('#'), which is only considered if a health state
940 * is announced.
941 *
942 * Words may be composed of :
943 * - a numeric weight suffixed by the percent character ('%').
944 * - a health status among "up", "down", "stopped", and "fail".
945 * - an admin status among "ready", "drain", "maint".
946 *
947 * These words may appear in any order. If multiple words of the
948 * same category appear, the last one wins.
949 */
950
Willy Tarreau9809b782013-12-11 21:40:11 +0100951 p = check->bi->data;
952 while (*p && *p != '\n' && *p != '\r')
953 p++;
954
955 if (!*p) {
956 if (!done)
957 goto wait_more_data;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900958
Willy Tarreau9809b782013-12-11 21:40:11 +0100959 /* at least inform the admin that the agent is mis-behaving */
960 set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
961 break;
962 }
Willy Tarreau81f5d942013-12-09 20:51:51 +0100963
Willy Tarreau9809b782013-12-11 21:40:11 +0100964 *p = 0;
Willy Tarreau81f5d942013-12-09 20:51:51 +0100965 cmd = check->bi->data;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900966
Willy Tarreau81f5d942013-12-09 20:51:51 +0100967 while (*cmd) {
968 /* look for next word */
969 if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
970 cmd++;
971 continue;
972 }
Simon Horman671b6f02013-11-25 10:46:39 +0900973
Willy Tarreau81f5d942013-12-09 20:51:51 +0100974 if (*cmd == '#') {
975 /* this is the beginning of a health status description,
976 * skip the sharp and blanks.
977 */
978 cmd++;
979 while (*cmd == '\t' || *cmd == ' ')
980 cmd++;
Simon Horman671b6f02013-11-25 10:46:39 +0900981 break;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900982 }
Willy Tarreau81f5d942013-12-09 20:51:51 +0100983
984 /* find the end of the word so that we have a null-terminated
985 * word between <cmd> and <p>.
986 */
987 p = cmd + 1;
988 while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
989 p++;
990 if (*p)
991 *p++ = 0;
992
993 /* first, health statuses */
994 if (strcasecmp(cmd, "up") == 0) {
995 check->health = check->rise + check->fall - 1;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900996 status = HCHK_STATUS_L7OKD;
Willy Tarreau81f5d942013-12-09 20:51:51 +0100997 hs = cmd;
998 }
999 else if (strcasecmp(cmd, "down") == 0) {
1000 check->health = 0;
1001 status = HCHK_STATUS_L7STS;
1002 hs = cmd;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001003 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001004 else if (strcasecmp(cmd, "stopped") == 0) {
1005 check->health = 0;
1006 status = HCHK_STATUS_L7STS;
1007 hs = cmd;
1008 }
1009 else if (strcasecmp(cmd, "fail") == 0) {
1010 check->health = 0;
1011 status = HCHK_STATUS_L7STS;
1012 hs = cmd;
1013 }
1014 /* admin statuses */
1015 else if (strcasecmp(cmd, "ready") == 0) {
1016 as = cmd;
1017 }
1018 else if (strcasecmp(cmd, "drain") == 0) {
1019 as = cmd;
1020 }
1021 else if (strcasecmp(cmd, "maint") == 0) {
1022 as = cmd;
1023 }
1024 /* else try to parse a weight here and keep the last one */
1025 else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
1026 ps = cmd;
1027 }
1028 else {
1029 /* keep a copy of the first error */
1030 if (!err)
1031 err = cmd;
1032 }
1033 /* skip to next word */
1034 cmd = p;
1035 }
1036 /* here, cmd points either to \0 or to the beginning of a
1037 * description. Skip possible leading spaces.
1038 */
1039 while (*cmd == ' ' || *cmd == '\n')
1040 cmd++;
1041
1042 /* First, update the admin status so that we avoid sending other
1043 * possibly useless warnings and can also update the health if
1044 * present after going back up.
1045 */
1046 if (as) {
1047 if (strcasecmp(as, "drain") == 0)
1048 srv_adm_set_drain(check->server);
1049 else if (strcasecmp(as, "maint") == 0)
1050 srv_adm_set_maint(check->server);
1051 else
1052 srv_adm_set_ready(check->server);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001053 }
1054
Willy Tarreau81f5d942013-12-09 20:51:51 +01001055 /* now change weights */
1056 if (ps) {
1057 const char *msg;
1058
1059 msg = server_parse_weight_change_request(s, ps);
1060 if (!wrn || !*wrn)
1061 wrn = msg;
1062 }
1063
1064 /* and finally health status */
1065 if (hs) {
1066 /* We'll report some of the warnings and errors we have
1067 * here. Down reports are critical, we leave them untouched.
1068 * Lack of report, or report of 'UP' leaves the room for
1069 * ERR first, then WARN.
Simon Hormana2b9dad2013-02-12 10:45:54 +09001070 */
Willy Tarreau81f5d942013-12-09 20:51:51 +01001071 const char *msg = cmd;
1072 struct chunk *t;
1073
1074 if (!*msg || status == HCHK_STATUS_L7OKD) {
1075 if (err && *err)
1076 msg = err;
1077 else if (wrn && *wrn)
1078 msg = wrn;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001079 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001080
1081 t = get_trash_chunk();
1082 chunk_printf(t, "via agent : %s%s%s%s",
1083 hs, *msg ? " (" : "",
1084 msg, *msg ? ")" : "");
1085
1086 set_server_check_status(check, status, t->str);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001087 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001088 else if (err && *err) {
1089 /* No status change but we'd like to report something odd.
1090 * Just report the current state and copy the message.
1091 */
1092 chunk_printf(&trash, "agent reports an error : %s", err);
1093 set_server_check_status(check, status/*check->status*/, trash.str);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001094
Willy Tarreau81f5d942013-12-09 20:51:51 +01001095 }
1096 else if (wrn && *wrn) {
1097 /* No status change but we'd like to report something odd.
1098 * Just report the current state and copy the message.
1099 */
1100 chunk_printf(&trash, "agent warns : %s", wrn);
1101 set_server_check_status(check, status/*check->status*/, trash.str);
1102 }
1103 else
1104 set_server_check_status(check, status, NULL);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001105 break;
1106 }
1107
Willy Tarreau1620ec32011-08-06 17:05:02 +02001108 case PR_O2_PGSQL_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001109 if (!done && check->bi->i < 9)
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001110 goto wait_more_data;
1111
Simon Horman4a741432013-02-23 15:35:38 +09001112 if (check->bi->data[0] == 'R') {
1113 set_server_check_status(check, HCHK_STATUS_L7OKD, "PostgreSQL server is ok");
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001114 }
1115 else {
Simon Horman4a741432013-02-23 15:35:38 +09001116 if ((check->bi->data[0] == 'E') && (check->bi->data[5]!=0) && (check->bi->data[6]!=0))
1117 desc = &check->bi->data[6];
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001118 else
1119 desc = "PostgreSQL unknown error";
1120
Simon Horman4a741432013-02-23 15:35:38 +09001121 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001122 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001123 break;
1124
1125 case PR_O2_REDIS_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001126 if (!done && check->bi->i < 7)
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001127 goto wait_more_data;
1128
Simon Horman4a741432013-02-23 15:35:38 +09001129 if (strcmp(check->bi->data, "+PONG\r\n") == 0) {
1130 set_server_check_status(check, HCHK_STATUS_L7OKD, "Redis server is ok");
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001131 }
1132 else {
Simon Horman4a741432013-02-23 15:35:38 +09001133 set_server_check_status(check, HCHK_STATUS_L7STS, check->bi->data);
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001134 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001135 break;
1136
1137 case PR_O2_MYSQL_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001138 if (!done && check->bi->i < 5)
Willy Tarreau03938182010-03-17 21:52:07 +01001139 goto wait_more_data;
1140
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001141 if (s->proxy->check_len == 0) { // old mode
Simon Horman4a741432013-02-23 15:35:38 +09001142 if (*(check->bi->data + 4) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001143 /* We set the MySQL Version in description for information purpose
1144 * FIXME : it can be cool to use MySQL Version for other purpose,
1145 * like mark as down old MySQL server.
1146 */
Simon Horman4a741432013-02-23 15:35:38 +09001147 if (check->bi->i > 51) {
1148 desc = ltrim(check->bi->data + 5, ' ');
1149 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001150 }
1151 else {
1152 if (!done)
1153 goto wait_more_data;
1154 /* it seems we have a OK packet but without a valid length,
1155 * it must be a protocol error
1156 */
Simon Horman4a741432013-02-23 15:35:38 +09001157 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001158 }
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001159 }
1160 else {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001161 /* An error message is attached in the Error packet */
Simon Horman4a741432013-02-23 15:35:38 +09001162 desc = ltrim(check->bi->data + 7, ' ');
1163 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001164 }
1165 } else {
Simon Horman4a741432013-02-23 15:35:38 +09001166 unsigned int first_packet_len = ((unsigned int) *check->bi->data) +
1167 (((unsigned int) *(check->bi->data + 1)) << 8) +
1168 (((unsigned int) *(check->bi->data + 2)) << 16);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001169
Simon Horman4a741432013-02-23 15:35:38 +09001170 if (check->bi->i == first_packet_len + 4) {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001171 /* MySQL Error packet always begin with field_count = 0xff */
Simon Horman4a741432013-02-23 15:35:38 +09001172 if (*(check->bi->data + 4) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001173 /* We have only one MySQL packet and it is a Handshake Initialization packet
1174 * but we need to have a second packet to know if it is alright
1175 */
Simon Horman4a741432013-02-23 15:35:38 +09001176 if (!done && check->bi->i < first_packet_len + 5)
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001177 goto wait_more_data;
1178 }
1179 else {
1180 /* We have only one packet and it is an Error packet,
1181 * an error message is attached, so we can display it
1182 */
Simon Horman4a741432013-02-23 15:35:38 +09001183 desc = &check->bi->data[7];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001184 //Warning("onlyoneERR: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001185 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001186 }
Simon Horman4a741432013-02-23 15:35:38 +09001187 } else if (check->bi->i > first_packet_len + 4) {
1188 unsigned int second_packet_len = ((unsigned int) *(check->bi->data + first_packet_len + 4)) +
1189 (((unsigned int) *(check->bi->data + first_packet_len + 5)) << 8) +
1190 (((unsigned int) *(check->bi->data + first_packet_len + 6)) << 16);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001191
Simon Horman4a741432013-02-23 15:35:38 +09001192 if (check->bi->i == first_packet_len + 4 + second_packet_len + 4 ) {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001193 /* We have 2 packets and that's good */
1194 /* Check if the second packet is a MySQL Error packet or not */
Simon Horman4a741432013-02-23 15:35:38 +09001195 if (*(check->bi->data + first_packet_len + 8) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001196 /* No error packet */
1197 /* We set the MySQL Version in description for information purpose */
Simon Horman4a741432013-02-23 15:35:38 +09001198 desc = &check->bi->data[5];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001199 //Warning("2packetOK: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001200 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001201 }
1202 else {
1203 /* An error message is attached in the Error packet
1204 * so we can display it ! :)
1205 */
Simon Horman4a741432013-02-23 15:35:38 +09001206 desc = &check->bi->data[first_packet_len+11];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001207 //Warning("2packetERR: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001208 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001209 }
1210 }
1211 }
1212 else {
Willy Tarreau03938182010-03-17 21:52:07 +01001213 if (!done)
1214 goto wait_more_data;
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001215 /* it seems we have a Handshake Initialization packet but without a valid length,
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001216 * it must be a protocol error
1217 */
Simon Horman4a741432013-02-23 15:35:38 +09001218 desc = &check->bi->data[5];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001219 //Warning("protoerr: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001220 set_server_check_status(check, HCHK_STATUS_L7RSP, desc);
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001221 }
1222 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001223 break;
1224
1225 case PR_O2_LDAP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001226 if (!done && check->bi->i < 14)
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001227 goto wait_more_data;
1228
1229 /* Check if the server speaks LDAP (ASN.1/BER)
1230 * http://en.wikipedia.org/wiki/Basic_Encoding_Rules
1231 * http://tools.ietf.org/html/rfc4511
1232 */
1233
1234 /* http://tools.ietf.org/html/rfc4511#section-4.1.1
1235 * LDAPMessage: 0x30: SEQUENCE
1236 */
Simon Horman4a741432013-02-23 15:35:38 +09001237 if ((check->bi->i < 14) || (*(check->bi->data) != '\x30')) {
1238 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001239 }
1240 else {
1241 /* size of LDAPMessage */
Simon Horman4a741432013-02-23 15:35:38 +09001242 msglen = (*(check->bi->data + 1) & 0x80) ? (*(check->bi->data + 1) & 0x7f) : 0;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001243
1244 /* http://tools.ietf.org/html/rfc4511#section-4.2.2
1245 * messageID: 0x02 0x01 0x01: INTEGER 1
1246 * protocolOp: 0x61: bindResponse
1247 */
1248 if ((msglen > 2) ||
Simon Horman4a741432013-02-23 15:35:38 +09001249 (memcmp(check->bi->data + 2 + msglen, "\x02\x01\x01\x61", 4) != 0)) {
1250 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001251
1252 goto out_wakeup;
1253 }
1254
1255 /* size of bindResponse */
Simon Horman4a741432013-02-23 15:35:38 +09001256 msglen += (*(check->bi->data + msglen + 6) & 0x80) ? (*(check->bi->data + msglen + 6) & 0x7f) : 0;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001257
1258 /* http://tools.ietf.org/html/rfc4511#section-4.1.9
1259 * ldapResult: 0x0a 0x01: ENUMERATION
1260 */
1261 if ((msglen > 4) ||
Simon Horman4a741432013-02-23 15:35:38 +09001262 (memcmp(check->bi->data + 7 + msglen, "\x0a\x01", 2) != 0)) {
1263 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001264
1265 goto out_wakeup;
1266 }
1267
1268 /* http://tools.ietf.org/html/rfc4511#section-4.1.9
1269 * resultCode
1270 */
Simon Horman4a741432013-02-23 15:35:38 +09001271 check->code = *(check->bi->data + msglen + 9);
1272 if (check->code) {
1273 set_server_check_status(check, HCHK_STATUS_L7STS, "See RFC: http://tools.ietf.org/html/rfc4511#section-4.1.9");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001274 } else {
Simon Horman4a741432013-02-23 15:35:38 +09001275 set_server_check_status(check, HCHK_STATUS_L7OKD, "Success");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001276 }
1277 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001278 break;
1279
1280 default:
Willy Tarreau06559ac2013-12-05 01:53:08 +01001281 /* for other checks (eg: pure TCP), delegate to the main task */
Willy Tarreau1620ec32011-08-06 17:05:02 +02001282 break;
1283 } /* switch */
Willy Tarreau83749182007-04-15 20:56:27 +02001284
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001285 out_wakeup:
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001286 /* collect possible new errors */
1287 if (conn->flags & CO_FL_ERROR)
1288 chk_report_conn_err(conn, 0, 0);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001289
Nick Chalk57b1bf72010-03-16 15:50:46 +00001290 /* Reset the check buffer... */
Simon Horman4a741432013-02-23 15:35:38 +09001291 *check->bi->data = '\0';
1292 check->bi->i = 0;
Nick Chalk57b1bf72010-03-16 15:50:46 +00001293
Willy Tarreaufd29cc52012-11-23 09:18:20 +01001294 /* Close the connection... We absolutely want to perform a hard close
1295 * and reset the connection if some data are pending, otherwise we end
1296 * up with many TIME_WAITs and eat all the source port range quickly.
1297 * To avoid sending RSTs all the time, we first try to drain pending
1298 * data.
1299 */
Willy Tarreaub4017d02015-03-12 23:11:26 +01001300 __conn_data_stop_both(conn);
1301 conn_data_shutw_hard(conn);
Willy Tarreau2b57cb82013-06-10 19:56:38 +02001302
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001303 /* OK, let's not stay here forever */
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001304 if (check->result == CHK_RES_FAILED)
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001305 conn->flags |= CO_FL_ERROR;
1306
Willy Tarreaufdccded2008-08-29 18:19:04 +02001307 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau3267d362012-08-17 23:53:56 +02001308 return;
Willy Tarreau03938182010-03-17 21:52:07 +01001309
1310 wait_more_data:
Willy Tarreauf817e9f2014-01-10 16:58:45 +01001311 __conn_data_want_recv(conn);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001312}
1313
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001314/*
1315 * This function is used only for server health-checks. It handles connection
1316 * status updates including errors. If necessary, it wakes the check task up.
1317 * It always returns 0.
1318 */
1319static int wake_srv_chk(struct connection *conn)
Willy Tarreau20bea422012-07-06 12:00:49 +02001320{
Simon Horman4a741432013-02-23 15:35:38 +09001321 struct check *check = conn->owner;
Willy Tarreau20bea422012-07-06 12:00:49 +02001322
Willy Tarreau6c560da2012-11-24 11:14:45 +01001323 if (unlikely(conn->flags & CO_FL_ERROR)) {
Willy Tarreau02b0f582013-12-03 15:42:33 +01001324 /* We may get error reports bypassing the I/O handlers, typically
1325 * the case when sending a pure TCP check which fails, then the I/O
1326 * handlers above are not called. This is completely handled by the
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001327 * main processing task so let's simply wake it up. If we get here,
1328 * we expect errno to still be valid.
1329 */
1330 chk_report_conn_err(conn, errno, 0);
1331
Willy Tarreau2d351b62013-12-05 02:36:25 +01001332 __conn_data_stop_both(conn);
1333 task_wakeup(check->task, TASK_WOKEN_IO);
1334 }
Willy Tarreau3be293f2014-02-05 18:31:24 +01001335 else if (!(conn->flags & (CO_FL_DATA_RD_ENA|CO_FL_DATA_WR_ENA|CO_FL_HANDSHAKE))) {
1336 /* we may get here if only a connection probe was required : we
1337 * don't have any data to send nor anything expected in response,
1338 * so the completion of the connection establishment is enough.
1339 */
1340 task_wakeup(check->task, TASK_WOKEN_IO);
1341 }
Willy Tarreau2d351b62013-12-05 02:36:25 +01001342
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001343 if (check->result != CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001344 /* We're here because nobody wants to handle the error, so we
1345 * sure want to abort the hard way.
Willy Tarreau02b0f582013-12-03 15:42:33 +01001346 */
Willy Tarreaud85c4852015-03-13 00:40:28 +01001347 conn_sock_drain(conn);
Willy Tarreauf79c8172013-10-21 16:30:56 +02001348 conn_force_close(conn);
Willy Tarreau2d351b62013-12-05 02:36:25 +01001349 }
Willy Tarreau3267d362012-08-17 23:53:56 +02001350 return 0;
Willy Tarreau20bea422012-07-06 12:00:49 +02001351}
1352
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001353struct data_cb check_conn_cb = {
1354 .recv = event_srv_chk_r,
1355 .send = event_srv_chk_w,
1356 .wake = wake_srv_chk,
1357};
1358
Willy Tarreaubaaee002006-06-26 02:48:02 +02001359/*
Willy Tarreau2e993902011-10-31 11:53:20 +01001360 * updates the server's weight during a warmup stage. Once the final weight is
1361 * reached, the task automatically stops. Note that any server status change
1362 * must have updated s->last_change accordingly.
1363 */
1364static struct task *server_warmup(struct task *t)
1365{
1366 struct server *s = t->context;
1367
1368 /* by default, plan on stopping the task */
1369 t->expire = TICK_ETERNITY;
Willy Tarreau20125212014-05-13 19:44:56 +02001370 if ((s->admin & SRV_ADMF_MAINT) ||
Willy Tarreau892337c2014-05-13 23:41:20 +02001371 (s->state != SRV_ST_STARTING))
Willy Tarreau2e993902011-10-31 11:53:20 +01001372 return t;
1373
Willy Tarreau892337c2014-05-13 23:41:20 +02001374 /* recalculate the weights and update the state */
Willy Tarreau004e0452013-11-21 11:22:01 +01001375 server_recalc_eweight(s);
Willy Tarreau2e993902011-10-31 11:53:20 +01001376
1377 /* probably that we can refill this server with a bit more connections */
Willy Tarreau4aac7db2014-05-16 11:48:10 +02001378 pendconn_grab_from_px(s);
Willy Tarreau2e993902011-10-31 11:53:20 +01001379
1380 /* get back there in 1 second or 1/20th of the slowstart interval,
1381 * whichever is greater, resulting in small 5% steps.
1382 */
Willy Tarreau892337c2014-05-13 23:41:20 +02001383 if (s->state == SRV_ST_STARTING)
Willy Tarreau2e993902011-10-31 11:53:20 +01001384 t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
1385 return t;
1386}
1387
1388/*
Simon Horman98637e52014-06-20 12:30:16 +09001389 * establish a server health-check that makes use of a connection.
Simon Hormanb00d17a2014-06-13 16:18:16 +09001390 *
1391 * It can return one of :
Willy Tarreaue7dff022015-04-03 01:14:29 +02001392 * - SF_ERR_NONE if everything's OK and tcpcheck_main() was not called
1393 * - SF_ERR_UP if if everything's OK and tcpcheck_main() was called
1394 * - SF_ERR_SRVTO if there are no more servers
1395 * - SF_ERR_SRVCL if the connection was refused by the server
1396 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1397 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1398 * - SF_ERR_INTERNAL for any other purely internal errors
1399 * Additionnally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
Simon Hormanb00d17a2014-06-13 16:18:16 +09001400 * Note that we try to prevent the network stack from sending the ACK during the
1401 * connect() when a pure TCP check is used (without PROXY protocol).
1402 */
Simon Horman98637e52014-06-20 12:30:16 +09001403static int connect_conn_chk(struct task *t)
Simon Hormanb00d17a2014-06-13 16:18:16 +09001404{
1405 struct check *check = t->context;
1406 struct server *s = check->server;
1407 struct connection *conn = check->conn;
1408 struct protocol *proto;
1409 int ret;
Willy Tarreauf3d34822014-12-08 12:11:28 +01001410 int quickack;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001411
1412 /* tcpcheck send/expect initialisation */
1413 if (check->type == PR_O2_TCPCHK_CHK)
1414 check->current_step = NULL;
1415
1416 /* prepare the check buffer.
1417 * This should not be used if check is the secondary agent check
1418 * of a server as s->proxy->check_req will relate to the
1419 * configuration of the primary check. Similarly, tcp-check uses
1420 * its own strings.
1421 */
1422 if (check->type && check->type != PR_O2_TCPCHK_CHK && !(check->state & CHK_ST_AGENT)) {
1423 bo_putblk(check->bo, s->proxy->check_req, s->proxy->check_len);
1424
1425 /* we want to check if this host replies to HTTP or SSLv3 requests
1426 * so we'll send the request, and won't wake the checker up now.
1427 */
1428 if ((check->type) == PR_O2_SSL3_CHK) {
1429 /* SSL requires that we put Unix time in the request */
1430 int gmt_time = htonl(date.tv_sec);
1431 memcpy(check->bo->data + 11, &gmt_time, 4);
1432 }
1433 else if ((check->type) == PR_O2_HTTP_CHK) {
1434 if (s->proxy->options2 & PR_O2_CHK_SNDST)
1435 bo_putblk(check->bo, trash.str, httpchk_build_status_header(s, trash.str, trash.size));
Cyril Bonté32602d22015-01-30 00:07:07 +01001436 /* prevent HTTP keep-alive when "http-check expect" is used */
1437 if (s->proxy->options2 & PR_O2_EXP_TYPE)
1438 bo_putstr(check->bo, "Connection: close\r\n");
Simon Hormanb00d17a2014-06-13 16:18:16 +09001439 bo_putstr(check->bo, "\r\n");
1440 *check->bo->p = '\0'; /* to make gdb output easier to read */
1441 }
1442 }
1443
1444 /* prepare a new connection */
1445 conn_init(conn);
Simon Hormanb00d17a2014-06-13 16:18:16 +09001446
Simon Horman41f58762015-01-30 11:22:56 +09001447 if (is_addr(&check->addr)) {
Simon Hormanb00d17a2014-06-13 16:18:16 +09001448 /* we'll connect to the check addr specified on the server */
Simon Horman41f58762015-01-30 11:22:56 +09001449 conn->addr.to = check->addr;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001450 }
1451 else {
1452 /* we'll connect to the addr on the server */
1453 conn->addr.to = s->addr;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001454 }
1455
1456 if (check->port) {
1457 set_host_port(&conn->addr.to, check->port);
1458 }
1459
Thierry FOURNIERbb2ae642015-01-14 11:31:49 +01001460 proto = protocol_by_family(conn->addr.to.ss_family);
1461
1462 conn_prepare(conn, proto, check->xprt);
1463 conn_attach(conn, check, &check_conn_cb);
1464 conn->target = &s->obj_type;
1465
1466 /* no client address */
1467 clear_addr(&conn->addr.from);
1468
Willy Tarreauf3d34822014-12-08 12:11:28 +01001469 /* only plain tcp-check supports quick ACK */
1470 quickack = check->type == 0 || check->type == PR_O2_TCPCHK_CHK;
1471
Simon Hormane16c1b32015-01-30 11:22:57 +09001472 if (check->type == PR_O2_TCPCHK_CHK && !LIST_ISEMPTY(check->tcpcheck_rules)) {
1473 struct tcpcheck_rule *r = (struct tcpcheck_rule *) check->tcpcheck_rules->n;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001474 /* if first step is a 'connect', then tcpcheck_main must run it */
1475 if (r->action == TCPCHK_ACT_CONNECT) {
1476 tcpcheck_main(conn);
Willy Tarreaue7dff022015-04-03 01:14:29 +02001477 return SF_ERR_UP;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001478 }
Willy Tarreauf3d34822014-12-08 12:11:28 +01001479 if (r->action == TCPCHK_ACT_EXPECT)
1480 quickack = 0;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001481 }
1482
Willy Tarreaue7dff022015-04-03 01:14:29 +02001483 ret = SF_ERR_INTERNAL;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001484 if (proto->connect)
Willy Tarreauf3d34822014-12-08 12:11:28 +01001485 ret = proto->connect(conn, check->type, quickack ? 2 : 0);
Simon Hormanb00d17a2014-06-13 16:18:16 +09001486 conn->flags |= CO_FL_WAKE_DATA;
1487 if (s->check.send_proxy) {
1488 conn->send_proxy_ofs = 1;
1489 conn->flags |= CO_FL_SEND_PROXY;
1490 }
1491
1492 return ret;
1493}
1494
Simon Horman98637e52014-06-20 12:30:16 +09001495static struct list pid_list = LIST_HEAD_INIT(pid_list);
1496static struct pool_head *pool2_pid_list;
1497
1498void block_sigchld(void)
1499{
1500 sigset_t set;
1501 sigemptyset(&set);
1502 sigaddset(&set, SIGCHLD);
1503 assert(sigprocmask(SIG_SETMASK, &set, NULL) == 0);
1504}
1505
1506void unblock_sigchld(void)
1507{
1508 sigset_t set;
1509 sigemptyset(&set);
1510 assert(sigprocmask(SIG_SETMASK, &set, NULL) == 0);
1511}
1512
1513/* Call with SIGCHLD blocked */
1514static struct pid_list *pid_list_add(pid_t pid, struct task *t)
1515{
1516 struct pid_list *elem;
1517 struct check *check = t->context;
1518
1519 elem = pool_alloc2(pool2_pid_list);
1520 if (!elem)
1521 return NULL;
1522 elem->pid = pid;
1523 elem->t = t;
1524 elem->exited = 0;
1525 check->curpid = elem;
1526 LIST_INIT(&elem->list);
1527 LIST_ADD(&pid_list, &elem->list);
1528 return elem;
1529}
1530
1531/* Blocks blocks and then unblocks SIGCHLD */
1532static void pid_list_del(struct pid_list *elem)
1533{
1534 struct check *check;
1535
1536 if (!elem)
1537 return;
1538
1539 block_sigchld();
1540 LIST_DEL(&elem->list);
1541 unblock_sigchld();
1542 if (!elem->exited)
1543 kill(elem->pid, SIGTERM);
1544
1545 check = elem->t->context;
1546 check->curpid = NULL;
1547 pool_free2(pool2_pid_list, elem);
1548}
1549
1550/* Called from inside SIGCHLD handler, SIGCHLD is blocked */
1551static void pid_list_expire(pid_t pid, int status)
1552{
1553 struct pid_list *elem;
1554
1555 list_for_each_entry(elem, &pid_list, list) {
1556 if (elem->pid == pid) {
1557 elem->t->expire = now_ms;
1558 elem->status = status;
1559 elem->exited = 1;
Cyril Bonté9dbcfab2014-08-07 01:55:39 +02001560 task_wakeup(elem->t, TASK_WOKEN_IO);
Simon Horman98637e52014-06-20 12:30:16 +09001561 return;
1562 }
1563 }
1564}
1565
1566static void sigchld_handler(int signal)
1567{
1568 pid_t pid;
1569 int status;
1570 while ((pid = waitpid(0, &status, WNOHANG)) > 0)
1571 pid_list_expire(pid, status);
1572}
1573
1574static int init_pid_list(void) {
1575 struct sigaction action = {
1576 .sa_handler = sigchld_handler,
1577 .sa_flags = SA_NOCLDSTOP
1578 };
1579
1580 if (pool2_pid_list != NULL)
1581 /* Nothing to do */
1582 return 0;
1583
1584 if (sigaction(SIGCHLD, &action, NULL)) {
1585 Alert("Failed to set signal handler for external health checks: %s. Aborting.\n",
1586 strerror(errno));
1587 return 1;
1588 }
1589
1590 pool2_pid_list = create_pool("pid_list", sizeof(struct pid_list), MEM_F_SHARED);
1591 if (pool2_pid_list == NULL) {
1592 Alert("Failed to allocate memory pool for external health checks: %s. Aborting.\n",
1593 strerror(errno));
1594 return 1;
1595 }
1596
1597 return 0;
1598}
1599
Cyril Bontéac92a062014-12-27 22:28:38 +01001600/* helper macro to set an environment variable and jump to a specific label on failure. */
1601#define EXTCHK_SETENV(check, envidx, value, fail) { if (extchk_setenv(check, envidx, value)) goto fail; }
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001602
1603/*
Cyril Bontéac92a062014-12-27 22:28:38 +01001604 * helper function to allocate enough memory to store an environment variable.
1605 * It will also check that the environment variable is updatable, and silently
1606 * fail if not.
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001607 */
Cyril Bontéac92a062014-12-27 22:28:38 +01001608static int extchk_setenv(struct check *check, int idx, const char *value)
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001609{
1610 int len, ret;
Cyril Bontéac92a062014-12-27 22:28:38 +01001611 char *envname;
1612 int vmaxlen;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001613
Cyril Bontéac92a062014-12-27 22:28:38 +01001614 if (idx < 0 || idx >= EXTCHK_SIZE) {
1615 Alert("Illegal environment variable index %d. Aborting.\n", idx);
1616 return 1;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001617 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001618
1619 envname = extcheck_envs[idx].name;
1620 vmaxlen = extcheck_envs[idx].vmaxlen;
1621
1622 /* Check if the environment variable is already set, and silently reject
1623 * the update if this one is not updatable. */
1624 if ((vmaxlen == EXTCHK_SIZE_EVAL_INIT) && (check->envp[idx]))
1625 return 0;
1626
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001627 /* Instead of sending NOT_USED, sending an empty value is preferable */
1628 if (strcmp(value, "NOT_USED") == 0) {
1629 value = "";
1630 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001631
1632 len = strlen(envname) + 1;
1633 if (vmaxlen == EXTCHK_SIZE_EVAL_INIT)
1634 len += strlen(value);
1635 else
1636 len += vmaxlen;
1637
1638 if (!check->envp[idx])
1639 check->envp[idx] = malloc(len + 1);
1640
1641 if (!check->envp[idx]) {
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001642 Alert("Failed to allocate memory for the environment variable '%s'. Aborting.\n", envname);
1643 return 1;
1644 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001645 ret = snprintf(check->envp[idx], len + 1, "%s=%s", envname, value);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001646 if (ret < 0) {
1647 Alert("Failed to store the environment variable '%s'. Reason : %s. Aborting.\n", envname, strerror(errno));
1648 return 1;
1649 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001650 else if (ret > len) {
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001651 Alert("Environment variable '%s' was truncated. Aborting.\n", envname);
1652 return 1;
1653 }
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001654 return 0;
1655}
Simon Horman98637e52014-06-20 12:30:16 +09001656
1657static int prepare_external_check(struct check *check)
1658{
1659 struct server *s = check->server;
1660 struct proxy *px = s->proxy;
1661 struct listener *listener = NULL, *l;
1662 int i;
Simon Horman98637e52014-06-20 12:30:16 +09001663 const char *path = px->check_path ? px->check_path : DEF_CHECK_PATH;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001664 char buf[256];
Simon Horman98637e52014-06-20 12:30:16 +09001665
1666 list_for_each_entry(l, &px->conf.listeners, by_fe)
1667 /* Use the first INET, INET6 or UNIX listener */
1668 if (l->addr.ss_family == AF_INET ||
1669 l->addr.ss_family == AF_INET6 ||
1670 l->addr.ss_family == AF_UNIX) {
1671 listener = l;
1672 break;
1673 }
1674
Simon Horman98637e52014-06-20 12:30:16 +09001675 check->curpid = NULL;
Cyril Bontéac92a062014-12-27 22:28:38 +01001676 check->envp = calloc((EXTCHK_SIZE + 1), sizeof(char *));
1677 if (!check->envp) {
1678 Alert("Failed to allocate memory for environment variables. Aborting\n");
1679 goto err;
1680 }
Simon Horman98637e52014-06-20 12:30:16 +09001681
Cyril Bontéac92a062014-12-27 22:28:38 +01001682 check->argv = calloc(6, sizeof(char *));
1683 if (!check->argv) {
1684 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
Simon Horman98637e52014-06-20 12:30:16 +09001685 goto err;
Cyril Bontéac92a062014-12-27 22:28:38 +01001686 }
Simon Horman98637e52014-06-20 12:30:16 +09001687
1688 check->argv[0] = px->check_command;
1689
Cyril Bonté777be862014-12-02 21:21:35 +01001690 if (!listener) {
1691 check->argv[1] = strdup("NOT_USED");
1692 check->argv[2] = strdup("NOT_USED");
1693 }
1694 else if (listener->addr.ss_family == AF_INET ||
Simon Horman98637e52014-06-20 12:30:16 +09001695 listener->addr.ss_family == AF_INET6) {
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001696 addr_to_str(&listener->addr, buf, sizeof(buf));
1697 check->argv[1] = strdup(buf);
1698 port_to_str(&listener->addr, buf, sizeof(buf));
1699 check->argv[2] = strdup(buf);
Cyril Bonté777be862014-12-02 21:21:35 +01001700 }
1701 else if (listener->addr.ss_family == AF_UNIX) {
Simon Horman98637e52014-06-20 12:30:16 +09001702 const struct sockaddr_un *un;
1703
1704 un = (struct sockaddr_un *)&listener->addr;
1705 check->argv[1] = strdup(un->sun_path);
1706 check->argv[2] = strdup("NOT_USED");
Cyril Bonté777be862014-12-02 21:21:35 +01001707 }
1708 else {
Cyril Bontéac92a062014-12-27 22:28:38 +01001709 Alert("Starting [%s:%s] check: unsupported address family.\n", px->id, s->id);
Simon Horman98637e52014-06-20 12:30:16 +09001710 goto err;
1711 }
1712
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001713 addr_to_str(&s->addr, buf, sizeof(buf));
1714 check->argv[3] = strdup(buf);
1715 port_to_str(&s->addr, buf, sizeof(buf));
1716 check->argv[4] = strdup(buf);
Simon Horman98637e52014-06-20 12:30:16 +09001717
Cyril Bontéac92a062014-12-27 22:28:38 +01001718 for (i = 0; i < 5; i++) {
1719 if (!check->argv[i]) {
1720 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
Simon Horman98637e52014-06-20 12:30:16 +09001721 goto err;
Cyril Bontéac92a062014-12-27 22:28:38 +01001722 }
1723 }
Simon Horman98637e52014-06-20 12:30:16 +09001724
Cyril Bontéac92a062014-12-27 22:28:38 +01001725 EXTCHK_SETENV(check, EXTCHK_PATH, path, err);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001726 /* Add proxy environment variables */
Cyril Bontéac92a062014-12-27 22:28:38 +01001727 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_NAME, px->id, err);
1728 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ID, ultoa_r(px->uuid, buf, sizeof(buf)), err);
1729 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ADDR, check->argv[1], err);
1730 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_PORT, check->argv[2], err);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001731 /* Add server environment variables */
Cyril Bontéac92a062014-12-27 22:28:38 +01001732 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_NAME, s->id, err);
1733 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ID, ultoa_r(s->puid, buf, sizeof(buf)), err);
1734 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], err);
1735 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], err);
1736 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_MAXCONN, ultoa_r(s->maxconn, buf, sizeof(buf)), err);
1737 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), err);
1738
1739 /* Ensure that we don't leave any hole in check->envp */
1740 for (i = 0; i < EXTCHK_SIZE; i++)
1741 if (!check->envp[i])
1742 EXTCHK_SETENV(check, i, "", err);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001743
Cyril Bonté99c5bf52014-08-07 01:55:38 +02001744 return 1;
Simon Horman98637e52014-06-20 12:30:16 +09001745err:
1746 if (check->envp) {
Cyril Bontéac92a062014-12-27 22:28:38 +01001747 for (i = 0; i < EXTCHK_SIZE; i++)
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001748 free(check->envp[i]);
Simon Horman98637e52014-06-20 12:30:16 +09001749 free(check->envp);
1750 check->envp = NULL;
1751 }
1752
1753 if (check->argv) {
1754 for (i = 1; i < 5; i++)
1755 free(check->argv[i]);
1756 free(check->argv);
1757 check->argv = NULL;
1758 }
Cyril Bonté99c5bf52014-08-07 01:55:38 +02001759 return 0;
Simon Horman98637e52014-06-20 12:30:16 +09001760}
1761
Simon Hormanb00d17a2014-06-13 16:18:16 +09001762/*
Simon Horman98637e52014-06-20 12:30:16 +09001763 * establish a server health-check that makes use of a process.
1764 *
1765 * It can return one of :
Willy Tarreaue7dff022015-04-03 01:14:29 +02001766 * - SF_ERR_NONE if everything's OK
1767 * - SF_ERR_SRVTO if there are no more servers
1768 * - SF_ERR_SRVCL if the connection was refused by the server
1769 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1770 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1771 * - SF_ERR_INTERNAL for any other purely internal errors
1772 * Additionnally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
Simon Horman98637e52014-06-20 12:30:16 +09001773 *
1774 * Blocks and then unblocks SIGCHLD
1775 */
1776static int connect_proc_chk(struct task *t)
1777{
Cyril Bontéac92a062014-12-27 22:28:38 +01001778 char buf[256];
Simon Horman98637e52014-06-20 12:30:16 +09001779 struct check *check = t->context;
1780 struct server *s = check->server;
1781 struct proxy *px = s->proxy;
1782 int status;
1783 pid_t pid;
1784
Willy Tarreaue7dff022015-04-03 01:14:29 +02001785 status = SF_ERR_RESOURCE;
Simon Horman98637e52014-06-20 12:30:16 +09001786
1787 block_sigchld();
1788
1789 pid = fork();
1790 if (pid < 0) {
1791 Alert("Failed to fork process for external health check: %s. Aborting.\n",
1792 strerror(errno));
1793 set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
1794 goto out;
1795 }
1796 if (pid == 0) {
1797 /* Child */
1798 extern char **environ;
1799 environ = check->envp;
Cyril Bontéac92a062014-12-27 22:28:38 +01001800 extchk_setenv(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)));
Simon Horman98637e52014-06-20 12:30:16 +09001801 execvp(px->check_command, check->argv);
1802 Alert("Failed to exec process for external health check: %s. Aborting.\n",
1803 strerror(errno));
1804 exit(-1);
1805 }
1806
1807 /* Parent */
1808 if (check->result == CHK_RES_UNKNOWN) {
1809 if (pid_list_add(pid, t) != NULL) {
1810 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1811
1812 if (px->timeout.check && px->timeout.connect) {
1813 int t_con = tick_add(now_ms, px->timeout.connect);
1814 t->expire = tick_first(t->expire, t_con);
1815 }
Willy Tarreaue7dff022015-04-03 01:14:29 +02001816 status = SF_ERR_NONE;
Simon Horman98637e52014-06-20 12:30:16 +09001817 goto out;
1818 }
1819 else {
1820 set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
1821 }
1822 kill(pid, SIGTERM); /* process creation error */
1823 }
1824 else
1825 set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
1826
1827out:
1828 unblock_sigchld();
1829 return status;
1830}
1831
1832/*
Simon Horman98637e52014-06-20 12:30:16 +09001833 * manages a server health-check that uses a process. Returns
Willy Tarreaubaaee002006-06-26 02:48:02 +02001834 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
1835 */
Simon Horman98637e52014-06-20 12:30:16 +09001836static struct task *process_chk_proc(struct task *t)
1837{
1838 struct check *check = t->context;
1839 struct server *s = check->server;
1840 struct connection *conn = check->conn;
1841 int rv;
1842 int ret;
1843 int expired = tick_is_expired(t->expire, now_ms);
1844
1845 if (!(check->state & CHK_ST_INPROGRESS)) {
1846 /* no check currently running */
1847 if (!expired) /* woke up too early */
1848 return t;
1849
1850 /* we don't send any health-checks when the proxy is
1851 * stopped, the server should not be checked or the check
1852 * is disabled.
1853 */
1854 if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
1855 s->proxy->state == PR_STSTOPPED)
1856 goto reschedule;
1857
1858 /* we'll initiate a new check */
1859 set_server_check_status(check, HCHK_STATUS_START, NULL);
1860
1861 check->state |= CHK_ST_INPROGRESS;
1862
Simon Hormandbf70192015-01-30 11:22:53 +09001863 ret = connect_proc_chk(t);
Simon Horman98637e52014-06-20 12:30:16 +09001864 switch (ret) {
Willy Tarreaue7dff022015-04-03 01:14:29 +02001865 case SF_ERR_UP:
Simon Horman98637e52014-06-20 12:30:16 +09001866 return t;
Willy Tarreaue7dff022015-04-03 01:14:29 +02001867 case SF_ERR_NONE:
Simon Horman98637e52014-06-20 12:30:16 +09001868 /* we allow up to min(inter, timeout.connect) for a connection
1869 * to establish but only when timeout.check is set
1870 * as it may be to short for a full check otherwise
1871 */
1872 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1873
1874 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
1875 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
1876 t->expire = tick_first(t->expire, t_con);
1877 }
1878
1879 goto reschedule;
1880
Willy Tarreaue7dff022015-04-03 01:14:29 +02001881 case SF_ERR_SRVTO: /* ETIMEDOUT */
1882 case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
Simon Horman98637e52014-06-20 12:30:16 +09001883 conn->flags |= CO_FL_ERROR;
1884 chk_report_conn_err(conn, errno, 0);
1885 break;
Willy Tarreaue7dff022015-04-03 01:14:29 +02001886 case SF_ERR_PRXCOND:
1887 case SF_ERR_RESOURCE:
1888 case SF_ERR_INTERNAL:
Simon Horman98637e52014-06-20 12:30:16 +09001889 conn->flags |= CO_FL_ERROR;
1890 chk_report_conn_err(conn, 0, 0);
1891 break;
1892 }
1893
1894 /* here, we have seen a synchronous error, no fd was allocated */
1895
1896 check->state &= ~CHK_ST_INPROGRESS;
1897 check_notify_failure(check);
1898
1899 /* we allow up to min(inter, timeout.connect) for a connection
1900 * to establish but only when timeout.check is set
1901 * as it may be to short for a full check otherwise
1902 */
1903 while (tick_is_expired(t->expire, now_ms)) {
1904 int t_con;
1905
1906 t_con = tick_add(t->expire, s->proxy->timeout.connect);
1907 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1908
1909 if (s->proxy->timeout.check)
1910 t->expire = tick_first(t->expire, t_con);
1911 }
1912 }
1913 else {
1914 /* there was a test running.
1915 * First, let's check whether there was an uncaught error,
1916 * which can happen on connect timeout or error.
1917 */
1918 if (check->result == CHK_RES_UNKNOWN) {
1919 /* good connection is enough for pure TCP check */
1920 struct pid_list *elem = check->curpid;
1921 int status = HCHK_STATUS_UNKNOWN;
1922
1923 if (elem->exited) {
1924 status = elem->status; /* Save in case the process exits between use below */
1925 if (!WIFEXITED(status))
1926 check->code = -1;
1927 else
1928 check->code = WEXITSTATUS(status);
1929 if (!WIFEXITED(status) || WEXITSTATUS(status))
1930 status = HCHK_STATUS_PROCERR;
1931 else
1932 status = HCHK_STATUS_PROCOK;
1933 } else if (expired) {
1934 status = HCHK_STATUS_PROCTOUT;
Willy Tarreaudc3d1902014-07-08 00:56:27 +02001935 Warning("kill %d\n", (int)elem->pid);
Simon Horman98637e52014-06-20 12:30:16 +09001936 kill(elem->pid, SIGTERM);
1937 }
1938 set_server_check_status(check, status, NULL);
1939 }
1940
1941 if (check->result == CHK_RES_FAILED) {
1942 /* a failure or timeout detected */
1943 check_notify_failure(check);
1944 }
1945 else if (check->result == CHK_RES_CONDPASS) {
1946 /* check is OK but asks for stopping mode */
1947 check_notify_stopping(check);
1948 }
1949 else if (check->result == CHK_RES_PASSED) {
1950 /* a success was detected */
1951 check_notify_success(check);
1952 }
1953 check->state &= ~CHK_ST_INPROGRESS;
1954
1955 pid_list_del(check->curpid);
1956
1957 rv = 0;
1958 if (global.spread_checks > 0) {
1959 rv = srv_getinter(check) * global.spread_checks / 100;
1960 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
1961 }
1962 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
1963 }
1964
1965 reschedule:
1966 while (tick_is_expired(t->expire, now_ms))
1967 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1968 return t;
1969}
1970
1971/*
1972 * manages a server health-check that uses a connection. Returns
1973 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
1974 */
1975static struct task *process_chk_conn(struct task *t)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001976{
Simon Horman4a741432013-02-23 15:35:38 +09001977 struct check *check = t->context;
1978 struct server *s = check->server;
1979 struct connection *conn = check->conn;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02001980 int rv;
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001981 int ret;
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01001982 int expired = tick_is_expired(t->expire, now_ms);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001983
Willy Tarreau2c115e52013-12-11 19:41:16 +01001984 if (!(check->state & CHK_ST_INPROGRESS)) {
Willy Tarreau5a78f362012-11-23 12:47:05 +01001985 /* no check currently running */
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01001986 if (!expired) /* woke up too early */
Willy Tarreau26c25062009-03-08 09:38:41 +01001987 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001988
Simon Horman671b6f02013-11-25 10:46:39 +09001989 /* we don't send any health-checks when the proxy is
1990 * stopped, the server should not be checked or the check
1991 * is disabled.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001992 */
Willy Tarreau0d924cc2013-12-11 21:26:24 +01001993 if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
Willy Tarreau33a08db2013-12-11 21:03:31 +01001994 s->proxy->state == PR_STSTOPPED)
Willy Tarreau5a78f362012-11-23 12:47:05 +01001995 goto reschedule;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001996
1997 /* we'll initiate a new check */
Simon Horman4a741432013-02-23 15:35:38 +09001998 set_server_check_status(check, HCHK_STATUS_START, NULL);
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02001999
Willy Tarreau2c115e52013-12-11 19:41:16 +01002000 check->state |= CHK_ST_INPROGRESS;
Simon Horman4a741432013-02-23 15:35:38 +09002001 check->bi->p = check->bi->data;
2002 check->bi->i = 0;
2003 check->bo->p = check->bo->data;
2004 check->bo->o = 0;
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002005
Simon Hormandbf70192015-01-30 11:22:53 +09002006 ret = connect_conn_chk(t);
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002007 switch (ret) {
Willy Tarreaue7dff022015-04-03 01:14:29 +02002008 case SF_ERR_UP:
Simon Hormanb00d17a2014-06-13 16:18:16 +09002009 return t;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002010 case SF_ERR_NONE:
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002011 /* we allow up to min(inter, timeout.connect) for a connection
2012 * to establish but only when timeout.check is set
2013 * as it may be to short for a full check otherwise
2014 */
Simon Horman4a741432013-02-23 15:35:38 +09002015 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02002016
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002017 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
2018 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
2019 t->expire = tick_first(t->expire, t_con);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002020 }
Willy Tarreau06559ac2013-12-05 01:53:08 +01002021
2022 if (check->type)
2023 conn_data_want_recv(conn); /* prepare for reading a possible reply */
2024
Willy Tarreau5a78f362012-11-23 12:47:05 +01002025 goto reschedule;
2026
Willy Tarreaue7dff022015-04-03 01:14:29 +02002027 case SF_ERR_SRVTO: /* ETIMEDOUT */
2028 case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
Willy Tarreau4bd07de2014-01-24 16:10:57 +01002029 conn->flags |= CO_FL_ERROR;
2030 chk_report_conn_err(conn, errno, 0);
Willy Tarreau5a78f362012-11-23 12:47:05 +01002031 break;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002032 case SF_ERR_PRXCOND:
2033 case SF_ERR_RESOURCE:
2034 case SF_ERR_INTERNAL:
Willy Tarreau4bd07de2014-01-24 16:10:57 +01002035 conn->flags |= CO_FL_ERROR;
2036 chk_report_conn_err(conn, 0, 0);
Willy Tarreau5a78f362012-11-23 12:47:05 +01002037 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002038 }
2039
Willy Tarreau5a78f362012-11-23 12:47:05 +01002040 /* here, we have seen a synchronous error, no fd was allocated */
Willy Tarreau6b0a8502012-11-23 08:51:32 +01002041
Willy Tarreau2c115e52013-12-11 19:41:16 +01002042 check->state &= ~CHK_ST_INPROGRESS;
Willy Tarreau4eec5472014-05-20 22:32:27 +02002043 check_notify_failure(check);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002044
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002045 /* we allow up to min(inter, timeout.connect) for a connection
2046 * to establish but only when timeout.check is set
2047 * as it may be to short for a full check otherwise
2048 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +02002049 while (tick_is_expired(t->expire, now_ms)) {
2050 int t_con;
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002051
Willy Tarreau0c303ee2008-07-07 00:09:58 +02002052 t_con = tick_add(t->expire, s->proxy->timeout.connect);
Simon Horman4a741432013-02-23 15:35:38 +09002053 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002054
Willy Tarreau0c303ee2008-07-07 00:09:58 +02002055 if (s->proxy->timeout.check)
2056 t->expire = tick_first(t->expire, t_con);
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002057 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02002058 }
2059 else {
Willy Tarreauf1503172012-09-28 19:39:36 +02002060 /* there was a test running.
2061 * First, let's check whether there was an uncaught error,
2062 * which can happen on connect timeout or error.
2063 */
Simon Hormanccaabcd2014-06-20 12:29:47 +09002064 if (check->result == CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +01002065 /* good connection is enough for pure TCP check */
2066 if ((conn->flags & CO_FL_CONNECTED) && !check->type) {
Simon Horman4a741432013-02-23 15:35:38 +09002067 if (check->use_ssl)
2068 set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
Willy Tarreauf1503172012-09-28 19:39:36 +02002069 else
Simon Horman4a741432013-02-23 15:35:38 +09002070 set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01002071 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +01002072 else if ((conn->flags & CO_FL_ERROR) || expired) {
2073 chk_report_conn_err(conn, 0, expired);
Willy Tarreauf1503172012-09-28 19:39:36 +02002074 }
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002075 else
2076 goto out_wait; /* timeout not reached, wait again */
Willy Tarreauf1503172012-09-28 19:39:36 +02002077 }
2078
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002079 /* check complete or aborted */
Willy Tarreau5ba04f62013-02-12 15:23:12 +01002080 if (conn->xprt) {
2081 /* The check was aborted and the connection was not yet closed.
2082 * This can happen upon timeout, or when an external event such
2083 * as a failed response coupled with "observe layer7" caused the
2084 * server state to be suddenly changed.
2085 */
Willy Tarreaud85c4852015-03-13 00:40:28 +01002086 conn_sock_drain(conn);
Willy Tarreauf79c8172013-10-21 16:30:56 +02002087 conn_force_close(conn);
Willy Tarreau5ba04f62013-02-12 15:23:12 +01002088 }
2089
Willy Tarreauaf549582014-05-16 17:37:50 +02002090 if (check->result == CHK_RES_FAILED) {
2091 /* a failure or timeout detected */
Willy Tarreau4eec5472014-05-20 22:32:27 +02002092 check_notify_failure(check);
Willy Tarreauaf549582014-05-16 17:37:50 +02002093 }
Willy Tarreaudb58b792014-05-21 13:57:23 +02002094 else if (check->result == CHK_RES_CONDPASS) {
2095 /* check is OK but asks for stopping mode */
2096 check_notify_stopping(check);
Willy Tarreauaf549582014-05-16 17:37:50 +02002097 }
Willy Tarreau3e048382014-05-21 10:30:54 +02002098 else if (check->result == CHK_RES_PASSED) {
2099 /* a success was detected */
2100 check_notify_success(check);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002101 }
Willy Tarreau2c115e52013-12-11 19:41:16 +01002102 check->state &= ~CHK_ST_INPROGRESS;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002103
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002104 rv = 0;
2105 if (global.spread_checks > 0) {
Simon Horman4a741432013-02-23 15:35:38 +09002106 rv = srv_getinter(check) * global.spread_checks / 100;
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002107 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Willy Tarreaubaaee002006-06-26 02:48:02 +02002108 }
Simon Horman4a741432013-02-23 15:35:38 +09002109 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +02002110 }
Willy Tarreau5a78f362012-11-23 12:47:05 +01002111
2112 reschedule:
2113 while (tick_is_expired(t->expire, now_ms))
Simon Horman4a741432013-02-23 15:35:38 +09002114 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002115 out_wait:
Willy Tarreau26c25062009-03-08 09:38:41 +01002116 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002117}
2118
Simon Horman98637e52014-06-20 12:30:16 +09002119/*
2120 * manages a server health-check. Returns
2121 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
2122 */
2123static struct task *process_chk(struct task *t)
2124{
2125 struct check *check = t->context;
2126
2127 if (check->type == PR_O2_EXT_CHK)
2128 return process_chk_proc(t);
2129 return process_chk_conn(t);
2130}
2131
Simon Horman5c942422013-11-25 10:46:32 +09002132static int start_check_task(struct check *check, int mininter,
2133 int nbcheck, int srvpos)
2134{
2135 struct task *t;
2136 /* task for the check */
2137 if ((t = task_new()) == NULL) {
2138 Alert("Starting [%s:%s] check: out of memory.\n",
2139 check->server->proxy->id, check->server->id);
2140 return 0;
2141 }
2142
2143 check->task = t;
2144 t->process = process_chk;
2145 t->context = check;
2146
Willy Tarreau1746eec2014-04-25 10:46:47 +02002147 if (mininter < srv_getinter(check))
2148 mininter = srv_getinter(check);
2149
2150 if (global.max_spread_checks && mininter > global.max_spread_checks)
2151 mininter = global.max_spread_checks;
2152
Simon Horman5c942422013-11-25 10:46:32 +09002153 /* check this every ms */
Willy Tarreau1746eec2014-04-25 10:46:47 +02002154 t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
Simon Horman5c942422013-11-25 10:46:32 +09002155 check->start = now;
2156 task_queue(t);
2157
2158 return 1;
2159}
2160
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002161/*
2162 * Start health-check.
2163 * Returns 0 if OK, -1 if error, and prints the error in this case.
2164 */
2165int start_checks() {
2166
2167 struct proxy *px;
2168 struct server *s;
2169 struct task *t;
Simon Horman4a741432013-02-23 15:35:38 +09002170 int nbcheck=0, mininter=0, srvpos=0;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002171
Willy Tarreau2c43a1e2007-10-14 23:05:39 +02002172 /* 1- count the checkers to run simultaneously.
2173 * We also determine the minimum interval among all of those which
2174 * have an interval larger than SRV_CHK_INTER_THRES. This interval
2175 * will be used to spread their start-up date. Those which have
Jamie Gloudon801a0a32012-08-25 00:18:33 -04002176 * a shorter interval will start independently and will not dictate
Willy Tarreau2c43a1e2007-10-14 23:05:39 +02002177 * too short an interval for all others.
2178 */
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002179 for (px = proxy; px; px = px->next) {
2180 for (s = px->srv; s; s = s->next) {
Willy Tarreaue7b73482013-11-21 11:50:50 +01002181 if (s->slowstart) {
2182 if ((t = task_new()) == NULL) {
2183 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
2184 return -1;
2185 }
2186 /* We need a warmup task that will be called when the server
2187 * state switches from down to up.
2188 */
2189 s->warmup = t;
2190 t->process = server_warmup;
2191 t->context = s;
2192 t->expire = TICK_ETERNITY;
2193 }
2194
Willy Tarreaud8514a22013-12-11 21:10:14 +01002195 if (s->check.state & CHK_ST_CONFIGURED) {
2196 nbcheck++;
2197 if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
2198 (!mininter || mininter > srv_getinter(&s->check)))
2199 mininter = srv_getinter(&s->check);
2200 }
Willy Tarreau15f39102013-12-11 20:41:18 +01002201
Willy Tarreaud8514a22013-12-11 21:10:14 +01002202 if (s->agent.state & CHK_ST_CONFIGURED) {
2203 nbcheck++;
2204 if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
2205 (!mininter || mininter > srv_getinter(&s->agent)))
2206 mininter = srv_getinter(&s->agent);
2207 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002208 }
2209 }
2210
Simon Horman4a741432013-02-23 15:35:38 +09002211 if (!nbcheck)
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002212 return 0;
2213
2214 srand((unsigned)time(NULL));
2215
2216 /*
2217 * 2- start them as far as possible from each others. For this, we will
2218 * start them after their interval set to the min interval divided by
2219 * the number of servers, weighted by the server's position in the list.
2220 */
2221 for (px = proxy; px; px = px->next) {
Simon Horman98637e52014-06-20 12:30:16 +09002222 if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
2223 if (init_pid_list()) {
2224 Alert("Starting [%s] check: out of memory.\n", px->id);
2225 return -1;
2226 }
2227 }
2228
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002229 for (s = px->srv; s; s = s->next) {
Simon Hormand60d6912013-11-25 10:46:36 +09002230 /* A task for the main check */
Willy Tarreauff5ae352013-12-11 20:36:34 +01002231 if (s->check.state & CHK_ST_CONFIGURED) {
Cyril Bonté99c5bf52014-08-07 01:55:38 +02002232 if (s->check.type == PR_O2_EXT_CHK) {
2233 if (!prepare_external_check(&s->check))
2234 return -1;
2235 }
Simon Hormand60d6912013-11-25 10:46:36 +09002236 if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
2237 return -1;
2238 srvpos++;
2239 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002240
Simon Hormand60d6912013-11-25 10:46:36 +09002241 /* A task for a auxiliary agent check */
Willy Tarreauff5ae352013-12-11 20:36:34 +01002242 if (s->agent.state & CHK_ST_CONFIGURED) {
Simon Hormand60d6912013-11-25 10:46:36 +09002243 if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
2244 return -1;
2245 }
2246 srvpos++;
2247 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002248 }
2249 }
2250 return 0;
2251}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002252
2253/*
Willy Tarreau5b3a2022012-09-28 15:01:02 +02002254 * Perform content verification check on data in s->check.buffer buffer.
Willy Tarreaubd741542010-03-16 18:46:54 +01002255 * The buffer MUST be terminated by a null byte before calling this function.
2256 * Sets server status appropriately. The caller is responsible for ensuring
2257 * that the buffer contains at least 13 characters. If <done> is zero, we may
2258 * return 0 to indicate that data is required to decide of a match.
2259 */
2260static int httpchk_expect(struct server *s, int done)
2261{
2262 static char status_msg[] = "HTTP status check returned code <000>";
2263 char status_code[] = "000";
2264 char *contentptr;
2265 int crlf;
2266 int ret;
2267
2268 switch (s->proxy->options2 & PR_O2_EXP_TYPE) {
2269 case PR_O2_EXP_STS:
2270 case PR_O2_EXP_RSTS:
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002271 memcpy(status_code, s->check.bi->data + 9, 3);
2272 memcpy(status_msg + strlen(status_msg) - 4, s->check.bi->data + 9, 3);
Willy Tarreaubd741542010-03-16 18:46:54 +01002273
2274 if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STS)
2275 ret = strncmp(s->proxy->expect_str, status_code, 3) == 0;
2276 else
Thierry FOURNIER09af0d62014-06-18 11:35:54 +02002277 ret = regex_exec(s->proxy->expect_regex, status_code);
Willy Tarreaubd741542010-03-16 18:46:54 +01002278
2279 /* we necessarily have the response, so there are no partial failures */
2280 if (s->proxy->options2 & PR_O2_EXP_INV)
2281 ret = !ret;
2282
Simon Horman4a741432013-02-23 15:35:38 +09002283 set_server_check_status(&s->check, ret ? HCHK_STATUS_L7OKD : HCHK_STATUS_L7STS, status_msg);
Willy Tarreaubd741542010-03-16 18:46:54 +01002284 break;
2285
2286 case PR_O2_EXP_STR:
2287 case PR_O2_EXP_RSTR:
2288 /* very simple response parser: ignore CR and only count consecutive LFs,
2289 * stop with contentptr pointing to first char after the double CRLF or
2290 * to '\0' if crlf < 2.
2291 */
2292 crlf = 0;
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002293 for (contentptr = s->check.bi->data; *contentptr; contentptr++) {
Willy Tarreaubd741542010-03-16 18:46:54 +01002294 if (crlf >= 2)
2295 break;
2296 if (*contentptr == '\r')
2297 continue;
2298 else if (*contentptr == '\n')
2299 crlf++;
2300 else
2301 crlf = 0;
2302 }
2303
2304 /* Check that response contains a body... */
2305 if (crlf < 2) {
2306 if (!done)
2307 return 0;
2308
Simon Horman4a741432013-02-23 15:35:38 +09002309 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002310 "HTTP content check could not find a response body");
2311 return 1;
2312 }
2313
2314 /* Check that response body is not empty... */
2315 if (*contentptr == '\0') {
Willy Tarreaua164fb52011-04-13 09:32:41 +02002316 if (!done)
2317 return 0;
2318
Simon Horman4a741432013-02-23 15:35:38 +09002319 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002320 "HTTP content check found empty response body");
2321 return 1;
2322 }
2323
2324 /* Check the response content against the supplied string
2325 * or regex... */
2326 if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STR)
2327 ret = strstr(contentptr, s->proxy->expect_str) != NULL;
2328 else
Thierry FOURNIER09af0d62014-06-18 11:35:54 +02002329 ret = regex_exec(s->proxy->expect_regex, contentptr);
Willy Tarreaubd741542010-03-16 18:46:54 +01002330
2331 /* if we don't match, we may need to wait more */
2332 if (!ret && !done)
2333 return 0;
2334
2335 if (ret) {
2336 /* content matched */
2337 if (s->proxy->options2 & PR_O2_EXP_INV)
Simon Horman4a741432013-02-23 15:35:38 +09002338 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002339 "HTTP check matched unwanted content");
2340 else
Simon Horman4a741432013-02-23 15:35:38 +09002341 set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
Willy Tarreaubd741542010-03-16 18:46:54 +01002342 "HTTP content check matched");
2343 }
2344 else {
2345 if (s->proxy->options2 & PR_O2_EXP_INV)
Simon Horman4a741432013-02-23 15:35:38 +09002346 set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
Willy Tarreaubd741542010-03-16 18:46:54 +01002347 "HTTP check did not match unwanted content");
2348 else
Simon Horman4a741432013-02-23 15:35:38 +09002349 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002350 "HTTP content check did not match");
2351 }
2352 break;
2353 }
2354 return 1;
2355}
2356
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002357/*
2358 * return the id of a step in a send/expect session
2359 */
Simon Hormane16c1b32015-01-30 11:22:57 +09002360static int tcpcheck_get_step_id(struct check *check)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002361{
2362 struct tcpcheck_rule *cur = NULL, *next = NULL;
2363 int i = 0;
2364
Willy Tarreau213c6782014-10-02 14:51:02 +02002365 /* not even started anything yet => step 0 = initial connect */
Simon Hormane16c1b32015-01-30 11:22:57 +09002366 if (check->current_step)
Willy Tarreau213c6782014-10-02 14:51:02 +02002367 return 0;
2368
Simon Hormane16c1b32015-01-30 11:22:57 +09002369 cur = check->last_started_step;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002370
2371 /* no step => first step */
2372 if (cur == NULL)
2373 return 1;
2374
2375 /* increment i until current step */
Simon Hormane16c1b32015-01-30 11:22:57 +09002376 list_for_each_entry(next, check->tcpcheck_rules, list) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002377 if (next->list.p == &cur->list)
2378 break;
2379 ++i;
2380 }
2381
2382 return i;
2383}
2384
2385static void tcpcheck_main(struct connection *conn)
2386{
2387 char *contentptr;
Willy Tarreauf3d34822014-12-08 12:11:28 +01002388 struct tcpcheck_rule *cur, *next;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002389 int done = 0, ret = 0;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002390 struct check *check = conn->owner;
2391 struct server *s = check->server;
2392 struct task *t = check->task;
Simon Hormane16c1b32015-01-30 11:22:57 +09002393 struct list *head = check->tcpcheck_rules;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002394
Willy Tarreauef953952014-10-02 14:30:14 +02002395 /* here, we know that the check is complete or that it failed */
2396 if (check->result != CHK_RES_UNKNOWN)
2397 goto out_end_tcpcheck;
2398
2399 /* We have 4 possibilities here :
2400 * 1. we've not yet attempted step 1, and step 1 is a connect, so no
2401 * connection attempt was made yet ;
2402 * 2. we've not yet attempted step 1, and step 1 is a not connect or
2403 * does not exist (no rule), so a connection attempt was made
2404 * before coming here.
2405 * 3. we're coming back after having started with step 1, so we may
2406 * be waiting for a connection attempt to complete.
2407 * 4. the connection + handshake are complete
2408 *
2409 * #2 and #3 are quite similar, we want both the connection and the
2410 * handshake to complete before going any further. Thus we must always
2411 * wait for a connection to complete unless we're before and existing
2412 * step 1.
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002413 */
Willy Tarreauef953952014-10-02 14:30:14 +02002414 if ((!(conn->flags & CO_FL_CONNECTED) || (conn->flags & CO_FL_HANDSHAKE)) &&
2415 (check->current_step || LIST_ISEMPTY(head))) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002416 /* we allow up to min(inter, timeout.connect) for a connection
2417 * to establish but only when timeout.check is set
2418 * as it may be to short for a full check otherwise
2419 */
2420 while (tick_is_expired(t->expire, now_ms)) {
2421 int t_con;
2422
2423 t_con = tick_add(t->expire, s->proxy->timeout.connect);
2424 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
2425
2426 if (s->proxy->timeout.check)
2427 t->expire = tick_first(t->expire, t_con);
2428 }
2429 return;
2430 }
2431
Willy Tarreauef953952014-10-02 14:30:14 +02002432 /* special case: option tcp-check with no rule, a connect is enough */
2433 if (LIST_ISEMPTY(head)) {
2434 set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002435 goto out_end_tcpcheck;
Willy Tarreauef953952014-10-02 14:30:14 +02002436 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002437
Willy Tarreau213c6782014-10-02 14:51:02 +02002438 /* no step means first step initialisation */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002439 if (check->current_step == NULL) {
Willy Tarreau213c6782014-10-02 14:51:02 +02002440 check->last_started_step = NULL;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002441 check->bo->p = check->bo->data;
2442 check->bo->o = 0;
2443 check->bi->p = check->bi->data;
2444 check->bi->i = 0;
2445 cur = check->current_step = LIST_ELEM(head->n, struct tcpcheck_rule *, list);
2446 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
2447 if (s->proxy->timeout.check)
2448 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
2449 }
2450 /* keep on processing step */
2451 else {
2452 cur = check->current_step;
2453 }
2454
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002455 /* It's only the rules which will enable send/recv */
2456 __conn_data_stop_both(conn);
2457
Willy Tarreauabca5b62013-12-06 14:19:25 +01002458 while (1) {
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002459 /* we have to try to flush the output buffer before reading, at the end,
2460 * or if we're about to send a string that does not fit in the remaining space.
2461 */
2462 if (check->bo->o &&
2463 (&cur->list == head ||
2464 check->current_step->action != TCPCHK_ACT_SEND ||
2465 check->current_step->string_len >= buffer_total_space(check->bo))) {
2466
Willy Tarreau1049b1f2014-02-02 01:51:17 +01002467 if (conn->xprt->snd_buf(conn, check->bo, 0) <= 0) {
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002468 if (conn->flags & CO_FL_ERROR) {
2469 chk_report_conn_err(conn, errno, 0);
2470 __conn_data_stop_both(conn);
2471 goto out_end_tcpcheck;
2472 }
2473 goto out_need_io;
Willy Tarreauabca5b62013-12-06 14:19:25 +01002474 }
Willy Tarreauabca5b62013-12-06 14:19:25 +01002475 }
2476
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002477 /* did we reach the end ? If so, let's check that everything was sent */
2478 if (&cur->list == head) {
2479 if (check->bo->o)
2480 goto out_need_io;
Willy Tarreauabca5b62013-12-06 14:19:25 +01002481 break;
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002482 }
Willy Tarreauabca5b62013-12-06 14:19:25 +01002483
Willy Tarreauf3d34822014-12-08 12:11:28 +01002484 /* have 'next' point to the next rule or NULL if we're on the last one */
2485 next = (struct tcpcheck_rule *)cur->list.n;
2486 if (&next->list == head)
2487 next = NULL;
2488
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002489 if (check->current_step->action == TCPCHK_ACT_CONNECT) {
2490 struct protocol *proto;
2491 struct xprt_ops *xprt;
2492
2493 /* mark the step as started */
2494 check->last_started_step = check->current_step;
2495 /* first, shut existing connection */
2496 conn_force_close(conn);
2497
2498 /* prepare new connection */
2499 /* initialization */
2500 conn_init(conn);
2501 conn_attach(conn, check, &check_conn_cb);
2502 conn->target = &s->obj_type;
2503
2504 /* no client address */
2505 clear_addr(&conn->addr.from);
2506
Simon Horman41f58762015-01-30 11:22:56 +09002507 if (is_addr(&check->addr)) {
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002508 /* we'll connect to the check addr specified on the server */
Simon Horman41f58762015-01-30 11:22:56 +09002509 conn->addr.to = check->addr;
Willy Tarreau640556c2014-05-09 23:38:15 +02002510 }
2511 else {
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002512 /* we'll connect to the addr on the server */
2513 conn->addr.to = s->addr;
Willy Tarreau640556c2014-05-09 23:38:15 +02002514 }
Thierry FOURNIERbb2ae642015-01-14 11:31:49 +01002515 proto = protocol_by_family(conn->addr.to.ss_family);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002516
2517 /* port */
2518 if (check->current_step->port)
2519 set_host_port(&conn->addr.to, check->current_step->port);
2520 else if (check->port)
2521 set_host_port(&conn->addr.to, check->port);
2522
2523#ifdef USE_OPENSSL
2524 if (check->current_step->conn_opts & TCPCHK_OPT_SSL) {
2525 xprt = &ssl_sock;
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002526 }
2527 else {
2528 xprt = &raw_sock;
2529 }
2530#else /* USE_OPENSSL */
2531 xprt = &raw_sock;
2532#endif /* USE_OPENSSL */
2533 conn_prepare(conn, proto, xprt);
2534
Willy Tarreaue7dff022015-04-03 01:14:29 +02002535 ret = SF_ERR_INTERNAL;
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002536 if (proto->connect)
Willy Tarreauf3d34822014-12-08 12:11:28 +01002537 ret = proto->connect(conn,
2538 1 /* I/O polling is always needed */,
2539 (next && next->action == TCPCHK_ACT_EXPECT) ? 0 : 2);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002540 conn->flags |= CO_FL_WAKE_DATA;
2541 if (check->current_step->conn_opts & TCPCHK_OPT_SEND_PROXY) {
2542 conn->send_proxy_ofs = 1;
2543 conn->flags |= CO_FL_SEND_PROXY;
2544 }
2545
2546 /* It can return one of :
Willy Tarreaue7dff022015-04-03 01:14:29 +02002547 * - SF_ERR_NONE if everything's OK
2548 * - SF_ERR_SRVTO if there are no more servers
2549 * - SF_ERR_SRVCL if the connection was refused by the server
2550 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
2551 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
2552 * - SF_ERR_INTERNAL for any other purely internal errors
2553 * Additionnally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002554 * Note that we try to prevent the network stack from sending the ACK during the
2555 * connect() when a pure TCP check is used (without PROXY protocol).
2556 */
2557 switch (ret) {
Willy Tarreaue7dff022015-04-03 01:14:29 +02002558 case SF_ERR_NONE:
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002559 /* we allow up to min(inter, timeout.connect) for a connection
2560 * to establish but only when timeout.check is set
2561 * as it may be to short for a full check otherwise
2562 */
2563 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
2564
2565 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
2566 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
2567 t->expire = tick_first(t->expire, t_con);
2568 }
2569 break;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002570 case SF_ERR_SRVTO: /* ETIMEDOUT */
2571 case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002572 chunk_printf(&trash, "TCPCHK error establishing connection at step %d: %s",
Simon Hormane16c1b32015-01-30 11:22:57 +09002573 tcpcheck_get_step_id(check), strerror(errno));
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002574 set_server_check_status(check, HCHK_STATUS_L4CON, trash.str);
2575 goto out_end_tcpcheck;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002576 case SF_ERR_PRXCOND:
2577 case SF_ERR_RESOURCE:
2578 case SF_ERR_INTERNAL:
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002579 chunk_printf(&trash, "TCPCHK error establishing connection at step %d",
Simon Hormane16c1b32015-01-30 11:22:57 +09002580 tcpcheck_get_step_id(check));
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002581 set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.str);
2582 goto out_end_tcpcheck;
2583 }
2584
2585 /* allow next rule */
2586 cur = (struct tcpcheck_rule *)cur->list.n;
2587 check->current_step = cur;
2588
2589 /* don't do anything until the connection is established */
2590 if (!(conn->flags & CO_FL_CONNECTED)) {
2591 /* update expire time, should be done by process_chk */
2592 /* we allow up to min(inter, timeout.connect) for a connection
2593 * to establish but only when timeout.check is set
2594 * as it may be to short for a full check otherwise
2595 */
2596 while (tick_is_expired(t->expire, now_ms)) {
2597 int t_con;
2598
2599 t_con = tick_add(t->expire, s->proxy->timeout.connect);
2600 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
2601
2602 if (s->proxy->timeout.check)
2603 t->expire = tick_first(t->expire, t_con);
2604 }
2605 return;
2606 }
2607
2608 } /* end 'connect' */
2609 else if (check->current_step->action == TCPCHK_ACT_SEND) {
2610 /* mark the step as started */
2611 check->last_started_step = check->current_step;
2612
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002613 /* reset the read buffer */
2614 if (*check->bi->data != '\0') {
2615 *check->bi->data = '\0';
2616 check->bi->i = 0;
2617 }
2618
2619 if (conn->flags & (CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH)) {
2620 conn->flags |= CO_FL_ERROR;
2621 chk_report_conn_err(conn, 0, 0);
2622 goto out_end_tcpcheck;
2623 }
2624
Willy Tarreauabca5b62013-12-06 14:19:25 +01002625 if (check->current_step->string_len >= check->bo->size) {
2626 chunk_printf(&trash, "tcp-check send : string too large (%d) for buffer size (%d) at step %d",
2627 check->current_step->string_len, check->bo->size,
Simon Hormane16c1b32015-01-30 11:22:57 +09002628 tcpcheck_get_step_id(check));
Willy Tarreauabca5b62013-12-06 14:19:25 +01002629 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2630 goto out_end_tcpcheck;
2631 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002632
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002633 /* do not try to send if there is no space */
2634 if (check->current_step->string_len >= buffer_total_space(check->bo))
2635 continue;
2636
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002637 bo_putblk(check->bo, check->current_step->string, check->current_step->string_len);
2638 *check->bo->p = '\0'; /* to make gdb output easier to read */
2639
Willy Tarreauabca5b62013-12-06 14:19:25 +01002640 /* go to next rule and try to send */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002641 cur = (struct tcpcheck_rule *)cur->list.n;
2642 check->current_step = cur;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002643 } /* end 'send' */
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002644 else if (check->current_step->action == TCPCHK_ACT_EXPECT) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01002645 if (unlikely(check->result == CHK_RES_FAILED))
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002646 goto out_end_tcpcheck;
2647
Willy Tarreau310987a2014-01-22 19:46:33 +01002648 if (conn->xprt->rcv_buf(conn, check->bi, check->bi->size) <= 0) {
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002649 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH)) {
2650 done = 1;
2651 if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
2652 /* Report network errors only if we got no other data. Otherwise
2653 * we'll let the upper layers decide whether the response is OK
2654 * or not. It is very common that an RST sent by the server is
2655 * reported as an error just after the last data chunk.
2656 */
2657 chk_report_conn_err(conn, errno, 0);
2658 goto out_end_tcpcheck;
2659 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002660 }
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002661 else
2662 goto out_need_io;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002663 }
2664
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002665 /* mark the step as started */
2666 check->last_started_step = check->current_step;
2667
2668
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002669 /* Intermediate or complete response received.
2670 * Terminate string in check->bi->data buffer.
2671 */
2672 if (check->bi->i < check->bi->size) {
2673 check->bi->data[check->bi->i] = '\0';
2674 }
2675 else {
2676 check->bi->data[check->bi->i - 1] = '\0';
2677 done = 1; /* buffer full, don't wait for more data */
2678 }
2679
2680 contentptr = check->bi->data;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002681
2682 /* Check that response body is not empty... */
Willy Tarreauec6b0122014-05-13 17:57:29 +02002683 if (!check->bi->i) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002684 if (!done)
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002685 continue;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002686
2687 /* empty response */
2688 chunk_printf(&trash, "TCPCHK got an empty response at step %d",
Simon Hormane16c1b32015-01-30 11:22:57 +09002689 tcpcheck_get_step_id(check));
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002690 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2691
2692 goto out_end_tcpcheck;
2693 }
2694
2695 if (!done && (cur->string != NULL) && (check->bi->i < cur->string_len) )
Willy Tarreaua970c282013-12-06 12:47:19 +01002696 continue; /* try to read more */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002697
Willy Tarreaua970c282013-12-06 12:47:19 +01002698 tcpcheck_expect:
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002699 if (cur->string != NULL)
Willy Tarreauec6b0122014-05-13 17:57:29 +02002700 ret = my_memmem(contentptr, check->bi->i, cur->string, cur->string_len) != NULL;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002701 else if (cur->expect_regex != NULL)
Thierry FOURNIER09af0d62014-06-18 11:35:54 +02002702 ret = regex_exec(cur->expect_regex, contentptr);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002703
2704 if (!ret && !done)
Willy Tarreaua970c282013-12-06 12:47:19 +01002705 continue; /* try to read more */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002706
2707 /* matched */
2708 if (ret) {
2709 /* matched but we did not want to => ERROR */
2710 if (cur->inverse) {
2711 /* we were looking for a string */
2712 if (cur->string != NULL) {
2713 chunk_printf(&trash, "TCPCHK matched unwanted content '%s' at step %d",
Simon Hormane16c1b32015-01-30 11:22:57 +09002714 cur->string, tcpcheck_get_step_id(check));
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002715 }
2716 else {
2717 /* we were looking for a regex */
2718 chunk_printf(&trash, "TCPCHK matched unwanted content (regex) at step %d",
Simon Hormane16c1b32015-01-30 11:22:57 +09002719 tcpcheck_get_step_id(check));
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002720 }
2721 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2722 goto out_end_tcpcheck;
2723 }
2724 /* matched and was supposed to => OK, next step */
2725 else {
2726 cur = (struct tcpcheck_rule*)cur->list.n;
2727 check->current_step = cur;
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002728 if (check->current_step->action == TCPCHK_ACT_EXPECT)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002729 goto tcpcheck_expect;
2730 __conn_data_stop_recv(conn);
2731 }
2732 }
2733 else {
2734 /* not matched */
2735 /* not matched and was not supposed to => OK, next step */
2736 if (cur->inverse) {
2737 cur = (struct tcpcheck_rule*)cur->list.n;
2738 check->current_step = cur;
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002739 if (check->current_step->action == TCPCHK_ACT_EXPECT)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002740 goto tcpcheck_expect;
2741 __conn_data_stop_recv(conn);
2742 }
2743 /* not matched but was supposed to => ERROR */
2744 else {
2745 /* we were looking for a string */
2746 if (cur->string != NULL) {
2747 chunk_printf(&trash, "TCPCHK did not match content '%s' at step %d",
Simon Hormane16c1b32015-01-30 11:22:57 +09002748 cur->string, tcpcheck_get_step_id(check));
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002749 }
2750 else {
2751 /* we were looking for a regex */
2752 chunk_printf(&trash, "TCPCHK did not match content (regex) at step %d",
Simon Hormane16c1b32015-01-30 11:22:57 +09002753 tcpcheck_get_step_id(check));
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002754 }
2755 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2756 goto out_end_tcpcheck;
2757 }
2758 }
2759 } /* end expect */
2760 } /* end loop over double chained step list */
2761
2762 set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
2763 goto out_end_tcpcheck;
2764
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002765 out_need_io:
2766 if (check->bo->o)
2767 __conn_data_want_send(conn);
2768
2769 if (check->current_step->action == TCPCHK_ACT_EXPECT)
2770 __conn_data_want_recv(conn);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002771 return;
2772
2773 out_end_tcpcheck:
2774 /* collect possible new errors */
2775 if (conn->flags & CO_FL_ERROR)
2776 chk_report_conn_err(conn, 0, 0);
2777
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002778 /* cleanup before leaving */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002779 check->current_step = NULL;
2780
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01002781 if (check->result == CHK_RES_FAILED)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002782 conn->flags |= CO_FL_ERROR;
2783
2784 __conn_data_stop_both(conn);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002785
2786 return;
2787}
2788
Simon Hormanb1900d52015-01-30 11:22:54 +09002789const char *init_check(struct check *check, int type)
2790{
2791 check->type = type;
2792
2793 /* Allocate buffer for requests... */
2794 if ((check->bi = calloc(sizeof(struct buffer) + global.tune.chksize, sizeof(char))) == NULL) {
2795 return "out of memory while allocating check buffer";
2796 }
2797 check->bi->size = global.tune.chksize;
2798
2799 /* Allocate buffer for responses... */
2800 if ((check->bo = calloc(sizeof(struct buffer) + global.tune.chksize, sizeof(char))) == NULL) {
2801 return "out of memory while allocating check buffer";
2802 }
2803 check->bo->size = global.tune.chksize;
2804
2805 /* Allocate buffer for partial results... */
2806 if ((check->conn = calloc(1, sizeof(struct connection))) == NULL) {
2807 return "out of memory while allocating check connection";
2808 }
2809
2810 check->conn->t.sock.fd = -1; /* no agent in progress yet */
2811
2812 return NULL;
2813}
2814
Simon Hormanbfb5d332015-01-30 11:22:55 +09002815void free_check(struct check *check)
2816{
2817 free(check->bi);
2818 free(check->bo);
2819 free(check->conn);
2820}
2821
Simon Horman0ba0e4a2015-01-30 11:23:00 +09002822void email_alert_free(struct email_alert *alert)
2823{
2824 struct tcpcheck_rule *rule, *back;
2825
2826 if (!alert)
2827 return;
2828
2829 list_for_each_entry_safe(rule, back, &alert->tcpcheck_rules, list)
2830 free(rule);
2831 free(alert);
2832}
2833
2834static struct task *process_email_alert(struct task *t)
2835{
2836 struct check *check = t->context;
2837 struct email_alertq *q;
2838
2839 q = container_of(check, typeof(*q), check);
2840
2841 if (!(check->state & CHK_ST_ENABLED)) {
2842 if (LIST_ISEMPTY(&q->email_alerts)) {
2843 /* All alerts processed, delete check */
2844 task_delete(t);
2845 task_free(t);
2846 check->task = NULL;
2847 return NULL;
2848 } else {
2849 struct email_alert *alert;
2850
2851 alert = LIST_NEXT(&q->email_alerts, typeof(alert), list);
2852 check->tcpcheck_rules = &alert->tcpcheck_rules;
2853 LIST_DEL(&alert->list);
2854
2855 check->state |= CHK_ST_ENABLED;
2856 }
2857
2858 }
2859
2860 process_chk(t);
2861
2862 if (!(check->state & CHK_ST_INPROGRESS) && check->tcpcheck_rules) {
2863 struct email_alert *alert;
2864
2865 alert = container_of(check->tcpcheck_rules, typeof(*alert), tcpcheck_rules);
2866 email_alert_free(alert);
2867
2868 check->tcpcheck_rules = NULL;
2869 check->state &= ~CHK_ST_ENABLED;
2870 }
2871 return t;
2872}
2873
2874static int init_email_alert_checks(struct server *s)
2875{
2876 int i;
2877 struct mailer *mailer;
2878 const char *err_str;
2879 struct proxy *p = s->proxy;
2880
2881 if (p->email_alert.queues)
2882 /* Already initialised, nothing to do */
2883 return 1;
2884
2885 p->email_alert.queues = calloc(p->email_alert.mailers.m->count, sizeof *p->email_alert.queues);
2886 if (!p->email_alert.queues) {
2887 err_str = "out of memory while allocating checks array";
2888 goto error_alert;
2889 }
2890
2891 for (i = 0, mailer = p->email_alert.mailers.m->mailer_list;
2892 i < p->email_alert.mailers.m->count; i++, mailer = mailer->next) {
2893 struct email_alertq *q = &p->email_alert.queues[i];
2894 struct check *check = &q->check;
2895
2896
2897 LIST_INIT(&q->email_alerts);
2898
2899 check->inter = DEF_CHKINTR; /* XXX: Would like to Skip to the next alert, if any, ASAP.
2900 * But need enough time so that timeouts don't occur
2901 * during tcp check procssing. For now just us an arbitrary default. */
2902 check->rise = DEF_AGENT_RISETIME;
2903 check->fall = DEF_AGENT_FALLTIME;
2904 err_str = init_check(check, PR_O2_TCPCHK_CHK);
2905 if (err_str) {
2906 goto error_free;
2907 }
2908
2909 check->xprt = mailer->xprt;
2910 if (!get_host_port(&mailer->addr))
2911 /* Default to submission port */
2912 check->port = 587;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09002913 check->addr = mailer->addr;
2914 check->server = s;
2915 }
2916
2917 return 1;
2918
2919error_free:
2920 while (i-- > 1)
2921 task_free(p->email_alert.queues[i].check.task);
2922 free(p->email_alert.queues);
2923 p->email_alert.queues = NULL;
2924error_alert:
2925 Alert("Email alert [%s] could not be initialised: %s\n", p->id, err_str);
2926 return 0;
2927}
2928
2929
2930static int add_tcpcheck_expect_str(struct list *list, const char *str)
2931{
2932 struct tcpcheck_rule *tcpcheck;
2933
2934 tcpcheck = calloc(1, sizeof *tcpcheck);
2935 if (!tcpcheck)
2936 return 0;
2937
2938 tcpcheck->action = TCPCHK_ACT_EXPECT;
2939 tcpcheck->string = strdup(str);
2940 if (!tcpcheck->string) {
2941 free(tcpcheck);
2942 return 0;
2943 }
2944
2945 LIST_ADDQ(list, &tcpcheck->list);
2946 return 1;
2947}
2948
2949static int add_tcpcheck_send_strs(struct list *list, const char * const *strs)
2950{
2951 struct tcpcheck_rule *tcpcheck;
2952 int i;
2953
2954 tcpcheck = calloc(1, sizeof *tcpcheck);
2955 if (!tcpcheck)
2956 return 0;
2957
2958 tcpcheck->action = TCPCHK_ACT_SEND;
2959
2960 tcpcheck->string_len = 0;
2961 for (i = 0; strs[i]; i++)
2962 tcpcheck->string_len += strlen(strs[i]);
2963
2964 tcpcheck->string = malloc(tcpcheck->string_len + 1);
2965 if (!tcpcheck->string) {
2966 free(tcpcheck);
2967 return 0;
2968 }
2969 tcpcheck->string[0] = '\0';
2970
2971 for (i = 0; strs[i]; i++)
2972 strcat(tcpcheck->string, strs[i]);
2973
2974 LIST_ADDQ(list, &tcpcheck->list);
2975 return 1;
2976}
2977
2978static int enqueue_one_email_alert(struct email_alertq *q, const char *msg)
2979{
2980 struct email_alert *alert = NULL;
2981 struct tcpcheck_rule *tcpcheck;
2982 struct check *check = &q->check;
2983 struct proxy *p = check->server->proxy;
2984
2985 alert = calloc(1, sizeof *alert);
2986 if (!alert) {
2987 goto error;
2988 }
2989 LIST_INIT(&alert->tcpcheck_rules);
2990
2991 tcpcheck = calloc(1, sizeof *tcpcheck);
2992 if (!tcpcheck)
2993 goto error;
2994 tcpcheck->action = TCPCHK_ACT_CONNECT;
2995 LIST_ADDQ(&alert->tcpcheck_rules, &tcpcheck->list);
2996
2997 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "220 "))
2998 goto error;
2999
3000 {
3001 const char * const strs[4] = { "EHLO ", p->email_alert.myhostname, "\r\n" };
3002 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3003 goto error;
3004 }
3005
3006 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3007 goto error;
3008
3009 {
3010 const char * const strs[4] = { "MAIL FROM:<", p->email_alert.from, ">\r\n" };
3011 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3012 goto error;
3013 }
3014
3015 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3016 goto error;
3017
3018 {
3019 const char * const strs[4] = { "RCPT TO:<", p->email_alert.to, ">\r\n" };
3020 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3021 goto error;
3022 }
3023
3024 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3025 goto error;
3026
3027 {
3028 const char * const strs[2] = { "DATA\r\n" };
3029 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3030 goto error;
3031 }
3032
3033 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "354 "))
3034 goto error;
3035
3036 {
3037 struct tm tm;
3038 char datestr[48];
3039 const char * const strs[18] = {
3040 "From: ", p->email_alert.from, "\n",
3041 "To: ", p->email_alert.to, "\n",
3042 "Date: ", datestr, "\n",
3043 "Subject: [HAproxy Alert] ", msg, "\n",
3044 "\n",
3045 msg, "\n",
3046 ".\r\n",
3047 "\r\n",
3048 NULL
3049 };
3050
3051 get_localtime(date.tv_sec, &tm);
3052
3053 if (strftime(datestr, sizeof(datestr), "%a, %d %b %Y %T %z (%Z)", &tm) == 0) {
3054 goto error;
3055 }
3056
3057 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3058 goto error;
3059 }
3060
3061 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3062 goto error;
3063
3064 {
3065 const char * const strs[2] = { "QUIT\r\n" };
3066 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3067 goto error;
3068 }
3069
3070 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "221 "))
3071 goto error;
3072
3073 if (!check->task) {
3074 struct task *t;
3075
3076 if ((t = task_new()) == NULL)
3077 goto error;
3078
3079 check->task = t;
3080 t->process = process_email_alert;
3081 t->context = check;
3082
3083 /* check this in one ms */
3084 t->expire = tick_add(now_ms, MS_TO_TICKS(1));
3085 check->start = now;
3086 task_queue(t);
3087 }
3088
3089 LIST_ADDQ(&q->email_alerts, &alert->list);
3090
3091 return 1;
3092
3093error:
3094 email_alert_free(alert);
3095 return 0;
3096}
3097
3098static void enqueue_email_alert(struct proxy *p, const char *msg)
3099{
3100 int i;
3101 struct mailer *mailer;
3102
3103 for (i = 0, mailer = p->email_alert.mailers.m->mailer_list;
3104 i < p->email_alert.mailers.m->count; i++, mailer = mailer->next) {
3105 if (!enqueue_one_email_alert(&p->email_alert.queues[i], msg)) {
3106 Alert("Email alert [%s] could not be enqueued: out of memory\n", p->id);
3107 return;
3108 }
3109 }
3110
3111 return;
3112}
3113
3114/*
3115 * Send email alert if configured.
3116 */
Simon Horman64e34162015-02-06 11:11:57 +09003117void send_email_alert(struct server *s, int level, const char *format, ...)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003118{
3119 va_list argp;
3120 char buf[1024];
3121 int len;
3122 struct proxy *p = s->proxy;
3123
Simon Horman64e34162015-02-06 11:11:57 +09003124 if (!p->email_alert.mailers.m || level > p->email_alert.level ||
3125 format == NULL || !init_email_alert_checks(s))
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003126 return;
3127
3128 va_start(argp, format);
3129 len = vsnprintf(buf, sizeof(buf), format, argp);
3130 va_end(argp);
3131
3132 if (len < 0) {
3133 Alert("Email alert [%s] could format message\n", p->id);
3134 return;
3135 }
3136
3137 enqueue_email_alert(p, buf);
3138}
3139
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02003140
Willy Tarreaubd741542010-03-16 18:46:54 +01003141/*
Willy Tarreaubaaee002006-06-26 02:48:02 +02003142 * Local variables:
3143 * c-indent-level: 8
3144 * c-basic-offset: 8
3145 * End:
3146 */