blob: ed99bb56ac124208a98b6274df5ef533cd96d2bc [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Health-checks functions.
3 *
Willy Tarreau26c25062009-03-08 09:38:41 +01004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02005 * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
Willy Tarreaubaaee002006-06-26 02:48:02 +02006 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
Willy Tarreaub8816082008-01-18 12:18:15 +010014#include <assert.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020015#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020016#include <errno.h>
17#include <fcntl.h>
Willy Tarreau9b39dc52014-07-08 00:54:10 +020018#include <signal.h>
Simon Horman0ba0e4a2015-01-30 11:23:00 +090019#include <stdarg.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020#include <stdio.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020021#include <stdlib.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#include <string.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020023#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020024#include <unistd.h>
25#include <sys/socket.h>
Dmitry Sivachenkocaf58982009-08-24 15:11:06 +040026#include <sys/types.h>
Simon Horman98637e52014-06-20 12:30:16 +090027#include <sys/wait.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <netinet/in.h>
Willy Tarreau1274bc42009-07-15 07:16:31 +020029#include <netinet/tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030#include <arpa/inet.h>
31
Willy Tarreauc7e42382012-08-24 19:22:53 +020032#include <common/chunk.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020033#include <common/compat.h>
34#include <common/config.h>
35#include <common/mini-clist.h>
Willy Tarreau83749182007-04-15 20:56:27 +020036#include <common/standard.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020037#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038
39#include <types/global.h>
Baptiste Assmanna68ca962015-04-14 01:15:08 +020040#include <types/dns.h>
William Lallemand9ed62032016-11-21 17:49:11 +010041#include <types/stats.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020042
43#include <proto/backend.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020044#include <proto/checks.h>
William Lallemand9ed62032016-11-21 17:49:11 +010045#include <proto/stats.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020046#include <proto/fd.h>
47#include <proto/log.h>
48#include <proto/queue.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020049#include <proto/port_range.h>
Willy Tarreau3d300592007-03-18 18:34:41 +010050#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010051#include <proto/proto_tcp.h>
Baptiste Assmann69e273f2013-12-11 00:52:19 +010052#include <proto/protocol.h>
Willy Tarreau2b5652f2006-12-31 17:46:05 +010053#include <proto/proxy.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020054#include <proto/server.h>
Willy Tarreau48d6bf22016-06-21 16:27:34 +020055#include <proto/signal.h>
Willy Tarreau9e000c62011-03-10 14:03:36 +010056#include <proto/stream_interface.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020057#include <proto/task.h>
Baptiste Assmanna68ca962015-04-14 01:15:08 +020058#include <proto/log.h>
59#include <proto/dns.h>
60#include <proto/proto_udp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020061
Olivier Houchard9130a962017-10-17 17:33:43 +020062#ifdef USE_OPENSSL
63#include <proto/ssl_sock.h>
64#endif /* USE_OPENSSL */
65
Willy Tarreaubd741542010-03-16 18:46:54 +010066static int httpchk_expect(struct server *s, int done);
Simon Hormane16c1b32015-01-30 11:22:57 +090067static int tcpcheck_get_step_id(struct check *);
Baptiste Assmann22b09d22015-05-01 08:03:04 +020068static char * tcpcheck_get_step_comment(struct check *, int);
Willy Tarreau6bdcab02017-10-04 18:41:00 +020069static int tcpcheck_main(struct check *);
Willy Tarreaubd741542010-03-16 18:46:54 +010070
Christopher Faulet31dff9b2017-10-23 15:45:20 +020071static struct pool_head *pool2_email_alert = NULL;
72static struct pool_head *pool2_tcpcheck_rule = NULL;
73
74
Simon Horman63a4a822012-03-19 07:24:41 +090075static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010076 [HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
77 [HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +020078 [HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020079
Willy Tarreau23964182014-05-20 20:56:30 +020080 /* Below we have finished checks */
81 [HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010082 [HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +010083
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010084 [HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020085
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010086 [HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
87 [HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
88 [HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020089
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010090 [HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
91 [HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
92 [HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020093
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010094 [HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
95 [HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020096
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +020097 [HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020098
Willy Tarreau6aaa1b82013-12-11 17:09:34 +010099 [HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
100 [HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
101 [HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
Simon Horman98637e52014-06-20 12:30:16 +0900102
103 [HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
104 [HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
Cyril Bonté77010d82014-08-07 01:55:37 +0200105 [HCHK_STATUS_PROCOK] = { CHK_RES_PASSED, "PROCOK", "External check passed" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200106};
107
Cyril Bontéac92a062014-12-27 22:28:38 +0100108const struct extcheck_env extcheck_envs[EXTCHK_SIZE] = {
109 [EXTCHK_PATH] = { "PATH", EXTCHK_SIZE_EVAL_INIT },
110 [EXTCHK_HAPROXY_PROXY_NAME] = { "HAPROXY_PROXY_NAME", EXTCHK_SIZE_EVAL_INIT },
111 [EXTCHK_HAPROXY_PROXY_ID] = { "HAPROXY_PROXY_ID", EXTCHK_SIZE_EVAL_INIT },
112 [EXTCHK_HAPROXY_PROXY_ADDR] = { "HAPROXY_PROXY_ADDR", EXTCHK_SIZE_EVAL_INIT },
113 [EXTCHK_HAPROXY_PROXY_PORT] = { "HAPROXY_PROXY_PORT", EXTCHK_SIZE_EVAL_INIT },
114 [EXTCHK_HAPROXY_SERVER_NAME] = { "HAPROXY_SERVER_NAME", EXTCHK_SIZE_EVAL_INIT },
115 [EXTCHK_HAPROXY_SERVER_ID] = { "HAPROXY_SERVER_ID", EXTCHK_SIZE_EVAL_INIT },
116 [EXTCHK_HAPROXY_SERVER_ADDR] = { "HAPROXY_SERVER_ADDR", EXTCHK_SIZE_EVAL_INIT },
117 [EXTCHK_HAPROXY_SERVER_PORT] = { "HAPROXY_SERVER_PORT", EXTCHK_SIZE_EVAL_INIT },
118 [EXTCHK_HAPROXY_SERVER_MAXCONN] = { "HAPROXY_SERVER_MAXCONN", EXTCHK_SIZE_EVAL_INIT },
119 [EXTCHK_HAPROXY_SERVER_CURCONN] = { "HAPROXY_SERVER_CURCONN", EXTCHK_SIZE_ULONG },
120};
121
Simon Horman63a4a822012-03-19 07:24:41 +0900122static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100123 [HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
124
125 [HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
126 [HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
127
128 [HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
129 [HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
130 [HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
131 [HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
132
133 [HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
134 [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
135 [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
136};
137
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200138/*
139 * Convert check_status code to description
140 */
141const char *get_check_status_description(short check_status) {
142
143 const char *desc;
144
145 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200146 desc = check_statuses[check_status].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200147 else
148 desc = NULL;
149
150 if (desc && *desc)
151 return desc;
152 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200153 return check_statuses[HCHK_STATUS_UNKNOWN].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200154}
155
156/*
157 * Convert check_status code to short info
158 */
159const char *get_check_status_info(short check_status) {
160
161 const char *info;
162
163 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200164 info = check_statuses[check_status].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200165 else
166 info = NULL;
167
168 if (info && *info)
169 return info;
170 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200171 return check_statuses[HCHK_STATUS_UNKNOWN].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200172}
173
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100174const char *get_analyze_status(short analyze_status) {
175
176 const char *desc;
177
178 if (analyze_status < HANA_STATUS_SIZE)
179 desc = analyze_statuses[analyze_status].desc;
180 else
181 desc = NULL;
182
183 if (desc && *desc)
184 return desc;
185 else
186 return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
187}
188
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200189/*
Simon Horman4a741432013-02-23 15:35:38 +0900190 * Set check->status, update check->duration and fill check->result with
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200191 * an adequate CHK_RES_* value. The new check->health is computed based
192 * on the result.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200193 *
194 * Show information in logs about failed health check if server is UP
195 * or succeeded health checks if server is DOWN.
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200196 */
Simon Horman4a741432013-02-23 15:35:38 +0900197static void set_server_check_status(struct check *check, short status, const char *desc)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100198{
Simon Horman4a741432013-02-23 15:35:38 +0900199 struct server *s = check->server;
Willy Tarreaubef1b322014-05-13 21:01:39 +0200200 short prev_status = check->status;
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200201 int report = 0;
Simon Horman4a741432013-02-23 15:35:38 +0900202
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200203 if (status == HCHK_STATUS_START) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100204 check->result = CHK_RES_UNKNOWN; /* no result yet */
Simon Horman4a741432013-02-23 15:35:38 +0900205 check->desc[0] = '\0';
206 check->start = now;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200207 return;
208 }
209
Simon Horman4a741432013-02-23 15:35:38 +0900210 if (!check->status)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200211 return;
212
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200213 if (desc && *desc) {
Simon Horman4a741432013-02-23 15:35:38 +0900214 strncpy(check->desc, desc, HCHK_DESC_LEN-1);
215 check->desc[HCHK_DESC_LEN-1] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200216 } else
Simon Horman4a741432013-02-23 15:35:38 +0900217 check->desc[0] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200218
Simon Horman4a741432013-02-23 15:35:38 +0900219 check->status = status;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200220 if (check_statuses[status].result)
Simon Horman4a741432013-02-23 15:35:38 +0900221 check->result = check_statuses[status].result;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200222
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100223 if (status == HCHK_STATUS_HANA)
Simon Horman4a741432013-02-23 15:35:38 +0900224 check->duration = -1;
225 else if (!tv_iszero(&check->start)) {
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200226 /* set_server_check_status() may be called more than once */
Simon Horman4a741432013-02-23 15:35:38 +0900227 check->duration = tv_ms_elapsed(&check->start, &now);
228 tv_zero(&check->start);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200229 }
230
Willy Tarreau23964182014-05-20 20:56:30 +0200231 /* no change is expected if no state change occurred */
232 if (check->result == CHK_RES_NEUTRAL)
233 return;
234
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200235 report = 0;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200236
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200237 switch (check->result) {
238 case CHK_RES_FAILED:
Willy Tarreau12634e12014-05-23 11:32:36 +0200239 /* Failure to connect to the agent as a secondary check should not
240 * cause the server to be marked down.
241 */
242 if ((!(check->state & CHK_ST_AGENT) ||
Simon Hormaneaabd522015-02-26 11:26:17 +0900243 (check->status >= HCHK_STATUS_L57DATA)) &&
Willy Tarreau12634e12014-05-23 11:32:36 +0200244 (check->health >= check->rise)) {
Christopher Faulet29f77e82017-06-08 14:04:45 +0200245 HA_ATOMIC_ADD(&s->counters.failed_checks, 1);
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200246 report = 1;
247 check->health--;
248 if (check->health < check->rise)
249 check->health = 0;
250 }
251 break;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200252
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200253 case CHK_RES_PASSED:
254 case CHK_RES_CONDPASS: /* "condpass" cannot make the first step but it OK after a "passed" */
255 if ((check->health < check->rise + check->fall - 1) &&
256 (check->result == CHK_RES_PASSED || check->health > 0)) {
257 report = 1;
258 check->health++;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200259
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200260 if (check->health >= check->rise)
261 check->health = check->rise + check->fall - 1; /* OK now */
262 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200263
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200264 /* clear consecutive_errors if observing is enabled */
265 if (s->onerror)
266 s->consecutive_errors = 0;
267 break;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100268
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200269 default:
270 break;
271 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200272
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200273 if (s->proxy->options2 & PR_O2_LOGHCHKS &&
274 (status != prev_status || report)) {
275 chunk_printf(&trash,
Willy Tarreau12634e12014-05-23 11:32:36 +0200276 "%s check for %sserver %s/%s %s%s",
277 (check->state & CHK_ST_AGENT) ? "Agent" : "Health",
Willy Tarreauc93cd162014-05-13 15:54:22 +0200278 s->flags & SRV_F_BACKUP ? "backup " : "",
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100279 s->proxy->id, s->id,
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100280 (check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200281 (check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200282
Emeric Brun5a133512017-10-19 14:42:30 +0200283 srv_append_status(&trash, s, check, -1, 0);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200284
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100285 chunk_appendf(&trash, ", status: %d/%d %s",
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200286 (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
287 (check->health >= check->rise) ? check->fall : check->rise,
288 (check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200289
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100290 Warning("%s.\n", trash.str);
291 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.str);
Simon Horman7ea9be02015-04-30 13:10:33 +0900292 send_email_alert(s, LOG_INFO, "%s", trash.str);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200293 }
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200294}
295
Willy Tarreau4eec5472014-05-20 22:32:27 +0200296/* Marks the check <check>'s server down if the current check is already failed
297 * and the server is not down yet nor in maintenance.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200298 */
Willy Tarreau4eec5472014-05-20 22:32:27 +0200299static void check_notify_failure(struct check *check)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200300{
Simon Horman4a741432013-02-23 15:35:38 +0900301 struct server *s = check->server;
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900302
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200303 /* The agent secondary check should only cause a server to be marked
304 * as down if check->status is HCHK_STATUS_L7STS, which indicates
305 * that the agent returned "fail", "stopped" or "down".
306 * The implication here is that failure to connect to the agent
307 * as a secondary check should not cause the server to be marked
308 * down. */
309 if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
310 return;
311
Willy Tarreau4eec5472014-05-20 22:32:27 +0200312 if (check->health > 0)
313 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100314
Willy Tarreau4eec5472014-05-20 22:32:27 +0200315 /* We only report a reason for the check if we did not do so previously */
Emeric Brun5a133512017-10-19 14:42:30 +0200316 srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200317}
318
Willy Tarreauaf549582014-05-16 17:37:50 +0200319/* Marks the check <check> as valid and tries to set its server up, provided
Willy Tarreau3e048382014-05-21 10:30:54 +0200320 * it isn't in maintenance, it is not tracking a down server and other checks
321 * comply. The rule is simple : by default, a server is up, unless any of the
322 * following conditions is true :
323 * - health check failed (check->health < rise)
324 * - agent check failed (agent->health < rise)
325 * - the server tracks a down server (track && track->state == STOPPED)
326 * Note that if the server has a slowstart, it will switch to STARTING instead
327 * of RUNNING. Also, only the health checks support the nolb mode, so the
328 * agent's success may not take the server out of this mode.
Willy Tarreauaf549582014-05-16 17:37:50 +0200329 */
Willy Tarreau3e048382014-05-21 10:30:54 +0200330static void check_notify_success(struct check *check)
Willy Tarreauaf549582014-05-16 17:37:50 +0200331{
Simon Horman4a741432013-02-23 15:35:38 +0900332 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100333
Emeric Brun52a91d32017-08-31 14:41:55 +0200334 if (s->next_admin & SRV_ADMF_MAINT)
Willy Tarreauaf549582014-05-16 17:37:50 +0200335 return;
Cyril Bontécd19e512010-01-31 22:34:03 +0100336
Emeric Brun52a91d32017-08-31 14:41:55 +0200337 if (s->track && s->track->next_state == SRV_ST_STOPPED)
Willy Tarreauaf549582014-05-16 17:37:50 +0200338 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100339
Willy Tarreau3e048382014-05-21 10:30:54 +0200340 if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
341 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100342
Willy Tarreau3e048382014-05-21 10:30:54 +0200343 if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
344 return;
Willy Tarreauaf549582014-05-16 17:37:50 +0200345
Emeric Brun52a91d32017-08-31 14:41:55 +0200346 if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
Willy Tarreau3e048382014-05-21 10:30:54 +0200347 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100348
Emeric Brun5a133512017-10-19 14:42:30 +0200349 srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100350}
351
Willy Tarreaudb58b792014-05-21 13:57:23 +0200352/* Marks the check <check> as valid and tries to set its server into stopping mode
353 * if it was running or starting, and provided it isn't in maintenance and other
354 * checks comply. The conditions for the server to be marked in stopping mode are
355 * the same as for it to be turned up. Also, only the health checks support the
356 * nolb mode.
Willy Tarreauaf549582014-05-16 17:37:50 +0200357 */
Willy Tarreaudb58b792014-05-21 13:57:23 +0200358static void check_notify_stopping(struct check *check)
Willy Tarreauaf549582014-05-16 17:37:50 +0200359{
Simon Horman4a741432013-02-23 15:35:38 +0900360 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100361
Emeric Brun52a91d32017-08-31 14:41:55 +0200362 if (s->next_admin & SRV_ADMF_MAINT)
Willy Tarreauaf549582014-05-16 17:37:50 +0200363 return;
364
Willy Tarreaudb58b792014-05-21 13:57:23 +0200365 if (check->state & CHK_ST_AGENT)
366 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100367
Emeric Brun52a91d32017-08-31 14:41:55 +0200368 if (s->track && s->track->next_state == SRV_ST_STOPPED)
Willy Tarreaudb58b792014-05-21 13:57:23 +0200369 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100370
Willy Tarreaudb58b792014-05-21 13:57:23 +0200371 if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
372 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100373
Willy Tarreaudb58b792014-05-21 13:57:23 +0200374 if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
375 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100376
Emeric Brun5a133512017-10-19 14:42:30 +0200377 srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100378}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200379
Willy Tarreau9fe7aae2013-12-31 23:47:37 +0100380/* note: use health_adjust() only, which first checks that the observe mode is
381 * enabled.
382 */
383void __health_adjust(struct server *s, short status)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100384{
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100385 int failed;
386 int expire;
387
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100388 if (s->observe >= HANA_OBS_SIZE)
389 return;
390
Willy Tarreaubb956662013-01-24 00:37:39 +0100391 if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100392 return;
393
394 switch (analyze_statuses[status].lr[s->observe - 1]) {
395 case 1:
396 failed = 1;
397 break;
398
399 case 2:
400 failed = 0;
401 break;
402
403 default:
404 return;
405 }
406
407 if (!failed) {
408 /* good: clear consecutive_errors */
409 s->consecutive_errors = 0;
410 return;
411 }
412
Christopher Faulet29f77e82017-06-08 14:04:45 +0200413 HA_ATOMIC_ADD(&s->consecutive_errors, 1);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100414
415 if (s->consecutive_errors < s->consecutive_errors_limit)
416 return;
417
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100418 chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
419 s->consecutive_errors, get_analyze_status(status));
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100420
421 switch (s->onerror) {
422 case HANA_ONERR_FASTINTER:
423 /* force fastinter - nothing to do here as all modes force it */
424 break;
425
426 case HANA_ONERR_SUDDTH:
427 /* simulate a pre-fatal failed health check */
Simon Horman58c32972013-11-25 10:46:38 +0900428 if (s->check.health > s->check.rise)
429 s->check.health = s->check.rise + 1;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100430
431 /* no break - fall through */
432
433 case HANA_ONERR_FAILCHK:
434 /* simulate a failed health check */
Simon Horman4a741432013-02-23 15:35:38 +0900435 set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200436 check_notify_failure(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100437 break;
438
439 case HANA_ONERR_MARKDWN:
440 /* mark server down */
Simon Horman58c32972013-11-25 10:46:38 +0900441 s->check.health = s->check.rise;
Simon Horman4a741432013-02-23 15:35:38 +0900442 set_server_check_status(&s->check, HCHK_STATUS_HANA, trash.str);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200443 check_notify_failure(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100444 break;
445
446 default:
447 /* write a warning? */
448 break;
449 }
450
451 s->consecutive_errors = 0;
Christopher Faulet29f77e82017-06-08 14:04:45 +0200452 HA_ATOMIC_ADD(&s->counters.failed_hana, 1);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100453
Simon Horman66183002013-02-23 10:16:43 +0900454 if (s->check.fastinter) {
455 expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
Sergiy Prykhodko1d57e502013-09-21 12:05:00 +0300456 if (s->check.task->expire > expire) {
Willy Tarreau5b3a2022012-09-28 15:01:02 +0200457 s->check.task->expire = expire;
Sergiy Prykhodko1d57e502013-09-21 12:05:00 +0300458 /* requeue check task with new expire */
459 task_queue(s->check.task);
460 }
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100461 }
462}
463
Willy Tarreaua1dab552014-04-14 15:04:54 +0200464static int httpchk_build_status_header(struct server *s, char *buffer, int size)
Willy Tarreauef781042010-01-27 11:53:01 +0100465{
466 int sv_state;
467 int ratio;
468 int hlen = 0;
Joseph Lynch514061c2015-01-15 17:52:59 -0800469 char addr[46];
470 char port[6];
Willy Tarreauef781042010-01-27 11:53:01 +0100471 const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
472 "UP %d/%d", "UP",
473 "NOLB %d/%d", "NOLB",
474 "no check" };
475
476 memcpy(buffer + hlen, "X-Haproxy-Server-State: ", 24);
477 hlen += 24;
478
Willy Tarreauff5ae352013-12-11 20:36:34 +0100479 if (!(s->check.state & CHK_ST_ENABLED))
480 sv_state = 6;
Emeric Brun52a91d32017-08-31 14:41:55 +0200481 else if (s->cur_state != SRV_ST_STOPPED) {
Simon Horman58c32972013-11-25 10:46:38 +0900482 if (s->check.health == s->check.rise + s->check.fall - 1)
Willy Tarreauef781042010-01-27 11:53:01 +0100483 sv_state = 3; /* UP */
484 else
485 sv_state = 2; /* going down */
486
Emeric Brun52a91d32017-08-31 14:41:55 +0200487 if (s->cur_state == SRV_ST_STOPPING)
Willy Tarreauef781042010-01-27 11:53:01 +0100488 sv_state += 2;
489 } else {
Simon Horman125d0992013-02-24 17:23:38 +0900490 if (s->check.health)
Willy Tarreauef781042010-01-27 11:53:01 +0100491 sv_state = 1; /* going up */
492 else
493 sv_state = 0; /* DOWN */
494 }
495
Willy Tarreaua1dab552014-04-14 15:04:54 +0200496 hlen += snprintf(buffer + hlen, size - hlen,
Willy Tarreauef781042010-01-27 11:53:01 +0100497 srv_hlt_st[sv_state],
Emeric Brun52a91d32017-08-31 14:41:55 +0200498 (s->cur_state != SRV_ST_STOPPED) ? (s->check.health - s->check.rise + 1) : (s->check.health),
499 (s->cur_state != SRV_ST_STOPPED) ? (s->check.fall) : (s->check.rise));
Willy Tarreauef781042010-01-27 11:53:01 +0100500
Joseph Lynch514061c2015-01-15 17:52:59 -0800501 addr_to_str(&s->addr, addr, sizeof(addr));
Willy Tarreau04276f32017-01-06 17:41:29 +0100502 if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
503 snprintf(port, sizeof(port), "%u", s->svc_port);
504 else
505 *port = 0;
Joseph Lynch514061c2015-01-15 17:52:59 -0800506
507 hlen += snprintf(buffer + hlen, size - hlen, "; address=%s; port=%s; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
508 addr, port, s->proxy->id, s->id,
Willy Tarreauef781042010-01-27 11:53:01 +0100509 global.node,
Emeric Brun52a91d32017-08-31 14:41:55 +0200510 (s->cur_eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
Willy Tarreauef781042010-01-27 11:53:01 +0100511 (s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
512 s->cur_sess, s->proxy->beconn - s->proxy->nbpend,
513 s->nbpend);
514
Emeric Brun52a91d32017-08-31 14:41:55 +0200515 if ((s->cur_state == SRV_ST_STARTING) &&
Willy Tarreauef781042010-01-27 11:53:01 +0100516 now.tv_sec < s->last_change + s->slowstart &&
517 now.tv_sec >= s->last_change) {
518 ratio = MAX(1, 100 * (now.tv_sec - s->last_change) / s->slowstart);
Willy Tarreaua1dab552014-04-14 15:04:54 +0200519 hlen += snprintf(buffer + hlen, size - hlen, "; throttle=%d%%", ratio);
Willy Tarreauef781042010-01-27 11:53:01 +0100520 }
521
522 buffer[hlen++] = '\r';
523 buffer[hlen++] = '\n';
524
525 return hlen;
526}
527
Willy Tarreau20a18342013-12-05 00:31:46 +0100528/* Check the connection. If an error has already been reported or the socket is
529 * closed, keep errno intact as it is supposed to contain the valid error code.
530 * If no error is reported, check the socket's error queue using getsockopt().
531 * Warning, this must be done only once when returning from poll, and never
532 * after an I/O error was attempted, otherwise the error queue might contain
533 * inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
534 * socket. Returns non-zero if an error was reported, zero if everything is
535 * clean (including a properly closed socket).
536 */
537static int retrieve_errno_from_socket(struct connection *conn)
538{
539 int skerr;
540 socklen_t lskerr = sizeof(skerr);
541
542 if (conn->flags & CO_FL_ERROR && ((errno && errno != EAGAIN) || !conn->ctrl))
543 return 1;
544
Willy Tarreau3c728722014-01-23 13:50:42 +0100545 if (!conn_ctrl_ready(conn))
Willy Tarreau20a18342013-12-05 00:31:46 +0100546 return 0;
547
Willy Tarreau585744b2017-08-24 14:31:19 +0200548 if (getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
Willy Tarreau20a18342013-12-05 00:31:46 +0100549 errno = skerr;
550
551 if (errno == EAGAIN)
552 errno = 0;
553
554 if (!errno) {
555 /* we could not retrieve an error, that does not mean there is
556 * none. Just don't change anything and only report the prior
557 * error if any.
558 */
559 if (conn->flags & CO_FL_ERROR)
560 return 1;
561 else
562 return 0;
563 }
564
565 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
566 return 1;
567}
568
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100569/* Try to collect as much information as possible on the connection status,
570 * and adjust the server status accordingly. It may make use of <errno_bck>
571 * if non-null when the caller is absolutely certain of its validity (eg:
572 * checked just after a syscall). If the caller doesn't have a valid errno,
573 * it can pass zero, and retrieve_errno_from_socket() will be called to try
574 * to extract errno from the socket. If no error is reported, it will consider
575 * the <expired> flag. This is intended to be used when a connection error was
576 * reported in conn->flags or when a timeout was reported in <expired>. The
577 * function takes care of not updating a server status which was already set.
578 * All situations where at least one of <expired> or CO_FL_ERROR are set
579 * produce a status.
580 */
Willy Tarreaub5259bf2017-10-04 14:47:29 +0200581static void chk_report_conn_err(struct check *check, int errno_bck, int expired)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100582{
Willy Tarreaub5259bf2017-10-04 14:47:29 +0200583 struct connection *conn = check->conn;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100584 const char *err_msg;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200585 struct chunk *chk;
Willy Tarreau213c6782014-10-02 14:51:02 +0200586 int step;
Baptiste Assmann22b09d22015-05-01 08:03:04 +0200587 char *comment;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100588
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100589 if (check->result != CHK_RES_UNKNOWN)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100590 return;
591
592 errno = errno_bck;
Willy Tarreau00149122017-10-04 18:05:01 +0200593 if (conn && (!errno || errno == EAGAIN))
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100594 retrieve_errno_from_socket(conn);
595
Willy Tarreau00149122017-10-04 18:05:01 +0200596 if (conn && !(conn->flags & CO_FL_ERROR) && !expired)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100597 return;
598
599 /* we'll try to build a meaningful error message depending on the
600 * context of the error possibly present in conn->err_code, and the
601 * socket error possibly collected above. This is useful to know the
602 * exact step of the L6 layer (eg: SSL handshake).
603 */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200604 chk = get_trash_chunk();
605
606 if (check->type == PR_O2_TCPCHK_CHK) {
Simon Hormane16c1b32015-01-30 11:22:57 +0900607 step = tcpcheck_get_step_id(check);
Willy Tarreau213c6782014-10-02 14:51:02 +0200608 if (!step)
609 chunk_printf(chk, " at initial connection step of tcp-check");
610 else {
611 chunk_printf(chk, " at step %d of tcp-check", step);
612 /* we were looking for a string */
613 if (check->last_started_step && check->last_started_step->action == TCPCHK_ACT_CONNECT) {
614 if (check->last_started_step->port)
615 chunk_appendf(chk, " (connect port %d)" ,check->last_started_step->port);
616 else
617 chunk_appendf(chk, " (connect)");
618 }
619 else if (check->last_started_step && check->last_started_step->action == TCPCHK_ACT_EXPECT) {
620 if (check->last_started_step->string)
Baptiste Assmann96a5c9b2015-05-01 08:09:29 +0200621 chunk_appendf(chk, " (expect string '%s')", check->last_started_step->string);
Willy Tarreau213c6782014-10-02 14:51:02 +0200622 else if (check->last_started_step->expect_regex)
623 chunk_appendf(chk, " (expect regex)");
624 }
625 else if (check->last_started_step && check->last_started_step->action == TCPCHK_ACT_SEND) {
626 chunk_appendf(chk, " (send)");
627 }
Baptiste Assmann22b09d22015-05-01 08:03:04 +0200628
629 comment = tcpcheck_get_step_comment(check, step);
630 if (comment)
631 chunk_appendf(chk, " comment: '%s'", comment);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200632 }
633 }
634
Willy Tarreau00149122017-10-04 18:05:01 +0200635 if (conn && conn->err_code) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100636 if (errno && errno != EAGAIN)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200637 chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100638 else
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200639 chunk_printf(&trash, "%s%s", conn_err_code_str(conn), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100640 err_msg = trash.str;
641 }
642 else {
643 if (errno && errno != EAGAIN) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200644 chunk_printf(&trash, "%s%s", strerror(errno), chk->str);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100645 err_msg = trash.str;
646 }
647 else {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200648 err_msg = chk->str;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100649 }
650 }
651
Willy Tarreau00149122017-10-04 18:05:01 +0200652 if (check->state & CHK_ST_PORT_MISS) {
Baptiste Assmann95db2bc2016-06-13 14:15:41 +0200653 /* NOTE: this is reported after <fall> tries */
654 chunk_printf(chk, "No port available for the TCP connection");
655 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
656 }
657
Willy Tarreau00149122017-10-04 18:05:01 +0200658 if (!conn) {
659 /* connection allocation error before the connection was established */
660 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
661 }
662 else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L4_CONN)) == CO_FL_WAIT_L4_CONN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100663 /* L4 not established (yet) */
664 if (conn->flags & CO_FL_ERROR)
665 set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
666 else if (expired)
667 set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
Baptiste Assmanna68ca962015-04-14 01:15:08 +0200668
669 /*
670 * might be due to a server IP change.
671 * Let's trigger a DNS resolution if none are currently running.
672 */
Christopher Faulet67957bd2017-09-27 11:00:59 +0200673 dns_trigger_resolution(check->server->dns_requester);
Baptiste Assmanna68ca962015-04-14 01:15:08 +0200674
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100675 }
676 else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L6_CONN)) == CO_FL_WAIT_L6_CONN) {
677 /* L6 not established (yet) */
678 if (conn->flags & CO_FL_ERROR)
679 set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
680 else if (expired)
681 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
682 }
683 else if (conn->flags & CO_FL_ERROR) {
684 /* I/O error after connection was established and before we could diagnose */
685 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
686 }
687 else if (expired) {
688 /* connection established but expired check */
689 if (check->type == PR_O2_SSL3_CHK)
690 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
691 else /* HTTP, SMTP, ... */
692 set_server_check_status(check, HCHK_STATUS_L7TOUT, err_msg);
693 }
694
695 return;
696}
697
Willy Tarreaubaaee002006-06-26 02:48:02 +0200698/*
699 * This function is used only for server health-checks. It handles
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200700 * the connection acknowledgement. If the proxy requires L7 health-checks,
701 * it sends the request. In other cases, it calls set_server_check_status()
Simon Horman4a741432013-02-23 15:35:38 +0900702 * to set check->status, check->duration and check->result.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200703 */
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200704static void event_srv_chk_w(struct connection *conn)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200705{
Simon Horman4a741432013-02-23 15:35:38 +0900706 struct check *check = conn->owner;
707 struct server *s = check->server;
Simon Horman4a741432013-02-23 15:35:38 +0900708 struct task *t = check->task;
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200709
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100710 if (unlikely(check->result == CHK_RES_FAILED))
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100711 goto out_wakeup;
712
Willy Tarreau310987a2014-01-22 19:46:33 +0100713 if (conn->flags & CO_FL_HANDSHAKE)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100714 return;
715
Willy Tarreau20a18342013-12-05 00:31:46 +0100716 if (retrieve_errno_from_socket(conn)) {
Willy Tarreaub5259bf2017-10-04 14:47:29 +0200717 chk_report_conn_err(check, errno, 0);
Olivier Houchard1a0545f2017-09-13 18:30:23 +0200718 __conn_xprt_stop_both(conn);
Willy Tarreau20a18342013-12-05 00:31:46 +0100719 goto out_wakeup;
720 }
Krzysztof Piotr Oledzki6492db52010-01-02 22:03:01 +0100721
Willy Tarreaubbae3f02017-08-30 09:59:52 +0200722 if (conn->flags & CO_FL_SOCK_WR_SH) {
Willy Tarreau20a18342013-12-05 00:31:46 +0100723 /* if the output is closed, we can't do anything */
724 conn->flags |= CO_FL_ERROR;
Willy Tarreaub5259bf2017-10-04 14:47:29 +0200725 chk_report_conn_err(check, 0, 0);
Willy Tarreau20a18342013-12-05 00:31:46 +0100726 goto out_wakeup;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200727 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200728
Willy Tarreau06559ac2013-12-05 01:53:08 +0100729 /* here, we know that the connection is established. That's enough for
730 * a pure TCP check.
731 */
732 if (!check->type)
733 goto out_wakeup;
734
Willy Tarreauc09572f2017-10-04 11:58:22 +0200735 /* wake() will take care of calling tcpcheck_main() */
736 if (check->type == PR_O2_TCPCHK_CHK)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200737 return;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200738
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100739 if (check->bo->o) {
Willy Tarreau1049b1f2014-02-02 01:51:17 +0100740 conn->xprt->snd_buf(conn, check->bo, 0);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100741 if (conn->flags & CO_FL_ERROR) {
Willy Tarreaub5259bf2017-10-04 14:47:29 +0200742 chk_report_conn_err(check, errno, 0);
Olivier Houchard1a0545f2017-09-13 18:30:23 +0200743 __conn_xprt_stop_both(conn);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100744 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200745 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100746 if (check->bo->o)
747 return;
748 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200749
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100750 /* full request sent, we allow up to <timeout.check> if nonzero for a response */
751 if (s->proxy->timeout.check) {
752 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
753 task_queue(t);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200754 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100755 goto out_nowake;
756
Willy Tarreau83749182007-04-15 20:56:27 +0200757 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200758 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau83749182007-04-15 20:56:27 +0200759 out_nowake:
Olivier Houchard1a0545f2017-09-13 18:30:23 +0200760 __conn_xprt_stop_send(conn); /* nothing more to write */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200761}
762
Willy Tarreaubaaee002006-06-26 02:48:02 +0200763/*
Willy Tarreauf3c69202006-07-09 16:42:34 +0200764 * This function is used only for server health-checks. It handles the server's
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +0200765 * reply to an HTTP request, SSL HELLO or MySQL client Auth. It calls
Simon Horman4a741432013-02-23 15:35:38 +0900766 * set_server_check_status() to update check->status, check->duration
767 * and check->result.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200768
769 * The set_server_check_status function is called with HCHK_STATUS_L7OKD if
770 * an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP server
771 * returns 2xx, HCHK_STATUS_L6OK if an SSL server returns at least 5 bytes in
772 * response to an SSL HELLO (the principle is that this is enough to
773 * distinguish between an SSL server and a pure TCP relay). All other cases will
774 * call it with a proper error status like HCHK_STATUS_L7STS, HCHK_STATUS_L6RSP,
775 * etc.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200776 */
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200777static void event_srv_chk_r(struct connection *conn)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200778{
Simon Horman4a741432013-02-23 15:35:38 +0900779 struct check *check = conn->owner;
780 struct server *s = check->server;
781 struct task *t = check->task;
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200782 char *desc;
Willy Tarreau03938182010-03-17 21:52:07 +0100783 int done;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +0200784 unsigned short msglen;
Willy Tarreau83749182007-04-15 20:56:27 +0200785
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100786 if (unlikely(check->result == CHK_RES_FAILED))
Willy Tarreau83749182007-04-15 20:56:27 +0200787 goto out_wakeup;
Willy Tarreau83749182007-04-15 20:56:27 +0200788
Willy Tarreau310987a2014-01-22 19:46:33 +0100789 if (conn->flags & CO_FL_HANDSHAKE)
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200790 return;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200791
Willy Tarreauc09572f2017-10-04 11:58:22 +0200792 /* wake() will take care of calling tcpcheck_main() */
793 if (check->type == PR_O2_TCPCHK_CHK)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200794 return;
Willy Tarreaufb56aab2012-09-28 14:40:02 +0200795
Willy Tarreau83749182007-04-15 20:56:27 +0200796 /* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
797 * but the connection was closed on the remote end. Fortunately, recv still
798 * works correctly and we don't need to do the getsockopt() on linux.
799 */
Nick Chalk57b1bf72010-03-16 15:50:46 +0000800
801 /* Set buffer to point to the end of the data already read, and check
802 * that there is free space remaining. If the buffer is full, proceed
803 * with running the checks without attempting another socket read.
804 */
Nick Chalk57b1bf72010-03-16 15:50:46 +0000805
Willy Tarreau03938182010-03-17 21:52:07 +0100806 done = 0;
Nick Chalk57b1bf72010-03-16 15:50:46 +0000807
Simon Horman4a741432013-02-23 15:35:38 +0900808 conn->xprt->rcv_buf(conn, check->bi, check->bi->size);
Willy Tarreau54e917c2017-08-30 07:35:35 +0200809 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH)) {
Willy Tarreau03938182010-03-17 21:52:07 +0100810 done = 1;
Simon Horman4a741432013-02-23 15:35:38 +0900811 if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
Willy Tarreauf1503172012-09-28 19:39:36 +0200812 /* Report network errors only if we got no other data. Otherwise
813 * we'll let the upper layers decide whether the response is OK
814 * or not. It is very common that an RST sent by the server is
815 * reported as an error just after the last data chunk.
816 */
Willy Tarreaub5259bf2017-10-04 14:47:29 +0200817 chk_report_conn_err(check, errno, 0);
Willy Tarreauc1a07962010-03-16 20:55:43 +0100818 goto out_wakeup;
819 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200820 }
821
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100822
Willy Tarreau03938182010-03-17 21:52:07 +0100823 /* Intermediate or complete response received.
Simon Horman4a741432013-02-23 15:35:38 +0900824 * Terminate string in check->bi->data buffer.
Willy Tarreau03938182010-03-17 21:52:07 +0100825 */
Simon Horman4a741432013-02-23 15:35:38 +0900826 if (check->bi->i < check->bi->size)
827 check->bi->data[check->bi->i] = '\0';
Willy Tarreau03938182010-03-17 21:52:07 +0100828 else {
Simon Horman4a741432013-02-23 15:35:38 +0900829 check->bi->data[check->bi->i - 1] = '\0';
Willy Tarreau03938182010-03-17 21:52:07 +0100830 done = 1; /* buffer full, don't wait for more data */
831 }
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200832
Nick Chalk57b1bf72010-03-16 15:50:46 +0000833 /* Run the checks... */
Simon Horman4a741432013-02-23 15:35:38 +0900834 switch (check->type) {
Willy Tarreau1620ec32011-08-06 17:05:02 +0200835 case PR_O2_HTTP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +0900836 if (!done && check->bi->i < strlen("HTTP/1.0 000\r"))
Willy Tarreau03938182010-03-17 21:52:07 +0100837 goto wait_more_data;
838
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100839 /* Check if the server speaks HTTP 1.X */
Simon Horman4a741432013-02-23 15:35:38 +0900840 if ((check->bi->i < strlen("HTTP/1.0 000\r")) ||
841 (memcmp(check->bi->data, "HTTP/1.", 7) != 0 ||
842 (*(check->bi->data + 12) != ' ' && *(check->bi->data + 12) != '\r')) ||
843 !isdigit((unsigned char) *(check->bi->data + 9)) || !isdigit((unsigned char) *(check->bi->data + 10)) ||
844 !isdigit((unsigned char) *(check->bi->data + 11))) {
845 cut_crlf(check->bi->data);
846 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200847
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100848 goto out_wakeup;
849 }
850
Simon Horman4a741432013-02-23 15:35:38 +0900851 check->code = str2uic(check->bi->data + 9);
852 desc = ltrim(check->bi->data + 12, ' ');
Nick Chalk57b1bf72010-03-16 15:50:46 +0000853
Willy Tarreaubd741542010-03-16 18:46:54 +0100854 if ((s->proxy->options & PR_O_DISABLE404) &&
Emeric Brun52a91d32017-08-31 14:41:55 +0200855 (s->next_state != SRV_ST_STOPPED) && (check->code == 404)) {
Nick Chalk57b1bf72010-03-16 15:50:46 +0000856 /* 404 may be accepted as "stopping" only if the server was up */
857 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +0900858 set_server_check_status(check, HCHK_STATUS_L7OKCD, desc);
Nick Chalk57b1bf72010-03-16 15:50:46 +0000859 }
Willy Tarreaubd741542010-03-16 18:46:54 +0100860 else if (s->proxy->options2 & PR_O2_EXP_TYPE) {
861 /* Run content verification check... We know we have at least 13 chars */
862 if (!httpchk_expect(s, done))
863 goto wait_more_data;
864 }
865 /* check the reply : HTTP/1.X 2xx and 3xx are OK */
Simon Horman4a741432013-02-23 15:35:38 +0900866 else if (*(check->bi->data + 9) == '2' || *(check->bi->data + 9) == '3') {
Willy Tarreaubd741542010-03-16 18:46:54 +0100867 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +0900868 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Willy Tarreaubd741542010-03-16 18:46:54 +0100869 }
Nick Chalk57b1bf72010-03-16 15:50:46 +0000870 else {
871 cut_crlf(desc);
Simon Horman4a741432013-02-23 15:35:38 +0900872 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Nick Chalk57b1bf72010-03-16 15:50:46 +0000873 }
Willy Tarreau1620ec32011-08-06 17:05:02 +0200874 break;
875
876 case PR_O2_SSL3_CHK:
Simon Horman4a741432013-02-23 15:35:38 +0900877 if (!done && check->bi->i < 5)
Willy Tarreau03938182010-03-17 21:52:07 +0100878 goto wait_more_data;
879
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100880 /* Check for SSLv3 alert or handshake */
Simon Horman4a741432013-02-23 15:35:38 +0900881 if ((check->bi->i >= 5) && (*check->bi->data == 0x15 || *check->bi->data == 0x16))
882 set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200883 else
Simon Horman4a741432013-02-23 15:35:38 +0900884 set_server_check_status(check, HCHK_STATUS_L6RSP, NULL);
Willy Tarreau1620ec32011-08-06 17:05:02 +0200885 break;
886
887 case PR_O2_SMTP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +0900888 if (!done && check->bi->i < strlen("000\r"))
Willy Tarreau03938182010-03-17 21:52:07 +0100889 goto wait_more_data;
890
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200891 /* Check if the server speaks SMTP */
Simon Horman4a741432013-02-23 15:35:38 +0900892 if ((check->bi->i < strlen("000\r")) ||
893 (*(check->bi->data + 3) != ' ' && *(check->bi->data + 3) != '\r') ||
894 !isdigit((unsigned char) *check->bi->data) || !isdigit((unsigned char) *(check->bi->data + 1)) ||
895 !isdigit((unsigned char) *(check->bi->data + 2))) {
896 cut_crlf(check->bi->data);
897 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200898
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200899 goto out_wakeup;
900 }
901
Simon Horman4a741432013-02-23 15:35:38 +0900902 check->code = str2uic(check->bi->data);
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200903
Simon Horman4a741432013-02-23 15:35:38 +0900904 desc = ltrim(check->bi->data + 3, ' ');
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200905 cut_crlf(desc);
906
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100907 /* Check for SMTP code 2xx (should be 250) */
Simon Horman4a741432013-02-23 15:35:38 +0900908 if (*check->bi->data == '2')
909 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200910 else
Simon Horman4a741432013-02-23 15:35:38 +0900911 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Willy Tarreau1620ec32011-08-06 17:05:02 +0200912 break;
913
Simon Hormana2b9dad2013-02-12 10:45:54 +0900914 case PR_O2_LB_AGENT_CHK: {
Willy Tarreau81f5d942013-12-09 20:51:51 +0100915 int status = HCHK_STATUS_CHECKED;
916 const char *hs = NULL; /* health status */
917 const char *as = NULL; /* admin status */
918 const char *ps = NULL; /* performance status */
Nenad Merdanovic174dd372016-04-24 23:10:06 +0200919 const char *cs = NULL; /* maxconn */
Willy Tarreau81f5d942013-12-09 20:51:51 +0100920 const char *err = NULL; /* first error to report */
921 const char *wrn = NULL; /* first warning to report */
922 char *cmd, *p;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900923
Willy Tarreau81f5d942013-12-09 20:51:51 +0100924 /* We're getting an agent check response. The agent could
925 * have been disabled in the mean time with a long check
926 * still pending. It is important that we ignore the whole
927 * response.
928 */
929 if (!(check->server->agent.state & CHK_ST_ENABLED))
930 break;
931
932 /* The agent supports strings made of a single line ended by the
933 * first CR ('\r') or LF ('\n'). This line is composed of words
934 * delimited by spaces (' '), tabs ('\t'), or commas (','). The
935 * line may optionally contained a description of a state change
936 * after a sharp ('#'), which is only considered if a health state
937 * is announced.
938 *
939 * Words may be composed of :
940 * - a numeric weight suffixed by the percent character ('%').
941 * - a health status among "up", "down", "stopped", and "fail".
942 * - an admin status among "ready", "drain", "maint".
943 *
944 * These words may appear in any order. If multiple words of the
945 * same category appear, the last one wins.
946 */
947
Willy Tarreau9809b782013-12-11 21:40:11 +0100948 p = check->bi->data;
949 while (*p && *p != '\n' && *p != '\r')
950 p++;
951
952 if (!*p) {
953 if (!done)
954 goto wait_more_data;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900955
Willy Tarreau9809b782013-12-11 21:40:11 +0100956 /* at least inform the admin that the agent is mis-behaving */
957 set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
958 break;
959 }
Willy Tarreau81f5d942013-12-09 20:51:51 +0100960
Willy Tarreau9809b782013-12-11 21:40:11 +0100961 *p = 0;
Willy Tarreau81f5d942013-12-09 20:51:51 +0100962 cmd = check->bi->data;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900963
Willy Tarreau81f5d942013-12-09 20:51:51 +0100964 while (*cmd) {
965 /* look for next word */
966 if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
967 cmd++;
968 continue;
969 }
Simon Horman671b6f02013-11-25 10:46:39 +0900970
Willy Tarreau81f5d942013-12-09 20:51:51 +0100971 if (*cmd == '#') {
972 /* this is the beginning of a health status description,
973 * skip the sharp and blanks.
974 */
975 cmd++;
976 while (*cmd == '\t' || *cmd == ' ')
977 cmd++;
Simon Horman671b6f02013-11-25 10:46:39 +0900978 break;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900979 }
Willy Tarreau81f5d942013-12-09 20:51:51 +0100980
981 /* find the end of the word so that we have a null-terminated
982 * word between <cmd> and <p>.
983 */
984 p = cmd + 1;
985 while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
986 p++;
987 if (*p)
988 *p++ = 0;
989
990 /* first, health statuses */
991 if (strcasecmp(cmd, "up") == 0) {
992 check->health = check->rise + check->fall - 1;
Simon Hormana2b9dad2013-02-12 10:45:54 +0900993 status = HCHK_STATUS_L7OKD;
Willy Tarreau81f5d942013-12-09 20:51:51 +0100994 hs = cmd;
995 }
996 else if (strcasecmp(cmd, "down") == 0) {
997 check->health = 0;
998 status = HCHK_STATUS_L7STS;
999 hs = cmd;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001000 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001001 else if (strcasecmp(cmd, "stopped") == 0) {
1002 check->health = 0;
1003 status = HCHK_STATUS_L7STS;
1004 hs = cmd;
1005 }
1006 else if (strcasecmp(cmd, "fail") == 0) {
1007 check->health = 0;
1008 status = HCHK_STATUS_L7STS;
1009 hs = cmd;
1010 }
1011 /* admin statuses */
1012 else if (strcasecmp(cmd, "ready") == 0) {
1013 as = cmd;
1014 }
1015 else if (strcasecmp(cmd, "drain") == 0) {
1016 as = cmd;
1017 }
1018 else if (strcasecmp(cmd, "maint") == 0) {
1019 as = cmd;
1020 }
Nenad Merdanovic174dd372016-04-24 23:10:06 +02001021 /* try to parse a weight here and keep the last one */
Willy Tarreau81f5d942013-12-09 20:51:51 +01001022 else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
1023 ps = cmd;
1024 }
Nenad Merdanovic174dd372016-04-24 23:10:06 +02001025 /* try to parse a maxconn here */
1026 else if (strncasecmp(cmd, "maxconn:", strlen("maxconn:")) == 0) {
1027 cs = cmd;
1028 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001029 else {
1030 /* keep a copy of the first error */
1031 if (!err)
1032 err = cmd;
1033 }
1034 /* skip to next word */
1035 cmd = p;
1036 }
1037 /* here, cmd points either to \0 or to the beginning of a
1038 * description. Skip possible leading spaces.
1039 */
1040 while (*cmd == ' ' || *cmd == '\n')
1041 cmd++;
1042
1043 /* First, update the admin status so that we avoid sending other
1044 * possibly useless warnings and can also update the health if
1045 * present after going back up.
1046 */
1047 if (as) {
1048 if (strcasecmp(as, "drain") == 0)
1049 srv_adm_set_drain(check->server);
1050 else if (strcasecmp(as, "maint") == 0)
1051 srv_adm_set_maint(check->server);
1052 else
1053 srv_adm_set_ready(check->server);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001054 }
1055
Willy Tarreau81f5d942013-12-09 20:51:51 +01001056 /* now change weights */
1057 if (ps) {
1058 const char *msg;
1059
1060 msg = server_parse_weight_change_request(s, ps);
1061 if (!wrn || !*wrn)
1062 wrn = msg;
1063 }
1064
Nenad Merdanovic174dd372016-04-24 23:10:06 +02001065 if (cs) {
1066 const char *msg;
1067
1068 cs += strlen("maxconn:");
1069
1070 msg = server_parse_maxconn_change_request(s, cs);
1071 if (!wrn || !*wrn)
1072 wrn = msg;
1073 }
1074
Willy Tarreau81f5d942013-12-09 20:51:51 +01001075 /* and finally health status */
1076 if (hs) {
1077 /* We'll report some of the warnings and errors we have
1078 * here. Down reports are critical, we leave them untouched.
1079 * Lack of report, or report of 'UP' leaves the room for
1080 * ERR first, then WARN.
Simon Hormana2b9dad2013-02-12 10:45:54 +09001081 */
Willy Tarreau81f5d942013-12-09 20:51:51 +01001082 const char *msg = cmd;
1083 struct chunk *t;
1084
1085 if (!*msg || status == HCHK_STATUS_L7OKD) {
1086 if (err && *err)
1087 msg = err;
1088 else if (wrn && *wrn)
1089 msg = wrn;
Simon Hormana2b9dad2013-02-12 10:45:54 +09001090 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001091
1092 t = get_trash_chunk();
1093 chunk_printf(t, "via agent : %s%s%s%s",
1094 hs, *msg ? " (" : "",
1095 msg, *msg ? ")" : "");
1096
1097 set_server_check_status(check, status, t->str);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001098 }
Willy Tarreau81f5d942013-12-09 20:51:51 +01001099 else if (err && *err) {
1100 /* No status change but we'd like to report something odd.
1101 * Just report the current state and copy the message.
1102 */
1103 chunk_printf(&trash, "agent reports an error : %s", err);
1104 set_server_check_status(check, status/*check->status*/, trash.str);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001105
Willy Tarreau81f5d942013-12-09 20:51:51 +01001106 }
1107 else if (wrn && *wrn) {
1108 /* No status change but we'd like to report something odd.
1109 * Just report the current state and copy the message.
1110 */
1111 chunk_printf(&trash, "agent warns : %s", wrn);
1112 set_server_check_status(check, status/*check->status*/, trash.str);
1113 }
1114 else
1115 set_server_check_status(check, status, NULL);
Simon Hormana2b9dad2013-02-12 10:45:54 +09001116 break;
1117 }
1118
Willy Tarreau1620ec32011-08-06 17:05:02 +02001119 case PR_O2_PGSQL_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001120 if (!done && check->bi->i < 9)
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001121 goto wait_more_data;
1122
Simon Horman4a741432013-02-23 15:35:38 +09001123 if (check->bi->data[0] == 'R') {
1124 set_server_check_status(check, HCHK_STATUS_L7OKD, "PostgreSQL server is ok");
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001125 }
1126 else {
Simon Horman4a741432013-02-23 15:35:38 +09001127 if ((check->bi->data[0] == 'E') && (check->bi->data[5]!=0) && (check->bi->data[6]!=0))
1128 desc = &check->bi->data[6];
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001129 else
1130 desc = "PostgreSQL unknown error";
1131
Simon Horman4a741432013-02-23 15:35:38 +09001132 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Rauf Kuliyev38b41562011-01-04 15:14:13 +01001133 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001134 break;
1135
1136 case PR_O2_REDIS_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001137 if (!done && check->bi->i < 7)
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001138 goto wait_more_data;
1139
Simon Horman4a741432013-02-23 15:35:38 +09001140 if (strcmp(check->bi->data, "+PONG\r\n") == 0) {
1141 set_server_check_status(check, HCHK_STATUS_L7OKD, "Redis server is ok");
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001142 }
1143 else {
Simon Horman4a741432013-02-23 15:35:38 +09001144 set_server_check_status(check, HCHK_STATUS_L7STS, check->bi->data);
Hervé COMMOWICKec032d62011-08-05 16:23:48 +02001145 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001146 break;
1147
1148 case PR_O2_MYSQL_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001149 if (!done && check->bi->i < 5)
Willy Tarreau03938182010-03-17 21:52:07 +01001150 goto wait_more_data;
1151
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001152 if (s->proxy->check_len == 0) { // old mode
Simon Horman4a741432013-02-23 15:35:38 +09001153 if (*(check->bi->data + 4) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001154 /* We set the MySQL Version in description for information purpose
1155 * FIXME : it can be cool to use MySQL Version for other purpose,
1156 * like mark as down old MySQL server.
1157 */
Simon Horman4a741432013-02-23 15:35:38 +09001158 if (check->bi->i > 51) {
1159 desc = ltrim(check->bi->data + 5, ' ');
1160 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001161 }
1162 else {
1163 if (!done)
1164 goto wait_more_data;
1165 /* it seems we have a OK packet but without a valid length,
1166 * it must be a protocol error
1167 */
Simon Horman4a741432013-02-23 15:35:38 +09001168 set_server_check_status(check, HCHK_STATUS_L7RSP, check->bi->data);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001169 }
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001170 }
1171 else {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001172 /* An error message is attached in the Error packet */
Simon Horman4a741432013-02-23 15:35:38 +09001173 desc = ltrim(check->bi->data + 7, ' ');
1174 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001175 }
1176 } else {
Simon Horman4a741432013-02-23 15:35:38 +09001177 unsigned int first_packet_len = ((unsigned int) *check->bi->data) +
1178 (((unsigned int) *(check->bi->data + 1)) << 8) +
1179 (((unsigned int) *(check->bi->data + 2)) << 16);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001180
Simon Horman4a741432013-02-23 15:35:38 +09001181 if (check->bi->i == first_packet_len + 4) {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001182 /* MySQL Error packet always begin with field_count = 0xff */
Simon Horman4a741432013-02-23 15:35:38 +09001183 if (*(check->bi->data + 4) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001184 /* We have only one MySQL packet and it is a Handshake Initialization packet
1185 * but we need to have a second packet to know if it is alright
1186 */
Simon Horman4a741432013-02-23 15:35:38 +09001187 if (!done && check->bi->i < first_packet_len + 5)
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001188 goto wait_more_data;
1189 }
1190 else {
1191 /* We have only one packet and it is an Error packet,
1192 * an error message is attached, so we can display it
1193 */
Simon Horman4a741432013-02-23 15:35:38 +09001194 desc = &check->bi->data[7];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001195 //Warning("onlyoneERR: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001196 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001197 }
Simon Horman4a741432013-02-23 15:35:38 +09001198 } else if (check->bi->i > first_packet_len + 4) {
1199 unsigned int second_packet_len = ((unsigned int) *(check->bi->data + first_packet_len + 4)) +
1200 (((unsigned int) *(check->bi->data + first_packet_len + 5)) << 8) +
1201 (((unsigned int) *(check->bi->data + first_packet_len + 6)) << 16);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001202
Simon Horman4a741432013-02-23 15:35:38 +09001203 if (check->bi->i == first_packet_len + 4 + second_packet_len + 4 ) {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001204 /* We have 2 packets and that's good */
1205 /* Check if the second packet is a MySQL Error packet or not */
Simon Horman4a741432013-02-23 15:35:38 +09001206 if (*(check->bi->data + first_packet_len + 8) != '\xff') {
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001207 /* No error packet */
1208 /* We set the MySQL Version in description for information purpose */
Simon Horman4a741432013-02-23 15:35:38 +09001209 desc = &check->bi->data[5];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001210 //Warning("2packetOK: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001211 set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001212 }
1213 else {
1214 /* An error message is attached in the Error packet
1215 * so we can display it ! :)
1216 */
Simon Horman4a741432013-02-23 15:35:38 +09001217 desc = &check->bi->data[first_packet_len+11];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001218 //Warning("2packetERR: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001219 set_server_check_status(check, HCHK_STATUS_L7STS, desc);
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001220 }
1221 }
1222 }
1223 else {
Willy Tarreau03938182010-03-17 21:52:07 +01001224 if (!done)
1225 goto wait_more_data;
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001226 /* it seems we have a Handshake Initialization packet but without a valid length,
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001227 * it must be a protocol error
1228 */
Simon Horman4a741432013-02-23 15:35:38 +09001229 desc = &check->bi->data[5];
Hervé COMMOWICK8776f1b2010-10-18 15:58:36 +02001230 //Warning("protoerr: %s\n", desc);
Simon Horman4a741432013-02-23 15:35:38 +09001231 set_server_check_status(check, HCHK_STATUS_L7RSP, desc);
Hervé COMMOWICK698ae002010-01-12 09:25:13 +01001232 }
1233 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001234 break;
1235
1236 case PR_O2_LDAP_CHK:
Simon Horman4a741432013-02-23 15:35:38 +09001237 if (!done && check->bi->i < 14)
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001238 goto wait_more_data;
1239
1240 /* Check if the server speaks LDAP (ASN.1/BER)
1241 * http://en.wikipedia.org/wiki/Basic_Encoding_Rules
1242 * http://tools.ietf.org/html/rfc4511
1243 */
1244
1245 /* http://tools.ietf.org/html/rfc4511#section-4.1.1
1246 * LDAPMessage: 0x30: SEQUENCE
1247 */
Simon Horman4a741432013-02-23 15:35:38 +09001248 if ((check->bi->i < 14) || (*(check->bi->data) != '\x30')) {
1249 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001250 }
1251 else {
1252 /* size of LDAPMessage */
Simon Horman4a741432013-02-23 15:35:38 +09001253 msglen = (*(check->bi->data + 1) & 0x80) ? (*(check->bi->data + 1) & 0x7f) : 0;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001254
1255 /* http://tools.ietf.org/html/rfc4511#section-4.2.2
1256 * messageID: 0x02 0x01 0x01: INTEGER 1
1257 * protocolOp: 0x61: bindResponse
1258 */
1259 if ((msglen > 2) ||
Simon Horman4a741432013-02-23 15:35:38 +09001260 (memcmp(check->bi->data + 2 + msglen, "\x02\x01\x01\x61", 4) != 0)) {
1261 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001262
1263 goto out_wakeup;
1264 }
1265
1266 /* size of bindResponse */
Simon Horman4a741432013-02-23 15:35:38 +09001267 msglen += (*(check->bi->data + msglen + 6) & 0x80) ? (*(check->bi->data + msglen + 6) & 0x7f) : 0;
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001268
1269 /* http://tools.ietf.org/html/rfc4511#section-4.1.9
1270 * ldapResult: 0x0a 0x01: ENUMERATION
1271 */
1272 if ((msglen > 4) ||
Simon Horman4a741432013-02-23 15:35:38 +09001273 (memcmp(check->bi->data + 7 + msglen, "\x0a\x01", 2) != 0)) {
1274 set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001275
1276 goto out_wakeup;
1277 }
1278
1279 /* http://tools.ietf.org/html/rfc4511#section-4.1.9
1280 * resultCode
1281 */
Simon Horman4a741432013-02-23 15:35:38 +09001282 check->code = *(check->bi->data + msglen + 9);
1283 if (check->code) {
1284 set_server_check_status(check, HCHK_STATUS_L7STS, "See RFC: http://tools.ietf.org/html/rfc4511#section-4.1.9");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001285 } else {
Simon Horman4a741432013-02-23 15:35:38 +09001286 set_server_check_status(check, HCHK_STATUS_L7OKD, "Success");
Gabor Lekenyb4c81e42010-09-29 18:17:05 +02001287 }
1288 }
Willy Tarreau1620ec32011-08-06 17:05:02 +02001289 break;
1290
Christopher Fauletba7bc162016-11-07 21:07:38 +01001291 case PR_O2_SPOP_CHK: {
1292 unsigned int framesz;
1293 char err[HCHK_DESC_LEN];
1294
1295 if (!done && check->bi->i < 4)
1296 goto wait_more_data;
1297
1298 memcpy(&framesz, check->bi->data, 4);
1299 framesz = ntohl(framesz);
1300
1301 if (!done && check->bi->i < (4+framesz))
1302 goto wait_more_data;
1303
Christopher Faulet8ef75252017-02-20 22:56:03 +01001304 if (!spoe_handle_healthcheck_response(check->bi->data+4, framesz, err, HCHK_DESC_LEN-1))
Christopher Fauletba7bc162016-11-07 21:07:38 +01001305 set_server_check_status(check, HCHK_STATUS_L7OKD, "SPOA server is ok");
1306 else
1307 set_server_check_status(check, HCHK_STATUS_L7STS, err);
1308 break;
1309 }
1310
Willy Tarreau1620ec32011-08-06 17:05:02 +02001311 default:
Willy Tarreau06559ac2013-12-05 01:53:08 +01001312 /* for other checks (eg: pure TCP), delegate to the main task */
Willy Tarreau1620ec32011-08-06 17:05:02 +02001313 break;
1314 } /* switch */
Willy Tarreau83749182007-04-15 20:56:27 +02001315
Willy Tarreauc7dd71a2007-11-30 08:33:21 +01001316 out_wakeup:
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001317 /* collect possible new errors */
1318 if (conn->flags & CO_FL_ERROR)
Willy Tarreaub5259bf2017-10-04 14:47:29 +02001319 chk_report_conn_err(check, 0, 0);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001320
Nick Chalk57b1bf72010-03-16 15:50:46 +00001321 /* Reset the check buffer... */
Simon Horman4a741432013-02-23 15:35:38 +09001322 *check->bi->data = '\0';
1323 check->bi->i = 0;
Nick Chalk57b1bf72010-03-16 15:50:46 +00001324
Steven Davidovitz544d4812017-03-08 11:06:20 -08001325 /* Close the connection... We still attempt to nicely close if,
1326 * for instance, SSL needs to send a "close notify." Later, we perform
1327 * a hard close and reset the connection if some data are pending,
1328 * otherwise we end up with many TIME_WAITs and eat all the source port
1329 * range quickly. To avoid sending RSTs all the time, we first try to
1330 * drain pending data.
Willy Tarreaufd29cc52012-11-23 09:18:20 +01001331 */
Olivier Houchard1a0545f2017-09-13 18:30:23 +02001332 __conn_xprt_stop_both(conn);
1333 conn_xprt_shutw(conn);
Willy Tarreau2b57cb82013-06-10 19:56:38 +02001334
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001335 /* OK, let's not stay here forever */
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001336 if (check->result == CHK_RES_FAILED)
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001337 conn->flags |= CO_FL_ERROR;
1338
Willy Tarreaufdccded2008-08-29 18:19:04 +02001339 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau3267d362012-08-17 23:53:56 +02001340 return;
Willy Tarreau03938182010-03-17 21:52:07 +01001341
1342 wait_more_data:
Olivier Houchard1a0545f2017-09-13 18:30:23 +02001343 __conn_xprt_want_recv(conn);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001344}
1345
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001346/*
1347 * This function is used only for server health-checks. It handles connection
1348 * status updates including errors. If necessary, it wakes the check task up.
Willy Tarreau6bdcab02017-10-04 18:41:00 +02001349 * It returns 0 on normal cases, <0 if at least one close() has happened on the
1350 * connection (eg: reconnect).
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001351 */
1352static int wake_srv_chk(struct connection *conn)
Willy Tarreau20bea422012-07-06 12:00:49 +02001353{
Simon Horman4a741432013-02-23 15:35:38 +09001354 struct check *check = conn->owner;
Willy Tarreau6bdcab02017-10-04 18:41:00 +02001355 int ret = 0;
Willy Tarreau20bea422012-07-06 12:00:49 +02001356
Willy Tarreauc09572f2017-10-04 11:58:22 +02001357 /* we may have to make progress on the TCP checks */
Willy Tarreau6bdcab02017-10-04 18:41:00 +02001358 if (check->type == PR_O2_TCPCHK_CHK) {
1359 ret = tcpcheck_main(check);
Willy Tarreau00149122017-10-04 18:05:01 +02001360 conn = check->conn;
Willy Tarreau6bdcab02017-10-04 18:41:00 +02001361 }
Willy Tarreauc09572f2017-10-04 11:58:22 +02001362
Willy Tarreau6c560da2012-11-24 11:14:45 +01001363 if (unlikely(conn->flags & CO_FL_ERROR)) {
Willy Tarreau02b0f582013-12-03 15:42:33 +01001364 /* We may get error reports bypassing the I/O handlers, typically
1365 * the case when sending a pure TCP check which fails, then the I/O
1366 * handlers above are not called. This is completely handled by the
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001367 * main processing task so let's simply wake it up. If we get here,
1368 * we expect errno to still be valid.
1369 */
Willy Tarreaub5259bf2017-10-04 14:47:29 +02001370 chk_report_conn_err(check, errno, 0);
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001371
Olivier Houchard1a0545f2017-09-13 18:30:23 +02001372 __conn_xprt_stop_both(conn);
Willy Tarreau2d351b62013-12-05 02:36:25 +01001373 task_wakeup(check->task, TASK_WOKEN_IO);
1374 }
Olivier Houchard1a0545f2017-09-13 18:30:23 +02001375 else if (!(conn->flags & (CO_FL_XPRT_RD_ENA|CO_FL_XPRT_WR_ENA|CO_FL_HANDSHAKE))) {
Willy Tarreau3be293f2014-02-05 18:31:24 +01001376 /* we may get here if only a connection probe was required : we
1377 * don't have any data to send nor anything expected in response,
1378 * so the completion of the connection establishment is enough.
1379 */
1380 task_wakeup(check->task, TASK_WOKEN_IO);
1381 }
Willy Tarreau2d351b62013-12-05 02:36:25 +01001382
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01001383 if (check->result != CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +01001384 /* We're here because nobody wants to handle the error, so we
1385 * sure want to abort the hard way.
Willy Tarreau02b0f582013-12-03 15:42:33 +01001386 */
Willy Tarreaud85c4852015-03-13 00:40:28 +01001387 conn_sock_drain(conn);
Willy Tarreau402dbc12017-10-05 17:53:13 +02001388 conn_full_close(conn);
Willy Tarreau6bdcab02017-10-04 18:41:00 +02001389 ret = -1;
Willy Tarreau2d351b62013-12-05 02:36:25 +01001390 }
Willy Tarreau6bdcab02017-10-04 18:41:00 +02001391
1392 /* if a connection got replaced, we must absolutely prevent the connection
1393 * handler from touching its fd, and perform the FD polling updates ourselves
1394 */
1395 if (ret < 0)
1396 conn_cond_update_polling(conn);
1397
1398 return ret;
Willy Tarreau20bea422012-07-06 12:00:49 +02001399}
1400
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001401struct data_cb check_conn_cb = {
1402 .recv = event_srv_chk_r,
1403 .send = event_srv_chk_w,
1404 .wake = wake_srv_chk,
Willy Tarreau8e0bb0a2016-11-24 16:58:12 +01001405 .name = "CHCK",
Willy Tarreaufb56aab2012-09-28 14:40:02 +02001406};
1407
Willy Tarreaubaaee002006-06-26 02:48:02 +02001408/*
Willy Tarreau2e993902011-10-31 11:53:20 +01001409 * updates the server's weight during a warmup stage. Once the final weight is
1410 * reached, the task automatically stops. Note that any server status change
1411 * must have updated s->last_change accordingly.
1412 */
1413static struct task *server_warmup(struct task *t)
1414{
1415 struct server *s = t->context;
1416
1417 /* by default, plan on stopping the task */
1418 t->expire = TICK_ETERNITY;
Emeric Brun52a91d32017-08-31 14:41:55 +02001419 if ((s->next_admin & SRV_ADMF_MAINT) ||
1420 (s->next_state != SRV_ST_STARTING))
Willy Tarreau2e993902011-10-31 11:53:20 +01001421 return t;
1422
Willy Tarreau892337c2014-05-13 23:41:20 +02001423 /* recalculate the weights and update the state */
Willy Tarreau004e0452013-11-21 11:22:01 +01001424 server_recalc_eweight(s);
Willy Tarreau2e993902011-10-31 11:53:20 +01001425
1426 /* probably that we can refill this server with a bit more connections */
Willy Tarreau4aac7db2014-05-16 11:48:10 +02001427 pendconn_grab_from_px(s);
Willy Tarreau2e993902011-10-31 11:53:20 +01001428
1429 /* get back there in 1 second or 1/20th of the slowstart interval,
1430 * whichever is greater, resulting in small 5% steps.
1431 */
Emeric Brun52a91d32017-08-31 14:41:55 +02001432 if (s->next_state == SRV_ST_STARTING)
Willy Tarreau2e993902011-10-31 11:53:20 +01001433 t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
1434 return t;
1435}
1436
Willy Tarreau894c6422017-10-04 15:58:52 +02001437/* returns the first NON-COMMENT tcp-check rule from list <list> or NULL if
1438 * none was found.
1439 */
1440static struct tcpcheck_rule *get_first_tcpcheck_rule(struct list *list)
1441{
1442 struct tcpcheck_rule *r;
1443
1444 list_for_each_entry(r, list, list) {
1445 if (r->action != TCPCHK_ACT_COMMENT)
1446 return r;
1447 }
1448 return NULL;
1449}
1450
Willy Tarreau2e993902011-10-31 11:53:20 +01001451/*
Simon Horman98637e52014-06-20 12:30:16 +09001452 * establish a server health-check that makes use of a connection.
Simon Hormanb00d17a2014-06-13 16:18:16 +09001453 *
1454 * It can return one of :
Willy Tarreaue7dff022015-04-03 01:14:29 +02001455 * - SF_ERR_NONE if everything's OK and tcpcheck_main() was not called
1456 * - SF_ERR_UP if if everything's OK and tcpcheck_main() was called
1457 * - SF_ERR_SRVTO if there are no more servers
1458 * - SF_ERR_SRVCL if the connection was refused by the server
1459 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1460 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1461 * - SF_ERR_INTERNAL for any other purely internal errors
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02001462 * - SF_ERR_CHK_PORT if no port could be found to run a health check on an AF_INET* socket
Tim Düsterhus4896c442016-11-29 02:15:19 +01001463 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
Simon Hormanb00d17a2014-06-13 16:18:16 +09001464 * Note that we try to prevent the network stack from sending the ACK during the
1465 * connect() when a pure TCP check is used (without PROXY protocol).
1466 */
Simon Horman98637e52014-06-20 12:30:16 +09001467static int connect_conn_chk(struct task *t)
Simon Hormanb00d17a2014-06-13 16:18:16 +09001468{
1469 struct check *check = t->context;
1470 struct server *s = check->server;
1471 struct connection *conn = check->conn;
1472 struct protocol *proto;
Willy Tarreauf411cce2017-10-04 16:21:19 +02001473 struct tcpcheck_rule *tcp_rule = NULL;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001474 int ret;
Willy Tarreauf3d34822014-12-08 12:11:28 +01001475 int quickack;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001476
Willy Tarreau00149122017-10-04 18:05:01 +02001477 /* we cannot have a connection here */
1478 if (conn)
1479 return SF_ERR_INTERNAL;
1480
Simon Hormanb00d17a2014-06-13 16:18:16 +09001481 /* tcpcheck send/expect initialisation */
Willy Tarreauf411cce2017-10-04 16:21:19 +02001482 if (check->type == PR_O2_TCPCHK_CHK) {
Simon Hormanb00d17a2014-06-13 16:18:16 +09001483 check->current_step = NULL;
Willy Tarreauf411cce2017-10-04 16:21:19 +02001484 tcp_rule = get_first_tcpcheck_rule(check->tcpcheck_rules);
1485 }
Simon Hormanb00d17a2014-06-13 16:18:16 +09001486
1487 /* prepare the check buffer.
1488 * This should not be used if check is the secondary agent check
1489 * of a server as s->proxy->check_req will relate to the
1490 * configuration of the primary check. Similarly, tcp-check uses
1491 * its own strings.
1492 */
1493 if (check->type && check->type != PR_O2_TCPCHK_CHK && !(check->state & CHK_ST_AGENT)) {
1494 bo_putblk(check->bo, s->proxy->check_req, s->proxy->check_len);
1495
1496 /* we want to check if this host replies to HTTP or SSLv3 requests
1497 * so we'll send the request, and won't wake the checker up now.
1498 */
1499 if ((check->type) == PR_O2_SSL3_CHK) {
1500 /* SSL requires that we put Unix time in the request */
1501 int gmt_time = htonl(date.tv_sec);
1502 memcpy(check->bo->data + 11, &gmt_time, 4);
1503 }
1504 else if ((check->type) == PR_O2_HTTP_CHK) {
1505 if (s->proxy->options2 & PR_O2_CHK_SNDST)
1506 bo_putblk(check->bo, trash.str, httpchk_build_status_header(s, trash.str, trash.size));
Cyril Bonté32602d22015-01-30 00:07:07 +01001507 /* prevent HTTP keep-alive when "http-check expect" is used */
1508 if (s->proxy->options2 & PR_O2_EXP_TYPE)
1509 bo_putstr(check->bo, "Connection: close\r\n");
Simon Hormanb00d17a2014-06-13 16:18:16 +09001510 bo_putstr(check->bo, "\r\n");
1511 *check->bo->p = '\0'; /* to make gdb output easier to read */
1512 }
1513 }
1514
James Brown55f9ff12015-10-21 18:19:05 -07001515 if ((check->type & PR_O2_LB_AGENT_CHK) && check->send_string_len) {
1516 bo_putblk(check->bo, check->send_string, check->send_string_len);
1517 }
1518
Willy Tarreauf411cce2017-10-04 16:21:19 +02001519 /* for tcp-checks, the initial connection setup is handled separately as
1520 * it may be sent to a specific port and not to the server's.
1521 */
1522 if (tcp_rule && tcp_rule->action == TCPCHK_ACT_CONNECT) {
1523 tcpcheck_main(check);
1524 return SF_ERR_UP;
1525 }
1526
Simon Hormanb00d17a2014-06-13 16:18:16 +09001527 /* prepare a new connection */
Willy Tarreau00149122017-10-04 18:05:01 +02001528 conn = check->conn = conn_new();
1529 if (!check->conn)
1530 return SF_ERR_RESOURCE;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001531
Simon Horman41f58762015-01-30 11:22:56 +09001532 if (is_addr(&check->addr)) {
Simon Hormanb00d17a2014-06-13 16:18:16 +09001533 /* we'll connect to the check addr specified on the server */
Simon Horman41f58762015-01-30 11:22:56 +09001534 conn->addr.to = check->addr;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001535 }
1536 else {
1537 /* we'll connect to the addr on the server */
1538 conn->addr.to = s->addr;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001539 }
1540
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02001541 if ((conn->addr.to.ss_family == AF_INET) || (conn->addr.to.ss_family == AF_INET6)) {
1542 int i = 0;
1543
1544 i = srv_check_healthcheck_port(check);
1545 if (i == 0) {
1546 conn->owner = check;
1547 return SF_ERR_CHK_PORT;
1548 }
1549
1550 set_host_port(&conn->addr.to, i);
Simon Hormanb00d17a2014-06-13 16:18:16 +09001551 }
1552
Thierry FOURNIERbb2ae642015-01-14 11:31:49 +01001553 proto = protocol_by_family(conn->addr.to.ss_family);
1554
1555 conn_prepare(conn, proto, check->xprt);
1556 conn_attach(conn, check, &check_conn_cb);
1557 conn->target = &s->obj_type;
1558
1559 /* no client address */
1560 clear_addr(&conn->addr.from);
1561
Willy Tarreauf3d34822014-12-08 12:11:28 +01001562 /* only plain tcp-check supports quick ACK */
1563 quickack = check->type == 0 || check->type == PR_O2_TCPCHK_CHK;
1564
Willy Tarreauf411cce2017-10-04 16:21:19 +02001565 if (tcp_rule && tcp_rule->action == TCPCHK_ACT_EXPECT)
1566 quickack = 0;
Simon Hormanb00d17a2014-06-13 16:18:16 +09001567
Willy Tarreaue7dff022015-04-03 01:14:29 +02001568 ret = SF_ERR_INTERNAL;
Olivier Houchardb68fda42017-08-04 18:39:01 +02001569 if (proto && proto->connect)
Willy Tarreauf3d34822014-12-08 12:11:28 +01001570 ret = proto->connect(conn, check->type, quickack ? 2 : 0);
Olivier Houchard9130a962017-10-17 17:33:43 +02001571#ifdef USE_OPENSSL
1572 if (s->check.sni)
1573 ssl_sock_set_servername(conn, s->check.sni);
1574#endif
Willy Tarreauf4949772017-05-06 08:45:28 +02001575 if (s->check.send_proxy && !(check->state & CHK_ST_AGENT)) {
Simon Hormanb00d17a2014-06-13 16:18:16 +09001576 conn->send_proxy_ofs = 1;
1577 conn->flags |= CO_FL_SEND_PROXY;
1578 }
1579
1580 return ret;
1581}
1582
Simon Horman98637e52014-06-20 12:30:16 +09001583static struct list pid_list = LIST_HEAD_INIT(pid_list);
1584static struct pool_head *pool2_pid_list;
1585
1586void block_sigchld(void)
1587{
1588 sigset_t set;
1589 sigemptyset(&set);
1590 sigaddset(&set, SIGCHLD);
Willy Tarreauebc92442016-06-21 17:29:46 +02001591 assert(sigprocmask(SIG_BLOCK, &set, NULL) == 0);
Simon Horman98637e52014-06-20 12:30:16 +09001592}
1593
1594void unblock_sigchld(void)
1595{
1596 sigset_t set;
1597 sigemptyset(&set);
Willy Tarreauebc92442016-06-21 17:29:46 +02001598 sigaddset(&set, SIGCHLD);
1599 assert(sigprocmask(SIG_UNBLOCK, &set, NULL) == 0);
Simon Horman98637e52014-06-20 12:30:16 +09001600}
1601
Simon Horman98637e52014-06-20 12:30:16 +09001602static struct pid_list *pid_list_add(pid_t pid, struct task *t)
1603{
1604 struct pid_list *elem;
1605 struct check *check = t->context;
1606
1607 elem = pool_alloc2(pool2_pid_list);
1608 if (!elem)
1609 return NULL;
1610 elem->pid = pid;
1611 elem->t = t;
1612 elem->exited = 0;
1613 check->curpid = elem;
1614 LIST_INIT(&elem->list);
1615 LIST_ADD(&pid_list, &elem->list);
1616 return elem;
1617}
1618
Simon Horman98637e52014-06-20 12:30:16 +09001619static void pid_list_del(struct pid_list *elem)
1620{
1621 struct check *check;
1622
1623 if (!elem)
1624 return;
1625
Simon Horman98637e52014-06-20 12:30:16 +09001626 LIST_DEL(&elem->list);
Simon Horman98637e52014-06-20 12:30:16 +09001627 if (!elem->exited)
1628 kill(elem->pid, SIGTERM);
1629
1630 check = elem->t->context;
1631 check->curpid = NULL;
1632 pool_free2(pool2_pid_list, elem);
1633}
1634
1635/* Called from inside SIGCHLD handler, SIGCHLD is blocked */
1636static void pid_list_expire(pid_t pid, int status)
1637{
1638 struct pid_list *elem;
1639
1640 list_for_each_entry(elem, &pid_list, list) {
1641 if (elem->pid == pid) {
1642 elem->t->expire = now_ms;
1643 elem->status = status;
1644 elem->exited = 1;
Cyril Bonté9dbcfab2014-08-07 01:55:39 +02001645 task_wakeup(elem->t, TASK_WOKEN_IO);
Simon Horman98637e52014-06-20 12:30:16 +09001646 return;
1647 }
1648 }
1649}
1650
Willy Tarreau48d6bf22016-06-21 16:27:34 +02001651static void sigchld_handler(struct sig_handler *sh)
Simon Horman98637e52014-06-20 12:30:16 +09001652{
1653 pid_t pid;
1654 int status;
Willy Tarreau48d6bf22016-06-21 16:27:34 +02001655
Simon Horman98637e52014-06-20 12:30:16 +09001656 while ((pid = waitpid(0, &status, WNOHANG)) > 0)
1657 pid_list_expire(pid, status);
1658}
1659
Willy Tarreau48d6bf22016-06-21 16:27:34 +02001660static int init_pid_list(void)
1661{
Simon Horman98637e52014-06-20 12:30:16 +09001662 if (pool2_pid_list != NULL)
1663 /* Nothing to do */
1664 return 0;
1665
Willy Tarreau48d6bf22016-06-21 16:27:34 +02001666 if (!signal_register_fct(SIGCHLD, sigchld_handler, SIGCHLD)) {
Simon Horman98637e52014-06-20 12:30:16 +09001667 Alert("Failed to set signal handler for external health checks: %s. Aborting.\n",
1668 strerror(errno));
1669 return 1;
1670 }
1671
1672 pool2_pid_list = create_pool("pid_list", sizeof(struct pid_list), MEM_F_SHARED);
1673 if (pool2_pid_list == NULL) {
1674 Alert("Failed to allocate memory pool for external health checks: %s. Aborting.\n",
1675 strerror(errno));
1676 return 1;
1677 }
1678
1679 return 0;
1680}
1681
Cyril Bontéac92a062014-12-27 22:28:38 +01001682/* helper macro to set an environment variable and jump to a specific label on failure. */
1683#define EXTCHK_SETENV(check, envidx, value, fail) { if (extchk_setenv(check, envidx, value)) goto fail; }
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001684
1685/*
Cyril Bontéac92a062014-12-27 22:28:38 +01001686 * helper function to allocate enough memory to store an environment variable.
1687 * It will also check that the environment variable is updatable, and silently
1688 * fail if not.
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001689 */
Cyril Bontéac92a062014-12-27 22:28:38 +01001690static int extchk_setenv(struct check *check, int idx, const char *value)
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001691{
1692 int len, ret;
Cyril Bontéac92a062014-12-27 22:28:38 +01001693 char *envname;
1694 int vmaxlen;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001695
Cyril Bontéac92a062014-12-27 22:28:38 +01001696 if (idx < 0 || idx >= EXTCHK_SIZE) {
1697 Alert("Illegal environment variable index %d. Aborting.\n", idx);
1698 return 1;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001699 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001700
1701 envname = extcheck_envs[idx].name;
1702 vmaxlen = extcheck_envs[idx].vmaxlen;
1703
1704 /* Check if the environment variable is already set, and silently reject
1705 * the update if this one is not updatable. */
1706 if ((vmaxlen == EXTCHK_SIZE_EVAL_INIT) && (check->envp[idx]))
1707 return 0;
1708
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001709 /* Instead of sending NOT_USED, sending an empty value is preferable */
1710 if (strcmp(value, "NOT_USED") == 0) {
1711 value = "";
1712 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001713
1714 len = strlen(envname) + 1;
1715 if (vmaxlen == EXTCHK_SIZE_EVAL_INIT)
1716 len += strlen(value);
1717 else
1718 len += vmaxlen;
1719
1720 if (!check->envp[idx])
1721 check->envp[idx] = malloc(len + 1);
1722
1723 if (!check->envp[idx]) {
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001724 Alert("Failed to allocate memory for the environment variable '%s'. Aborting.\n", envname);
1725 return 1;
1726 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001727 ret = snprintf(check->envp[idx], len + 1, "%s=%s", envname, value);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001728 if (ret < 0) {
1729 Alert("Failed to store the environment variable '%s'. Reason : %s. Aborting.\n", envname, strerror(errno));
1730 return 1;
1731 }
Cyril Bontéac92a062014-12-27 22:28:38 +01001732 else if (ret > len) {
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001733 Alert("Environment variable '%s' was truncated. Aborting.\n", envname);
1734 return 1;
1735 }
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001736 return 0;
1737}
Simon Horman98637e52014-06-20 12:30:16 +09001738
1739static int prepare_external_check(struct check *check)
1740{
1741 struct server *s = check->server;
1742 struct proxy *px = s->proxy;
1743 struct listener *listener = NULL, *l;
1744 int i;
Simon Horman98637e52014-06-20 12:30:16 +09001745 const char *path = px->check_path ? px->check_path : DEF_CHECK_PATH;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001746 char buf[256];
Simon Horman98637e52014-06-20 12:30:16 +09001747
1748 list_for_each_entry(l, &px->conf.listeners, by_fe)
1749 /* Use the first INET, INET6 or UNIX listener */
1750 if (l->addr.ss_family == AF_INET ||
1751 l->addr.ss_family == AF_INET6 ||
1752 l->addr.ss_family == AF_UNIX) {
1753 listener = l;
1754 break;
1755 }
1756
Simon Horman98637e52014-06-20 12:30:16 +09001757 check->curpid = NULL;
Cyril Bontéac92a062014-12-27 22:28:38 +01001758 check->envp = calloc((EXTCHK_SIZE + 1), sizeof(char *));
1759 if (!check->envp) {
1760 Alert("Failed to allocate memory for environment variables. Aborting\n");
1761 goto err;
1762 }
Simon Horman98637e52014-06-20 12:30:16 +09001763
Cyril Bontéac92a062014-12-27 22:28:38 +01001764 check->argv = calloc(6, sizeof(char *));
1765 if (!check->argv) {
1766 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
Simon Horman98637e52014-06-20 12:30:16 +09001767 goto err;
Cyril Bontéac92a062014-12-27 22:28:38 +01001768 }
Simon Horman98637e52014-06-20 12:30:16 +09001769
1770 check->argv[0] = px->check_command;
1771
Cyril Bonté777be862014-12-02 21:21:35 +01001772 if (!listener) {
1773 check->argv[1] = strdup("NOT_USED");
1774 check->argv[2] = strdup("NOT_USED");
1775 }
1776 else if (listener->addr.ss_family == AF_INET ||
Simon Horman98637e52014-06-20 12:30:16 +09001777 listener->addr.ss_family == AF_INET6) {
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001778 addr_to_str(&listener->addr, buf, sizeof(buf));
1779 check->argv[1] = strdup(buf);
1780 port_to_str(&listener->addr, buf, sizeof(buf));
1781 check->argv[2] = strdup(buf);
Cyril Bonté777be862014-12-02 21:21:35 +01001782 }
1783 else if (listener->addr.ss_family == AF_UNIX) {
Simon Horman98637e52014-06-20 12:30:16 +09001784 const struct sockaddr_un *un;
1785
1786 un = (struct sockaddr_un *)&listener->addr;
1787 check->argv[1] = strdup(un->sun_path);
1788 check->argv[2] = strdup("NOT_USED");
Cyril Bonté777be862014-12-02 21:21:35 +01001789 }
1790 else {
Cyril Bontéac92a062014-12-27 22:28:38 +01001791 Alert("Starting [%s:%s] check: unsupported address family.\n", px->id, s->id);
Simon Horman98637e52014-06-20 12:30:16 +09001792 goto err;
1793 }
1794
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001795 addr_to_str(&s->addr, buf, sizeof(buf));
1796 check->argv[3] = strdup(buf);
Willy Tarreau04276f32017-01-06 17:41:29 +01001797
1798 if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
1799 snprintf(buf, sizeof(buf), "%u", s->svc_port);
1800 else
1801 *buf = 0;
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001802 check->argv[4] = strdup(buf);
Simon Horman98637e52014-06-20 12:30:16 +09001803
Cyril Bontéac92a062014-12-27 22:28:38 +01001804 for (i = 0; i < 5; i++) {
1805 if (!check->argv[i]) {
1806 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
Simon Horman98637e52014-06-20 12:30:16 +09001807 goto err;
Cyril Bontéac92a062014-12-27 22:28:38 +01001808 }
1809 }
Simon Horman98637e52014-06-20 12:30:16 +09001810
Cyril Bontéac92a062014-12-27 22:28:38 +01001811 EXTCHK_SETENV(check, EXTCHK_PATH, path, err);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001812 /* Add proxy environment variables */
Cyril Bontéac92a062014-12-27 22:28:38 +01001813 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_NAME, px->id, err);
1814 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ID, ultoa_r(px->uuid, buf, sizeof(buf)), err);
1815 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ADDR, check->argv[1], err);
1816 EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_PORT, check->argv[2], err);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001817 /* Add server environment variables */
Cyril Bontéac92a062014-12-27 22:28:38 +01001818 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_NAME, s->id, err);
1819 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ID, ultoa_r(s->puid, buf, sizeof(buf)), err);
1820 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], err);
1821 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], err);
1822 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_MAXCONN, ultoa_r(s->maxconn, buf, sizeof(buf)), err);
1823 EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), err);
1824
1825 /* Ensure that we don't leave any hole in check->envp */
1826 for (i = 0; i < EXTCHK_SIZE; i++)
1827 if (!check->envp[i])
1828 EXTCHK_SETENV(check, i, "", err);
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001829
Cyril Bonté99c5bf52014-08-07 01:55:38 +02001830 return 1;
Simon Horman98637e52014-06-20 12:30:16 +09001831err:
1832 if (check->envp) {
Cyril Bontéac92a062014-12-27 22:28:38 +01001833 for (i = 0; i < EXTCHK_SIZE; i++)
Cyril Bonté9ede66b2014-12-02 21:21:36 +01001834 free(check->envp[i]);
Simon Horman98637e52014-06-20 12:30:16 +09001835 free(check->envp);
1836 check->envp = NULL;
1837 }
1838
1839 if (check->argv) {
1840 for (i = 1; i < 5; i++)
1841 free(check->argv[i]);
1842 free(check->argv);
1843 check->argv = NULL;
1844 }
Cyril Bonté99c5bf52014-08-07 01:55:38 +02001845 return 0;
Simon Horman98637e52014-06-20 12:30:16 +09001846}
1847
Simon Hormanb00d17a2014-06-13 16:18:16 +09001848/*
Simon Horman98637e52014-06-20 12:30:16 +09001849 * establish a server health-check that makes use of a process.
1850 *
1851 * It can return one of :
Willy Tarreaue7dff022015-04-03 01:14:29 +02001852 * - SF_ERR_NONE if everything's OK
Willy Tarreaue7dff022015-04-03 01:14:29 +02001853 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
Tim Düsterhus4896c442016-11-29 02:15:19 +01001854 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
Simon Horman98637e52014-06-20 12:30:16 +09001855 *
1856 * Blocks and then unblocks SIGCHLD
1857 */
1858static int connect_proc_chk(struct task *t)
1859{
Cyril Bontéac92a062014-12-27 22:28:38 +01001860 char buf[256];
Simon Horman98637e52014-06-20 12:30:16 +09001861 struct check *check = t->context;
1862 struct server *s = check->server;
1863 struct proxy *px = s->proxy;
1864 int status;
1865 pid_t pid;
1866
Willy Tarreaue7dff022015-04-03 01:14:29 +02001867 status = SF_ERR_RESOURCE;
Simon Horman98637e52014-06-20 12:30:16 +09001868
1869 block_sigchld();
1870
1871 pid = fork();
1872 if (pid < 0) {
1873 Alert("Failed to fork process for external health check: %s. Aborting.\n",
1874 strerror(errno));
1875 set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
1876 goto out;
1877 }
1878 if (pid == 0) {
1879 /* Child */
1880 extern char **environ;
Willy Tarreaub7b24782016-06-21 15:32:29 +02001881 int fd;
1882
1883 /* close all FDs. Keep stdin/stdout/stderr in verbose mode */
1884 fd = (global.mode & (MODE_QUIET|MODE_VERBOSE)) == MODE_QUIET ? 0 : 3;
1885
1886 while (fd < global.rlimit_nofile)
1887 close(fd++);
1888
Simon Horman98637e52014-06-20 12:30:16 +09001889 environ = check->envp;
Cyril Bontéac92a062014-12-27 22:28:38 +01001890 extchk_setenv(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)));
Simon Horman98637e52014-06-20 12:30:16 +09001891 execvp(px->check_command, check->argv);
1892 Alert("Failed to exec process for external health check: %s. Aborting.\n",
1893 strerror(errno));
1894 exit(-1);
1895 }
1896
1897 /* Parent */
1898 if (check->result == CHK_RES_UNKNOWN) {
1899 if (pid_list_add(pid, t) != NULL) {
1900 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1901
1902 if (px->timeout.check && px->timeout.connect) {
1903 int t_con = tick_add(now_ms, px->timeout.connect);
1904 t->expire = tick_first(t->expire, t_con);
1905 }
Willy Tarreaue7dff022015-04-03 01:14:29 +02001906 status = SF_ERR_NONE;
Simon Horman98637e52014-06-20 12:30:16 +09001907 goto out;
1908 }
1909 else {
1910 set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
1911 }
1912 kill(pid, SIGTERM); /* process creation error */
1913 }
1914 else
1915 set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
1916
1917out:
1918 unblock_sigchld();
1919 return status;
1920}
1921
1922/*
Willy Tarreau1e62e2a2017-10-04 15:07:02 +02001923 * manages a server health-check that uses an external process. Returns
Willy Tarreaubaaee002006-06-26 02:48:02 +02001924 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
1925 */
Simon Horman98637e52014-06-20 12:30:16 +09001926static struct task *process_chk_proc(struct task *t)
1927{
1928 struct check *check = t->context;
1929 struct server *s = check->server;
Simon Horman98637e52014-06-20 12:30:16 +09001930 int rv;
1931 int ret;
1932 int expired = tick_is_expired(t->expire, now_ms);
1933
1934 if (!(check->state & CHK_ST_INPROGRESS)) {
1935 /* no check currently running */
1936 if (!expired) /* woke up too early */
1937 return t;
1938
1939 /* we don't send any health-checks when the proxy is
1940 * stopped, the server should not be checked or the check
1941 * is disabled.
1942 */
1943 if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
1944 s->proxy->state == PR_STSTOPPED)
1945 goto reschedule;
1946
1947 /* we'll initiate a new check */
1948 set_server_check_status(check, HCHK_STATUS_START, NULL);
1949
1950 check->state |= CHK_ST_INPROGRESS;
1951
Simon Hormandbf70192015-01-30 11:22:53 +09001952 ret = connect_proc_chk(t);
Willy Tarreaud7c3fbd2017-10-04 15:19:26 +02001953 if (ret == SF_ERR_NONE) {
Willy Tarreau1e62e2a2017-10-04 15:07:02 +02001954 /* the process was forked, we allow up to min(inter,
1955 * timeout.connect) for it to report its status, but
1956 * only when timeout.check is set as it may be to short
1957 * for a full check otherwise.
Simon Horman98637e52014-06-20 12:30:16 +09001958 */
1959 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
1960
1961 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
1962 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
1963 t->expire = tick_first(t->expire, t_con);
1964 }
1965
1966 goto reschedule;
Simon Horman98637e52014-06-20 12:30:16 +09001967 }
1968
Willy Tarreau1e62e2a2017-10-04 15:07:02 +02001969 /* here, we failed to start the check */
Simon Horman98637e52014-06-20 12:30:16 +09001970
1971 check->state &= ~CHK_ST_INPROGRESS;
1972 check_notify_failure(check);
1973
1974 /* we allow up to min(inter, timeout.connect) for a connection
1975 * to establish but only when timeout.check is set
1976 * as it may be to short for a full check otherwise
1977 */
1978 while (tick_is_expired(t->expire, now_ms)) {
1979 int t_con;
1980
1981 t_con = tick_add(t->expire, s->proxy->timeout.connect);
1982 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1983
1984 if (s->proxy->timeout.check)
1985 t->expire = tick_first(t->expire, t_con);
1986 }
1987 }
1988 else {
1989 /* there was a test running.
1990 * First, let's check whether there was an uncaught error,
1991 * which can happen on connect timeout or error.
1992 */
1993 if (check->result == CHK_RES_UNKNOWN) {
1994 /* good connection is enough for pure TCP check */
1995 struct pid_list *elem = check->curpid;
1996 int status = HCHK_STATUS_UNKNOWN;
1997
1998 if (elem->exited) {
1999 status = elem->status; /* Save in case the process exits between use below */
2000 if (!WIFEXITED(status))
2001 check->code = -1;
2002 else
2003 check->code = WEXITSTATUS(status);
2004 if (!WIFEXITED(status) || WEXITSTATUS(status))
2005 status = HCHK_STATUS_PROCERR;
2006 else
2007 status = HCHK_STATUS_PROCOK;
2008 } else if (expired) {
2009 status = HCHK_STATUS_PROCTOUT;
Willy Tarreaudc3d1902014-07-08 00:56:27 +02002010 Warning("kill %d\n", (int)elem->pid);
Simon Horman98637e52014-06-20 12:30:16 +09002011 kill(elem->pid, SIGTERM);
2012 }
2013 set_server_check_status(check, status, NULL);
2014 }
2015
2016 if (check->result == CHK_RES_FAILED) {
2017 /* a failure or timeout detected */
2018 check_notify_failure(check);
2019 }
2020 else if (check->result == CHK_RES_CONDPASS) {
2021 /* check is OK but asks for stopping mode */
2022 check_notify_stopping(check);
2023 }
2024 else if (check->result == CHK_RES_PASSED) {
2025 /* a success was detected */
2026 check_notify_success(check);
2027 }
2028 check->state &= ~CHK_ST_INPROGRESS;
2029
2030 pid_list_del(check->curpid);
2031
2032 rv = 0;
2033 if (global.spread_checks > 0) {
2034 rv = srv_getinter(check) * global.spread_checks / 100;
2035 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
2036 }
2037 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
2038 }
2039
2040 reschedule:
2041 while (tick_is_expired(t->expire, now_ms))
2042 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
2043 return t;
2044}
2045
2046/*
2047 * manages a server health-check that uses a connection. Returns
2048 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
2049 */
2050static struct task *process_chk_conn(struct task *t)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002051{
Simon Horman4a741432013-02-23 15:35:38 +09002052 struct check *check = t->context;
2053 struct server *s = check->server;
2054 struct connection *conn = check->conn;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002055 int rv;
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002056 int ret;
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01002057 int expired = tick_is_expired(t->expire, now_ms);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002058
Willy Tarreau2c115e52013-12-11 19:41:16 +01002059 if (!(check->state & CHK_ST_INPROGRESS)) {
Willy Tarreau5a78f362012-11-23 12:47:05 +01002060 /* no check currently running */
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01002061 if (!expired) /* woke up too early */
Willy Tarreau26c25062009-03-08 09:38:41 +01002062 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002063
Simon Horman671b6f02013-11-25 10:46:39 +09002064 /* we don't send any health-checks when the proxy is
2065 * stopped, the server should not be checked or the check
2066 * is disabled.
Willy Tarreaubaaee002006-06-26 02:48:02 +02002067 */
Willy Tarreau0d924cc2013-12-11 21:26:24 +01002068 if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
Willy Tarreau33a08db2013-12-11 21:03:31 +01002069 s->proxy->state == PR_STSTOPPED)
Willy Tarreau5a78f362012-11-23 12:47:05 +01002070 goto reschedule;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002071
2072 /* we'll initiate a new check */
Simon Horman4a741432013-02-23 15:35:38 +09002073 set_server_check_status(check, HCHK_STATUS_START, NULL);
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002074
Willy Tarreau2c115e52013-12-11 19:41:16 +01002075 check->state |= CHK_ST_INPROGRESS;
Simon Horman4a741432013-02-23 15:35:38 +09002076 check->bi->p = check->bi->data;
2077 check->bi->i = 0;
2078 check->bo->p = check->bo->data;
2079 check->bo->o = 0;
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002080
Simon Hormandbf70192015-01-30 11:22:53 +09002081 ret = connect_conn_chk(t);
Willy Tarreau00149122017-10-04 18:05:01 +02002082 conn = check->conn;
2083
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002084 switch (ret) {
Willy Tarreaue7dff022015-04-03 01:14:29 +02002085 case SF_ERR_UP:
Simon Hormanb00d17a2014-06-13 16:18:16 +09002086 return t;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002087 case SF_ERR_NONE:
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002088 /* we allow up to min(inter, timeout.connect) for a connection
2089 * to establish but only when timeout.check is set
2090 * as it may be to short for a full check otherwise
2091 */
Simon Horman4a741432013-02-23 15:35:38 +09002092 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02002093
Willy Tarreaufb56aab2012-09-28 14:40:02 +02002094 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
2095 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
2096 t->expire = tick_first(t->expire, t_con);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002097 }
Willy Tarreau06559ac2013-12-05 01:53:08 +01002098
2099 if (check->type)
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002100 conn_xprt_want_recv(conn); /* prepare for reading a possible reply */
Willy Tarreau06559ac2013-12-05 01:53:08 +01002101
Willy Tarreau5a78f362012-11-23 12:47:05 +01002102 goto reschedule;
2103
Willy Tarreaue7dff022015-04-03 01:14:29 +02002104 case SF_ERR_SRVTO: /* ETIMEDOUT */
2105 case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
Willy Tarreau00149122017-10-04 18:05:01 +02002106 if (conn)
2107 conn->flags |= CO_FL_ERROR;
Willy Tarreaub5259bf2017-10-04 14:47:29 +02002108 chk_report_conn_err(check, errno, 0);
Willy Tarreau5a78f362012-11-23 12:47:05 +01002109 break;
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02002110 /* should share same code than cases below */
2111 case SF_ERR_CHK_PORT:
2112 check->state |= CHK_ST_PORT_MISS;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002113 case SF_ERR_PRXCOND:
2114 case SF_ERR_RESOURCE:
2115 case SF_ERR_INTERNAL:
Willy Tarreau00149122017-10-04 18:05:01 +02002116 if (conn)
2117 conn->flags |= CO_FL_ERROR;
2118 chk_report_conn_err(check, conn ? 0 : ENOMEM, 0);
Willy Tarreau5a78f362012-11-23 12:47:05 +01002119 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002120 }
2121
Willy Tarreau5a78f362012-11-23 12:47:05 +01002122 /* here, we have seen a synchronous error, no fd was allocated */
Olivier Houchard390485a2017-10-24 19:03:30 +02002123 if (conn) {
2124 conn_free(conn);
2125 check->conn = conn = NULL;
2126 }
Willy Tarreau6b0a8502012-11-23 08:51:32 +01002127
Willy Tarreau2c115e52013-12-11 19:41:16 +01002128 check->state &= ~CHK_ST_INPROGRESS;
Willy Tarreau4eec5472014-05-20 22:32:27 +02002129 check_notify_failure(check);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002130
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002131 /* we allow up to min(inter, timeout.connect) for a connection
2132 * to establish but only when timeout.check is set
2133 * as it may be to short for a full check otherwise
2134 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +02002135 while (tick_is_expired(t->expire, now_ms)) {
2136 int t_con;
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002137
Willy Tarreau0c303ee2008-07-07 00:09:58 +02002138 t_con = tick_add(t->expire, s->proxy->timeout.connect);
Simon Horman4a741432013-02-23 15:35:38 +09002139 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002140
Willy Tarreau0c303ee2008-07-07 00:09:58 +02002141 if (s->proxy->timeout.check)
2142 t->expire = tick_first(t->expire, t_con);
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +01002143 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02002144 }
2145 else {
Willy Tarreauf1503172012-09-28 19:39:36 +02002146 /* there was a test running.
2147 * First, let's check whether there was an uncaught error,
2148 * which can happen on connect timeout or error.
2149 */
Simon Hormanccaabcd2014-06-20 12:29:47 +09002150 if (check->result == CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +01002151 /* good connection is enough for pure TCP check */
2152 if ((conn->flags & CO_FL_CONNECTED) && !check->type) {
Simon Horman4a741432013-02-23 15:35:38 +09002153 if (check->use_ssl)
2154 set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
Willy Tarreauf1503172012-09-28 19:39:36 +02002155 else
Simon Horman4a741432013-02-23 15:35:38 +09002156 set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
Willy Tarreauacbdc7a2012-11-23 14:02:10 +01002157 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +01002158 else if ((conn->flags & CO_FL_ERROR) || expired) {
Willy Tarreaub5259bf2017-10-04 14:47:29 +02002159 chk_report_conn_err(check, 0, expired);
Willy Tarreauf1503172012-09-28 19:39:36 +02002160 }
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002161 else
2162 goto out_wait; /* timeout not reached, wait again */
Willy Tarreauf1503172012-09-28 19:39:36 +02002163 }
2164
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002165 /* check complete or aborted */
Willy Tarreau00149122017-10-04 18:05:01 +02002166 if (conn && conn->xprt) {
Willy Tarreau5ba04f62013-02-12 15:23:12 +01002167 /* The check was aborted and the connection was not yet closed.
2168 * This can happen upon timeout, or when an external event such
2169 * as a failed response coupled with "observe layer7" caused the
2170 * server state to be suddenly changed.
2171 */
Willy Tarreaud85c4852015-03-13 00:40:28 +01002172 conn_sock_drain(conn);
Willy Tarreau402dbc12017-10-05 17:53:13 +02002173 conn_full_close(conn);
Willy Tarreau5ba04f62013-02-12 15:23:12 +01002174 }
2175
Willy Tarreau00149122017-10-04 18:05:01 +02002176 if (conn) {
2177 conn_free(conn);
2178 check->conn = conn = NULL;
2179 }
2180
Willy Tarreauaf549582014-05-16 17:37:50 +02002181 if (check->result == CHK_RES_FAILED) {
2182 /* a failure or timeout detected */
Willy Tarreau4eec5472014-05-20 22:32:27 +02002183 check_notify_failure(check);
Willy Tarreauaf549582014-05-16 17:37:50 +02002184 }
Willy Tarreaudb58b792014-05-21 13:57:23 +02002185 else if (check->result == CHK_RES_CONDPASS) {
2186 /* check is OK but asks for stopping mode */
2187 check_notify_stopping(check);
Willy Tarreauaf549582014-05-16 17:37:50 +02002188 }
Willy Tarreau3e048382014-05-21 10:30:54 +02002189 else if (check->result == CHK_RES_PASSED) {
2190 /* a success was detected */
2191 check_notify_success(check);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002192 }
Willy Tarreau2c115e52013-12-11 19:41:16 +01002193 check->state &= ~CHK_ST_INPROGRESS;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002194
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002195 rv = 0;
2196 if (global.spread_checks > 0) {
Simon Horman4a741432013-02-23 15:35:38 +09002197 rv = srv_getinter(check) * global.spread_checks / 100;
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002198 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Willy Tarreaubaaee002006-06-26 02:48:02 +02002199 }
Simon Horman4a741432013-02-23 15:35:38 +09002200 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +02002201 }
Willy Tarreau5a78f362012-11-23 12:47:05 +01002202
2203 reschedule:
2204 while (tick_is_expired(t->expire, now_ms))
Simon Horman4a741432013-02-23 15:35:38 +09002205 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
Willy Tarreau74fa7fb2012-11-23 14:43:49 +01002206 out_wait:
Willy Tarreau26c25062009-03-08 09:38:41 +01002207 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002208}
2209
Simon Horman98637e52014-06-20 12:30:16 +09002210/*
2211 * manages a server health-check. Returns
2212 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
2213 */
2214static struct task *process_chk(struct task *t)
2215{
2216 struct check *check = t->context;
2217
2218 if (check->type == PR_O2_EXT_CHK)
2219 return process_chk_proc(t);
2220 return process_chk_conn(t);
Baptiste Assmanna68ca962015-04-14 01:15:08 +02002221
Simon Horman98637e52014-06-20 12:30:16 +09002222}
2223
Simon Horman5c942422013-11-25 10:46:32 +09002224static int start_check_task(struct check *check, int mininter,
2225 int nbcheck, int srvpos)
2226{
2227 struct task *t;
2228 /* task for the check */
Emeric Brunc60def82017-09-27 14:59:38 +02002229 if ((t = task_new(MAX_THREADS_MASK)) == NULL) {
Simon Horman5c942422013-11-25 10:46:32 +09002230 Alert("Starting [%s:%s] check: out of memory.\n",
2231 check->server->proxy->id, check->server->id);
2232 return 0;
2233 }
2234
2235 check->task = t;
2236 t->process = process_chk;
2237 t->context = check;
2238
Willy Tarreau1746eec2014-04-25 10:46:47 +02002239 if (mininter < srv_getinter(check))
2240 mininter = srv_getinter(check);
2241
2242 if (global.max_spread_checks && mininter > global.max_spread_checks)
2243 mininter = global.max_spread_checks;
2244
Simon Horman5c942422013-11-25 10:46:32 +09002245 /* check this every ms */
Willy Tarreau1746eec2014-04-25 10:46:47 +02002246 t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
Simon Horman5c942422013-11-25 10:46:32 +09002247 check->start = now;
2248 task_queue(t);
2249
2250 return 1;
2251}
2252
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002253/*
2254 * Start health-check.
Willy Tarreau865c5142016-12-21 20:04:48 +01002255 * Returns 0 if OK, ERR_FATAL on error, and prints the error in this case.
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002256 */
Willy Tarreau865c5142016-12-21 20:04:48 +01002257static int start_checks()
2258{
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002259
2260 struct proxy *px;
2261 struct server *s;
2262 struct task *t;
Simon Horman4a741432013-02-23 15:35:38 +09002263 int nbcheck=0, mininter=0, srvpos=0;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002264
Willy Tarreau2c43a1e2007-10-14 23:05:39 +02002265 /* 1- count the checkers to run simultaneously.
2266 * We also determine the minimum interval among all of those which
2267 * have an interval larger than SRV_CHK_INTER_THRES. This interval
2268 * will be used to spread their start-up date. Those which have
Jamie Gloudon801a0a32012-08-25 00:18:33 -04002269 * a shorter interval will start independently and will not dictate
Willy Tarreau2c43a1e2007-10-14 23:05:39 +02002270 * too short an interval for all others.
2271 */
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002272 for (px = proxy; px; px = px->next) {
2273 for (s = px->srv; s; s = s->next) {
Willy Tarreaue7b73482013-11-21 11:50:50 +01002274 if (s->slowstart) {
Emeric Brunc60def82017-09-27 14:59:38 +02002275 if ((t = task_new(MAX_THREADS_MASK)) == NULL) {
Willy Tarreaue7b73482013-11-21 11:50:50 +01002276 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
Willy Tarreau865c5142016-12-21 20:04:48 +01002277 return ERR_ALERT | ERR_FATAL;
Willy Tarreaue7b73482013-11-21 11:50:50 +01002278 }
2279 /* We need a warmup task that will be called when the server
2280 * state switches from down to up.
2281 */
2282 s->warmup = t;
2283 t->process = server_warmup;
2284 t->context = s;
Baptiste Assmann6076d1c2015-09-17 22:53:59 +02002285 /* server can be in this state only because of */
Emeric Brun52a91d32017-08-31 14:41:55 +02002286 if (s->next_state == SRV_ST_STARTING)
Baptiste Assmann6076d1c2015-09-17 22:53:59 +02002287 task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, (now.tv_sec - s->last_change)) / 20)));
Willy Tarreaue7b73482013-11-21 11:50:50 +01002288 }
2289
Willy Tarreaud8514a22013-12-11 21:10:14 +01002290 if (s->check.state & CHK_ST_CONFIGURED) {
2291 nbcheck++;
2292 if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
2293 (!mininter || mininter > srv_getinter(&s->check)))
2294 mininter = srv_getinter(&s->check);
2295 }
Willy Tarreau15f39102013-12-11 20:41:18 +01002296
Willy Tarreaud8514a22013-12-11 21:10:14 +01002297 if (s->agent.state & CHK_ST_CONFIGURED) {
2298 nbcheck++;
2299 if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
2300 (!mininter || mininter > srv_getinter(&s->agent)))
2301 mininter = srv_getinter(&s->agent);
2302 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002303 }
2304 }
2305
Simon Horman4a741432013-02-23 15:35:38 +09002306 if (!nbcheck)
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002307 return 0;
2308
2309 srand((unsigned)time(NULL));
2310
2311 /*
2312 * 2- start them as far as possible from each others. For this, we will
2313 * start them after their interval set to the min interval divided by
2314 * the number of servers, weighted by the server's position in the list.
2315 */
2316 for (px = proxy; px; px = px->next) {
Simon Horman98637e52014-06-20 12:30:16 +09002317 if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
2318 if (init_pid_list()) {
2319 Alert("Starting [%s] check: out of memory.\n", px->id);
Willy Tarreau865c5142016-12-21 20:04:48 +01002320 return ERR_ALERT | ERR_FATAL;
Simon Horman98637e52014-06-20 12:30:16 +09002321 }
2322 }
2323
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002324 for (s = px->srv; s; s = s->next) {
Simon Hormand60d6912013-11-25 10:46:36 +09002325 /* A task for the main check */
Willy Tarreauff5ae352013-12-11 20:36:34 +01002326 if (s->check.state & CHK_ST_CONFIGURED) {
Cyril Bonté99c5bf52014-08-07 01:55:38 +02002327 if (s->check.type == PR_O2_EXT_CHK) {
2328 if (!prepare_external_check(&s->check))
Willy Tarreau865c5142016-12-21 20:04:48 +01002329 return ERR_ALERT | ERR_FATAL;
Cyril Bonté99c5bf52014-08-07 01:55:38 +02002330 }
Simon Hormand60d6912013-11-25 10:46:36 +09002331 if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
Willy Tarreau865c5142016-12-21 20:04:48 +01002332 return ERR_ALERT | ERR_FATAL;
Simon Hormand60d6912013-11-25 10:46:36 +09002333 srvpos++;
2334 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002335
Simon Hormand60d6912013-11-25 10:46:36 +09002336 /* A task for a auxiliary agent check */
Willy Tarreauff5ae352013-12-11 20:36:34 +01002337 if (s->agent.state & CHK_ST_CONFIGURED) {
Simon Hormand60d6912013-11-25 10:46:36 +09002338 if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
Willy Tarreau865c5142016-12-21 20:04:48 +01002339 return ERR_ALERT | ERR_FATAL;
Simon Hormand60d6912013-11-25 10:46:36 +09002340 }
2341 srvpos++;
2342 }
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +02002343 }
2344 }
2345 return 0;
2346}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002347
2348/*
Willy Tarreau5b3a2022012-09-28 15:01:02 +02002349 * Perform content verification check on data in s->check.buffer buffer.
Willy Tarreaubd741542010-03-16 18:46:54 +01002350 * The buffer MUST be terminated by a null byte before calling this function.
2351 * Sets server status appropriately. The caller is responsible for ensuring
2352 * that the buffer contains at least 13 characters. If <done> is zero, we may
2353 * return 0 to indicate that data is required to decide of a match.
2354 */
2355static int httpchk_expect(struct server *s, int done)
2356{
2357 static char status_msg[] = "HTTP status check returned code <000>";
2358 char status_code[] = "000";
2359 char *contentptr;
2360 int crlf;
2361 int ret;
2362
2363 switch (s->proxy->options2 & PR_O2_EXP_TYPE) {
2364 case PR_O2_EXP_STS:
2365 case PR_O2_EXP_RSTS:
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002366 memcpy(status_code, s->check.bi->data + 9, 3);
2367 memcpy(status_msg + strlen(status_msg) - 4, s->check.bi->data + 9, 3);
Willy Tarreaubd741542010-03-16 18:46:54 +01002368
2369 if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STS)
2370 ret = strncmp(s->proxy->expect_str, status_code, 3) == 0;
2371 else
Thierry FOURNIER09af0d62014-06-18 11:35:54 +02002372 ret = regex_exec(s->proxy->expect_regex, status_code);
Willy Tarreaubd741542010-03-16 18:46:54 +01002373
2374 /* we necessarily have the response, so there are no partial failures */
2375 if (s->proxy->options2 & PR_O2_EXP_INV)
2376 ret = !ret;
2377
Simon Horman4a741432013-02-23 15:35:38 +09002378 set_server_check_status(&s->check, ret ? HCHK_STATUS_L7OKD : HCHK_STATUS_L7STS, status_msg);
Willy Tarreaubd741542010-03-16 18:46:54 +01002379 break;
2380
2381 case PR_O2_EXP_STR:
2382 case PR_O2_EXP_RSTR:
2383 /* very simple response parser: ignore CR and only count consecutive LFs,
2384 * stop with contentptr pointing to first char after the double CRLF or
2385 * to '\0' if crlf < 2.
2386 */
2387 crlf = 0;
Willy Tarreau1ae1b7b2012-09-28 15:28:30 +02002388 for (contentptr = s->check.bi->data; *contentptr; contentptr++) {
Willy Tarreaubd741542010-03-16 18:46:54 +01002389 if (crlf >= 2)
2390 break;
2391 if (*contentptr == '\r')
2392 continue;
2393 else if (*contentptr == '\n')
2394 crlf++;
2395 else
2396 crlf = 0;
2397 }
2398
2399 /* Check that response contains a body... */
2400 if (crlf < 2) {
2401 if (!done)
2402 return 0;
2403
Simon Horman4a741432013-02-23 15:35:38 +09002404 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002405 "HTTP content check could not find a response body");
2406 return 1;
2407 }
2408
2409 /* Check that response body is not empty... */
2410 if (*contentptr == '\0') {
Willy Tarreaua164fb52011-04-13 09:32:41 +02002411 if (!done)
2412 return 0;
2413
Simon Horman4a741432013-02-23 15:35:38 +09002414 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002415 "HTTP content check found empty response body");
2416 return 1;
2417 }
2418
2419 /* Check the response content against the supplied string
2420 * or regex... */
2421 if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STR)
2422 ret = strstr(contentptr, s->proxy->expect_str) != NULL;
2423 else
Thierry FOURNIER09af0d62014-06-18 11:35:54 +02002424 ret = regex_exec(s->proxy->expect_regex, contentptr);
Willy Tarreaubd741542010-03-16 18:46:54 +01002425
2426 /* if we don't match, we may need to wait more */
2427 if (!ret && !done)
2428 return 0;
2429
2430 if (ret) {
2431 /* content matched */
2432 if (s->proxy->options2 & PR_O2_EXP_INV)
Simon Horman4a741432013-02-23 15:35:38 +09002433 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002434 "HTTP check matched unwanted content");
2435 else
Simon Horman4a741432013-02-23 15:35:38 +09002436 set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
Willy Tarreaubd741542010-03-16 18:46:54 +01002437 "HTTP content check matched");
2438 }
2439 else {
2440 if (s->proxy->options2 & PR_O2_EXP_INV)
Simon Horman4a741432013-02-23 15:35:38 +09002441 set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
Willy Tarreaubd741542010-03-16 18:46:54 +01002442 "HTTP check did not match unwanted content");
2443 else
Simon Horman4a741432013-02-23 15:35:38 +09002444 set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
Willy Tarreaubd741542010-03-16 18:46:54 +01002445 "HTTP content check did not match");
2446 }
2447 break;
2448 }
2449 return 1;
2450}
2451
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002452/*
2453 * return the id of a step in a send/expect session
2454 */
Simon Hormane16c1b32015-01-30 11:22:57 +09002455static int tcpcheck_get_step_id(struct check *check)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002456{
2457 struct tcpcheck_rule *cur = NULL, *next = NULL;
2458 int i = 0;
2459
Willy Tarreau213c6782014-10-02 14:51:02 +02002460 /* not even started anything yet => step 0 = initial connect */
Baptiste Assmannf95bc8e2015-04-25 16:03:06 +02002461 if (!check->current_step)
Willy Tarreau213c6782014-10-02 14:51:02 +02002462 return 0;
2463
Simon Hormane16c1b32015-01-30 11:22:57 +09002464 cur = check->last_started_step;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002465
2466 /* no step => first step */
2467 if (cur == NULL)
2468 return 1;
2469
2470 /* increment i until current step */
Simon Hormane16c1b32015-01-30 11:22:57 +09002471 list_for_each_entry(next, check->tcpcheck_rules, list) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002472 if (next->list.p == &cur->list)
2473 break;
2474 ++i;
2475 }
2476
2477 return i;
2478}
2479
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002480/*
2481 * return the latest known comment before (including) the given stepid
2482 * returns NULL if no comment found
2483 */
2484static char * tcpcheck_get_step_comment(struct check *check, int stepid)
2485{
2486 struct tcpcheck_rule *cur = NULL;
2487 char *ret = NULL;
2488 int i = 0;
2489
2490 /* not even started anything yet, return latest comment found before any action */
2491 if (!check->current_step) {
2492 list_for_each_entry(cur, check->tcpcheck_rules, list) {
2493 if (cur->action == TCPCHK_ACT_COMMENT)
2494 ret = cur->comment;
2495 else
2496 goto return_comment;
2497 }
2498 }
2499
2500 i = 1;
2501 list_for_each_entry(cur, check->tcpcheck_rules, list) {
2502 if (cur->comment)
2503 ret = cur->comment;
2504
2505 if (i >= stepid)
2506 goto return_comment;
2507
2508 ++i;
2509 }
2510
2511 return_comment:
2512 return ret;
2513}
2514
Willy Tarreaube74b882017-10-04 16:22:49 +02002515/* proceed with next steps for the TCP checks <check>. Note that this is called
2516 * both from the connection's wake() callback and from the check scheduling task.
Willy Tarreau6bdcab02017-10-04 18:41:00 +02002517 * It returns 0 on normal cases, or <0 if a close() has happened on an existing
2518 * connection, presenting the risk of an fd replacement.
Willy Tarreaube74b882017-10-04 16:22:49 +02002519 */
Willy Tarreau6bdcab02017-10-04 18:41:00 +02002520static int tcpcheck_main(struct check *check)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002521{
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002522 char *contentptr, *comment;
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002523 struct tcpcheck_rule *next;
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002524 int done = 0, ret = 0, step = 0;
Willy Tarreaube74b882017-10-04 16:22:49 +02002525 struct connection *conn = check->conn;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002526 struct server *s = check->server;
2527 struct task *t = check->task;
Simon Hormane16c1b32015-01-30 11:22:57 +09002528 struct list *head = check->tcpcheck_rules;
Willy Tarreau6bdcab02017-10-04 18:41:00 +02002529 int retcode = 0;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002530
Willy Tarreauef953952014-10-02 14:30:14 +02002531 /* here, we know that the check is complete or that it failed */
2532 if (check->result != CHK_RES_UNKNOWN)
2533 goto out_end_tcpcheck;
2534
2535 /* We have 4 possibilities here :
2536 * 1. we've not yet attempted step 1, and step 1 is a connect, so no
Willy Tarreau00149122017-10-04 18:05:01 +02002537 * connection attempt was made yet ; conn==NULL;current_step==NULL.
Willy Tarreauef953952014-10-02 14:30:14 +02002538 * 2. we've not yet attempted step 1, and step 1 is a not connect or
2539 * does not exist (no rule), so a connection attempt was made
Willy Tarreau00149122017-10-04 18:05:01 +02002540 * before coming here, conn!=NULL.
Willy Tarreauef953952014-10-02 14:30:14 +02002541 * 3. we're coming back after having started with step 1, so we may
Willy Tarreau00149122017-10-04 18:05:01 +02002542 * be waiting for a connection attempt to complete. conn!=NULL.
2543 * 4. the connection + handshake are complete. conn!=NULL.
Willy Tarreauef953952014-10-02 14:30:14 +02002544 *
2545 * #2 and #3 are quite similar, we want both the connection and the
2546 * handshake to complete before going any further. Thus we must always
2547 * wait for a connection to complete unless we're before and existing
2548 * step 1.
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002549 */
Willy Tarreau449f9522015-05-13 15:39:48 +02002550
2551 /* find first rule and skip comments */
2552 next = LIST_NEXT(head, struct tcpcheck_rule *, list);
2553 while (&next->list != head && next->action == TCPCHK_ACT_COMMENT)
2554 next = LIST_NEXT(&next->list, struct tcpcheck_rule *, list);
2555
Willy Tarreau00149122017-10-04 18:05:01 +02002556 if ((check->current_step || &next->list == head) &&
2557 (!(conn->flags & CO_FL_CONNECTED) || (conn->flags & CO_FL_HANDSHAKE))) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002558 /* we allow up to min(inter, timeout.connect) for a connection
2559 * to establish but only when timeout.check is set
2560 * as it may be to short for a full check otherwise
2561 */
2562 while (tick_is_expired(t->expire, now_ms)) {
2563 int t_con;
2564
2565 t_con = tick_add(t->expire, s->proxy->timeout.connect);
2566 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
2567
2568 if (s->proxy->timeout.check)
2569 t->expire = tick_first(t->expire, t_con);
2570 }
Willy Tarreau6bdcab02017-10-04 18:41:00 +02002571 return retcode;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002572 }
2573
Willy Tarreauef953952014-10-02 14:30:14 +02002574 /* special case: option tcp-check with no rule, a connect is enough */
Willy Tarreau449f9522015-05-13 15:39:48 +02002575 if (&next->list == head) {
Willy Tarreauef953952014-10-02 14:30:14 +02002576 set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002577 goto out_end_tcpcheck;
Willy Tarreauef953952014-10-02 14:30:14 +02002578 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002579
Willy Tarreau213c6782014-10-02 14:51:02 +02002580 /* no step means first step initialisation */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002581 if (check->current_step == NULL) {
Willy Tarreau213c6782014-10-02 14:51:02 +02002582 check->last_started_step = NULL;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002583 check->bo->p = check->bo->data;
2584 check->bo->o = 0;
2585 check->bi->p = check->bi->data;
2586 check->bi->i = 0;
Willy Tarreau449f9522015-05-13 15:39:48 +02002587 check->current_step = next;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002588 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
2589 if (s->proxy->timeout.check)
2590 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
2591 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002592
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002593 /* It's only the rules which will enable send/recv */
Willy Tarreau00149122017-10-04 18:05:01 +02002594 if (conn)
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002595 __conn_xprt_stop_both(conn);
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002596
Willy Tarreauabca5b62013-12-06 14:19:25 +01002597 while (1) {
Willy Tarreau263013d2015-05-13 11:59:14 +02002598 /* We have to try to flush the output buffer before reading, at
2599 * the end, or if we're about to send a string that does not fit
2600 * in the remaining space. That explains why we break out of the
Willy Tarreau00149122017-10-04 18:05:01 +02002601 * loop after this control. If we have data, conn is valid.
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002602 */
2603 if (check->bo->o &&
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002604 (&check->current_step->list == head ||
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002605 check->current_step->action != TCPCHK_ACT_SEND ||
2606 check->current_step->string_len >= buffer_total_space(check->bo))) {
2607
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002608 __conn_xprt_want_send(conn);
Willy Tarreau1049b1f2014-02-02 01:51:17 +01002609 if (conn->xprt->snd_buf(conn, check->bo, 0) <= 0) {
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002610 if (conn->flags & CO_FL_ERROR) {
Willy Tarreaub5259bf2017-10-04 14:47:29 +02002611 chk_report_conn_err(check, errno, 0);
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002612 __conn_xprt_stop_both(conn);
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002613 goto out_end_tcpcheck;
2614 }
Willy Tarreau263013d2015-05-13 11:59:14 +02002615 break;
Willy Tarreauabca5b62013-12-06 14:19:25 +01002616 }
Willy Tarreauabca5b62013-12-06 14:19:25 +01002617 }
2618
Willy Tarreau263013d2015-05-13 11:59:14 +02002619 if (&check->current_step->list == head)
Willy Tarreauabca5b62013-12-06 14:19:25 +01002620 break;
2621
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002622 /* have 'next' point to the next rule or NULL if we're on the
2623 * last one, connect() needs this.
2624 */
Willy Tarreau5581c272015-05-13 12:24:53 +02002625 next = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002626
2627 /* bypass all comment rules */
Willy Tarreauf2c87352015-05-13 12:08:21 +02002628 while (&next->list != head && next->action == TCPCHK_ACT_COMMENT)
Willy Tarreau5581c272015-05-13 12:24:53 +02002629 next = LIST_NEXT(&next->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002630
2631 /* NULL if we're on the last rule */
Willy Tarreauf3d34822014-12-08 12:11:28 +01002632 if (&next->list == head)
2633 next = NULL;
2634
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002635 if (check->current_step->action == TCPCHK_ACT_CONNECT) {
2636 struct protocol *proto;
2637 struct xprt_ops *xprt;
2638
Willy Tarreau00149122017-10-04 18:05:01 +02002639 /* For a connect action we'll create a new connection.
2640 * We may also have to kill a previous one. But we don't
2641 * want to leave *without* a connection if we came here
2642 * from the connection layer, hence with a connection.
2643 * Thus we'll proceed in the following order :
2644 * 1: close but not release previous connection
2645 * 2: try to get a new connection
2646 * 3: release and replace the old one on success
2647 */
2648 if (check->conn) {
Willy Tarreau402dbc12017-10-05 17:53:13 +02002649 conn_full_close(check->conn);
Willy Tarreau00149122017-10-04 18:05:01 +02002650 retcode = -1; /* do not reuse the fd! */
2651 }
2652
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002653 /* mark the step as started */
2654 check->last_started_step = check->current_step;
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002655
2656 /* prepare new connection */
Willy Tarreau00149122017-10-04 18:05:01 +02002657 conn = conn_new();
2658 if (!conn) {
2659 step = tcpcheck_get_step_id(check);
2660 chunk_printf(&trash, "TCPCHK error allocating connection at step %d", step);
2661 comment = tcpcheck_get_step_comment(check, step);
2662 if (comment)
2663 chunk_appendf(&trash, " comment: '%s'", comment);
2664 set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.str);
2665 check->current_step = NULL;
2666 return retcode;
2667 }
2668
2669 if (check->conn)
2670 conn_free(check->conn);
2671 check->conn = conn;
2672
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002673 conn_attach(conn, check, &check_conn_cb);
2674 conn->target = &s->obj_type;
2675
2676 /* no client address */
2677 clear_addr(&conn->addr.from);
2678
Simon Horman41f58762015-01-30 11:22:56 +09002679 if (is_addr(&check->addr)) {
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002680 /* we'll connect to the check addr specified on the server */
Simon Horman41f58762015-01-30 11:22:56 +09002681 conn->addr.to = check->addr;
Willy Tarreau640556c2014-05-09 23:38:15 +02002682 }
2683 else {
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002684 /* we'll connect to the addr on the server */
2685 conn->addr.to = s->addr;
Willy Tarreau640556c2014-05-09 23:38:15 +02002686 }
Thierry FOURNIERbb2ae642015-01-14 11:31:49 +01002687 proto = protocol_by_family(conn->addr.to.ss_family);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002688
2689 /* port */
2690 if (check->current_step->port)
2691 set_host_port(&conn->addr.to, check->current_step->port);
2692 else if (check->port)
2693 set_host_port(&conn->addr.to, check->port);
2694
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002695 if (check->current_step->conn_opts & TCPCHK_OPT_SSL) {
Willy Tarreaua261e9b2016-12-22 20:44:00 +01002696 xprt = xprt_get(XPRT_SSL);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002697 }
2698 else {
Willy Tarreaua261e9b2016-12-22 20:44:00 +01002699 xprt = xprt_get(XPRT_RAW);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002700 }
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002701 conn_prepare(conn, proto, xprt);
2702
Willy Tarreaue7dff022015-04-03 01:14:29 +02002703 ret = SF_ERR_INTERNAL;
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002704 if (proto->connect)
Willy Tarreauf3d34822014-12-08 12:11:28 +01002705 ret = proto->connect(conn,
2706 1 /* I/O polling is always needed */,
2707 (next && next->action == TCPCHK_ACT_EXPECT) ? 0 : 2);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002708 if (check->current_step->conn_opts & TCPCHK_OPT_SEND_PROXY) {
2709 conn->send_proxy_ofs = 1;
2710 conn->flags |= CO_FL_SEND_PROXY;
2711 }
2712
2713 /* It can return one of :
Willy Tarreaue7dff022015-04-03 01:14:29 +02002714 * - SF_ERR_NONE if everything's OK
2715 * - SF_ERR_SRVTO if there are no more servers
2716 * - SF_ERR_SRVCL if the connection was refused by the server
2717 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
2718 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
2719 * - SF_ERR_INTERNAL for any other purely internal errors
Tim Düsterhus4896c442016-11-29 02:15:19 +01002720 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002721 * Note that we try to prevent the network stack from sending the ACK during the
2722 * connect() when a pure TCP check is used (without PROXY protocol).
2723 */
2724 switch (ret) {
Willy Tarreaue7dff022015-04-03 01:14:29 +02002725 case SF_ERR_NONE:
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002726 /* we allow up to min(inter, timeout.connect) for a connection
2727 * to establish but only when timeout.check is set
2728 * as it may be to short for a full check otherwise
2729 */
2730 t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
2731
2732 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
2733 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
2734 t->expire = tick_first(t->expire, t_con);
2735 }
2736 break;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002737 case SF_ERR_SRVTO: /* ETIMEDOUT */
2738 case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002739 step = tcpcheck_get_step_id(check);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002740 chunk_printf(&trash, "TCPCHK error establishing connection at step %d: %s",
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002741 step, strerror(errno));
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002742 comment = tcpcheck_get_step_comment(check, step);
2743 if (comment)
2744 chunk_appendf(&trash, " comment: '%s'", comment);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002745 set_server_check_status(check, HCHK_STATUS_L4CON, trash.str);
2746 goto out_end_tcpcheck;
Willy Tarreaue7dff022015-04-03 01:14:29 +02002747 case SF_ERR_PRXCOND:
2748 case SF_ERR_RESOURCE:
2749 case SF_ERR_INTERNAL:
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002750 step = tcpcheck_get_step_id(check);
2751 chunk_printf(&trash, "TCPCHK error establishing connection at step %d", step);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002752 comment = tcpcheck_get_step_comment(check, step);
2753 if (comment)
2754 chunk_appendf(&trash, " comment: '%s'", comment);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002755 set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.str);
2756 goto out_end_tcpcheck;
2757 }
2758
2759 /* allow next rule */
Willy Tarreau5581c272015-05-13 12:24:53 +02002760 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002761
2762 /* bypass all comment rules */
Willy Tarreauf2c87352015-05-13 12:08:21 +02002763 while (&check->current_step->list != head &&
2764 check->current_step->action == TCPCHK_ACT_COMMENT)
Willy Tarreau5581c272015-05-13 12:24:53 +02002765 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002766
Willy Tarreauf2c87352015-05-13 12:08:21 +02002767 if (&check->current_step->list == head)
2768 break;
2769
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002770 /* don't do anything until the connection is established */
2771 if (!(conn->flags & CO_FL_CONNECTED)) {
2772 /* update expire time, should be done by process_chk */
2773 /* we allow up to min(inter, timeout.connect) for a connection
2774 * to establish but only when timeout.check is set
2775 * as it may be to short for a full check otherwise
2776 */
2777 while (tick_is_expired(t->expire, now_ms)) {
2778 int t_con;
2779
2780 t_con = tick_add(t->expire, s->proxy->timeout.connect);
2781 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
2782
2783 if (s->proxy->timeout.check)
2784 t->expire = tick_first(t->expire, t_con);
2785 }
Willy Tarreau6bdcab02017-10-04 18:41:00 +02002786 return retcode;
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002787 }
2788
2789 } /* end 'connect' */
2790 else if (check->current_step->action == TCPCHK_ACT_SEND) {
2791 /* mark the step as started */
2792 check->last_started_step = check->current_step;
2793
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002794 /* reset the read buffer */
2795 if (*check->bi->data != '\0') {
2796 *check->bi->data = '\0';
2797 check->bi->i = 0;
2798 }
2799
Willy Tarreaubbae3f02017-08-30 09:59:52 +02002800 if (conn->flags & CO_FL_SOCK_WR_SH) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002801 conn->flags |= CO_FL_ERROR;
Willy Tarreaub5259bf2017-10-04 14:47:29 +02002802 chk_report_conn_err(check, 0, 0);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002803 goto out_end_tcpcheck;
2804 }
2805
Willy Tarreauabca5b62013-12-06 14:19:25 +01002806 if (check->current_step->string_len >= check->bo->size) {
2807 chunk_printf(&trash, "tcp-check send : string too large (%d) for buffer size (%d) at step %d",
2808 check->current_step->string_len, check->bo->size,
Simon Hormane16c1b32015-01-30 11:22:57 +09002809 tcpcheck_get_step_id(check));
Willy Tarreauabca5b62013-12-06 14:19:25 +01002810 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2811 goto out_end_tcpcheck;
2812 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002813
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002814 /* do not try to send if there is no space */
2815 if (check->current_step->string_len >= buffer_total_space(check->bo))
2816 continue;
2817
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002818 bo_putblk(check->bo, check->current_step->string, check->current_step->string_len);
2819 *check->bo->p = '\0'; /* to make gdb output easier to read */
2820
Willy Tarreauabca5b62013-12-06 14:19:25 +01002821 /* go to next rule and try to send */
Willy Tarreau5581c272015-05-13 12:24:53 +02002822 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002823
2824 /* bypass all comment rules */
Willy Tarreauf2c87352015-05-13 12:08:21 +02002825 while (&check->current_step->list != head &&
2826 check->current_step->action == TCPCHK_ACT_COMMENT)
Willy Tarreau5581c272015-05-13 12:24:53 +02002827 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Willy Tarreauf2c87352015-05-13 12:08:21 +02002828
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002829 } /* end 'send' */
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002830 else if (check->current_step->action == TCPCHK_ACT_EXPECT) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01002831 if (unlikely(check->result == CHK_RES_FAILED))
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002832 goto out_end_tcpcheck;
2833
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002834 __conn_xprt_want_recv(conn);
Willy Tarreau310987a2014-01-22 19:46:33 +01002835 if (conn->xprt->rcv_buf(conn, check->bi, check->bi->size) <= 0) {
Willy Tarreau54e917c2017-08-30 07:35:35 +02002836 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH)) {
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002837 done = 1;
2838 if ((conn->flags & CO_FL_ERROR) && !check->bi->i) {
2839 /* Report network errors only if we got no other data. Otherwise
2840 * we'll let the upper layers decide whether the response is OK
2841 * or not. It is very common that an RST sent by the server is
2842 * reported as an error just after the last data chunk.
2843 */
Willy Tarreaub5259bf2017-10-04 14:47:29 +02002844 chk_report_conn_err(check, errno, 0);
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002845 goto out_end_tcpcheck;
2846 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002847 }
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002848 else
Willy Tarreau263013d2015-05-13 11:59:14 +02002849 break;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002850 }
2851
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002852 /* mark the step as started */
2853 check->last_started_step = check->current_step;
2854
2855
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002856 /* Intermediate or complete response received.
2857 * Terminate string in check->bi->data buffer.
2858 */
2859 if (check->bi->i < check->bi->size) {
2860 check->bi->data[check->bi->i] = '\0';
2861 }
2862 else {
2863 check->bi->data[check->bi->i - 1] = '\0';
2864 done = 1; /* buffer full, don't wait for more data */
2865 }
2866
2867 contentptr = check->bi->data;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002868
2869 /* Check that response body is not empty... */
Willy Tarreauec6b0122014-05-13 17:57:29 +02002870 if (!check->bi->i) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002871 if (!done)
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002872 continue;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002873
2874 /* empty response */
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002875 step = tcpcheck_get_step_id(check);
2876 chunk_printf(&trash, "TCPCHK got an empty response at step %d", step);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002877 comment = tcpcheck_get_step_comment(check, step);
2878 if (comment)
2879 chunk_appendf(&trash, " comment: '%s'", comment);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002880 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2881
2882 goto out_end_tcpcheck;
2883 }
2884
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002885 if (!done && (check->current_step->string != NULL) && (check->bi->i < check->current_step->string_len) )
Willy Tarreaua970c282013-12-06 12:47:19 +01002886 continue; /* try to read more */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002887
Willy Tarreaua970c282013-12-06 12:47:19 +01002888 tcpcheck_expect:
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002889 if (check->current_step->string != NULL)
2890 ret = my_memmem(contentptr, check->bi->i, check->current_step->string, check->current_step->string_len) != NULL;
2891 else if (check->current_step->expect_regex != NULL)
2892 ret = regex_exec(check->current_step->expect_regex, contentptr);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002893
2894 if (!ret && !done)
Willy Tarreaua970c282013-12-06 12:47:19 +01002895 continue; /* try to read more */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002896
2897 /* matched */
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002898 step = tcpcheck_get_step_id(check);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002899 if (ret) {
2900 /* matched but we did not want to => ERROR */
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002901 if (check->current_step->inverse) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002902 /* we were looking for a string */
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002903 if (check->current_step->string != NULL) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002904 chunk_printf(&trash, "TCPCHK matched unwanted content '%s' at step %d",
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002905 check->current_step->string, step);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002906 }
2907 else {
2908 /* we were looking for a regex */
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002909 chunk_printf(&trash, "TCPCHK matched unwanted content (regex) at step %d", step);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002910 }
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002911 comment = tcpcheck_get_step_comment(check, step);
2912 if (comment)
2913 chunk_appendf(&trash, " comment: '%s'", comment);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002914 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2915 goto out_end_tcpcheck;
2916 }
2917 /* matched and was supposed to => OK, next step */
2918 else {
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002919 /* allow next rule */
Willy Tarreau5581c272015-05-13 12:24:53 +02002920 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002921
2922 /* bypass all comment rules */
Willy Tarreauf2c87352015-05-13 12:08:21 +02002923 while (&check->current_step->list != head &&
2924 check->current_step->action == TCPCHK_ACT_COMMENT)
Willy Tarreau5581c272015-05-13 12:24:53 +02002925 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002926
Willy Tarreauf2c87352015-05-13 12:08:21 +02002927 if (&check->current_step->list == head)
2928 break;
2929
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002930 if (check->current_step->action == TCPCHK_ACT_EXPECT)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002931 goto tcpcheck_expect;
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002932 __conn_xprt_stop_recv(conn);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002933 }
2934 }
2935 else {
2936 /* not matched */
2937 /* not matched and was not supposed to => OK, next step */
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002938 if (check->current_step->inverse) {
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002939 /* allow next rule */
Willy Tarreau5581c272015-05-13 12:24:53 +02002940 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002941
2942 /* bypass all comment rules */
Willy Tarreauf2c87352015-05-13 12:08:21 +02002943 while (&check->current_step->list != head &&
2944 check->current_step->action == TCPCHK_ACT_COMMENT)
Willy Tarreau5581c272015-05-13 12:24:53 +02002945 check->current_step = LIST_NEXT(&check->current_step->list, struct tcpcheck_rule *, list);
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002946
Willy Tarreauf2c87352015-05-13 12:08:21 +02002947 if (&check->current_step->list == head)
2948 break;
2949
Willy Tarreau98aec9f2013-12-06 16:16:41 +01002950 if (check->current_step->action == TCPCHK_ACT_EXPECT)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002951 goto tcpcheck_expect;
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002952 __conn_xprt_stop_recv(conn);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002953 }
2954 /* not matched but was supposed to => ERROR */
2955 else {
2956 /* we were looking for a string */
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002957 if (check->current_step->string != NULL) {
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002958 chunk_printf(&trash, "TCPCHK did not match content '%s' at step %d",
Willy Tarreauce8c42a2015-05-13 11:23:01 +02002959 check->current_step->string, step);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002960 }
2961 else {
2962 /* we were looking for a regex */
2963 chunk_printf(&trash, "TCPCHK did not match content (regex) at step %d",
Baptiste Assmanncfbd1b82015-05-02 09:00:23 +02002964 step);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002965 }
Baptiste Assmann22b09d22015-05-01 08:03:04 +02002966 comment = tcpcheck_get_step_comment(check, step);
2967 if (comment)
2968 chunk_appendf(&trash, " comment: '%s'", comment);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002969 set_server_check_status(check, HCHK_STATUS_L7RSP, trash.str);
2970 goto out_end_tcpcheck;
2971 }
2972 }
2973 } /* end expect */
2974 } /* end loop over double chained step list */
2975
Willy Tarreau263013d2015-05-13 11:59:14 +02002976 /* We're waiting for some I/O to complete, we've reached the end of the
2977 * rules, or both. Do what we have to do, otherwise we're done.
2978 */
2979 if (&check->current_step->list == head && !check->bo->o) {
2980 set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
2981 goto out_end_tcpcheck;
2982 }
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002983
Willy Tarreau53c5a042015-05-13 11:38:17 +02002984 /* warning, current_step may now point to the head */
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002985 if (check->bo->o)
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002986 __conn_xprt_want_send(conn);
Willy Tarreaufbe0edf2013-12-06 16:54:31 +01002987
Willy Tarreau53c5a042015-05-13 11:38:17 +02002988 if (&check->current_step->list != head &&
2989 check->current_step->action == TCPCHK_ACT_EXPECT)
Olivier Houchard1a0545f2017-09-13 18:30:23 +02002990 __conn_xprt_want_recv(conn);
Willy Tarreau6bdcab02017-10-04 18:41:00 +02002991 return retcode;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002992
2993 out_end_tcpcheck:
2994 /* collect possible new errors */
2995 if (conn->flags & CO_FL_ERROR)
Willy Tarreaub5259bf2017-10-04 14:47:29 +02002996 chk_report_conn_err(check, 0, 0);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002997
Baptiste Assmann69e273f2013-12-11 00:52:19 +01002998 /* cleanup before leaving */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02002999 check->current_step = NULL;
3000
Willy Tarreau6aaa1b82013-12-11 17:09:34 +01003001 if (check->result == CHK_RES_FAILED)
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02003002 conn->flags |= CO_FL_ERROR;
3003
Olivier Houchard1a0545f2017-09-13 18:30:23 +02003004 __conn_xprt_stop_both(conn);
Willy Tarreau6bdcab02017-10-04 18:41:00 +02003005 return retcode;
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02003006}
3007
Simon Hormanb1900d52015-01-30 11:22:54 +09003008const char *init_check(struct check *check, int type)
3009{
3010 check->type = type;
3011
3012 /* Allocate buffer for requests... */
3013 if ((check->bi = calloc(sizeof(struct buffer) + global.tune.chksize, sizeof(char))) == NULL) {
3014 return "out of memory while allocating check buffer";
3015 }
3016 check->bi->size = global.tune.chksize;
3017
3018 /* Allocate buffer for responses... */
3019 if ((check->bo = calloc(sizeof(struct buffer) + global.tune.chksize, sizeof(char))) == NULL) {
3020 return "out of memory while allocating check buffer";
3021 }
3022 check->bo->size = global.tune.chksize;
3023
Simon Hormanb1900d52015-01-30 11:22:54 +09003024 return NULL;
3025}
3026
Simon Hormanbfb5d332015-01-30 11:22:55 +09003027void free_check(struct check *check)
3028{
3029 free(check->bi);
Frédéric Lécaille6e0843c2017-03-21 16:39:15 +01003030 check->bi = NULL;
Simon Hormanbfb5d332015-01-30 11:22:55 +09003031 free(check->bo);
Frédéric Lécaille6e0843c2017-03-21 16:39:15 +01003032 check->bo = NULL;
Simon Hormanbfb5d332015-01-30 11:22:55 +09003033 free(check->conn);
Frédéric Lécaille6e0843c2017-03-21 16:39:15 +01003034 check->conn = NULL;
Simon Hormanbfb5d332015-01-30 11:22:55 +09003035}
3036
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003037void email_alert_free(struct email_alert *alert)
3038{
3039 struct tcpcheck_rule *rule, *back;
3040
3041 if (!alert)
3042 return;
3043
Christopher Fauletde1a75b2017-10-23 15:38:19 +02003044 list_for_each_entry_safe(rule, back, &alert->tcpcheck_rules, list) {
3045 LIST_DEL(&rule->list);
3046 free(rule->comment);
3047 free(rule->string);
3048 if (rule->expect_regex)
3049 regex_free(rule->expect_regex);
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003050 pool_free2(pool2_tcpcheck_rule, rule);
Christopher Fauletde1a75b2017-10-23 15:38:19 +02003051 }
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003052 pool_free2(pool2_email_alert, alert);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003053}
3054
3055static struct task *process_email_alert(struct task *t)
3056{
Christopher Faulet0108bb32017-10-20 21:34:32 +02003057 struct check *check = t->context;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003058 struct email_alertq *q;
Christopher Faulet0108bb32017-10-20 21:34:32 +02003059 struct email_alert *alert;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003060
3061 q = container_of(check, typeof(*q), check);
3062
Christopher Faulet0108bb32017-10-20 21:34:32 +02003063 while (1) {
3064 if (!(check->state & CHK_ST_ENABLED)) {
3065 if (LIST_ISEMPTY(&q->email_alerts)) {
3066 /* All alerts processed, queue the task */
3067 t->expire = TICK_ETERNITY;
3068 task_queue(t);
3069 return t;
3070 }
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003071
3072 alert = LIST_NEXT(&q->email_alerts, typeof(alert), list);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003073 LIST_DEL(&alert->list);
Christopher Faulet0108bb32017-10-20 21:34:32 +02003074 t->expire = now_ms;
3075 check->server = alert->srv;
3076 check->tcpcheck_rules = &alert->tcpcheck_rules;
3077 check->status = HCHK_STATUS_INI;
3078 check->state |= CHK_ST_ENABLED;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003079 }
3080
Christopher Faulet0108bb32017-10-20 21:34:32 +02003081 process_chk(t);
3082 if (check->state & CHK_ST_INPROGRESS)
3083 break;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003084
3085 alert = container_of(check->tcpcheck_rules, typeof(*alert), tcpcheck_rules);
3086 email_alert_free(alert);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003087 check->tcpcheck_rules = NULL;
Christopher Faulet0108bb32017-10-20 21:34:32 +02003088 check->server = NULL;
3089 check->state &= ~CHK_ST_ENABLED;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003090 }
3091 return t;
3092}
3093
Christopher Faulet0108bb32017-10-20 21:34:32 +02003094/* Initializes mailer alerts for the proxy <p> using <mls> parameters.
3095 *
3096 * The function returns 1 in success case, otherwise, it returns 0 and err is
3097 * filled.
3098 */
3099int init_email_alert(struct mailers *mls, struct proxy *p, char **err)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003100{
Christopher Faulet0108bb32017-10-20 21:34:32 +02003101 struct mailer *mailer;
3102 struct email_alertq *queues;
3103 const char *err_str;
3104 int i = 0;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003105
Christopher Faulet0108bb32017-10-20 21:34:32 +02003106 if ((queues = calloc(mls->count, sizeof(*queues))) == NULL) {
3107 memprintf(err, "out of memory while allocating mailer alerts queues");
3108 goto error;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003109 }
3110
Christopher Faulet0108bb32017-10-20 21:34:32 +02003111 for (mailer = mls->mailer_list; mailer; i++, mailer = mailer->next) {
3112 struct email_alertq *q = &queues[i];
3113 struct check *check = &q->check;
3114 struct task *t;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003115
3116 LIST_INIT(&q->email_alerts);
3117
Christopher Faulet0108bb32017-10-20 21:34:32 +02003118 check->inter = mls->timeout.mail;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003119 check->rise = DEF_AGENT_RISETIME;
3120 check->fall = DEF_AGENT_FALLTIME;
Christopher Faulet0108bb32017-10-20 21:34:32 +02003121 if ((err_str = init_check(check, PR_O2_TCPCHK_CHK))) {
3122 memprintf(err, "%s", err_str);
3123 goto error;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003124 }
3125
3126 check->xprt = mailer->xprt;
Christopher Faulet0108bb32017-10-20 21:34:32 +02003127 check->addr = mailer->addr;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003128 if (!get_host_port(&mailer->addr))
3129 /* Default to submission port */
3130 check->port = 587;
Christopher Faulet0108bb32017-10-20 21:34:32 +02003131 //check->server = s;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003132
Emeric Brunc60def82017-09-27 14:59:38 +02003133 if ((t = task_new(MAX_THREADS_MASK)) == NULL) {
Christopher Faulet0108bb32017-10-20 21:34:32 +02003134 memprintf(err, "out of memory while allocating mailer alerts task");
3135 goto error;
3136 }
3137
3138 check->task = t;
3139 t->process = process_email_alert;
3140 t->context = check;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003141
Christopher Faulet0108bb32017-10-20 21:34:32 +02003142 /* check this in one ms */
3143 t->expire = TICK_ETERNITY;
3144 check->start = now;
3145 task_queue(t);
3146 }
3147
3148 mls->users++;
3149 free(p->email_alert.mailers.name);
3150 p->email_alert.mailers.m = mls;
3151 p->email_alert.queues = queues;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003152 return 0;
Christopher Faulet0108bb32017-10-20 21:34:32 +02003153
3154 error:
3155 for (i = 0; i < mls->count; i++) {
3156 struct email_alertq *q = &queues[i];
3157 struct check *check = &q->check;
3158
3159 if (check->task) {
3160 task_delete(check->task);
3161 task_free(check->task);
3162 check->task = NULL;
3163 }
3164 free_check(check);
3165 }
3166 free(queues);
3167 return 1;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003168}
3169
3170
3171static int add_tcpcheck_expect_str(struct list *list, const char *str)
3172{
3173 struct tcpcheck_rule *tcpcheck;
3174
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003175 if ((tcpcheck = pool_alloc2(pool2_tcpcheck_rule)) == NULL)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003176 return 0;
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003177 memset(tcpcheck, 0, sizeof(*tcpcheck));
3178 tcpcheck->action = TCPCHK_ACT_EXPECT;
3179 tcpcheck->string = strdup(str);
3180 tcpcheck->expect_regex = NULL;
3181 tcpcheck->comment = NULL;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003182 if (!tcpcheck->string) {
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003183 pool_free2(pool2_tcpcheck_rule, tcpcheck);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003184 return 0;
3185 }
3186
3187 LIST_ADDQ(list, &tcpcheck->list);
3188 return 1;
3189}
3190
3191static int add_tcpcheck_send_strs(struct list *list, const char * const *strs)
3192{
3193 struct tcpcheck_rule *tcpcheck;
Willy Tarreau64345aa2016-08-10 19:29:09 +02003194 const char *in;
3195 char *dst;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003196 int i;
3197
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003198 if ((tcpcheck = pool_alloc2(pool2_tcpcheck_rule)) == NULL)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003199 return 0;
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003200 memset(tcpcheck, 0, sizeof(*tcpcheck));
3201 tcpcheck->action = TCPCHK_ACT_SEND;
3202 tcpcheck->expect_regex = NULL;
3203 tcpcheck->comment = NULL;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003204 tcpcheck->string_len = 0;
3205 for (i = 0; strs[i]; i++)
3206 tcpcheck->string_len += strlen(strs[i]);
3207
3208 tcpcheck->string = malloc(tcpcheck->string_len + 1);
3209 if (!tcpcheck->string) {
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003210 pool_free2(pool2_tcpcheck_rule, tcpcheck);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003211 return 0;
3212 }
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003213
Willy Tarreau64345aa2016-08-10 19:29:09 +02003214 dst = tcpcheck->string;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003215 for (i = 0; strs[i]; i++)
Willy Tarreau64345aa2016-08-10 19:29:09 +02003216 for (in = strs[i]; (*dst = *in++); dst++);
3217 *dst = 0;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003218
3219 LIST_ADDQ(list, &tcpcheck->list);
3220 return 1;
3221}
3222
Christopher Faulet0108bb32017-10-20 21:34:32 +02003223static int enqueue_one_email_alert(struct proxy *p, struct server *s,
3224 struct email_alertq *q, const char *msg)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003225{
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003226 struct email_alert *alert;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003227 struct tcpcheck_rule *tcpcheck;
3228 struct check *check = &q->check;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003229
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003230 if ((alert = pool_alloc2(pool2_email_alert)) == NULL)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003231 goto error;
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003232 LIST_INIT(&alert->list);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003233 LIST_INIT(&alert->tcpcheck_rules);
Christopher Faulet0108bb32017-10-20 21:34:32 +02003234 alert->srv = s;
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003235
3236 if ((tcpcheck = pool_alloc2(pool2_tcpcheck_rule)) == NULL)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003237 goto error;
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003238 memset(tcpcheck, 0, sizeof(*tcpcheck));
3239 tcpcheck->action = TCPCHK_ACT_CONNECT;
3240 tcpcheck->comment = NULL;
3241 tcpcheck->string = NULL;
3242 tcpcheck->expect_regex = NULL;
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003243 LIST_ADDQ(&alert->tcpcheck_rules, &tcpcheck->list);
3244
3245 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "220 "))
3246 goto error;
3247
3248 {
3249 const char * const strs[4] = { "EHLO ", p->email_alert.myhostname, "\r\n" };
3250 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3251 goto error;
3252 }
3253
3254 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3255 goto error;
3256
3257 {
3258 const char * const strs[4] = { "MAIL FROM:<", p->email_alert.from, ">\r\n" };
3259 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3260 goto error;
3261 }
3262
3263 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3264 goto error;
3265
3266 {
3267 const char * const strs[4] = { "RCPT TO:<", p->email_alert.to, ">\r\n" };
3268 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3269 goto error;
3270 }
3271
3272 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3273 goto error;
3274
3275 {
3276 const char * const strs[2] = { "DATA\r\n" };
3277 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3278 goto error;
3279 }
3280
3281 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "354 "))
3282 goto error;
3283
3284 {
3285 struct tm tm;
3286 char datestr[48];
3287 const char * const strs[18] = {
Pieter Baauw5e0964e2016-02-13 16:27:35 +01003288 "From: ", p->email_alert.from, "\r\n",
3289 "To: ", p->email_alert.to, "\r\n",
3290 "Date: ", datestr, "\r\n",
3291 "Subject: [HAproxy Alert] ", msg, "\r\n",
3292 "\r\n",
3293 msg, "\r\n",
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003294 "\r\n",
Pieter Baauwed35c372015-07-22 19:51:54 +02003295 ".\r\n",
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003296 NULL
3297 };
3298
3299 get_localtime(date.tv_sec, &tm);
3300
3301 if (strftime(datestr, sizeof(datestr), "%a, %d %b %Y %T %z (%Z)", &tm) == 0) {
3302 goto error;
3303 }
3304
3305 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3306 goto error;
3307 }
3308
3309 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "250 "))
3310 goto error;
3311
3312 {
3313 const char * const strs[2] = { "QUIT\r\n" };
3314 if (!add_tcpcheck_send_strs(&alert->tcpcheck_rules, strs))
3315 goto error;
3316 }
3317
3318 if (!add_tcpcheck_expect_str(&alert->tcpcheck_rules, "221 "))
3319 goto error;
3320
Christopher Faulet0108bb32017-10-20 21:34:32 +02003321 task_wakeup(check->task, TASK_WOKEN_MSG);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003322 LIST_ADDQ(&q->email_alerts, &alert->list);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003323 return 1;
3324
3325error:
3326 email_alert_free(alert);
3327 return 0;
3328}
3329
Christopher Faulet0108bb32017-10-20 21:34:32 +02003330static void enqueue_email_alert(struct proxy *p, struct server *s, const char *msg)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003331{
3332 int i;
3333 struct mailer *mailer;
3334
3335 for (i = 0, mailer = p->email_alert.mailers.m->mailer_list;
3336 i < p->email_alert.mailers.m->count; i++, mailer = mailer->next) {
Christopher Faulet0108bb32017-10-20 21:34:32 +02003337 if (!enqueue_one_email_alert(p, s, &p->email_alert.queues[i], msg)) {
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003338 Alert("Email alert [%s] could not be enqueued: out of memory\n", p->id);
3339 return;
3340 }
3341 }
3342
3343 return;
3344}
3345
3346/*
3347 * Send email alert if configured.
3348 */
Simon Horman64e34162015-02-06 11:11:57 +09003349void send_email_alert(struct server *s, int level, const char *format, ...)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003350{
3351 va_list argp;
3352 char buf[1024];
3353 int len;
3354 struct proxy *p = s->proxy;
3355
Christopher Faulet0108bb32017-10-20 21:34:32 +02003356 if (!p->email_alert.mailers.m || level > p->email_alert.level || format == NULL)
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003357 return;
3358
3359 va_start(argp, format);
3360 len = vsnprintf(buf, sizeof(buf), format, argp);
3361 va_end(argp);
3362
Thierry FOURNIER62c8a212017-02-09 12:19:27 +01003363 if (len < 0 || len >= sizeof(buf)) {
Cyril Bontéb65e0332015-12-04 03:07:08 +01003364 Alert("Email alert [%s] could not format message\n", p->id);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003365 return;
3366 }
3367
Christopher Faulet0108bb32017-10-20 21:34:32 +02003368 enqueue_email_alert(p, s, buf);
Simon Horman0ba0e4a2015-01-30 11:23:00 +09003369}
3370
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02003371/*
3372 * Return value:
3373 * the port to be used for the health check
3374 * 0 in case no port could be found for the check
3375 */
3376int srv_check_healthcheck_port(struct check *chk)
3377{
3378 int i = 0;
3379 struct server *srv = NULL;
3380
3381 srv = chk->server;
3382
3383 /* If neither a port nor an addr was specified and no check transport
3384 * layer is forced, then the transport layer used by the checks is the
3385 * same as for the production traffic. Otherwise we use raw_sock by
3386 * default, unless one is specified.
3387 */
3388 if (!chk->port && !is_addr(&chk->addr)) {
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02003389 chk->use_ssl |= (srv->use_ssl || (srv->proxy->options & PR_O_TCPCHK_SSL));
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02003390 chk->send_proxy |= (srv->pp_opts);
3391 }
3392
3393 /* by default, we use the health check port ocnfigured */
3394 if (chk->port > 0)
3395 return chk->port;
3396
3397 /* try to get the port from check_core.addr if check.port not set */
3398 i = get_host_port(&chk->addr);
3399 if (i > 0)
3400 return i;
3401
3402 /* try to get the port from server address */
3403 /* prevent MAPPORTS from working at this point, since checks could
3404 * not be performed in such case (MAPPORTS impose a relative ports
3405 * based on live traffic)
3406 */
3407 if (srv->flags & SRV_F_MAPPORTS)
3408 return 0;
Willy Tarreau04276f32017-01-06 17:41:29 +01003409
3410 i = srv->svc_port; /* by default */
Baptiste Assmann95db2bc2016-06-13 14:15:41 +02003411 if (i > 0)
3412 return i;
3413
3414 return 0;
3415}
3416
Willy Tarreau865c5142016-12-21 20:04:48 +01003417__attribute__((constructor))
3418static void __check_init(void)
3419{
3420 hap_register_post_check(start_checks);
Christopher Faulet31dff9b2017-10-23 15:45:20 +02003421
3422 pool2_email_alert = create_pool("email_alert", sizeof(struct email_alert), MEM_F_SHARED);
3423 pool2_tcpcheck_rule = create_pool("tcpcheck_rule", sizeof(struct tcpcheck_rule), MEM_F_SHARED);
Willy Tarreau865c5142016-12-21 20:04:48 +01003424}
3425
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +02003426
Willy Tarreaubd741542010-03-16 18:46:54 +01003427/*
Willy Tarreaubaaee002006-06-26 02:48:02 +02003428 * Local variables:
3429 * c-indent-level: 8
3430 * c-basic-offset: 8
3431 * End:
3432 */