blob: 34039626071e62321865714ba6973f36eb9417bd [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Health-checks functions.
3 *
Willy Tarreau26c25062009-03-08 09:38:41 +01004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +02005 * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
Willy Tarreaubaaee002006-06-26 02:48:02 +02006 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
Willy Tarreaub8816082008-01-18 12:18:15 +010014#include <assert.h>
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +020015#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020016#include <errno.h>
17#include <fcntl.h>
Simon Horman0ba0e4a2015-01-30 11:23:00 +090018#include <stdarg.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020019#include <stdio.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020020#include <stdlib.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <string.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020022#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020023#include <unistd.h>
Willy Tarreau9f6dc722019-03-01 11:15:10 +010024#include <sys/resource.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025#include <sys/socket.h>
Dmitry Sivachenkocaf58982009-08-24 15:11:06 +040026#include <sys/types.h>
Simon Horman98637e52014-06-20 12:30:16 +090027#include <sys/wait.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <netinet/in.h>
Willy Tarreau1274bc42009-07-15 07:16:31 +020029#include <netinet/tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030#include <arpa/inet.h>
31
Willy Tarreau122eba92020-06-04 10:15:32 +020032#include <haproxy/action.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020033#include <haproxy/api.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020034#include <haproxy/arg.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020035#include <haproxy/cfgparse.h>
Willy Tarreau4aa573d2020-06-04 18:21:56 +020036#include <haproxy/check.h>
Willy Tarreauc13ed532020-06-02 10:22:45 +020037#include <haproxy/chunk.h>
Willy Tarreau7c18b542020-06-11 09:23:02 +020038#include <haproxy/dgram.h>
Christopher Fauletb381a502020-11-25 13:47:00 +010039#include <haproxy/dynbuf-t.h>
Willy Tarreaubcc67332020-06-05 15:31:31 +020040#include <haproxy/extcheck.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020041#include <haproxy/fd.h>
42#include <haproxy/global.h>
43#include <haproxy/h1.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020044#include <haproxy/http.h>
Willy Tarreau87735332020-06-04 09:08:41 +020045#include <haproxy/http_htx.h>
Willy Tarreau16f958c2020-06-03 08:44:35 +020046#include <haproxy/htx.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020047#include <haproxy/istbuf.h>
48#include <haproxy/list.h>
Willy Tarreauaeed4a82020-06-04 22:01:04 +020049#include <haproxy/log.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020050#include <haproxy/mailers.h>
51#include <haproxy/port_range.h>
52#include <haproxy/proto_tcp.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020053#include <haproxy/protocol.h>
Willy Tarreaua264d962020-06-04 22:29:18 +020054#include <haproxy/proxy.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020055#include <haproxy/queue.h>
56#include <haproxy/regex.h>
Emeric Brunc9437992021-02-12 19:42:55 +010057#include <haproxy/resolvers.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020058#include <haproxy/sample.h>
Willy Tarreau1e56f922020-06-04 23:20:13 +020059#include <haproxy/server.h>
Willy Tarreau209108d2020-06-04 20:30:20 +020060#include <haproxy/ssl_sock.h>
Willy Tarreau2eec9b52020-06-04 19:58:55 +020061#include <haproxy/stats-t.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020062#include <haproxy/stream_interface.h>
Willy Tarreaucea0e1b2020-06-04 17:25:40 +020063#include <haproxy/task.h>
Willy Tarreau51cd5952020-06-05 12:25:38 +020064#include <haproxy/tcpcheck.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020065#include <haproxy/thread.h>
66#include <haproxy/time.h>
67#include <haproxy/tools.h>
Christopher Faulet147b8c92021-04-10 09:00:38 +020068#include <haproxy/trace.h>
Willy Tarreaua1718922020-06-04 16:25:31 +020069#include <haproxy/vars.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020070
Christopher Faulet147b8c92021-04-10 09:00:38 +020071/* trace source and events */
72static void check_trace(enum trace_level level, uint64_t mask,
73 const struct trace_source *src,
74 const struct ist where, const struct ist func,
75 const void *a1, const void *a2, const void *a3, const void *a4);
76
77/* The event representation is split like this :
78 * check - check
79 *
80 * CHECK_EV_* macros are defined in <haproxy/check.h>
81 */
82static const struct trace_event check_trace_events[] = {
83 { .mask = CHK_EV_TASK_WAKE, .name = "task_wake", .desc = "Check task woken up" },
84 { .mask = CHK_EV_HCHK_START, .name = "hchck_start", .desc = "Health-check started" },
85 { .mask = CHK_EV_HCHK_WAKE, .name = "hchck_wake", .desc = "Health-check woken up" },
86 { .mask = CHK_EV_HCHK_RUN, .name = "hchck_run", .desc = "Health-check running" },
87 { .mask = CHK_EV_HCHK_END, .name = "hchck_end", .desc = "Health-check terminated" },
88 { .mask = CHK_EV_HCHK_SUCC, .name = "hchck_succ", .desc = "Health-check success" },
89 { .mask = CHK_EV_HCHK_ERR, .name = "hchck_err", .desc = "Health-check failure" },
90
91 { .mask = CHK_EV_TCPCHK_EVAL, .name = "tcp_check_eval", .desc = "tcp-check rules evaluation" },
92 { .mask = CHK_EV_TCPCHK_ERR, .name = "tcp_check_err", .desc = "tcp-check evaluation error" },
93 { .mask = CHK_EV_TCPCHK_CONN, .name = "tcp_check_conn", .desc = "tcp-check connection rule" },
94 { .mask = CHK_EV_TCPCHK_SND, .name = "tcp_check_send", .desc = "tcp-check send rule" },
95 { .mask = CHK_EV_TCPCHK_EXP, .name = "tcp_check_expect", .desc = "tcp-check expect rule" },
96 { .mask = CHK_EV_TCPCHK_ACT, .name = "tcp_check_action", .desc = "tcp-check action rule" },
97
98 { .mask = CHK_EV_RX_DATA, .name = "rx_data", .desc = "receipt of data" },
99 { .mask = CHK_EV_RX_BLK, .name = "rx_blk", .desc = "receipt blocked" },
100 { .mask = CHK_EV_RX_ERR, .name = "rx_err", .desc = "receipt error" },
101
102 { .mask = CHK_EV_TX_DATA, .name = "tx_data", .desc = "transmission of data" },
103 { .mask = CHK_EV_TX_BLK, .name = "tx_blk", .desc = "transmission blocked" },
104 { .mask = CHK_EV_TX_ERR, .name = "tx_err", .desc = "transmission error" },
105
106 {}
107};
108
109static const struct name_desc check_trace_lockon_args[4] = {
110 /* arg1 */ { /* already used by the check */ },
111 /* arg2 */ { },
112 /* arg3 */ { },
113 /* arg4 */ { }
114};
115
116static const struct name_desc check_trace_decoding[] = {
117#define CHK_VERB_CLEAN 1
118 { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
119#define CHK_VERB_MINIMAL 2
120 { .name="minimal", .desc="report info on stream and stream-interfaces" },
121#define CHK_VERB_SIMPLE 3
122 { .name="simple", .desc="add info on request and response channels" },
123#define CHK_VERB_ADVANCED 4
124 { .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
125#define CHK_VERB_COMPLETE 5
126 { .name="complete", .desc="add info on channel's buffer" },
127 { /* end */ }
128};
129
130struct trace_source trace_check = {
131 .name = IST("check"),
132 .desc = "Health-check",
133 .arg_def = TRC_ARG1_CHK, // TRACE()'s first argument is always a stream
134 .default_cb = check_trace,
135 .known_events = check_trace_events,
136 .lockon_args = check_trace_lockon_args,
137 .decoding = check_trace_decoding,
138 .report_events = ~0, // report everything by default
139};
140
141#define TRACE_SOURCE &trace_check
142INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
143
Olivier Houchard9130a962017-10-17 17:33:43 +0200144
Christopher Faulet61cc8522020-04-20 14:54:42 +0200145static int wake_srv_chk(struct conn_stream *cs);
146struct data_cb check_conn_cb = {
147 .wake = wake_srv_chk,
148 .name = "CHCK",
149};
Christopher Fauletd7e63962020-04-17 20:15:59 +0200150
Christopher Faulet5d503fc2020-03-30 20:34:34 +0200151
Gaetan Rivet05d692d2020-02-14 17:42:54 +0100152/* Dummy frontend used to create all checks sessions. */
Willy Tarreau51cd5952020-06-05 12:25:38 +0200153struct proxy checks_fe;
Christopher Faulet31dff9b2017-10-23 15:45:20 +0200154
Christopher Faulet147b8c92021-04-10 09:00:38 +0200155
156static inline void check_trace_buf(const struct buffer *buf, size_t ofs, size_t len)
157{
158 size_t block1, block2;
159 int line, ptr, newptr;
160
161 block1 = b_contig_data(buf, ofs);
162 block2 = 0;
163 if (block1 > len)
164 block1 = len;
165 block2 = len - block1;
166
167 ofs = b_peek_ofs(buf, ofs);
168
169 line = 0;
170 ptr = ofs;
171 while (ptr < ofs + block1) {
172 newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), ofs + block1, &line, ptr);
173 if (newptr == ptr)
174 break;
175 ptr = newptr;
176 }
177
178 line = ptr = 0;
179 while (ptr < block2) {
180 newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), block2, &line, ptr);
181 if (newptr == ptr)
182 break;
183 ptr = newptr;
184 }
185}
186
187/* trace source and events */
188static void check_trace(enum trace_level level, uint64_t mask,
189 const struct trace_source *src,
190 const struct ist where, const struct ist func,
191 const void *a1, const void *a2, const void *a3, const void *a4)
192{
193 const struct check *check = a1;
194 const struct server *srv = (check ? check->server : NULL);
195 const size_t *val = a4;
196 const char *res;
197
198 if (!check || src->verbosity < CHK_VERB_CLEAN)
199 return;
200
201 chunk_appendf(&trace_buf, " : [%c] SRV=%s",
202 ((check->type == PR_O2_EXT_CHK) ? 'E' : (check->state & CHK_ST_AGENT ? 'A' : 'H')),
203 srv->id);
204
205 chunk_appendf(&trace_buf, " status=%d/%d %s",
206 (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
207 (check->health >= check->rise) ? check->fall : check->rise,
208 (check->health >= check->rise) ? (srv->uweight ? "UP" : "DRAIN") : "DOWN");
209
210 switch (check->result) {
211 case CHK_RES_NEUTRAL: res = "-"; break;
212 case CHK_RES_FAILED: res = "FAIL"; break;
213 case CHK_RES_PASSED: res = "PASS"; break;
214 case CHK_RES_CONDPASS: res = "COND"; break;
215 default: res = "UNK"; break;
216 }
217
218 if (src->verbosity == CHK_VERB_CLEAN)
219 return;
220
221 chunk_appendf(&trace_buf, " - last=%s(%d)/%s(%d)",
222 get_check_status_info(check->status), check->status,
223 res, check->result);
224
225 /* Display the value to the 4th argument (level > STATE) */
226 if (src->level > TRACE_LEVEL_STATE && val)
227 chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
228
229 chunk_appendf(&trace_buf, " check=%p(0x%08x)", check, check->state);
230
231 if (src->verbosity == CHK_VERB_MINIMAL)
232 return;
233
234
235 if (check->cs) {
236 chunk_appendf(&trace_buf, " - conn=%p(0x%08x)", check->cs->conn, check->cs->conn->flags);
237 chunk_appendf(&trace_buf, " cs=%p(0x%08x)", check->cs, check->cs->flags);
238 }
239
240 if (mask & CHK_EV_TCPCHK) {
241 const char *type;
242
243 switch (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) {
244 case TCPCHK_RULES_PGSQL_CHK: type = "PGSQL"; break;
245 case TCPCHK_RULES_REDIS_CHK: type = "REDIS"; break;
246 case TCPCHK_RULES_SMTP_CHK: type = "SMTP"; break;
247 case TCPCHK_RULES_HTTP_CHK: type = "HTTP"; break;
248 case TCPCHK_RULES_MYSQL_CHK: type = "MYSQL"; break;
249 case TCPCHK_RULES_LDAP_CHK: type = "LDAP"; break;
250 case TCPCHK_RULES_SSL3_CHK: type = "SSL3"; break;
251 case TCPCHK_RULES_AGENT_CHK: type = "AGENT"; break;
252 case TCPCHK_RULES_SPOP_CHK: type = "SPOP"; break;
253 case TCPCHK_RULES_TCP_CHK: type = "TCP"; break;
254 default: type = "???"; break;
255 }
256 if (check->current_step)
257 chunk_appendf(&trace_buf, " - tcp-check=(%s,%d)", type, tcpcheck_get_step_id(check, NULL));
258 else
259 chunk_appendf(&trace_buf, " - tcp-check=(%s,-)", type);
260 }
261
262 /* Display bi and bo buffer info (level > USER & verbosity > SIMPLE) */
263 if (src->level > TRACE_LEVEL_USER) {
264 const struct buffer *buf = NULL;
265
266 chunk_appendf(&trace_buf, " bi=%u@%p+%u/%u",
267 (unsigned int)b_data(&check->bi), b_orig(&check->bi),
268 (unsigned int)b_head_ofs(&check->bi), (unsigned int)b_size(&check->bi));
269 chunk_appendf(&trace_buf, " bo=%u@%p+%u/%u",
270 (unsigned int)b_data(&check->bo), b_orig(&check->bo),
271 (unsigned int)b_head_ofs(&check->bo), (unsigned int)b_size(&check->bo));
272
273 if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_RX)))
274 buf = (b_is_null(&check->bi) ? NULL : &check->bi);
275 else if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_TX)))
276 buf = (b_is_null(&check->bo) ? NULL : &check->bo);
277
278 if (buf) {
279 if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
280 int full = (src->verbosity == CHK_VERB_COMPLETE);
281
282 chunk_memcat(&trace_buf, "\n\t", 2);
283 htx_dump(&trace_buf, htxbuf(buf), full);
284 }
285 else {
286 int max = ((src->verbosity == CHK_VERB_COMPLETE) ? 1024 : 256);
287
288 chunk_memcat(&trace_buf, "\n", 1);
289 if (b_data(buf) > max) {
290 check_trace_buf(buf, 0, max);
291 chunk_memcat(&trace_buf, " ...\n", 6);
292 }
293 else
294 check_trace_buf(buf, 0, b_data(buf));
295 }
296
297 }
298 }
299
300}
301
302
Christopher Faulet61cc8522020-04-20 14:54:42 +0200303/**************************************************************************/
304/************************ Handle check results ****************************/
305/**************************************************************************/
306struct check_status {
307 short result; /* one of SRV_CHK_* */
308 char *info; /* human readable short info */
309 char *desc; /* long description */
310};
311
312struct analyze_status {
313 char *desc; /* description */
314 unsigned char lr[HANA_OBS_SIZE]; /* result for l4/l7: 0 = ignore, 1 - error, 2 - OK */
315};
316
Simon Horman63a4a822012-03-19 07:24:41 +0900317static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100318 [HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
319 [HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200320 [HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200321
Willy Tarreau23964182014-05-20 20:56:30 +0200322 /* Below we have finished checks */
323 [HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100324 [HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100325
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100326 [HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200327
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100328 [HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
329 [HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
330 [HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200331
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100332 [HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
333 [HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
334 [HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200335
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100336 [HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
337 [HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200338
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200339 [HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200340
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100341 [HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
342 [HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
343 [HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
Simon Horman98637e52014-06-20 12:30:16 +0900344
345 [HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
346 [HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
Cyril Bonté77010d82014-08-07 01:55:37 +0200347 [HCHK_STATUS_PROCOK] = { CHK_RES_PASSED, "PROCOK", "External check passed" },
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200348};
349
Simon Horman63a4a822012-03-19 07:24:41 +0900350static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100351 [HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
352
353 [HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
354 [HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
355
356 [HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
357 [HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
358 [HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
359 [HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
360
361 [HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
362 [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
363 [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
364};
365
Willy Tarreauc8dc20a2019-12-27 12:03:27 +0100366/* checks if <err> is a real error for errno or one that can be ignored, and
367 * return 0 for these ones or <err> for real ones.
368 */
369static inline int unclean_errno(int err)
370{
371 if (err == EAGAIN || err == EINPROGRESS ||
372 err == EISCONN || err == EALREADY)
373 return 0;
374 return err;
375}
376
Christopher Faulet7aa32712021-02-01 13:11:50 +0100377/* Converts check_status code to result code */
378short get_check_status_result(short check_status)
379{
380 if (check_status < HCHK_STATUS_SIZE)
381 return check_statuses[check_status].result;
382 else
383 return check_statuses[HCHK_STATUS_UNKNOWN].result;
384}
385
Christopher Faulet61cc8522020-04-20 14:54:42 +0200386/* Converts check_status code to description */
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200387const char *get_check_status_description(short check_status) {
388
389 const char *desc;
390
391 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200392 desc = check_statuses[check_status].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200393 else
394 desc = NULL;
395
396 if (desc && *desc)
397 return desc;
398 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200399 return check_statuses[HCHK_STATUS_UNKNOWN].desc;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200400}
401
Christopher Faulet61cc8522020-04-20 14:54:42 +0200402/* Converts check_status code to short info */
William Dauchyb26122b2021-02-14 22:26:23 +0100403const char *get_check_status_info(short check_status)
404{
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200405 const char *info;
406
407 if (check_status < HCHK_STATUS_SIZE)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200408 info = check_statuses[check_status].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200409 else
410 info = NULL;
411
412 if (info && *info)
413 return info;
414 else
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200415 return check_statuses[HCHK_STATUS_UNKNOWN].info;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200416}
417
Christopher Faulet61cc8522020-04-20 14:54:42 +0200418/* Convert analyze_status to description */
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100419const char *get_analyze_status(short analyze_status) {
420
421 const char *desc;
422
423 if (analyze_status < HANA_STATUS_SIZE)
424 desc = analyze_statuses[analyze_status].desc;
425 else
426 desc = NULL;
427
428 if (desc && *desc)
429 return desc;
430 else
431 return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
432}
433
Christopher Faulet61cc8522020-04-20 14:54:42 +0200434/* Sets check->status, update check->duration and fill check->result with an
435 * adequate CHK_RES_* value. The new check->health is computed based on the
436 * result.
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200437 *
Christopher Faulet61cc8522020-04-20 14:54:42 +0200438 * Shows information in logs about failed health check if server is UP or
439 * succeeded health checks if server is DOWN.
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200440 */
Willy Tarreau51cd5952020-06-05 12:25:38 +0200441void set_server_check_status(struct check *check, short status, const char *desc)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100442{
Simon Horman4a741432013-02-23 15:35:38 +0900443 struct server *s = check->server;
Willy Tarreaubef1b322014-05-13 21:01:39 +0200444 short prev_status = check->status;
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200445 int report = 0;
Simon Horman4a741432013-02-23 15:35:38 +0900446
Christopher Faulet147b8c92021-04-10 09:00:38 +0200447 TRACE_POINT(CHK_EV_HCHK_RUN, check);
448
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200449 if (status == HCHK_STATUS_START) {
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100450 check->result = CHK_RES_UNKNOWN; /* no result yet */
Simon Horman4a741432013-02-23 15:35:38 +0900451 check->desc[0] = '\0';
452 check->start = now;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200453 return;
454 }
455
Simon Horman4a741432013-02-23 15:35:38 +0900456 if (!check->status)
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200457 return;
458
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200459 if (desc && *desc) {
Simon Horman4a741432013-02-23 15:35:38 +0900460 strncpy(check->desc, desc, HCHK_DESC_LEN-1);
461 check->desc[HCHK_DESC_LEN-1] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200462 } else
Simon Horman4a741432013-02-23 15:35:38 +0900463 check->desc[0] = '\0';
Krzysztof Piotr Oledzkif7089f52009-10-10 21:06:49 +0200464
Simon Horman4a741432013-02-23 15:35:38 +0900465 check->status = status;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200466 if (check_statuses[status].result)
Simon Horman4a741432013-02-23 15:35:38 +0900467 check->result = check_statuses[status].result;
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200468
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100469 if (status == HCHK_STATUS_HANA)
Simon Horman4a741432013-02-23 15:35:38 +0900470 check->duration = -1;
471 else if (!tv_iszero(&check->start)) {
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200472 /* set_server_check_status() may be called more than once */
Simon Horman4a741432013-02-23 15:35:38 +0900473 check->duration = tv_ms_elapsed(&check->start, &now);
474 tv_zero(&check->start);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200475 }
476
Willy Tarreau23964182014-05-20 20:56:30 +0200477 /* no change is expected if no state change occurred */
478 if (check->result == CHK_RES_NEUTRAL)
479 return;
480
Olivier Houchard0923fa42019-01-11 18:43:04 +0100481 /* If the check was really just sending a mail, it won't have an
482 * associated server, so we're done now.
483 */
484 if (!s)
485 return;
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200486 report = 0;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200487
Christopher Faulet147b8c92021-04-10 09:00:38 +0200488
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200489 switch (check->result) {
490 case CHK_RES_FAILED:
Willy Tarreau12634e12014-05-23 11:32:36 +0200491 /* Failure to connect to the agent as a secondary check should not
492 * cause the server to be marked down.
493 */
494 if ((!(check->state & CHK_ST_AGENT) ||
Simon Hormaneaabd522015-02-26 11:26:17 +0900495 (check->status >= HCHK_STATUS_L57DATA)) &&
Christopher Fauletb119a792018-05-02 12:12:45 +0200496 (check->health > 0)) {
Willy Tarreau4781b152021-04-06 13:53:36 +0200497 _HA_ATOMIC_INC(&s->counters.failed_checks);
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200498 report = 1;
499 check->health--;
500 if (check->health < check->rise)
501 check->health = 0;
502 }
503 break;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200504
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200505 case CHK_RES_PASSED:
Christopher Faulet1e527cb2020-11-20 18:13:02 +0100506 case CHK_RES_CONDPASS:
507 if (check->health < check->rise + check->fall - 1) {
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200508 report = 1;
509 check->health++;
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200510
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200511 if (check->health >= check->rise)
512 check->health = check->rise + check->fall - 1; /* OK now */
513 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200514
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200515 /* clear consecutive_errors if observing is enabled */
516 if (s->onerror)
517 s->consecutive_errors = 0;
518 break;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100519
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200520 default:
521 break;
522 }
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200523
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200524 if (s->proxy->options2 & PR_O2_LOGHCHKS &&
525 (status != prev_status || report)) {
526 chunk_printf(&trash,
Willy Tarreau12634e12014-05-23 11:32:36 +0200527 "%s check for %sserver %s/%s %s%s",
528 (check->state & CHK_ST_AGENT) ? "Agent" : "Health",
Willy Tarreauc93cd162014-05-13 15:54:22 +0200529 s->flags & SRV_F_BACKUP ? "backup " : "",
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100530 s->proxy->id, s->id,
Willy Tarreau6aaa1b82013-12-11 17:09:34 +0100531 (check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200532 (check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200533
Emeric Brun5a133512017-10-19 14:42:30 +0200534 srv_append_status(&trash, s, check, -1, 0);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200535
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100536 chunk_appendf(&trash, ", status: %d/%d %s",
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200537 (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
538 (check->health >= check->rise) ? check->fall : check->rise,
539 (check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200540
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200541 ha_warning("%s.\n", trash.area);
542 send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
543 send_email_alert(s, LOG_INFO, "%s", trash.area);
Krzysztof Piotr Oledzki213014e2009-09-27 15:50:02 +0200544 }
Krzysztof Piotr Oledzki09605412009-09-23 22:09:24 +0200545}
546
Willy Tarreau4eec5472014-05-20 22:32:27 +0200547/* Marks the check <check>'s server down if the current check is already failed
548 * and the server is not down yet nor in maintenance.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200549 */
Willy Tarreaubcc67332020-06-05 15:31:31 +0200550void check_notify_failure(struct check *check)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200551{
Simon Horman4a741432013-02-23 15:35:38 +0900552 struct server *s = check->server;
Simon Hormane0d1bfb2011-06-21 14:34:58 +0900553
Willy Tarreau7b1d47c2014-05-20 14:55:13 +0200554 /* The agent secondary check should only cause a server to be marked
555 * as down if check->status is HCHK_STATUS_L7STS, which indicates
556 * that the agent returned "fail", "stopped" or "down".
557 * The implication here is that failure to connect to the agent
558 * as a secondary check should not cause the server to be marked
559 * down. */
560 if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
561 return;
562
Willy Tarreau4eec5472014-05-20 22:32:27 +0200563 if (check->health > 0)
564 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100565
Christopher Faulet147b8c92021-04-10 09:00:38 +0200566 TRACE_STATE("health-check failed, set server DOWN", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200567 /* We only report a reason for the check if we did not do so previously */
Emeric Brun5a133512017-10-19 14:42:30 +0200568 srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200569}
570
Willy Tarreauaf549582014-05-16 17:37:50 +0200571/* Marks the check <check> as valid and tries to set its server up, provided
Willy Tarreau3e048382014-05-21 10:30:54 +0200572 * it isn't in maintenance, it is not tracking a down server and other checks
573 * comply. The rule is simple : by default, a server is up, unless any of the
574 * following conditions is true :
575 * - health check failed (check->health < rise)
576 * - agent check failed (agent->health < rise)
577 * - the server tracks a down server (track && track->state == STOPPED)
578 * Note that if the server has a slowstart, it will switch to STARTING instead
579 * of RUNNING. Also, only the health checks support the nolb mode, so the
580 * agent's success may not take the server out of this mode.
Willy Tarreauaf549582014-05-16 17:37:50 +0200581 */
Willy Tarreaubcc67332020-06-05 15:31:31 +0200582void check_notify_success(struct check *check)
Willy Tarreauaf549582014-05-16 17:37:50 +0200583{
Simon Horman4a741432013-02-23 15:35:38 +0900584 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100585
Emeric Brun52a91d32017-08-31 14:41:55 +0200586 if (s->next_admin & SRV_ADMF_MAINT)
Willy Tarreauaf549582014-05-16 17:37:50 +0200587 return;
Cyril Bontécd19e512010-01-31 22:34:03 +0100588
Emeric Brun52a91d32017-08-31 14:41:55 +0200589 if (s->track && s->track->next_state == SRV_ST_STOPPED)
Willy Tarreauaf549582014-05-16 17:37:50 +0200590 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100591
Willy Tarreau3e048382014-05-21 10:30:54 +0200592 if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
593 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100594
Willy Tarreau3e048382014-05-21 10:30:54 +0200595 if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
596 return;
Willy Tarreauaf549582014-05-16 17:37:50 +0200597
Emeric Brun52a91d32017-08-31 14:41:55 +0200598 if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
Willy Tarreau3e048382014-05-21 10:30:54 +0200599 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100600
Christopher Faulet147b8c92021-04-10 09:00:38 +0200601 TRACE_STATE("health-check succeeded, set server RUNNING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
Emeric Brun5a133512017-10-19 14:42:30 +0200602 srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100603}
604
Willy Tarreaudb58b792014-05-21 13:57:23 +0200605/* Marks the check <check> as valid and tries to set its server into stopping mode
606 * if it was running or starting, and provided it isn't in maintenance and other
607 * checks comply. The conditions for the server to be marked in stopping mode are
608 * the same as for it to be turned up. Also, only the health checks support the
609 * nolb mode.
Willy Tarreauaf549582014-05-16 17:37:50 +0200610 */
Willy Tarreaubcc67332020-06-05 15:31:31 +0200611void check_notify_stopping(struct check *check)
Willy Tarreauaf549582014-05-16 17:37:50 +0200612{
Simon Horman4a741432013-02-23 15:35:38 +0900613 struct server *s = check->server;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100614
Emeric Brun52a91d32017-08-31 14:41:55 +0200615 if (s->next_admin & SRV_ADMF_MAINT)
Willy Tarreauaf549582014-05-16 17:37:50 +0200616 return;
617
Willy Tarreaudb58b792014-05-21 13:57:23 +0200618 if (check->state & CHK_ST_AGENT)
619 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100620
Emeric Brun52a91d32017-08-31 14:41:55 +0200621 if (s->track && s->track->next_state == SRV_ST_STOPPED)
Willy Tarreaudb58b792014-05-21 13:57:23 +0200622 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100623
Willy Tarreaudb58b792014-05-21 13:57:23 +0200624 if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
625 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100626
Willy Tarreaudb58b792014-05-21 13:57:23 +0200627 if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
628 return;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100629
Christopher Faulet147b8c92021-04-10 09:00:38 +0200630 TRACE_STATE("health-check condionnaly succeeded, set server STOPPING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
Willy Tarreaub26881a2017-12-23 11:16:49 +0100631 srv_set_stopping(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100632}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200633
Willy Tarreau9fe7aae2013-12-31 23:47:37 +0100634/* note: use health_adjust() only, which first checks that the observe mode is
Willy Tarreau4e9df272021-02-17 15:20:19 +0100635 * enabled. This will take the server lock if needed.
Willy Tarreau9fe7aae2013-12-31 23:47:37 +0100636 */
637void __health_adjust(struct server *s, short status)
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100638{
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100639 int failed;
640 int expire;
641
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100642 if (s->observe >= HANA_OBS_SIZE)
643 return;
644
Willy Tarreaubb956662013-01-24 00:37:39 +0100645 if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100646 return;
647
648 switch (analyze_statuses[status].lr[s->observe - 1]) {
649 case 1:
650 failed = 1;
651 break;
652
653 case 2:
654 failed = 0;
655 break;
656
657 default:
658 return;
659 }
660
661 if (!failed) {
662 /* good: clear consecutive_errors */
663 s->consecutive_errors = 0;
664 return;
665 }
666
Willy Tarreau4781b152021-04-06 13:53:36 +0200667 _HA_ATOMIC_INC(&s->consecutive_errors);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100668
669 if (s->consecutive_errors < s->consecutive_errors_limit)
670 return;
671
Willy Tarreau19d14ef2012-10-29 16:51:55 +0100672 chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
673 s->consecutive_errors, get_analyze_status(status));
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100674
Willy Tarreau4e9df272021-02-17 15:20:19 +0100675 if (s->check.fastinter)
676 expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
677 else
678 expire = TICK_ETERNITY;
679
680 HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
681
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100682 switch (s->onerror) {
683 case HANA_ONERR_FASTINTER:
684 /* force fastinter - nothing to do here as all modes force it */
685 break;
686
687 case HANA_ONERR_SUDDTH:
688 /* simulate a pre-fatal failed health check */
Simon Horman58c32972013-11-25 10:46:38 +0900689 if (s->check.health > s->check.rise)
690 s->check.health = s->check.rise + 1;
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100691
Tim Duesterhus588b3142020-05-29 14:35:51 +0200692 /* fall through */
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100693
694 case HANA_ONERR_FAILCHK:
695 /* simulate a failed health check */
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200696 set_server_check_status(&s->check, HCHK_STATUS_HANA,
697 trash.area);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200698 check_notify_failure(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100699 break;
700
701 case HANA_ONERR_MARKDWN:
702 /* mark server down */
Simon Horman58c32972013-11-25 10:46:38 +0900703 s->check.health = s->check.rise;
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200704 set_server_check_status(&s->check, HCHK_STATUS_HANA,
705 trash.area);
Willy Tarreau4eec5472014-05-20 22:32:27 +0200706 check_notify_failure(&s->check);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100707 break;
708
709 default:
710 /* write a warning? */
711 break;
712 }
713
Willy Tarreau4e9df272021-02-17 15:20:19 +0100714 HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
715
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100716 s->consecutive_errors = 0;
Willy Tarreau4781b152021-04-06 13:53:36 +0200717 _HA_ATOMIC_INC(&s->counters.failed_hana);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100718
Christopher Fauletea860832021-05-07 11:45:26 +0200719 if (tick_isset(expire) && tick_is_lt(expire, s->check.task->expire)) {
Willy Tarreau4e9df272021-02-17 15:20:19 +0100720 /* requeue check task with new expire */
721 task_schedule(s->check.task, expire);
Krzysztof Piotr Oledzki97f07b82009-12-15 22:31:24 +0100722 }
Willy Tarreauef781042010-01-27 11:53:01 +0100723}
724
Christopher Faulet61cc8522020-04-20 14:54:42 +0200725/* Checks the connection. If an error has already been reported or the socket is
Willy Tarreau20a18342013-12-05 00:31:46 +0100726 * closed, keep errno intact as it is supposed to contain the valid error code.
727 * If no error is reported, check the socket's error queue using getsockopt().
728 * Warning, this must be done only once when returning from poll, and never
729 * after an I/O error was attempted, otherwise the error queue might contain
730 * inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
731 * socket. Returns non-zero if an error was reported, zero if everything is
732 * clean (including a properly closed socket).
733 */
734static int retrieve_errno_from_socket(struct connection *conn)
735{
736 int skerr;
737 socklen_t lskerr = sizeof(skerr);
738
Willy Tarreauc8dc20a2019-12-27 12:03:27 +0100739 if (conn->flags & CO_FL_ERROR && (unclean_errno(errno) || !conn->ctrl))
Willy Tarreau20a18342013-12-05 00:31:46 +0100740 return 1;
741
Willy Tarreau3c728722014-01-23 13:50:42 +0100742 if (!conn_ctrl_ready(conn))
Willy Tarreau20a18342013-12-05 00:31:46 +0100743 return 0;
744
Willy Tarreau585744b2017-08-24 14:31:19 +0200745 if (getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
Willy Tarreau20a18342013-12-05 00:31:46 +0100746 errno = skerr;
747
Willy Tarreauc8dc20a2019-12-27 12:03:27 +0100748 errno = unclean_errno(errno);
Willy Tarreau20a18342013-12-05 00:31:46 +0100749
750 if (!errno) {
751 /* we could not retrieve an error, that does not mean there is
752 * none. Just don't change anything and only report the prior
753 * error if any.
754 */
755 if (conn->flags & CO_FL_ERROR)
756 return 1;
757 else
758 return 0;
759 }
760
761 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
762 return 1;
763}
764
Christopher Faulet61cc8522020-04-20 14:54:42 +0200765/* Tries to collect as much information as possible on the connection status,
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100766 * and adjust the server status accordingly. It may make use of <errno_bck>
767 * if non-null when the caller is absolutely certain of its validity (eg:
768 * checked just after a syscall). If the caller doesn't have a valid errno,
769 * it can pass zero, and retrieve_errno_from_socket() will be called to try
770 * to extract errno from the socket. If no error is reported, it will consider
771 * the <expired> flag. This is intended to be used when a connection error was
772 * reported in conn->flags or when a timeout was reported in <expired>. The
773 * function takes care of not updating a server status which was already set.
774 * All situations where at least one of <expired> or CO_FL_ERROR are set
775 * produce a status.
776 */
Willy Tarreau51cd5952020-06-05 12:25:38 +0200777void chk_report_conn_err(struct check *check, int errno_bck, int expired)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100778{
Olivier Houchard9aaf7782017-09-13 18:30:23 +0200779 struct conn_stream *cs = check->cs;
780 struct connection *conn = cs_conn(cs);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100781 const char *err_msg;
Willy Tarreau83061a82018-07-13 11:56:34 +0200782 struct buffer *chk;
Willy Tarreau213c6782014-10-02 14:51:02 +0200783 int step;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100784
Christopher Faulet147b8c92021-04-10 09:00:38 +0200785 if (check->result != CHK_RES_UNKNOWN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100786 return;
Christopher Faulet147b8c92021-04-10 09:00:38 +0200787 }
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100788
Willy Tarreauc8dc20a2019-12-27 12:03:27 +0100789 errno = unclean_errno(errno_bck);
790 if (conn && errno)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100791 retrieve_errno_from_socket(conn);
792
Willy Tarreau4ff3b892017-10-16 15:17:17 +0200793 if (conn && !(conn->flags & CO_FL_ERROR) &&
794 !(cs->flags & CS_FL_ERROR) && !expired)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100795 return;
796
Christopher Faulet147b8c92021-04-10 09:00:38 +0200797 TRACE_ENTER(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check, 0, 0, (size_t[]){expired});
798
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100799 /* we'll try to build a meaningful error message depending on the
800 * context of the error possibly present in conn->err_code, and the
801 * socket error possibly collected above. This is useful to know the
802 * exact step of the L6 layer (eg: SSL handshake).
803 */
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200804 chk = get_trash_chunk();
805
Christopher Faulet799f3a42020-04-07 12:06:14 +0200806 if (check->type == PR_O2_TCPCHK_CHK &&
Christopher Fauletd7e63962020-04-17 20:15:59 +0200807 (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
Christopher Fauletb2c2e0f2020-03-30 11:05:10 +0200808 step = tcpcheck_get_step_id(check, NULL);
Christopher Faulet147b8c92021-04-10 09:00:38 +0200809 if (!step) {
810 TRACE_DEVEL("initial connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau213c6782014-10-02 14:51:02 +0200811 chunk_printf(chk, " at initial connection step of tcp-check");
Christopher Faulet147b8c92021-04-10 09:00:38 +0200812 }
Willy Tarreau213c6782014-10-02 14:51:02 +0200813 else {
814 chunk_printf(chk, " at step %d of tcp-check", step);
815 /* we were looking for a string */
Christopher Fauletb2c2e0f2020-03-30 11:05:10 +0200816 if (check->current_step && check->current_step->action == TCPCHK_ACT_CONNECT) {
817 if (check->current_step->connect.port)
818 chunk_appendf(chk, " (connect port %d)" ,check->current_step->connect.port);
Willy Tarreau213c6782014-10-02 14:51:02 +0200819 else
820 chunk_appendf(chk, " (connect)");
Christopher Faulet147b8c92021-04-10 09:00:38 +0200821 TRACE_DEVEL("connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau213c6782014-10-02 14:51:02 +0200822 }
Christopher Fauletb2c2e0f2020-03-30 11:05:10 +0200823 else if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
824 struct tcpcheck_expect *expect = &check->current_step->expect;
Gaetan Rivetb616add2020-02-07 15:37:17 +0100825
826 switch (expect->type) {
827 case TCPCHK_EXPECT_STRING:
Christopher Fauletb61caf42020-04-21 10:57:42 +0200828 chunk_appendf(chk, " (expect string '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
Gaetan Rivetb616add2020-02-07 15:37:17 +0100829 break;
830 case TCPCHK_EXPECT_BINARY:
Christopher Fauletb61caf42020-04-21 10:57:42 +0200831 chunk_appendf(chk, " (expect binary '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
Gaetan Rivetb616add2020-02-07 15:37:17 +0100832 break;
Christopher Faulet67a23452020-05-05 18:10:01 +0200833 case TCPCHK_EXPECT_STRING_REGEX:
Willy Tarreau213c6782014-10-02 14:51:02 +0200834 chunk_appendf(chk, " (expect regex)");
Gaetan Rivetb616add2020-02-07 15:37:17 +0100835 break;
Christopher Faulet67a23452020-05-05 18:10:01 +0200836 case TCPCHK_EXPECT_BINARY_REGEX:
Gaetan Rivetefab6c62020-02-07 15:37:17 +0100837 chunk_appendf(chk, " (expect binary regex)");
838 break;
Christopher Fauletaaab0832020-05-05 15:54:22 +0200839 case TCPCHK_EXPECT_STRING_LF:
840 chunk_appendf(chk, " (expect log-format string)");
841 break;
842 case TCPCHK_EXPECT_BINARY_LF:
843 chunk_appendf(chk, " (expect log-format binary)");
844 break;
Christopher Faulete5870d82020-04-15 11:32:03 +0200845 case TCPCHK_EXPECT_HTTP_STATUS:
Christopher Faulet8021a5f2020-04-24 13:53:12 +0200846 chunk_appendf(chk, " (expect HTTP status codes)");
Christopher Faulete5870d82020-04-15 11:32:03 +0200847 break;
Christopher Faulet67a23452020-05-05 18:10:01 +0200848 case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
Christopher Faulete5870d82020-04-15 11:32:03 +0200849 chunk_appendf(chk, " (expect HTTP status regex)");
850 break;
Christopher Faulet39708192020-05-05 10:47:36 +0200851 case TCPCHK_EXPECT_HTTP_HEADER:
852 chunk_appendf(chk, " (expect HTTP header pattern)");
853 break;
Christopher Faulete5870d82020-04-15 11:32:03 +0200854 case TCPCHK_EXPECT_HTTP_BODY:
Christopher Fauletb61caf42020-04-21 10:57:42 +0200855 chunk_appendf(chk, " (expect HTTP body content '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
Christopher Faulete5870d82020-04-15 11:32:03 +0200856 break;
Christopher Faulet67a23452020-05-05 18:10:01 +0200857 case TCPCHK_EXPECT_HTTP_BODY_REGEX:
Christopher Faulete5870d82020-04-15 11:32:03 +0200858 chunk_appendf(chk, " (expect HTTP body regex)");
859 break;
Christopher Fauletaaab0832020-05-05 15:54:22 +0200860 case TCPCHK_EXPECT_HTTP_BODY_LF:
861 chunk_appendf(chk, " (expect log-format HTTP body)");
862 break;
Christopher Faulet9e6ed152020-04-03 15:24:06 +0200863 case TCPCHK_EXPECT_CUSTOM:
864 chunk_appendf(chk, " (expect custom function)");
865 break;
Gaetan Rivetb616add2020-02-07 15:37:17 +0100866 case TCPCHK_EXPECT_UNDEF:
867 chunk_appendf(chk, " (undefined expect!)");
868 break;
869 }
Christopher Faulet147b8c92021-04-10 09:00:38 +0200870 TRACE_DEVEL("expect rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau213c6782014-10-02 14:51:02 +0200871 }
Christopher Fauletb2c2e0f2020-03-30 11:05:10 +0200872 else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
Willy Tarreau213c6782014-10-02 14:51:02 +0200873 chunk_appendf(chk, " (send)");
Christopher Faulet147b8c92021-04-10 09:00:38 +0200874 TRACE_DEVEL("send rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau213c6782014-10-02 14:51:02 +0200875 }
Baptiste Assmann22b09d22015-05-01 08:03:04 +0200876
Christopher Faulet6f2a5e42020-04-01 13:11:41 +0200877 if (check->current_step && check->current_step->comment)
878 chunk_appendf(chk, " comment: '%s'", check->current_step->comment);
Baptiste Assmann5ecb77f2013-10-06 23:24:13 +0200879 }
880 }
881
Willy Tarreau00149122017-10-04 18:05:01 +0200882 if (conn && conn->err_code) {
Willy Tarreauc8dc20a2019-12-27 12:03:27 +0100883 if (unclean_errno(errno))
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200884 chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno),
885 chk->area);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100886 else
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200887 chunk_printf(&trash, "%s%s", conn_err_code_str(conn),
888 chk->area);
889 err_msg = trash.area;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100890 }
891 else {
Willy Tarreauc8dc20a2019-12-27 12:03:27 +0100892 if (unclean_errno(errno)) {
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200893 chunk_printf(&trash, "%s%s", strerror(errno),
894 chk->area);
895 err_msg = trash.area;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100896 }
897 else {
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200898 err_msg = chk->area;
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100899 }
900 }
901
Willy Tarreau00149122017-10-04 18:05:01 +0200902 if (check->state & CHK_ST_PORT_MISS) {
Baptiste Assmann95db2bc2016-06-13 14:15:41 +0200903 /* NOTE: this is reported after <fall> tries */
Baptiste Assmann95db2bc2016-06-13 14:15:41 +0200904 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
905 }
906
Christopher Faulet5e293762020-10-26 11:10:49 +0100907 if (!conn || !conn->ctrl) {
908 /* error before any connection attempt (connection allocation error or no control layer) */
Willy Tarreau00149122017-10-04 18:05:01 +0200909 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
910 }
Willy Tarreauc192b0a2020-01-23 09:11:58 +0100911 else if (conn->flags & CO_FL_WAIT_L4_CONN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100912 /* L4 not established (yet) */
Willy Tarreau4ff3b892017-10-16 15:17:17 +0200913 if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100914 set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
915 else if (expired)
916 set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
Baptiste Assmanna68ca962015-04-14 01:15:08 +0200917
918 /*
919 * might be due to a server IP change.
920 * Let's trigger a DNS resolution if none are currently running.
921 */
Olivier Houchard0923fa42019-01-11 18:43:04 +0100922 if (check->server)
Emeric Brund30e9a12020-12-23 18:49:16 +0100923 resolv_trigger_resolution(check->server->resolv_requester);
Baptiste Assmanna68ca962015-04-14 01:15:08 +0200924
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100925 }
Willy Tarreauc192b0a2020-01-23 09:11:58 +0100926 else if (conn->flags & CO_FL_WAIT_L6_CONN) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100927 /* L6 not established (yet) */
Willy Tarreau4ff3b892017-10-16 15:17:17 +0200928 if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100929 set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
930 else if (expired)
931 set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
932 }
Willy Tarreau4ff3b892017-10-16 15:17:17 +0200933 else if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR) {
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100934 /* I/O error after connection was established and before we could diagnose */
935 set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
936 }
937 else if (expired) {
Christopher Fauletcf80f2f2020-04-01 11:04:52 +0200938 enum healthcheck_status tout = HCHK_STATUS_L7TOUT;
939
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100940 /* connection established but expired check */
Christopher Faulet1941bab2020-05-05 07:55:50 +0200941 if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT &&
942 check->current_step->expect.tout_status != HCHK_STATUS_UNKNOWN)
Christopher Faulet811f78c2020-04-01 11:10:27 +0200943 tout = check->current_step->expect.tout_status;
944 set_server_check_status(check, tout, err_msg);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100945 }
946
Christopher Faulet147b8c92021-04-10 09:00:38 +0200947 TRACE_LEAVE(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau25e2ab52013-12-04 11:17:05 +0100948 return;
949}
950
Simon Horman98637e52014-06-20 12:30:16 +0900951
Christopher Faulet61cc8522020-04-20 14:54:42 +0200952/* Builds the server state header used by HTTP health-checks */
Willy Tarreau51cd5952020-06-05 12:25:38 +0200953int httpchk_build_status_header(struct server *s, struct buffer *buf)
Simon Horman98637e52014-06-20 12:30:16 +0900954{
Christopher Faulet61cc8522020-04-20 14:54:42 +0200955 int sv_state;
956 int ratio;
957 char addr[46];
958 char port[6];
959 const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
960 "UP %d/%d", "UP",
961 "NOLB %d/%d", "NOLB",
962 "no check" };
Simon Horman98637e52014-06-20 12:30:16 +0900963
Christopher Faulet61cc8522020-04-20 14:54:42 +0200964 if (!(s->check.state & CHK_ST_ENABLED))
965 sv_state = 6;
966 else if (s->cur_state != SRV_ST_STOPPED) {
967 if (s->check.health == s->check.rise + s->check.fall - 1)
968 sv_state = 3; /* UP */
969 else
970 sv_state = 2; /* going down */
Simon Horman98637e52014-06-20 12:30:16 +0900971
Christopher Faulet61cc8522020-04-20 14:54:42 +0200972 if (s->cur_state == SRV_ST_STOPPING)
973 sv_state += 2;
974 } else {
975 if (s->check.health)
976 sv_state = 1; /* going up */
977 else
978 sv_state = 0; /* DOWN */
Simon Horman98637e52014-06-20 12:30:16 +0900979 }
Willy Tarreaub7b24782016-06-21 15:32:29 +0200980
Christopher Faulet61cc8522020-04-20 14:54:42 +0200981 chunk_appendf(buf, srv_hlt_st[sv_state],
982 (s->cur_state != SRV_ST_STOPPED) ? (s->check.health - s->check.rise + 1) : (s->check.health),
983 (s->cur_state != SRV_ST_STOPPED) ? (s->check.fall) : (s->check.rise));
Willy Tarreaub7b24782016-06-21 15:32:29 +0200984
Christopher Faulet61cc8522020-04-20 14:54:42 +0200985 addr_to_str(&s->addr, addr, sizeof(addr));
986 if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
987 snprintf(port, sizeof(port), "%u", s->svc_port);
988 else
989 *port = 0;
Willy Tarreaub7b24782016-06-21 15:32:29 +0200990
Christopher Faulet61cc8522020-04-20 14:54:42 +0200991 chunk_appendf(buf, "; address=%s; port=%s; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
992 addr, port, s->proxy->id, s->id,
993 global.node,
994 (s->cur_eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
995 (s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
996 s->cur_sess, s->proxy->beconn - s->proxy->nbpend,
997 s->nbpend);
Willy Tarreau9f6dc722019-03-01 11:15:10 +0100998
Christopher Faulet61cc8522020-04-20 14:54:42 +0200999 if ((s->cur_state == SRV_ST_STARTING) &&
1000 now.tv_sec < s->last_change + s->slowstart &&
1001 now.tv_sec >= s->last_change) {
1002 ratio = MAX(1, 100 * (now.tv_sec - s->last_change) / s->slowstart);
1003 chunk_appendf(buf, "; throttle=%d%%", ratio);
1004 }
Christopher Fauletaaae9a02020-04-26 09:50:31 +02001005
Christopher Faulet61cc8522020-04-20 14:54:42 +02001006 return b_data(buf);
1007}
Christopher Fauletaaae9a02020-04-26 09:50:31 +02001008
Willy Tarreau51cd5952020-06-05 12:25:38 +02001009/**************************************************************************/
Willy Tarreau51cd5952020-06-05 12:25:38 +02001010/***************** Health-checks based on connections *********************/
1011/**************************************************************************/
1012/* This function is used only for server health-checks. It handles connection
1013 * status updates including errors. If necessary, it wakes the check task up.
1014 * It returns 0 on normal cases, <0 if at least one close() has happened on the
1015 * connection (eg: reconnect). It relies on tcpcheck_main().
Christopher Faulet61cc8522020-04-20 14:54:42 +02001016 */
Willy Tarreau51cd5952020-06-05 12:25:38 +02001017static int wake_srv_chk(struct conn_stream *cs)
Christopher Faulet61cc8522020-04-20 14:54:42 +02001018{
Willy Tarreau51cd5952020-06-05 12:25:38 +02001019 struct connection *conn = cs->conn;
1020 struct check *check = cs->data;
1021 struct email_alertq *q = container_of(check, typeof(*q), check);
1022 int ret = 0;
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001023
Christopher Faulet147b8c92021-04-10 09:00:38 +02001024 TRACE_ENTER(CHK_EV_HCHK_WAKE, check);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001025 if (check->server)
1026 HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
1027 else
1028 HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001029
Willy Tarreau51cd5952020-06-05 12:25:38 +02001030 /* we may have to make progress on the TCP checks */
1031 ret = tcpcheck_main(check);
Christopher Fauletaaab0832020-05-05 15:54:22 +02001032
Willy Tarreau51cd5952020-06-05 12:25:38 +02001033 cs = check->cs;
1034 conn = cs->conn;
Christopher Fauletaaab0832020-05-05 15:54:22 +02001035
Willy Tarreau51cd5952020-06-05 12:25:38 +02001036 if (unlikely(conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)) {
1037 /* We may get error reports bypassing the I/O handlers, typically
1038 * the case when sending a pure TCP check which fails, then the I/O
1039 * handlers above are not called. This is completely handled by the
1040 * main processing task so let's simply wake it up. If we get here,
1041 * we expect errno to still be valid.
1042 */
Christopher Faulet147b8c92021-04-10 09:00:38 +02001043 TRACE_ERROR("report connection error", CHK_EV_HCHK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001044 chk_report_conn_err(check, errno, 0);
1045 task_wakeup(check->task, TASK_WOKEN_IO);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001046 }
1047
Christopher Faulet8f100422021-01-18 15:47:03 +01001048 if (check->result != CHK_RES_UNKNOWN || ret == -1) {
Willy Tarreau51cd5952020-06-05 12:25:38 +02001049 /* Check complete or aborted. If connection not yet closed do it
1050 * now and wake the check task up to be sure the result is
1051 * handled ASAP. */
Willy Tarreau30bd4ef2020-12-11 11:09:29 +01001052 cs_drain_and_close(cs);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001053 ret = -1;
Christopher Faulet8f100422021-01-18 15:47:03 +01001054
1055 if (check->wait_list.events)
1056 cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
1057
Willy Tarreau51cd5952020-06-05 12:25:38 +02001058 /* We may have been scheduled to run, and the
1059 * I/O handler expects to have a cs, so remove
1060 * the tasklet
1061 */
1062 tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
1063 task_wakeup(check->task, TASK_WOKEN_IO);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001064 }
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001065
Willy Tarreau51cd5952020-06-05 12:25:38 +02001066 if (check->server)
1067 HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
1068 else
1069 HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001070
Christopher Faulet147b8c92021-04-10 09:00:38 +02001071 TRACE_LEAVE(CHK_EV_HCHK_WAKE, check);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001072 return ret;
1073}
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001074
Willy Tarreau51cd5952020-06-05 12:25:38 +02001075/* This function checks if any I/O is wanted, and if so, attempts to do so */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001076struct task *event_srv_chk_io(struct task *t, void *ctx, unsigned int state)
Christopher Faulet61cc8522020-04-20 14:54:42 +02001077{
Willy Tarreau51cd5952020-06-05 12:25:38 +02001078 struct check *check = ctx;
1079 struct conn_stream *cs = check->cs;
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001080
Willy Tarreau51cd5952020-06-05 12:25:38 +02001081 wake_srv_chk(cs);
1082 return NULL;
Christopher Faulet61cc8522020-04-20 14:54:42 +02001083}
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001084
Willy Tarreau51cd5952020-06-05 12:25:38 +02001085/* manages a server health-check that uses a connection. Returns
1086 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
Christopher Faulet61cc8522020-04-20 14:54:42 +02001087 *
1088 * Please do NOT place any return statement in this function and only leave
Willy Tarreau51cd5952020-06-05 12:25:38 +02001089 * via the out_unlock label.
Christopher Faulet61cc8522020-04-20 14:54:42 +02001090 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001091struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
Christopher Faulet61cc8522020-04-20 14:54:42 +02001092{
Willy Tarreau51cd5952020-06-05 12:25:38 +02001093 struct check *check = context;
1094 struct proxy *proxy = check->proxy;
Christopher Faulet92017a32021-05-06 16:01:18 +02001095 struct conn_stream *cs;
1096 struct connection *conn;
Willy Tarreau51cd5952020-06-05 12:25:38 +02001097 int rv;
1098 int expired = tick_is_expired(t->expire, now_ms);
Willy Tarreaudeccd112018-06-14 18:38:55 +02001099
Christopher Faulet147b8c92021-04-10 09:00:38 +02001100 TRACE_ENTER(CHK_EV_TASK_WAKE, check);
1101
Willy Tarreau51cd5952020-06-05 12:25:38 +02001102 if (check->server)
1103 HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
Christopher Faulet92017a32021-05-06 16:01:18 +02001104
Willy Tarreau51cd5952020-06-05 12:25:38 +02001105 if (!(check->state & CHK_ST_INPROGRESS)) {
1106 /* no check currently running */
Christopher Faulet147b8c92021-04-10 09:00:38 +02001107 if (!expired) /* woke up too early */ {
1108 TRACE_STATE("health-check wake up too early", CHK_EV_TASK_WAKE, check);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001109 goto out_unlock;
Christopher Faulet147b8c92021-04-10 09:00:38 +02001110 }
Willy Tarreauabca5b62013-12-06 14:19:25 +01001111
Willy Tarreau51cd5952020-06-05 12:25:38 +02001112 /* we don't send any health-checks when the proxy is
1113 * stopped, the server should not be checked or the check
1114 * is disabled.
1115 */
1116 if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
Christopher Faulet147b8c92021-04-10 09:00:38 +02001117 proxy->disabled) {
1118 TRACE_STATE("health-check paused or disabled", CHK_EV_TASK_WAKE, check);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001119 goto reschedule;
Christopher Faulet147b8c92021-04-10 09:00:38 +02001120 }
Christopher Faulet404f9192020-04-09 23:13:54 +02001121
Willy Tarreau51cd5952020-06-05 12:25:38 +02001122 /* we'll initiate a new check */
1123 set_server_check_status(check, HCHK_STATUS_START, NULL);
Christopher Faulet404f9192020-04-09 23:13:54 +02001124
Willy Tarreau51cd5952020-06-05 12:25:38 +02001125 check->state |= CHK_ST_INPROGRESS;
Christopher Faulet147b8c92021-04-10 09:00:38 +02001126 TRACE_STATE("init new health-check", CHK_EV_TASK_WAKE|CHK_EV_HCHK_START, check);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001127
Willy Tarreau51cd5952020-06-05 12:25:38 +02001128 task_set_affinity(t, tid_bit);
1129
1130 check->current_step = NULL;
1131 tcpcheck_main(check);
Christopher Faulet92017a32021-05-06 16:01:18 +02001132 expired = 0;
Christopher Faulet61cc8522020-04-20 14:54:42 +02001133 }
Christopher Faulet92017a32021-05-06 16:01:18 +02001134
1135 cs = check->cs;
1136 conn = cs_conn(cs);
1137
1138 /* there was a test running.
1139 * First, let's check whether there was an uncaught error,
1140 * which can happen on connect timeout or error.
1141 */
1142 if (check->result == CHK_RES_UNKNOWN) {
1143 /* Here the connection must be defined. Otherwise the
1144 * error would have already been detected
Willy Tarreau51cd5952020-06-05 12:25:38 +02001145 */
Christopher Faulet92017a32021-05-06 16:01:18 +02001146 if ((conn && ((conn->flags & CO_FL_ERROR) || (cs->flags & CS_FL_ERROR))) || expired) {
1147 TRACE_ERROR("report connection error", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
1148 chk_report_conn_err(check, 0, expired);
1149 }
1150 else {
1151 if (check->state & CHK_ST_CLOSE_CONN) {
1152 TRACE_DEVEL("closing current connection", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
1153 cs_destroy(cs);
1154 cs = NULL;
1155 conn = NULL;
1156 check->cs = NULL;
1157 check->state &= ~CHK_ST_CLOSE_CONN;
1158 tcpcheck_main(check);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001159 }
Christopher Faulet92017a32021-05-06 16:01:18 +02001160 if (check->result == CHK_RES_UNKNOWN) {
1161 TRACE_DEVEL("health-check not expired", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
1162 goto out_unlock; /* timeout not reached, wait again */
Christopher Faulet8f100422021-01-18 15:47:03 +01001163 }
Christopher Faulet61cc8522020-04-20 14:54:42 +02001164 }
Christopher Faulet92017a32021-05-06 16:01:18 +02001165 }
Christopher Faulet404f9192020-04-09 23:13:54 +02001166
Christopher Faulet92017a32021-05-06 16:01:18 +02001167 /* check complete or aborted */
1168 TRACE_STATE("health-check complete or aborted", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END, check);
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001169
Christopher Faulet92017a32021-05-06 16:01:18 +02001170 check->current_step = NULL;
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001171
Christopher Faulet92017a32021-05-06 16:01:18 +02001172 if (conn && conn->xprt) {
1173 /* The check was aborted and the connection was not yet closed.
1174 * This can happen upon timeout, or when an external event such
1175 * as a failed response coupled with "observe layer7" caused the
1176 * server state to be suddenly changed.
1177 */
1178 cs_drain_and_close(cs);
1179 }
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001180
Christopher Faulet92017a32021-05-06 16:01:18 +02001181 if (cs) {
1182 if (check->wait_list.events)
1183 cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
1184 /* We may have been scheduled to run, and the
1185 * I/O handler expects to have a cs, so remove
1186 * the tasklet
1187 */
1188 tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
1189 cs_destroy(cs);
1190 cs = check->cs = NULL;
1191 conn = NULL;
1192 }
Willy Tarreau51cd5952020-06-05 12:25:38 +02001193
Christopher Faulet92017a32021-05-06 16:01:18 +02001194 if (check->sess != NULL) {
1195 vars_prune(&check->vars, check->sess, NULL);
1196 session_free(check->sess);
1197 check->sess = NULL;
1198 }
Willy Tarreau51cd5952020-06-05 12:25:38 +02001199
Christopher Faulet92017a32021-05-06 16:01:18 +02001200 if (check->server) {
1201 if (check->result == CHK_RES_FAILED) {
1202 /* a failure or timeout detected */
1203 TRACE_DEVEL("report failure", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
1204 check_notify_failure(check);
1205 }
1206 else if (check->result == CHK_RES_CONDPASS) {
1207 /* check is OK but asks for stopping mode */
1208 TRACE_DEVEL("report conditional success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
1209 check_notify_stopping(check);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001210 }
Christopher Faulet92017a32021-05-06 16:01:18 +02001211 else if (check->result == CHK_RES_PASSED) {
1212 /* a success was detected */
1213 TRACE_DEVEL("report success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
1214 check_notify_success(check);
1215 }
1216 }
1217 task_set_affinity(t, MAX_THREADS_MASK);
1218 check_release_buf(check, &check->bi);
1219 check_release_buf(check, &check->bo);
1220 check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001221
Christopher Faulet92017a32021-05-06 16:01:18 +02001222 if (check->server) {
1223 rv = 0;
1224 if (global.spread_checks > 0) {
1225 rv = srv_getinter(check) * global.spread_checks / 100;
1226 rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
Christopher Faulet61cc8522020-04-20 14:54:42 +02001227 }
Christopher Faulet92017a32021-05-06 16:01:18 +02001228 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
Christopher Faulet61cc8522020-04-20 14:54:42 +02001229 }
Willy Tarreau51cd5952020-06-05 12:25:38 +02001230
1231 reschedule:
1232 while (tick_is_expired(t->expire, now_ms))
1233 t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
1234 out_unlock:
1235 if (check->server)
1236 HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
Christopher Faulet147b8c92021-04-10 09:00:38 +02001237
1238 TRACE_LEAVE(CHK_EV_TASK_WAKE, check);
Willy Tarreau51cd5952020-06-05 12:25:38 +02001239 return t;
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001240}
1241
Willy Tarreau51cd5952020-06-05 12:25:38 +02001242
Christopher Faulet61cc8522020-04-20 14:54:42 +02001243/**************************************************************************/
1244/************************** Init/deinit checks ****************************/
1245/**************************************************************************/
Christopher Fauletb381a502020-11-25 13:47:00 +01001246/*
1247 * Tries to grab a buffer and to re-enables processing on check <target>. The
1248 * check flags are used to figure what buffer was requested. It returns 1 if the
1249 * allocation succeeds, in which case the I/O tasklet is woken up, or 0 if it's
1250 * impossible to wake up and we prefer to be woken up later.
1251 */
1252int check_buf_available(void *target)
Christopher Faulet61cc8522020-04-20 14:54:42 +02001253{
Christopher Fauletb381a502020-11-25 13:47:00 +01001254 struct check *check = target;
1255
Willy Tarreaud68d4f12021-03-22 14:44:31 +01001256 if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi)) {
Christopher Faulet147b8c92021-04-10 09:00:38 +02001257 TRACE_STATE("unblocking check, input buffer allocated", CHK_EV_TCPCHK_EXP|CHK_EV_RX_BLK, check);
Christopher Fauletb381a502020-11-25 13:47:00 +01001258 check->state &= ~CHK_ST_IN_ALLOC;
1259 tasklet_wakeup(check->wait_list.tasklet);
1260 return 1;
1261 }
Willy Tarreaud68d4f12021-03-22 14:44:31 +01001262 if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo)) {
Christopher Faulet147b8c92021-04-10 09:00:38 +02001263 TRACE_STATE("unblocking check, output buffer allocated", CHK_EV_TCPCHK_SND|CHK_EV_TX_BLK, check);
Christopher Fauletb381a502020-11-25 13:47:00 +01001264 check->state &= ~CHK_ST_OUT_ALLOC;
1265 tasklet_wakeup(check->wait_list.tasklet);
1266 return 1;
1267 }
1268
1269 return 0;
1270}
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001271
Christopher Fauletb381a502020-11-25 13:47:00 +01001272/*
William Dauchyf4300902021-02-06 20:47:50 +01001273 * Allocate a buffer. If it fails, it adds the check in buffer wait queue.
Christopher Fauletb381a502020-11-25 13:47:00 +01001274 */
1275struct buffer *check_get_buf(struct check *check, struct buffer *bptr)
1276{
1277 struct buffer *buf = NULL;
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001278
Willy Tarreau2b718102021-04-21 07:32:39 +02001279 if (likely(!LIST_INLIST(&check->buf_wait.list)) &&
Willy Tarreaud68d4f12021-03-22 14:44:31 +01001280 unlikely((buf = b_alloc(bptr)) == NULL)) {
Christopher Fauletb381a502020-11-25 13:47:00 +01001281 check->buf_wait.target = check;
1282 check->buf_wait.wakeup_cb = check_buf_available;
Willy Tarreau2b718102021-04-21 07:32:39 +02001283 LIST_APPEND(&ti->buffer_wq, &check->buf_wait.list);
Christopher Fauletb381a502020-11-25 13:47:00 +01001284 }
1285 return buf;
1286}
1287
1288/*
1289 * Release a buffer, if any, and try to wake up entities waiting in the buffer
1290 * wait queue.
1291 */
1292void check_release_buf(struct check *check, struct buffer *bptr)
1293{
1294 if (bptr->size) {
1295 b_free(bptr);
Willy Tarreau4d77bbf2021-02-20 12:02:46 +01001296 offer_buffers(check->buf_wait.target, 1);
Christopher Fauletb381a502020-11-25 13:47:00 +01001297 }
1298}
1299
1300const char *init_check(struct check *check, int type)
1301{
1302 check->type = type;
Christopher Fauletba3c68f2020-04-01 16:27:05 +02001303
Christopher Fauletb381a502020-11-25 13:47:00 +01001304 check->bi = BUF_NULL;
1305 check->bo = BUF_NULL;
Willy Tarreau90f366b2021-02-20 11:49:49 +01001306 LIST_INIT(&check->buf_wait.list);
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001307
Christopher Faulet61cc8522020-04-20 14:54:42 +02001308 check->wait_list.tasklet = tasklet_new();
1309 if (!check->wait_list.tasklet)
1310 return "out of memory while allocating check tasklet";
1311 check->wait_list.events = 0;
1312 check->wait_list.tasklet->process = event_srv_chk_io;
1313 check->wait_list.tasklet->context = check;
1314 return NULL;
1315}
1316
1317void free_check(struct check *check)
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001318{
Christopher Faulet61cc8522020-04-20 14:54:42 +02001319 task_destroy(check->task);
1320 if (check->wait_list.tasklet)
1321 tasklet_free(check->wait_list.tasklet);
1322
Christopher Fauletb381a502020-11-25 13:47:00 +01001323 check_release_buf(check, &check->bi);
1324 check_release_buf(check, &check->bo);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001325 if (check->cs) {
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001326 ha_free(&check->cs->conn);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001327 cs_free(check->cs);
1328 check->cs = NULL;
1329 }
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001330}
1331
Christopher Faulet61cc8522020-04-20 14:54:42 +02001332/* manages a server health-check. Returns the time the task accepts to wait, or
1333 * TIME_ETERNITY for infinity.
1334 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001335struct task *process_chk(struct task *t, void *context, unsigned int state)
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001336{
Christopher Faulet61cc8522020-04-20 14:54:42 +02001337 struct check *check = context;
1338
1339 if (check->type == PR_O2_EXT_CHK)
1340 return process_chk_proc(t, context, state);
1341 return process_chk_conn(t, context, state);
1342
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001343}
1344
Christopher Faulet61cc8522020-04-20 14:54:42 +02001345
1346static int start_check_task(struct check *check, int mininter,
1347 int nbcheck, int srvpos)
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001348{
Christopher Faulet61cc8522020-04-20 14:54:42 +02001349 struct task *t;
1350 unsigned long thread_mask = MAX_THREADS_MASK;
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001351
Christopher Faulet61cc8522020-04-20 14:54:42 +02001352 if (check->type == PR_O2_EXT_CHK)
1353 thread_mask = 1;
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001354
Christopher Faulet61cc8522020-04-20 14:54:42 +02001355 /* task for the check */
1356 if ((t = task_new(thread_mask)) == NULL) {
1357 ha_alert("Starting [%s:%s] check: out of memory.\n",
1358 check->server->proxy->id, check->server->id);
1359 return 0;
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001360 }
1361
Christopher Faulet61cc8522020-04-20 14:54:42 +02001362 check->task = t;
1363 t->process = process_chk;
1364 t->context = check;
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001365
Christopher Faulet61cc8522020-04-20 14:54:42 +02001366 if (mininter < srv_getinter(check))
1367 mininter = srv_getinter(check);
1368
1369 if (global.max_spread_checks && mininter > global.max_spread_checks)
1370 mininter = global.max_spread_checks;
1371
1372 /* check this every ms */
1373 t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
1374 check->start = now;
1375 task_queue(t);
1376
1377 return 1;
Gaetan Rivet707b52f2020-02-21 18:14:59 +01001378}
1379
Christopher Faulet61cc8522020-04-20 14:54:42 +02001380/* updates the server's weight during a warmup stage. Once the final weight is
1381 * reached, the task automatically stops. Note that any server status change
1382 * must have updated s->last_change accordingly.
1383 */
Willy Tarreau144f84a2021-03-02 16:09:26 +01001384struct task *server_warmup(struct task *t, void *context, unsigned int state)
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001385{
Christopher Faulet61cc8522020-04-20 14:54:42 +02001386 struct server *s = context;
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001387
Christopher Faulet61cc8522020-04-20 14:54:42 +02001388 /* by default, plan on stopping the task */
1389 t->expire = TICK_ETERNITY;
1390 if ((s->next_admin & SRV_ADMF_MAINT) ||
1391 (s->next_state != SRV_ST_STARTING))
1392 return t;
Christopher Faulete5870d82020-04-15 11:32:03 +02001393
Christopher Faulet61cc8522020-04-20 14:54:42 +02001394 HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001395
Christopher Faulet61cc8522020-04-20 14:54:42 +02001396 /* recalculate the weights and update the state */
1397 server_recalc_eweight(s, 1);
Christopher Faulet5c288742020-03-31 08:15:58 +02001398
Christopher Faulet61cc8522020-04-20 14:54:42 +02001399 /* probably that we can refill this server with a bit more connections */
1400 pendconn_grab_from_px(s);
Christopher Faulet5c288742020-03-31 08:15:58 +02001401
Christopher Faulet61cc8522020-04-20 14:54:42 +02001402 HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
Christopher Faulet5c288742020-03-31 08:15:58 +02001403
Christopher Faulet61cc8522020-04-20 14:54:42 +02001404 /* get back there in 1 second or 1/20th of the slowstart interval,
1405 * whichever is greater, resulting in small 5% steps.
1406 */
1407 if (s->next_state == SRV_ST_STARTING)
1408 t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
1409 return t;
1410}
1411
1412/*
1413 * Start health-check.
1414 * Returns 0 if OK, ERR_FATAL on error, and prints the error in this case.
1415 */
1416static int start_checks()
1417{
1418
1419 struct proxy *px;
1420 struct server *s;
1421 struct task *t;
1422 int nbcheck=0, mininter=0, srvpos=0;
1423
1424 /* 0- init the dummy frontend used to create all checks sessions */
1425 init_new_proxy(&checks_fe);
Christopher Faulet0f1fc232021-04-16 10:49:07 +02001426 checks_fe.id = strdup("CHECKS-FE");
Christopher Faulet61cc8522020-04-20 14:54:42 +02001427 checks_fe.cap = PR_CAP_FE | PR_CAP_BE;
1428 checks_fe.mode = PR_MODE_TCP;
1429 checks_fe.maxconn = 0;
1430 checks_fe.conn_retries = CONN_RETRIES;
1431 checks_fe.options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
1432 checks_fe.timeout.client = TICK_ETERNITY;
1433
1434 /* 1- count the checkers to run simultaneously.
1435 * We also determine the minimum interval among all of those which
1436 * have an interval larger than SRV_CHK_INTER_THRES. This interval
1437 * will be used to spread their start-up date. Those which have
1438 * a shorter interval will start independently and will not dictate
1439 * too short an interval for all others.
1440 */
1441 for (px = proxies_list; px; px = px->next) {
1442 for (s = px->srv; s; s = s->next) {
1443 if (s->slowstart) {
1444 if ((t = task_new(MAX_THREADS_MASK)) == NULL) {
1445 ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
1446 return ERR_ALERT | ERR_FATAL;
1447 }
1448 /* We need a warmup task that will be called when the server
1449 * state switches from down to up.
1450 */
1451 s->warmup = t;
1452 t->process = server_warmup;
1453 t->context = s;
1454 /* server can be in this state only because of */
1455 if (s->next_state == SRV_ST_STARTING)
1456 task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, (now.tv_sec - s->last_change)) / 20)));
Christopher Faulet5c288742020-03-31 08:15:58 +02001457 }
1458
Christopher Faulet61cc8522020-04-20 14:54:42 +02001459 if (s->check.state & CHK_ST_CONFIGURED) {
1460 nbcheck++;
1461 if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
1462 (!mininter || mininter > srv_getinter(&s->check)))
1463 mininter = srv_getinter(&s->check);
Christopher Faulet5c288742020-03-31 08:15:58 +02001464 }
1465
Christopher Faulet61cc8522020-04-20 14:54:42 +02001466 if (s->agent.state & CHK_ST_CONFIGURED) {
1467 nbcheck++;
1468 if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
1469 (!mininter || mininter > srv_getinter(&s->agent)))
1470 mininter = srv_getinter(&s->agent);
1471 }
Christopher Faulet5c288742020-03-31 08:15:58 +02001472 }
Christopher Faulet61cc8522020-04-20 14:54:42 +02001473 }
Christopher Fauletb7d30092020-03-30 15:19:03 +02001474
Christopher Faulet61cc8522020-04-20 14:54:42 +02001475 if (!nbcheck)
Christopher Fauletfc633b62020-11-06 15:24:23 +01001476 return ERR_NONE;
Christopher Fauletb7d30092020-03-30 15:19:03 +02001477
Christopher Faulet61cc8522020-04-20 14:54:42 +02001478 srand((unsigned)time(NULL));
Christopher Fauletb7d30092020-03-30 15:19:03 +02001479
William Dauchyf4300902021-02-06 20:47:50 +01001480 /* 2- start them as far as possible from each other. For this, we will
1481 * start them after their interval is set to the min interval divided
1482 * by the number of servers, weighted by the server's position in the
1483 * list.
Christopher Faulet61cc8522020-04-20 14:54:42 +02001484 */
1485 for (px = proxies_list; px; px = px->next) {
1486 if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
1487 if (init_pid_list()) {
1488 ha_alert("Starting [%s] check: out of memory.\n", px->id);
1489 return ERR_ALERT | ERR_FATAL;
1490 }
1491 }
Christopher Fauletb7d30092020-03-30 15:19:03 +02001492
Christopher Faulet61cc8522020-04-20 14:54:42 +02001493 for (s = px->srv; s; s = s->next) {
1494 /* A task for the main check */
1495 if (s->check.state & CHK_ST_CONFIGURED) {
1496 if (s->check.type == PR_O2_EXT_CHK) {
1497 if (!prepare_external_check(&s->check))
1498 return ERR_ALERT | ERR_FATAL;
Christopher Fauletb7d30092020-03-30 15:19:03 +02001499 }
Christopher Faulet61cc8522020-04-20 14:54:42 +02001500 if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
1501 return ERR_ALERT | ERR_FATAL;
1502 srvpos++;
Christopher Faulet98572322020-03-30 13:16:44 +02001503 }
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001504
Christopher Faulet61cc8522020-04-20 14:54:42 +02001505 /* A task for a auxiliary agent check */
1506 if (s->agent.state & CHK_ST_CONFIGURED) {
1507 if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
1508 return ERR_ALERT | ERR_FATAL;
1509 }
1510 srvpos++;
1511 }
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001512 }
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001513 }
Christopher Fauletfc633b62020-11-06 15:24:23 +01001514 return ERR_NONE;
Christopher Faulet61cc8522020-04-20 14:54:42 +02001515}
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001516
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001517
Christopher Faulet61cc8522020-04-20 14:54:42 +02001518/*
1519 * Return value:
1520 * the port to be used for the health check
1521 * 0 in case no port could be found for the check
1522 */
1523static int srv_check_healthcheck_port(struct check *chk)
1524{
1525 int i = 0;
1526 struct server *srv = NULL;
1527
1528 srv = chk->server;
1529
William Dauchyf4300902021-02-06 20:47:50 +01001530 /* by default, we use the health check port configured */
Christopher Faulet61cc8522020-04-20 14:54:42 +02001531 if (chk->port > 0)
1532 return chk->port;
1533
1534 /* try to get the port from check_core.addr if check.port not set */
1535 i = get_host_port(&chk->addr);
1536 if (i > 0)
1537 return i;
1538
1539 /* try to get the port from server address */
1540 /* prevent MAPPORTS from working at this point, since checks could
1541 * not be performed in such case (MAPPORTS impose a relative ports
1542 * based on live traffic)
1543 */
1544 if (srv->flags & SRV_F_MAPPORTS)
1545 return 0;
1546
1547 i = srv->svc_port; /* by default */
1548 if (i > 0)
1549 return i;
1550
1551 return 0;
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001552}
1553
Christopher Faulet61cc8522020-04-20 14:54:42 +02001554/* Initializes an health-check attached to the server <srv>. Non-zero is returned
1555 * if an error occurred.
1556 */
1557static int init_srv_check(struct server *srv)
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001558{
Christopher Faulet61cc8522020-04-20 14:54:42 +02001559 const char *err;
1560 struct tcpcheck_rule *r;
Christopher Fauletfc633b62020-11-06 15:24:23 +01001561 int ret = ERR_NONE;
Amaury Denoyelle0519bd42020-11-13 12:34:56 +01001562 int check_type;
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001563
Christopher Faulet6ecd5932021-01-12 17:29:45 +01001564 if (!srv->do_check || !(srv->proxy->cap & PR_CAP_BE))
Christopher Faulet61cc8522020-04-20 14:54:42 +02001565 goto out;
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001566
Amaury Denoyelle0519bd42020-11-13 12:34:56 +01001567 check_type = srv->check.tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK;
Christopher Fauletf50f4e92020-03-30 19:52:29 +02001568
Christopher Faulet61cc8522020-04-20 14:54:42 +02001569 /* If neither a port nor an addr was specified and no check transport
1570 * layer is forced, then the transport layer used by the checks is the
1571 * same as for the production traffic. Otherwise we use raw_sock by
1572 * default, unless one is specified.
1573 */
1574 if (!srv->check.port && !is_addr(&srv->check.addr)) {
1575 if (!srv->check.use_ssl && srv->use_ssl != -1) {
1576 srv->check.use_ssl = srv->use_ssl;
1577 srv->check.xprt = srv->xprt;
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001578 }
Christopher Faulet61cc8522020-04-20 14:54:42 +02001579 else if (srv->check.use_ssl == 1)
1580 srv->check.xprt = xprt_get(XPRT_SSL);
1581 srv->check.send_proxy |= (srv->pp_opts);
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001582 }
Christopher Faulet66163ec2020-05-20 22:36:24 +02001583 else if (srv->check.use_ssl == 1)
1584 srv->check.xprt = xprt_get(XPRT_SSL);
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001585
Christopher Faulet12882cf2020-04-23 15:50:18 +02001586 /* Inherit the mux protocol from the server if not already defined for
1587 * the check
1588 */
Amaury Denoyelle0519bd42020-11-13 12:34:56 +01001589 if (srv->mux_proto && !srv->check.mux_proto &&
1590 ((srv->mux_proto->mode == PROTO_MODE_HTTP && check_type == TCPCHK_RULES_HTTP_CHK) ||
1591 (srv->mux_proto->mode == PROTO_MODE_TCP && check_type != TCPCHK_RULES_HTTP_CHK))) {
Christopher Faulet12882cf2020-04-23 15:50:18 +02001592 srv->check.mux_proto = srv->mux_proto;
Amaury Denoyelle0519bd42020-11-13 12:34:56 +01001593 }
Amaury Denoyelle7c148902020-11-13 12:34:57 +01001594 /* test that check proto is valid if explicitly defined */
1595 else if (srv->check.mux_proto &&
1596 ((srv->check.mux_proto->mode == PROTO_MODE_HTTP && check_type != TCPCHK_RULES_HTTP_CHK) ||
1597 (srv->check.mux_proto->mode == PROTO_MODE_TCP && check_type == TCPCHK_RULES_HTTP_CHK))) {
1598 ha_alert("config: %s '%s': server '%s' uses an incompatible MUX protocol for the selected check type\n",
1599 proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
1600 ret |= ERR_ALERT | ERR_FATAL;
1601 goto out;
1602 }
Christopher Faulet12882cf2020-04-23 15:50:18 +02001603
Christopher Faulet61cc8522020-04-20 14:54:42 +02001604 /* validate <srv> server health-check settings */
Christopher Fauletf50f4e92020-03-30 19:52:29 +02001605
Christopher Faulet61cc8522020-04-20 14:54:42 +02001606 /* We need at least a service port, a check port or the first tcp-check
1607 * rule must be a 'connect' one when checking an IPv4/IPv6 server.
1608 */
1609 if ((srv_check_healthcheck_port(&srv->check) != 0) ||
1610 (!is_inet_addr(&srv->check.addr) && (is_addr(&srv->check.addr) || !is_inet_addr(&srv->addr))))
1611 goto init;
Christopher Fauletf50f4e92020-03-30 19:52:29 +02001612
Christopher Faulet61cc8522020-04-20 14:54:42 +02001613 if (!srv->proxy->tcpcheck_rules.list || LIST_ISEMPTY(srv->proxy->tcpcheck_rules.list)) {
1614 ha_alert("config: %s '%s': server '%s' has neither service port nor check port.\n",
1615 proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
1616 ret |= ERR_ALERT | ERR_ABORT;
1617 goto out;
1618 }
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001619
Christopher Faulet61cc8522020-04-20 14:54:42 +02001620 /* search the first action (connect / send / expect) in the list */
1621 r = get_first_tcpcheck_rule(&srv->proxy->tcpcheck_rules);
1622 if (!r || (r->action != TCPCHK_ACT_CONNECT) || (!r->connect.port && !get_host_port(&r->connect.addr))) {
1623 ha_alert("config: %s '%s': server '%s' has neither service port nor check port "
1624 "nor tcp_check rule 'connect' with port information.\n",
1625 proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
1626 ret |= ERR_ALERT | ERR_ABORT;
1627 goto out;
1628 }
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001629
Christopher Faulet61cc8522020-04-20 14:54:42 +02001630 /* scan the tcp-check ruleset to ensure a port has been configured */
1631 list_for_each_entry(r, srv->proxy->tcpcheck_rules.list, list) {
1632 if ((r->action == TCPCHK_ACT_CONNECT) && (!r->connect.port || !get_host_port(&r->connect.addr))) {
1633 ha_alert("config: %s '%s': server '%s' has neither service port nor check port, "
1634 "and a tcp_check rule 'connect' with no port information.\n",
1635 proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
1636 ret |= ERR_ALERT | ERR_ABORT;
1637 goto out;
Christopher Faulete5870d82020-04-15 11:32:03 +02001638 }
Christopher Faulete5870d82020-04-15 11:32:03 +02001639 }
1640
Christopher Faulet61cc8522020-04-20 14:54:42 +02001641 init:
1642 if (!(srv->proxy->options2 & PR_O2_CHK_ANY)) {
1643 struct tcpcheck_ruleset *rs = NULL;
1644 struct tcpcheck_rules *rules = &srv->proxy->tcpcheck_rules;
1645 //char *errmsg = NULL;
Christopher Faulete5870d82020-04-15 11:32:03 +02001646
Christopher Faulet61cc8522020-04-20 14:54:42 +02001647 srv->proxy->options2 &= ~PR_O2_CHK_ANY;
1648 srv->proxy->options2 |= PR_O2_TCPCHK_CHK;
Christopher Faulete5870d82020-04-15 11:32:03 +02001649
Christopher Faulet61cc8522020-04-20 14:54:42 +02001650 rs = find_tcpcheck_ruleset("*tcp-check");
1651 if (!rs) {
1652 rs = create_tcpcheck_ruleset("*tcp-check");
1653 if (rs == NULL) {
1654 ha_alert("config: %s '%s': out of memory.\n",
1655 proxy_type_str(srv->proxy), srv->proxy->id);
1656 ret |= ERR_ALERT | ERR_FATAL;
1657 goto out;
1658 }
Christopher Faulete5870d82020-04-15 11:32:03 +02001659 }
1660
Christopher Faulet61cc8522020-04-20 14:54:42 +02001661 free_tcpcheck_vars(&rules->preset_vars);
1662 rules->list = &rs->rules;
1663 rules->flags = 0;
Christopher Faulete5870d82020-04-15 11:32:03 +02001664 }
1665
Christopher Faulet61cc8522020-04-20 14:54:42 +02001666 err = init_check(&srv->check, srv->proxy->options2 & PR_O2_CHK_ANY);
1667 if (err) {
1668 ha_alert("config: %s '%s': unable to init check for server '%s' (%s).\n",
1669 proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
1670 ret |= ERR_ALERT | ERR_ABORT;
1671 goto out;
Christopher Faulete5870d82020-04-15 11:32:03 +02001672 }
Christopher Faulet61cc8522020-04-20 14:54:42 +02001673 srv->check.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED;
1674 global.maxsock++;
Christopher Faulete5870d82020-04-15 11:32:03 +02001675
Christopher Faulet61cc8522020-04-20 14:54:42 +02001676 out:
1677 return ret;
Christopher Faulete5870d82020-04-15 11:32:03 +02001678}
1679
Christopher Faulet61cc8522020-04-20 14:54:42 +02001680/* Initializes an agent-check attached to the server <srv>. Non-zero is returned
1681 * if an error occurred.
1682 */
1683static int init_srv_agent_check(struct server *srv)
Christopher Faulete5870d82020-04-15 11:32:03 +02001684{
Christopher Faulet61cc8522020-04-20 14:54:42 +02001685 struct tcpcheck_rule *chk;
1686 const char *err;
Christopher Fauletfc633b62020-11-06 15:24:23 +01001687 int ret = ERR_NONE;
Christopher Faulete5870d82020-04-15 11:32:03 +02001688
Christopher Faulet6ecd5932021-01-12 17:29:45 +01001689 if (!srv->do_agent || !(srv->proxy->cap & PR_CAP_BE))
Christopher Faulet61cc8522020-04-20 14:54:42 +02001690 goto out;
Christopher Faulete5870d82020-04-15 11:32:03 +02001691
Ilya Shipitsinc02a23f2020-05-06 00:53:22 +05001692 /* If there is no connect rule preceding all send / expect rules, an
Christopher Faulet61cc8522020-04-20 14:54:42 +02001693 * implicit one is inserted before all others.
1694 */
1695 chk = get_first_tcpcheck_rule(srv->agent.tcpcheck_rules);
1696 if (!chk || chk->action != TCPCHK_ACT_CONNECT) {
1697 chk = calloc(1, sizeof(*chk));
1698 if (!chk) {
1699 ha_alert("config : %s '%s': unable to add implicit tcp-check connect rule"
1700 " to agent-check for server '%s' (out of memory).\n",
1701 proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
1702 ret |= ERR_ALERT | ERR_FATAL;
1703 goto out;
Christopher Faulete5870d82020-04-15 11:32:03 +02001704 }
Christopher Faulet61cc8522020-04-20 14:54:42 +02001705 chk->action = TCPCHK_ACT_CONNECT;
1706 chk->connect.options = (TCPCHK_OPT_DEFAULT_CONNECT|TCPCHK_OPT_IMPLICIT);
Willy Tarreau2b718102021-04-21 07:32:39 +02001707 LIST_INSERT(srv->agent.tcpcheck_rules->list, &chk->list);
Christopher Faulete5870d82020-04-15 11:32:03 +02001708 }
1709
Christopher Faulete5870d82020-04-15 11:32:03 +02001710
Christopher Faulet61cc8522020-04-20 14:54:42 +02001711 err = init_check(&srv->agent, PR_O2_TCPCHK_CHK);
1712 if (err) {
1713 ha_alert("config: %s '%s': unable to init agent-check for server '%s' (%s).\n",
1714 proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
1715 ret |= ERR_ALERT | ERR_ABORT;
1716 goto out;
Christopher Faulete5870d82020-04-15 11:32:03 +02001717 }
1718
Christopher Faulet61cc8522020-04-20 14:54:42 +02001719 if (!srv->agent.inter)
1720 srv->agent.inter = srv->check.inter;
1721
1722 srv->agent.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_AGENT;
1723 global.maxsock++;
1724
1725 out:
1726 return ret;
Christopher Faulete5870d82020-04-15 11:32:03 +02001727}
1728
Christopher Faulet61cc8522020-04-20 14:54:42 +02001729static void deinit_srv_check(struct server *srv)
1730{
1731 if (srv->check.state & CHK_ST_CONFIGURED)
1732 free_check(&srv->check);
1733 srv->check.state &= ~CHK_ST_CONFIGURED & ~CHK_ST_ENABLED;
1734 srv->do_check = 0;
1735}
Christopher Faulete5870d82020-04-15 11:32:03 +02001736
Christopher Faulet61cc8522020-04-20 14:54:42 +02001737
1738static void deinit_srv_agent_check(struct server *srv)
1739{
1740 if (srv->agent.tcpcheck_rules) {
1741 free_tcpcheck_vars(&srv->agent.tcpcheck_rules->preset_vars);
Willy Tarreau61cfdf42021-02-20 10:46:51 +01001742 ha_free(&srv->agent.tcpcheck_rules);
Christopher Faulete5870d82020-04-15 11:32:03 +02001743 }
Christopher Faulete5870d82020-04-15 11:32:03 +02001744
Christopher Faulet61cc8522020-04-20 14:54:42 +02001745 if (srv->agent.state & CHK_ST_CONFIGURED)
1746 free_check(&srv->agent);
1747
1748 srv->agent.state &= ~CHK_ST_CONFIGURED & ~CHK_ST_ENABLED & ~CHK_ST_AGENT;
1749 srv->do_agent = 0;
Christopher Faulete5870d82020-04-15 11:32:03 +02001750}
1751
Willy Tarreaucee013e2020-06-05 11:40:38 +02001752REGISTER_POST_SERVER_CHECK(init_srv_check);
1753REGISTER_POST_SERVER_CHECK(init_srv_agent_check);
Willy Tarreaucee013e2020-06-05 11:40:38 +02001754REGISTER_POST_CHECK(start_checks);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001755
Willy Tarreaucee013e2020-06-05 11:40:38 +02001756REGISTER_SERVER_DEINIT(deinit_srv_check);
1757REGISTER_SERVER_DEINIT(deinit_srv_agent_check);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001758
Christopher Faulet61cc8522020-04-20 14:54:42 +02001759
1760/**************************************************************************/
1761/************************** Check sample fetches **************************/
1762/**************************************************************************/
Christopher Fauletfd6c2292020-03-25 18:20:15 +01001763
Christopher Faulet61cc8522020-04-20 14:54:42 +02001764static struct sample_fetch_kw_list smp_kws = {ILH, {
Christopher Faulet61cc8522020-04-20 14:54:42 +02001765 { /* END */ },
1766}};
1767
1768INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
1769
1770
1771/**************************************************************************/
1772/************************ Check's parsing functions ***********************/
1773/**************************************************************************/
Christopher Fauletce8111e2020-04-06 15:04:11 +02001774/* Parse the "addr" server keyword */
1775static int srv_parse_addr(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
1776 char **errmsg)
1777{
1778 struct sockaddr_storage *sk;
Christopher Fauletce8111e2020-04-06 15:04:11 +02001779 int port1, port2, err_code = 0;
1780
1781
1782 if (!*args[*cur_arg+1]) {
1783 memprintf(errmsg, "'%s' expects <ipv4|ipv6> as argument.", args[*cur_arg]);
1784 goto error;
1785 }
1786
Willy Tarreau65ec4e32020-09-16 19:17:08 +02001787 sk = str2sa_range(args[*cur_arg+1], NULL, &port1, &port2, NULL, NULL, errmsg, NULL, NULL,
1788 PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
Christopher Fauletce8111e2020-04-06 15:04:11 +02001789 if (!sk) {
1790 memprintf(errmsg, "'%s' : %s", args[*cur_arg], *errmsg);
1791 goto error;
1792 }
1793
William Dauchy1c921cd2021-02-03 22:30:08 +01001794 srv->check.addr = *sk;
1795 /* if agentaddr was never set, we can use addr */
1796 if (!(srv->flags & SRV_F_AGENTADDR))
1797 srv->agent.addr = *sk;
Christopher Fauletce8111e2020-04-06 15:04:11 +02001798
1799 out:
1800 return err_code;
1801
1802 error:
1803 err_code |= ERR_ALERT | ERR_FATAL;
1804 goto out;
1805}
1806
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001807/* Parse the "agent-addr" server keyword */
1808static int srv_parse_agent_addr(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
1809 char **errmsg)
1810{
William Dauchy1c921cd2021-02-03 22:30:08 +01001811 struct sockaddr_storage sk;
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001812 int err_code = 0;
1813
1814 if (!*(args[*cur_arg+1])) {
1815 memprintf(errmsg, "'%s' expects an address as argument.", args[*cur_arg]);
1816 goto error;
1817 }
William Dauchy1c921cd2021-02-03 22:30:08 +01001818 memset(&sk, 0, sizeof(sk));
1819 if (str2ip(args[*cur_arg + 1], &sk) == NULL) {
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001820 memprintf(errmsg, "parsing agent-addr failed. Check if '%s' is correct address.", args[*cur_arg+1]);
1821 goto error;
1822 }
William Dauchy1c921cd2021-02-03 22:30:08 +01001823 set_srv_agent_addr(srv, &sk);
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001824
1825 out:
1826 return err_code;
1827
1828 error:
1829 err_code |= ERR_ALERT | ERR_FATAL;
1830 goto out;
1831}
1832
1833/* Parse the "agent-check" server keyword */
1834static int srv_parse_agent_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
1835 char **errmsg)
1836{
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001837 struct tcpcheck_ruleset *rs = NULL;
1838 struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
1839 struct tcpcheck_rule *chk;
1840 int err_code = 0;
1841
1842 if (srv->do_agent)
1843 goto out;
1844
Christopher Faulet6ecd5932021-01-12 17:29:45 +01001845 if (!(curpx->cap & PR_CAP_BE)) {
1846 memprintf(errmsg, "'%s' ignored because %s '%s' has no backend capability",
1847 args[*cur_arg], proxy_type_str(curpx), curpx->id);
1848 return ERR_WARN;
1849 }
1850
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001851 if (!rules) {
1852 rules = calloc(1, sizeof(*rules));
1853 if (!rules) {
1854 memprintf(errmsg, "out of memory.");
1855 goto error;
1856 }
1857 LIST_INIT(&rules->preset_vars);
1858 srv->agent.tcpcheck_rules = rules;
1859 }
1860 rules->list = NULL;
1861 rules->flags = 0;
1862
Christopher Faulet61cc8522020-04-20 14:54:42 +02001863 rs = find_tcpcheck_ruleset("*agent-check");
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001864 if (rs)
1865 goto ruleset_found;
1866
Christopher Faulet61cc8522020-04-20 14:54:42 +02001867 rs = create_tcpcheck_ruleset("*agent-check");
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001868 if (rs == NULL) {
1869 memprintf(errmsg, "out of memory.");
1870 goto error;
1871 }
1872
Christopher Fauletb50b3e62020-05-05 18:43:43 +02001873 chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-lf", "%[var(check.agent_string)]", ""},
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001874 1, curpx, &rs->rules, srv->conf.file, srv->conf.line, errmsg);
1875 if (!chk) {
1876 memprintf(errmsg, "'%s': %s", args[*cur_arg], *errmsg);
1877 goto error;
1878 }
1879 chk->index = 0;
Willy Tarreau2b718102021-04-21 07:32:39 +02001880 LIST_APPEND(&rs->rules, &chk->list);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001881
1882 chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
Christopher Faulete5870d82020-04-15 11:32:03 +02001883 1, curpx, &rs->rules, TCPCHK_RULES_AGENT_CHK,
1884 srv->conf.file, srv->conf.line, errmsg);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001885 if (!chk) {
1886 memprintf(errmsg, "'%s': %s", args[*cur_arg], *errmsg);
1887 goto error;
1888 }
1889 chk->expect.custom = tcpcheck_agent_expect_reply;
1890 chk->index = 1;
Willy Tarreau2b718102021-04-21 07:32:39 +02001891 LIST_APPEND(&rs->rules, &chk->list);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001892
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001893 ruleset_found:
1894 rules->list = &rs->rules;
Christopher Faulet1faf18a2020-11-25 16:43:12 +01001895 rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
Christopher Faulet404f9192020-04-09 23:13:54 +02001896 rules->flags |= TCPCHK_RULES_AGENT_CHK;
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001897 srv->do_agent = 1;
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001898
1899 out:
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001900 return 0;
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001901
1902 error:
1903 deinit_srv_agent_check(srv);
Christopher Faulet61cc8522020-04-20 14:54:42 +02001904 free_tcpcheck_ruleset(rs);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001905 err_code |= ERR_ALERT | ERR_FATAL;
1906 goto out;
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001907}
1908
1909/* Parse the "agent-inter" server keyword */
1910static int srv_parse_agent_inter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
1911 char **errmsg)
1912{
1913 const char *err = NULL;
1914 unsigned int delay;
1915 int err_code = 0;
1916
1917 if (!*(args[*cur_arg+1])) {
1918 memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
1919 goto error;
1920 }
1921
1922 err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
1923 if (err == PARSE_TIME_OVER) {
1924 memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
1925 args[*cur_arg+1], args[*cur_arg], srv->id);
1926 goto error;
1927 }
1928 else if (err == PARSE_TIME_UNDER) {
1929 memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
1930 args[*cur_arg+1], args[*cur_arg], srv->id);
1931 goto error;
1932 }
1933 else if (err) {
1934 memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
1935 *err, srv->id);
1936 goto error;
1937 }
1938 if (delay <= 0) {
1939 memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
1940 delay, args[*cur_arg], srv->id);
1941 goto error;
1942 }
1943 srv->agent.inter = delay;
1944
1945 out:
1946 return err_code;
1947
1948 error:
1949 err_code |= ERR_ALERT | ERR_FATAL;
1950 goto out;
1951}
1952
1953/* Parse the "agent-port" server keyword */
1954static int srv_parse_agent_port(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
1955 char **errmsg)
1956{
1957 int err_code = 0;
1958
1959 if (!*(args[*cur_arg+1])) {
1960 memprintf(errmsg, "'%s' expects a port number as argument.", args[*cur_arg]);
1961 goto error;
1962 }
1963
1964 global.maxsock++;
William Dauchy4858fb22021-02-03 22:30:09 +01001965 set_srv_agent_port(srv, atol(args[*cur_arg + 1]));
Christopher Fauletcbba66c2020-04-06 14:26:30 +02001966
1967 out:
1968 return err_code;
1969
1970 error:
1971 err_code |= ERR_ALERT | ERR_FATAL;
1972 goto out;
1973}
1974
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001975int set_srv_agent_send(struct server *srv, const char *send)
1976{
1977 struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
1978 struct tcpcheck_var *var = NULL;
1979 char *str;
1980
1981 str = strdup(send);
Christopher Fauletb61caf42020-04-21 10:57:42 +02001982 var = create_tcpcheck_var(ist("check.agent_string"));
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001983 if (str == NULL || var == NULL)
1984 goto error;
1985
1986 free_tcpcheck_vars(&rules->preset_vars);
1987
1988 var->data.type = SMP_T_STR;
1989 var->data.u.str.area = str;
1990 var->data.u.str.data = strlen(str);
1991 LIST_INIT(&var->list);
Willy Tarreau2b718102021-04-21 07:32:39 +02001992 LIST_APPEND(&rules->preset_vars, &var->list);
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02001993
1994 return 1;
1995
1996 error:
1997 free(str);
1998 free(var);
1999 return 0;
2000}
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002001
William Dauchyf4300902021-02-06 20:47:50 +01002002/* set agent addr and appropriate flag */
William Dauchy1c921cd2021-02-03 22:30:08 +01002003inline void set_srv_agent_addr(struct server *srv, struct sockaddr_storage *sk)
2004{
2005 srv->agent.addr = *sk;
2006 srv->flags |= SRV_F_AGENTADDR;
2007}
2008
William Dauchyf4300902021-02-06 20:47:50 +01002009/* set agent port and appropriate flag */
William Dauchy4858fb22021-02-03 22:30:09 +01002010inline void set_srv_agent_port(struct server *srv, int port)
2011{
2012 srv->agent.port = port;
2013 srv->flags |= SRV_F_AGENTPORT;
2014}
2015
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002016/* Parse the "agent-send" server keyword */
2017static int srv_parse_agent_send(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2018 char **errmsg)
2019{
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02002020 struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002021 int err_code = 0;
2022
2023 if (!*(args[*cur_arg+1])) {
2024 memprintf(errmsg, "'%s' expects a string as argument.", args[*cur_arg]);
2025 goto error;
2026 }
2027
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02002028 if (!rules) {
2029 rules = calloc(1, sizeof(*rules));
2030 if (!rules) {
2031 memprintf(errmsg, "out of memory.");
2032 goto error;
2033 }
2034 LIST_INIT(&rules->preset_vars);
2035 srv->agent.tcpcheck_rules = rules;
2036 }
2037
2038 if (!set_srv_agent_send(srv, args[*cur_arg+1])) {
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002039 memprintf(errmsg, "out of memory.");
2040 goto error;
2041 }
2042
2043 out:
2044 return err_code;
2045
2046 error:
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02002047 deinit_srv_agent_check(srv);
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002048 err_code |= ERR_ALERT | ERR_FATAL;
2049 goto out;
2050}
2051
2052/* Parse the "no-agent-send" server keyword */
2053static int srv_parse_no_agent_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2054 char **errmsg)
2055{
Christopher Faulet0ae3d1d2020-04-06 17:54:24 +02002056 deinit_srv_agent_check(srv);
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002057 return 0;
2058}
2059
Christopher Fauletce8111e2020-04-06 15:04:11 +02002060/* Parse the "check" server keyword */
2061static int srv_parse_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2062 char **errmsg)
2063{
Christopher Faulet6ecd5932021-01-12 17:29:45 +01002064 if (!(curpx->cap & PR_CAP_BE)) {
2065 memprintf(errmsg, "'%s' ignored because %s '%s' has no backend capability",
2066 args[*cur_arg], proxy_type_str(curpx), curpx->id);
2067 return ERR_WARN;
2068 }
2069
Christopher Fauletce8111e2020-04-06 15:04:11 +02002070 srv->do_check = 1;
2071 return 0;
2072}
2073
2074/* Parse the "check-send-proxy" server keyword */
2075static int srv_parse_check_send_proxy(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2076 char **errmsg)
2077{
2078 srv->check.send_proxy = 1;
2079 return 0;
2080}
2081
2082/* Parse the "check-via-socks4" server keyword */
2083static int srv_parse_check_via_socks4(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2084 char **errmsg)
2085{
2086 srv->check.via_socks4 = 1;
2087 return 0;
2088}
2089
2090/* Parse the "no-check" server keyword */
2091static int srv_parse_no_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2092 char **errmsg)
2093{
2094 deinit_srv_check(srv);
2095 return 0;
2096}
2097
2098/* Parse the "no-check-send-proxy" server keyword */
2099static int srv_parse_no_check_send_proxy(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2100 char **errmsg)
2101{
2102 srv->check.send_proxy = 0;
2103 return 0;
2104}
2105
Christopher Fauletedc6ed92020-04-23 16:27:59 +02002106/* parse the "check-proto" server keyword */
2107static int srv_parse_check_proto(char **args, int *cur_arg,
2108 struct proxy *px, struct server *newsrv, char **err)
2109{
2110 int err_code = 0;
2111
2112 if (!*args[*cur_arg + 1]) {
2113 memprintf(err, "'%s' : missing value", args[*cur_arg]);
2114 goto error;
2115 }
Tim Duesterhusdcf753a2021-03-04 17:31:47 +01002116 newsrv->check.mux_proto = get_mux_proto(ist(args[*cur_arg + 1]));
Christopher Fauletedc6ed92020-04-23 16:27:59 +02002117 if (!newsrv->check.mux_proto) {
2118 memprintf(err, "'%s' : unknown MUX protocol '%s'", args[*cur_arg], args[*cur_arg+1]);
2119 goto error;
2120 }
2121
2122 out:
2123 return err_code;
2124
2125 error:
2126 err_code |= ERR_ALERT | ERR_FATAL;
2127 goto out;
2128}
2129
2130
Christopher Fauletce8111e2020-04-06 15:04:11 +02002131/* Parse the "rise" server keyword */
2132static int srv_parse_check_rise(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2133 char **errmsg)
2134{
2135 int err_code = 0;
2136
2137 if (!*args[*cur_arg + 1]) {
2138 memprintf(errmsg, "'%s' expects an integer argument.", args[*cur_arg]);
2139 goto error;
2140 }
2141
2142 srv->check.rise = atol(args[*cur_arg+1]);
2143 if (srv->check.rise <= 0) {
2144 memprintf(errmsg, "'%s' has to be > 0.", args[*cur_arg]);
2145 goto error;
2146 }
2147
2148 if (srv->check.health)
2149 srv->check.health = srv->check.rise;
2150
2151 out:
2152 return err_code;
2153
2154 error:
2155 deinit_srv_agent_check(srv);
2156 err_code |= ERR_ALERT | ERR_FATAL;
2157 goto out;
2158 return 0;
2159}
2160
2161/* Parse the "fall" server keyword */
2162static int srv_parse_check_fall(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2163 char **errmsg)
2164{
2165 int err_code = 0;
2166
2167 if (!*args[*cur_arg + 1]) {
2168 memprintf(errmsg, "'%s' expects an integer argument.", args[*cur_arg]);
2169 goto error;
2170 }
2171
2172 srv->check.fall = atol(args[*cur_arg+1]);
2173 if (srv->check.fall <= 0) {
2174 memprintf(errmsg, "'%s' has to be > 0.", args[*cur_arg]);
2175 goto error;
2176 }
2177
2178 out:
2179 return err_code;
2180
2181 error:
2182 deinit_srv_agent_check(srv);
2183 err_code |= ERR_ALERT | ERR_FATAL;
2184 goto out;
2185 return 0;
2186}
2187
2188/* Parse the "inter" server keyword */
2189static int srv_parse_check_inter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2190 char **errmsg)
2191{
2192 const char *err = NULL;
2193 unsigned int delay;
2194 int err_code = 0;
2195
2196 if (!*(args[*cur_arg+1])) {
2197 memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
2198 goto error;
2199 }
2200
2201 err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
2202 if (err == PARSE_TIME_OVER) {
2203 memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
2204 args[*cur_arg+1], args[*cur_arg], srv->id);
2205 goto error;
2206 }
2207 else if (err == PARSE_TIME_UNDER) {
2208 memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
2209 args[*cur_arg+1], args[*cur_arg], srv->id);
2210 goto error;
2211 }
2212 else if (err) {
2213 memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
2214 *err, srv->id);
2215 goto error;
2216 }
2217 if (delay <= 0) {
2218 memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
2219 delay, args[*cur_arg], srv->id);
2220 goto error;
2221 }
2222 srv->check.inter = delay;
2223
2224 out:
2225 return err_code;
2226
2227 error:
2228 err_code |= ERR_ALERT | ERR_FATAL;
2229 goto out;
2230}
2231
2232
2233/* Parse the "fastinter" server keyword */
2234static int srv_parse_check_fastinter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2235 char **errmsg)
2236{
2237 const char *err = NULL;
2238 unsigned int delay;
2239 int err_code = 0;
2240
2241 if (!*(args[*cur_arg+1])) {
2242 memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
2243 goto error;
2244 }
2245
2246 err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
2247 if (err == PARSE_TIME_OVER) {
2248 memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
2249 args[*cur_arg+1], args[*cur_arg], srv->id);
2250 goto error;
2251 }
2252 else if (err == PARSE_TIME_UNDER) {
2253 memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
2254 args[*cur_arg+1], args[*cur_arg], srv->id);
2255 goto error;
2256 }
2257 else if (err) {
2258 memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
2259 *err, srv->id);
2260 goto error;
2261 }
2262 if (delay <= 0) {
2263 memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
2264 delay, args[*cur_arg], srv->id);
2265 goto error;
2266 }
2267 srv->check.fastinter = delay;
2268
2269 out:
2270 return err_code;
2271
2272 error:
2273 err_code |= ERR_ALERT | ERR_FATAL;
2274 goto out;
2275}
2276
2277
2278/* Parse the "downinter" server keyword */
2279static int srv_parse_check_downinter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2280 char **errmsg)
2281{
2282 const char *err = NULL;
2283 unsigned int delay;
2284 int err_code = 0;
2285
2286 if (!*(args[*cur_arg+1])) {
2287 memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
2288 goto error;
2289 }
2290
2291 err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
2292 if (err == PARSE_TIME_OVER) {
2293 memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
2294 args[*cur_arg+1], args[*cur_arg], srv->id);
2295 goto error;
2296 }
2297 else if (err == PARSE_TIME_UNDER) {
2298 memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
2299 args[*cur_arg+1], args[*cur_arg], srv->id);
2300 goto error;
2301 }
2302 else if (err) {
2303 memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
2304 *err, srv->id);
2305 goto error;
2306 }
2307 if (delay <= 0) {
2308 memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
2309 delay, args[*cur_arg], srv->id);
2310 goto error;
2311 }
2312 srv->check.downinter = delay;
2313
2314 out:
2315 return err_code;
2316
2317 error:
2318 err_code |= ERR_ALERT | ERR_FATAL;
2319 goto out;
2320}
2321
2322/* Parse the "port" server keyword */
2323static int srv_parse_check_port(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
2324 char **errmsg)
2325{
2326 int err_code = 0;
2327
2328 if (!*(args[*cur_arg+1])) {
2329 memprintf(errmsg, "'%s' expects a port number as argument.", args[*cur_arg]);
2330 goto error;
2331 }
2332
2333 global.maxsock++;
2334 srv->check.port = atol(args[*cur_arg+1]);
William Dauchy4858fb22021-02-03 22:30:09 +01002335 /* if agentport was never set, we can use port */
2336 if (!(srv->flags & SRV_F_AGENTPORT))
2337 srv->agent.port = srv->check.port;
Christopher Fauletce8111e2020-04-06 15:04:11 +02002338
2339 out:
2340 return err_code;
2341
2342 error:
2343 err_code |= ERR_ALERT | ERR_FATAL;
2344 goto out;
2345}
2346
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002347static struct srv_kw_list srv_kws = { "CHK", { }, {
Amaury Denoyelle76e10e72021-03-08 17:08:01 +01002348 { "addr", srv_parse_addr, 1, 1, 0 }, /* IP address to send health to or to probe from agent-check */
2349 { "agent-addr", srv_parse_agent_addr, 1, 1, 0 }, /* Enable an auxiliary agent check */
2350 { "agent-check", srv_parse_agent_check, 0, 1, 0 }, /* Enable agent checks */
2351 { "agent-inter", srv_parse_agent_inter, 1, 1, 0 }, /* Set the interval between two agent checks */
2352 { "agent-port", srv_parse_agent_port, 1, 1, 0 }, /* Set the TCP port used for agent checks. */
2353 { "agent-send", srv_parse_agent_send, 1, 1, 0 }, /* Set string to send to agent. */
2354 { "check", srv_parse_check, 0, 1, 0 }, /* Enable health checks */
2355 { "check-proto", srv_parse_check_proto, 1, 1, 0 }, /* Set the mux protocol for health checks */
2356 { "check-send-proxy", srv_parse_check_send_proxy, 0, 1, 0 }, /* Enable PROXY protocol for health checks */
2357 { "check-via-socks4", srv_parse_check_via_socks4, 0, 1, 0 }, /* Enable socks4 proxy for health checks */
2358 { "no-agent-check", srv_parse_no_agent_check, 0, 1, 0 }, /* Do not enable any auxiliary agent check */
2359 { "no-check", srv_parse_no_check, 0, 1, 0 }, /* Disable health checks */
2360 { "no-check-send-proxy", srv_parse_no_check_send_proxy, 0, 1, 0 }, /* Disable PROXY protocol for health checks */
2361 { "rise", srv_parse_check_rise, 1, 1, 0 }, /* Set rise value for health checks */
2362 { "fall", srv_parse_check_fall, 1, 1, 0 }, /* Set fall value for health checks */
2363 { "inter", srv_parse_check_inter, 1, 1, 0 }, /* Set inter value for health checks */
2364 { "fastinter", srv_parse_check_fastinter, 1, 1, 0 }, /* Set fastinter value for health checks */
2365 { "downinter", srv_parse_check_downinter, 1, 1, 0 }, /* Set downinter value for health checks */
2366 { "port", srv_parse_check_port, 1, 1, 0 }, /* Set the TCP port used for health checks. */
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002367 { NULL, NULL, 0 },
2368}};
2369
Christopher Fauletcbba66c2020-04-06 14:26:30 +02002370INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
Christopher Fauletfd6c2292020-03-25 18:20:15 +01002371
Willy Tarreaubd741542010-03-16 18:46:54 +01002372/*
Willy Tarreaubaaee002006-06-26 02:48:02 +02002373 * Local variables:
2374 * c-indent-level: 8
2375 * c-basic-offset: 8
2376 * End:
2377 */