blob: 00751fefd9023637f2e189bcc4e87dd77bd816aa [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Health-checks functions.
3 *
Willy Tarreau26c25062009-03-08 09:38:41 +01004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +01005 * Copyright 2007-2008 Krzysztof Piotr Oledzki <ole@ans.pl>
Willy Tarreaubaaee002006-06-26 02:48:02 +02006 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
Willy Tarreaub8816082008-01-18 12:18:15 +010014#include <assert.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020015#include <errno.h>
16#include <fcntl.h>
17#include <stdio.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020018#include <stdlib.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020019#include <string.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020020#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020021#include <unistd.h>
22#include <sys/socket.h>
23#include <netinet/in.h>
Willy Tarreau1274bc42009-07-15 07:16:31 +020024#include <netinet/tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025#include <arpa/inet.h>
26
Willy Tarreau2dd0d472006-06-29 17:53:05 +020027#include <common/compat.h>
28#include <common/config.h>
29#include <common/mini-clist.h>
Willy Tarreau83749182007-04-15 20:56:27 +020030#include <common/standard.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020031#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020032
33#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034
35#include <proto/backend.h>
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +010036#include <proto/buffers.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/fd.h>
38#include <proto/log.h>
39#include <proto/queue.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020040#include <proto/port_range.h>
Willy Tarreau3d300592007-03-18 18:34:41 +010041#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010042#include <proto/proto_tcp.h>
Willy Tarreau2b5652f2006-12-31 17:46:05 +010043#include <proto/proxy.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044#include <proto/server.h>
45#include <proto/task.h>
46
Willy Tarreau48494c02007-11-30 10:41:39 +010047/* sends a log message when a backend goes down, and also sets last
48 * change date.
49 */
50static void set_backend_down(struct proxy *be)
51{
52 be->last_change = now.tv_sec;
53 be->down_trans++;
54
55 Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
56 send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
57}
58
59/* Redistribute pending connections when a server goes down. The number of
60 * connections redistributed is returned.
61 */
62static int redistribute_pending(struct server *s)
63{
64 struct pendconn *pc, *pc_bck, *pc_end;
65 int xferred = 0;
66
67 FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
68 struct session *sess = pc->sess;
69 if (sess->be->options & PR_O_REDISP) {
70 /* The REDISP option was specified. We will ignore
71 * cookie and force to balance or use the dispatcher.
72 */
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +010073
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +010074 /* it's left to the dispatcher to choose a server */
Willy Tarreau48494c02007-11-30 10:41:39 +010075 sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +010076
Willy Tarreau48494c02007-11-30 10:41:39 +010077 pendconn_free(pc);
Willy Tarreaufdccded2008-08-29 18:19:04 +020078 task_wakeup(sess->task, TASK_WOKEN_RES);
Willy Tarreau48494c02007-11-30 10:41:39 +010079 xferred++;
80 }
81 }
82 return xferred;
83}
84
85/* Check for pending connections at the backend, and assign some of them to
86 * the server coming up. The server's weight is checked before being assigned
87 * connections it may not be able to handle. The total number of transferred
88 * connections is returned.
89 */
90static int check_for_pending(struct server *s)
91{
92 int xferred;
93
94 if (!s->eweight)
95 return 0;
96
97 for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
98 struct session *sess;
99 struct pendconn *p;
100
101 p = pendconn_from_px(s->proxy);
102 if (!p)
103 break;
104 p->sess->srv = s;
105 sess = p->sess;
106 pendconn_free(p);
Willy Tarreaufdccded2008-08-29 18:19:04 +0200107 task_wakeup(sess->task, TASK_WOKEN_RES);
Willy Tarreau48494c02007-11-30 10:41:39 +0100108 }
109 return xferred;
110}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200111
112/* Sets server <s> down, notifies by all available means, recounts the
113 * remaining servers on the proxy and transfers queued sessions whenever
Willy Tarreau5af3a692007-07-24 23:32:33 +0200114 * possible to other servers. It automatically recomputes the number of
115 * servers, but not the map.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200116 */
Willy Tarreau83749182007-04-15 20:56:27 +0200117static void set_server_down(struct server *s)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200118{
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100119 struct server *srv;
120 struct chunk msg;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200121 int xferred;
122
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100123 if (s->health == s->rise || s->tracked) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100124 int srv_was_paused = s->state & SRV_GOINGDOWN;
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200125
126 s->last_change = now.tv_sec;
Willy Tarreau48494c02007-11-30 10:41:39 +0100127 s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
Willy Tarreaub625a082007-11-26 01:15:43 +0100128 s->proxy->lbprm.set_server_status_down(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200129
130 /* we might have sessions queued on this server and waiting for
131 * a connection. Those which are redispatchable will be queued
132 * to another server or to the proxy itself.
133 */
Willy Tarreau48494c02007-11-30 10:41:39 +0100134 xferred = redistribute_pending(s);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100135
136 msg.len = 0;
137 msg.str = trash;
138
139 chunk_printf(&msg, sizeof(trash),
140 "%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
141 s->proxy->id, s->id);
142
143 if (s->tracked)
144 chunk_printf(&msg, sizeof(trash), " via %s/%s",
145 s->tracked->proxy->id, s->tracked->id);
146
147 chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers left.%s"
Willy Tarreaubaaee002006-06-26 02:48:02 +0200148 " %d sessions active, %d requeued, %d remaining in queue.\n",
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100149 s->proxy->srv_act, s->proxy->srv_bck,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200150 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
151 s->cur_sess, xferred, s->nbpend);
152
153 Warning("%s", trash);
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200154
Willy Tarreau48494c02007-11-30 10:41:39 +0100155 /* we don't send an alert if the server was previously paused */
156 if (srv_was_paused)
157 send_log(s->proxy, LOG_NOTICE, "%s", trash);
158 else
159 send_log(s->proxy, LOG_ALERT, "%s", trash);
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200160
Willy Tarreau48494c02007-11-30 10:41:39 +0100161 if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
162 set_backend_down(s->proxy);
163
Willy Tarreaubaaee002006-06-26 02:48:02 +0200164 s->down_trans++;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100165
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100166 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100167 for(srv = s->tracknext; srv; srv = srv->tracknext)
168 set_server_down(srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200169 }
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100170
Willy Tarreaubaaee002006-06-26 02:48:02 +0200171 s->health = 0; /* failure */
172}
173
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100174static void set_server_up(struct server *s) {
175
176 struct server *srv;
177 struct chunk msg;
178 int xferred;
179
180 if (s->health == s->rise || s->tracked) {
181 if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
182 if (s->proxy->last_change < now.tv_sec) // ignore negative times
183 s->proxy->down_time += now.tv_sec - s->proxy->last_change;
184 s->proxy->last_change = now.tv_sec;
185 }
186
187 if (s->last_change < now.tv_sec) // ignore negative times
188 s->down_time += now.tv_sec - s->last_change;
189
190 s->last_change = now.tv_sec;
191 s->state |= SRV_RUNNING;
192
193 if (s->slowstart > 0) {
194 s->state |= SRV_WARMINGUP;
195 if (s->proxy->lbprm.algo & BE_LB_PROP_DYN) {
196 /* For dynamic algorithms, start at the first step of the weight,
197 * without multiplying by BE_WEIGHT_SCALE.
198 */
199 s->eweight = s->uweight;
200 if (s->proxy->lbprm.update_server_eweight)
201 s->proxy->lbprm.update_server_eweight(s);
202 }
203 }
204 s->proxy->lbprm.set_server_status_up(s);
205
206 /* check if we can handle some connections queued at the proxy. We
207 * will take as many as we can handle.
208 */
209 xferred = check_for_pending(s);
210
211 msg.len = 0;
212 msg.str = trash;
213
214 chunk_printf(&msg, sizeof(trash),
215 "%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
216 s->proxy->id, s->id);
217
218 if (s->tracked)
219 chunk_printf(&msg, sizeof(trash), " via %s/%s",
220 s->tracked->proxy->id, s->tracked->id);
221
222 chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers online.%s"
223 " %d sessions requeued, %d total in queue.\n",
224 s->proxy->srv_act, s->proxy->srv_bck,
225 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
Willy Tarreau1772ece2009-04-03 14:49:12 +0200226 xferred, s->nbpend);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100227
228 Warning("%s", trash);
229 send_log(s->proxy, LOG_NOTICE, "%s", trash);
230
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100231 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100232 for(srv = s->tracknext; srv; srv = srv->tracknext)
233 set_server_up(srv);
234 }
235
236 if (s->health >= s->rise)
237 s->health = s->rise + s->fall - 1; /* OK now */
238
239}
240
241static void set_server_disabled(struct server *s) {
242
243 struct server *srv;
244 struct chunk msg;
245 int xferred;
246
247 s->state |= SRV_GOINGDOWN;
248 s->proxy->lbprm.set_server_status_down(s);
249
250 /* we might have sessions queued on this server and waiting for
251 * a connection. Those which are redispatchable will be queued
252 * to another server or to the proxy itself.
253 */
254 xferred = redistribute_pending(s);
255
256 msg.len = 0;
257 msg.str = trash;
258
259 chunk_printf(&msg, sizeof(trash),
260 "Load-balancing on %sServer %s/%s is disabled",
261 s->state & SRV_BACKUP ? "Backup " : "",
262 s->proxy->id, s->id);
263
264 if (s->tracked)
265 chunk_printf(&msg, sizeof(trash), " via %s/%s",
266 s->tracked->proxy->id, s->tracked->id);
267
268
269 chunk_printf(&msg, sizeof(trash),". %d active and %d backup servers online.%s"
270 " %d sessions requeued, %d total in queue.\n",
271 s->proxy->srv_act, s->proxy->srv_bck,
272 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
273 xferred, s->nbpend);
274
275 Warning("%s", trash);
276
277 send_log(s->proxy, LOG_NOTICE, "%s", trash);
278
279 if (!s->proxy->srv_bck && !s->proxy->srv_act)
280 set_backend_down(s->proxy);
281
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100282 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100283 for(srv = s->tracknext; srv; srv = srv->tracknext)
284 set_server_disabled(srv);
285}
286
287static void set_server_enabled(struct server *s) {
288
289 struct server *srv;
290 struct chunk msg;
291 int xferred;
292
293 s->state &= ~SRV_GOINGDOWN;
294 s->proxy->lbprm.set_server_status_up(s);
295
296 /* check if we can handle some connections queued at the proxy. We
297 * will take as many as we can handle.
298 */
299 xferred = check_for_pending(s);
300
301 msg.len = 0;
302 msg.str = trash;
303
304 chunk_printf(&msg, sizeof(trash),
305 "Load-balancing on %sServer %s/%s is enabled again",
306 s->state & SRV_BACKUP ? "Backup " : "",
307 s->proxy->id, s->id);
308
309 if (s->tracked)
310 chunk_printf(&msg, sizeof(trash), " via %s/%s",
311 s->tracked->proxy->id, s->tracked->id);
312
313 chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers online.%s"
314 " %d sessions requeued, %d total in queue.\n",
315 s->proxy->srv_act, s->proxy->srv_bck,
316 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
317 xferred, s->nbpend);
318
319 Warning("%s", trash);
320 send_log(s->proxy, LOG_NOTICE, "%s", trash);
321
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100322 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100323 for(srv = s->tracknext; srv; srv = srv->tracknext)
324 set_server_enabled(srv);
325}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200326
327/*
328 * This function is used only for server health-checks. It handles
329 * the connection acknowledgement. If the proxy requires HTTP health-checks,
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100330 * it sends the request. In other cases, it fills s->result with SRV_CHK_*.
Willy Tarreau83749182007-04-15 20:56:27 +0200331 * The function itself returns 0 if it needs some polling before being called
332 * again, otherwise 1.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200333 */
Willy Tarreau83749182007-04-15 20:56:27 +0200334static int event_srv_chk_w(int fd)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200335{
Willy Tarreau6996e152007-04-30 14:37:43 +0200336 __label__ out_wakeup, out_nowake, out_poll, out_error;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200337 struct task *t = fdtab[fd].owner;
338 struct server *s = t->context;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200339
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100340 //fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state));
Willy Tarreau6996e152007-04-30 14:37:43 +0200341 if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR)))
342 goto out_error;
343
344 /* here, we know that the connection is established */
Willy Tarreau83749182007-04-15 20:56:27 +0200345
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100346 if (!(s->result & SRV_CHK_ERROR)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200347 /* we don't want to mark 'UP' a server on which we detected an error earlier */
Willy Tarreauf3c69202006-07-09 16:42:34 +0200348 if ((s->proxy->options & PR_O_HTTP_CHK) ||
Willy Tarreau23677902007-05-08 23:50:35 +0200349 (s->proxy->options & PR_O_SSL3_CHK) ||
350 (s->proxy->options & PR_O_SMTP_CHK)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200351 int ret;
Willy Tarreauf3c69202006-07-09 16:42:34 +0200352 /* we want to check if this host replies to HTTP or SSLv3 requests
Willy Tarreaubaaee002006-06-26 02:48:02 +0200353 * so we'll send the request, and won't wake the checker up now.
354 */
Willy Tarreauf3c69202006-07-09 16:42:34 +0200355
356 if (s->proxy->options & PR_O_SSL3_CHK) {
357 /* SSL requires that we put Unix time in the request */
Willy Tarreaub7f694f2008-06-22 17:18:02 +0200358 int gmt_time = htonl(date.tv_sec);
Willy Tarreauf3c69202006-07-09 16:42:34 +0200359 memcpy(s->proxy->check_req + 11, &gmt_time, 4);
360 }
361
Willy Tarreaubaaee002006-06-26 02:48:02 +0200362 ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT | MSG_NOSIGNAL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200363 if (ret == s->proxy->check_len) {
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100364 /* we allow up to <timeout.check> if nonzero for a responce */
Willy Tarreau7cd9d942008-12-21 13:00:41 +0100365 if (s->proxy->timeout.check)
366 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
Willy Tarreauf161a342007-04-08 16:59:42 +0200367 EV_FD_SET(fd, DIR_RD); /* prepare for reading reply */
Willy Tarreau83749182007-04-15 20:56:27 +0200368 goto out_nowake;
369 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200370 else if (ret == 0 || errno == EAGAIN)
371 goto out_poll;
372 else
373 goto out_error;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200374 }
375 else {
Willy Tarreau6996e152007-04-30 14:37:43 +0200376 /* We have no data to send to check the connection, and
377 * getsockopt() will not inform us whether the connection
378 * is still pending. So we'll reuse connect() to check the
379 * state of the socket. This has the advantage of givig us
380 * the following info :
381 * - error
382 * - connecting (EALREADY, EINPROGRESS)
383 * - connected (EISCONN, 0)
384 */
385
386 struct sockaddr_in sa;
387
388 sa = (s->check_addr.sin_addr.s_addr) ? s->check_addr : s->addr;
389 sa.sin_port = htons(s->check_port);
390
391 if (connect(fd, (struct sockaddr *)&sa, sizeof(sa)) == 0)
392 errno = 0;
393
394 if (errno == EALREADY || errno == EINPROGRESS)
395 goto out_poll;
396
397 if (errno && errno != EISCONN)
398 goto out_error;
399
Willy Tarreaubaaee002006-06-26 02:48:02 +0200400 /* good TCP connection is enough */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100401 s->result |= SRV_CHK_RUNNING;
Willy Tarreau6996e152007-04-30 14:37:43 +0200402 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200403 }
404 }
Willy Tarreau83749182007-04-15 20:56:27 +0200405 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200406 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau83749182007-04-15 20:56:27 +0200407 out_nowake:
408 EV_FD_CLR(fd, DIR_WR); /* nothing more to write */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100409 fdtab[fd].ev &= ~FD_POLL_OUT;
Willy Tarreau83749182007-04-15 20:56:27 +0200410 return 1;
Willy Tarreau6996e152007-04-30 14:37:43 +0200411 out_poll:
412 /* The connection is still pending. We'll have to poll it
413 * before attempting to go further. */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100414 fdtab[fd].ev &= ~FD_POLL_OUT;
Willy Tarreau6996e152007-04-30 14:37:43 +0200415 return 0;
416 out_error:
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100417 s->result |= SRV_CHK_ERROR;
Willy Tarreau6996e152007-04-30 14:37:43 +0200418 fdtab[fd].state = FD_STERROR;
419 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200420}
421
422
423/*
Willy Tarreauf3c69202006-07-09 16:42:34 +0200424 * This function is used only for server health-checks. It handles the server's
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100425 * reply to an HTTP request or SSL HELLO. It sets s->result to SRV_CHK_RUNNING
426 * if an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP
427 * server returns 2xx, or if an SSL server returns at least 5 bytes in response
428 * to an SSL HELLO (the principle is that this is enough to distinguish between
429 * an SSL server and a pure TCP relay). All other cases will set s->result to
430 * SRV_CHK_ERROR. The function returns 0 if it needs to be called again after
431 * some polling, otherwise non-zero..
Willy Tarreaubaaee002006-06-26 02:48:02 +0200432 */
Willy Tarreau83749182007-04-15 20:56:27 +0200433static int event_srv_chk_r(int fd)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200434{
Willy Tarreau83749182007-04-15 20:56:27 +0200435 __label__ out_wakeup;
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100436 int len;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200437 struct task *t = fdtab[fd].owner;
438 struct server *s = t->context;
439 int skerr;
440 socklen_t lskerr = sizeof(skerr);
441
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100442 len = -1;
Willy Tarreau83749182007-04-15 20:56:27 +0200443
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100444 if (unlikely((s->result & SRV_CHK_ERROR) ||
445 (fdtab[fd].state == FD_STERROR) ||
Willy Tarreau83749182007-04-15 20:56:27 +0200446 (fdtab[fd].ev & FD_POLL_ERR) ||
447 (getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == -1) ||
448 (skerr != 0))) {
449 /* in case of TCP only, this tells us if the connection failed */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100450 s->result |= SRV_CHK_ERROR;
Willy Tarreau83749182007-04-15 20:56:27 +0200451 goto out_wakeup;
452 }
453
Willy Tarreau83749182007-04-15 20:56:27 +0200454 /* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
455 * but the connection was closed on the remote end. Fortunately, recv still
456 * works correctly and we don't need to do the getsockopt() on linux.
457 */
Krzysztof Oledzki6b3f8b42007-10-11 18:41:08 +0200458 len = recv(fd, trash, sizeof(trash), MSG_NOSIGNAL);
Willy Tarreau83749182007-04-15 20:56:27 +0200459 if (unlikely(len < 0 && errno == EAGAIN)) {
460 /* we want some polling to happen first */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100461 fdtab[fd].ev &= ~FD_POLL_IN;
Willy Tarreau83749182007-04-15 20:56:27 +0200462 return 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200463 }
464
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100465 /* Note: the response will only be accepted if read at once */
466 if (s->proxy->options & PR_O_HTTP_CHK) {
467 /* Check if the server speaks HTTP 1.X */
468 if ((len < strlen("HTTP/1.0 000\r")) ||
469 (memcmp(trash, "HTTP/1.", 7) != 0)) {
470 s->result |= SRV_CHK_ERROR;
471 goto out_wakeup;
472 }
473
474 /* check the reply : HTTP/1.X 2xx and 3xx are OK */
475 if (trash[9] == '2' || trash[9] == '3')
476 s->result |= SRV_CHK_RUNNING;
Willy Tarreau48494c02007-11-30 10:41:39 +0100477 else if ((s->proxy->options & PR_O_DISABLE404) &&
478 (s->state & SRV_RUNNING) &&
479 (memcmp(&trash[9], "404", 3) == 0)) {
480 /* 404 may be accepted as "stopping" only if the server was up */
481 s->result |= SRV_CHK_RUNNING | SRV_CHK_DISABLE;
482 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100483 else
484 s->result |= SRV_CHK_ERROR;
485 }
486 else if (s->proxy->options & PR_O_SSL3_CHK) {
487 /* Check for SSLv3 alert or handshake */
488 if ((len >= 5) && (trash[0] == 0x15 || trash[0] == 0x16))
489 s->result |= SRV_CHK_RUNNING;
490 else
491 s->result |= SRV_CHK_ERROR;
Willy Tarreau6996e152007-04-30 14:37:43 +0200492 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100493 else if (s->proxy->options & PR_O_SMTP_CHK) {
494 /* Check for SMTP code 2xx (should be 250) */
495 if ((len >= 3) && (trash[0] == '2'))
496 s->result |= SRV_CHK_RUNNING;
497 else
498 s->result |= SRV_CHK_ERROR;
Willy Tarreau6996e152007-04-30 14:37:43 +0200499 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100500 else {
501 /* other checks are valid if the connection succeeded anyway */
502 s->result |= SRV_CHK_RUNNING;
Willy Tarreau23677902007-05-08 23:50:35 +0200503 }
Willy Tarreau83749182007-04-15 20:56:27 +0200504
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100505 out_wakeup:
506 if (s->result & SRV_CHK_ERROR)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200507 fdtab[fd].state = FD_STERROR;
508
Willy Tarreauf161a342007-04-08 16:59:42 +0200509 EV_FD_CLR(fd, DIR_RD);
Willy Tarreaufdccded2008-08-29 18:19:04 +0200510 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100511 fdtab[fd].ev &= ~FD_POLL_IN;
Willy Tarreau83749182007-04-15 20:56:27 +0200512 return 1;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200513}
514
515/*
516 * manages a server health-check. Returns
517 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
518 */
Willy Tarreau26c25062009-03-08 09:38:41 +0100519struct task *process_chk(struct task *t)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200520{
Willy Tarreaue3838802009-03-21 18:58:32 +0100521 int attempts = 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200522 struct server *s = t->context;
523 struct sockaddr_in sa;
524 int fd;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200525 int rv;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200526
527 //fprintf(stderr, "process_chk: task=%p\n", t);
528
529 new_chk:
Willy Tarreaue3838802009-03-21 18:58:32 +0100530 if (attempts++ > 0) {
531 /* we always fail to create a server, let's stop insisting... */
532 while (tick_is_expired(t->expire, now_ms))
533 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
534 return t;
535 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200536 fd = s->curfd;
537 if (fd < 0) { /* no check currently running */
538 //fprintf(stderr, "process_chk: 2\n");
Willy Tarreau26c25062009-03-08 09:38:41 +0100539 if (!tick_is_expired(t->expire, now_ms)) /* woke up too early */
540 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200541
542 /* we don't send any health-checks when the proxy is stopped or when
543 * the server should not be checked.
544 */
545 if (!(s->state & SRV_CHECKED) || s->proxy->state == PR_STSTOPPED) {
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200546 while (tick_is_expired(t->expire, now_ms))
547 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
Willy Tarreau26c25062009-03-08 09:38:41 +0100548 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200549 }
550
551 /* we'll initiate a new check */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100552 s->result = SRV_CHK_UNKNOWN; /* no result yet */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200553 if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) {
554 if ((fd < global.maxsock) &&
555 (fcntl(fd, F_SETFL, O_NONBLOCK) != -1) &&
556 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) != -1)) {
557 //fprintf(stderr, "process_chk: 3\n");
558
Willy Tarreau9edd1612007-10-18 18:07:48 +0200559 if (s->proxy->options & PR_O_TCP_NOLING) {
560 /* We don't want to useless data */
561 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
562 }
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200563
Willy Tarreau0f03c6f2007-03-25 20:46:19 +0200564 if (s->check_addr.sin_addr.s_addr)
565 /* we'll connect to the check addr specified on the server */
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200566 sa = s->check_addr;
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200567 else
Willy Tarreau0f03c6f2007-03-25 20:46:19 +0200568 /* we'll connect to the addr on the server */
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200569 sa = s->addr;
Willy Tarreau0f03c6f2007-03-25 20:46:19 +0200570
Willy Tarreaubaaee002006-06-26 02:48:02 +0200571 /* we'll connect to the check port on the server */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200572 sa.sin_port = htons(s->check_port);
573
574 /* allow specific binding :
575 * - server-specific at first
576 * - proxy-specific next
577 */
578 if (s->state & SRV_BIND_SRC) {
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100579 struct sockaddr_in *remote = NULL;
580 int ret, flags = 0;
Willy Tarreau163c5322006-11-14 16:18:41 +0100581
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100582#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100583 if ((s->state & SRV_TPROXY_MASK) == SRV_TPROXY_ADDR) {
584 remote = (struct sockaddr_in *)&s->tproxy_addr;
585 flags = 3;
586 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100587#endif
Willy Tarreauc76721d2009-02-04 20:20:58 +0100588#ifdef SO_BINDTODEVICE
589 /* Note: this might fail if not CAP_NET_RAW */
590 if (s->iface_name)
591 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
Willy Tarreau604e8302009-03-06 00:48:23 +0100592 s->iface_name, s->iface_len + 1);
Willy Tarreauc76721d2009-02-04 20:20:58 +0100593#endif
Willy Tarreauc6f4ce82009-06-10 11:09:37 +0200594 if (s->sport_range) {
595 int bind_attempts = 10; /* should be more than enough to find a spare port */
596 struct sockaddr_in src;
597
598 ret = 1;
599 src = s->source_addr;
600
601 do {
602 /* note: in case of retry, we may have to release a previously
603 * allocated port, hence this loop's construct.
604 */
605 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
606 fdtab[fd].port_range = NULL;
607
608 if (!bind_attempts)
609 break;
610 bind_attempts--;
611
612 fdtab[fd].local_port = port_range_alloc_port(s->sport_range);
613 if (!fdtab[fd].local_port)
614 break;
615
616 fdtab[fd].port_range = s->sport_range;
617 src.sin_port = htons(fdtab[fd].local_port);
618
619 ret = tcpv4_bind_socket(fd, flags, &src, remote);
620 } while (ret != 0); /* binding NOK */
621 }
622 else {
623 ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
624 }
625
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100626 if (ret) {
627 s->result |= SRV_CHK_ERROR;
628 switch (ret) {
629 case 1:
630 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
631 s->proxy->id, s->id);
632 break;
633 case 2:
Willy Tarreau163c5322006-11-14 16:18:41 +0100634 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
635 s->proxy->id, s->id);
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100636 break;
Willy Tarreau163c5322006-11-14 16:18:41 +0100637 }
638 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200639 }
640 else if (s->proxy->options & PR_O_BIND_SRC) {
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100641 struct sockaddr_in *remote = NULL;
642 int ret, flags = 0;
643
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100644#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau163c5322006-11-14 16:18:41 +0100645 if ((s->proxy->options & PR_O_TPXY_MASK) == PR_O_TPXY_ADDR) {
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100646 remote = (struct sockaddr_in *)&s->proxy->tproxy_addr;
647 flags = 3;
648 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100649#endif
Willy Tarreaud53f96b2009-02-04 18:46:54 +0100650#ifdef SO_BINDTODEVICE
651 /* Note: this might fail if not CAP_NET_RAW */
652 if (s->proxy->iface_name)
653 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
Willy Tarreau604e8302009-03-06 00:48:23 +0100654 s->proxy->iface_name, s->proxy->iface_len + 1);
Willy Tarreaud53f96b2009-02-04 18:46:54 +0100655#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100656 ret = tcpv4_bind_socket(fd, flags, &s->proxy->source_addr, remote);
657 if (ret) {
658 s->result |= SRV_CHK_ERROR;
659 switch (ret) {
660 case 1:
661 Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n",
662 proxy_type_str(s->proxy), s->proxy->id);
663 break;
664 case 2:
Willy Tarreau2b5652f2006-12-31 17:46:05 +0100665 Alert("Cannot bind to tproxy source address before connect() for %s '%s'. Aborting.\n",
666 proxy_type_str(s->proxy), s->proxy->id);
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100667 break;
Willy Tarreau163c5322006-11-14 16:18:41 +0100668 }
669 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200670 }
671
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100672 if (s->result == SRV_CHK_UNKNOWN) {
Willy Tarreauc9fce2f2009-08-16 14:13:47 +0200673#if defined(TCP_QUICKACK) && defined(SOL_TCP)
Willy Tarreau1274bc42009-07-15 07:16:31 +0200674 /* disabling tcp quick ack now allows
675 * the request to leave the machine with
676 * the first ACK.
677 */
678 if (s->proxy->options2 & PR_O2_SMARTCON)
679 setsockopt(fd, SOL_TCP, TCP_QUICKACK, (char *) &zero, sizeof(zero));
680#endif
Willy Tarreaubaaee002006-06-26 02:48:02 +0200681 if ((connect(fd, (struct sockaddr *)&sa, sizeof(sa)) != -1) || (errno == EINPROGRESS)) {
682 /* OK, connection in progress or established */
683
684 //fprintf(stderr, "process_chk: 4\n");
685
686 s->curfd = fd; /* that's how we know a test is in progress ;-) */
Willy Tarreau7a966482007-04-15 10:58:02 +0200687 fd_insert(fd);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200688 fdtab[fd].owner = t;
Willy Tarreau54469402006-07-29 16:59:06 +0200689 fdtab[fd].cb[DIR_RD].f = &event_srv_chk_r;
690 fdtab[fd].cb[DIR_RD].b = NULL;
691 fdtab[fd].cb[DIR_WR].f = &event_srv_chk_w;
692 fdtab[fd].cb[DIR_WR].b = NULL;
Willy Tarreaue94ebd02007-10-09 17:14:37 +0200693 fdtab[fd].peeraddr = (struct sockaddr *)&sa;
694 fdtab[fd].peerlen = sizeof(sa);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200695 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaufb14edc2009-06-14 15:24:37 +0200696 fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
Willy Tarreauf161a342007-04-08 16:59:42 +0200697 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200698#ifdef DEBUG_FULL
Willy Tarreauf161a342007-04-08 16:59:42 +0200699 assert (!EV_FD_ISSET(fd, DIR_RD));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200700#endif
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100701 //fprintf(stderr, "process_chk: 4+, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
702 /* we allow up to min(inter, timeout.connect) for a connection
703 * to establish but only when timeout.check is set
704 * as it may be to short for a full check otherwise
705 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200706 t->expire = tick_add(now_ms, MS_TO_TICKS(s->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100707
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200708 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
709 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
710 t->expire = tick_first(t->expire, t_con);
Willy Tarreau60548192008-02-17 11:34:10 +0100711 }
Willy Tarreau26c25062009-03-08 09:38:41 +0100712 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200713 }
714 else if (errno != EALREADY && errno != EISCONN && errno != EAGAIN) {
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100715 s->result |= SRV_CHK_ERROR; /* a real error */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200716 }
717 }
718 }
Willy Tarreauc6f4ce82009-06-10 11:09:37 +0200719 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
720 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200721 close(fd); /* socket creation error */
722 }
723
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100724 if (s->result == SRV_CHK_UNKNOWN) { /* nothing done */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200725 //fprintf(stderr, "process_chk: 6\n");
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200726 while (tick_is_expired(t->expire, now_ms))
727 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200728 goto new_chk; /* may be we should initialize a new check */
729 }
730
731 /* here, we have seen a failure */
732 if (s->health > s->rise) {
733 s->health--; /* still good */
734 s->failed_checks++;
735 }
736 else
737 set_server_down(s);
738
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100739 //fprintf(stderr, "process_chk: 7, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
740 /* we allow up to min(inter, timeout.connect) for a connection
741 * to establish but only when timeout.check is set
742 * as it may be to short for a full check otherwise
743 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200744 while (tick_is_expired(t->expire, now_ms)) {
745 int t_con;
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100746
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200747 t_con = tick_add(t->expire, s->proxy->timeout.connect);
748 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100749
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200750 if (s->proxy->timeout.check)
751 t->expire = tick_first(t->expire, t_con);
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100752 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200753 goto new_chk;
754 }
755 else {
756 //fprintf(stderr, "process_chk: 8\n");
757 /* there was a test running */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100758 if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200759 //fprintf(stderr, "process_chk: 9\n");
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200760
Willy Tarreau9909fc12007-11-30 17:42:05 +0100761 if (s->state & SRV_WARMINGUP) {
762 if (now.tv_sec < s->last_change || now.tv_sec >= s->last_change + s->slowstart) {
763 s->state &= ~SRV_WARMINGUP;
764 if (s->proxy->lbprm.algo & BE_LB_PROP_DYN)
765 s->eweight = s->uweight * BE_WEIGHT_SCALE;
766 if (s->proxy->lbprm.update_server_eweight)
767 s->proxy->lbprm.update_server_eweight(s);
768 }
769 else if (s->proxy->lbprm.algo & BE_LB_PROP_DYN) {
770 /* for dynamic algorithms, let's update the weight */
Willy Tarreau5542af62007-12-03 02:04:00 +0100771 s->eweight = (BE_WEIGHT_SCALE * (now.tv_sec - s->last_change) +
772 s->slowstart - 1) / s->slowstart;
Willy Tarreau9909fc12007-11-30 17:42:05 +0100773 s->eweight *= s->uweight;
774 if (s->proxy->lbprm.update_server_eweight)
775 s->proxy->lbprm.update_server_eweight(s);
776 }
777 /* probably that we can refill this server with a bit more connections */
778 check_for_pending(s);
779 }
780
Willy Tarreau48494c02007-11-30 10:41:39 +0100781 /* we may have to add/remove this server from the LB group */
782 if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
783 if ((s->state & SRV_GOINGDOWN) &&
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100784 ((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING))
785 set_server_enabled(s);
Willy Tarreau48494c02007-11-30 10:41:39 +0100786 else if (!(s->state & SRV_GOINGDOWN) &&
787 ((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) ==
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100788 (SRV_CHK_RUNNING | SRV_CHK_DISABLE)))
789 set_server_disabled(s);
Willy Tarreau48494c02007-11-30 10:41:39 +0100790 }
791
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200792 if (s->health < s->rise + s->fall - 1) {
793 s->health++; /* was bad, stays for a while */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200794
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100795 set_server_up(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200796 }
797 s->curfd = -1; /* no check running anymore */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200798 fd_delete(fd);
Willy Tarreau44ec0f02007-10-14 23:47:04 +0200799
800 rv = 0;
801 if (global.spread_checks > 0) {
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100802 rv = srv_getinter(s) * global.spread_checks / 100;
Willy Tarreau44ec0f02007-10-14 23:47:04 +0200803 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100804 //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv);
Willy Tarreau44ec0f02007-10-14 23:47:04 +0200805 }
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200806 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200807 goto new_chk;
808 }
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200809 else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200810 //fprintf(stderr, "process_chk: 10\n");
811 /* failure or timeout detected */
812 if (s->health > s->rise) {
813 s->health--; /* still good */
814 s->failed_checks++;
815 }
816 else
817 set_server_down(s);
818 s->curfd = -1;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200819 fd_delete(fd);
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200820
821 rv = 0;
822 if (global.spread_checks > 0) {
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100823 rv = srv_getinter(s) * global.spread_checks / 100;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200824 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100825 //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv);
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200826 }
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200827 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200828 goto new_chk;
829 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100830 /* if result is unknown and there's no timeout, we have to wait again */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200831 }
832 //fprintf(stderr, "process_chk: 11\n");
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100833 s->result = SRV_CHK_UNKNOWN;
Willy Tarreau26c25062009-03-08 09:38:41 +0100834 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200835}
836
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200837/*
838 * Start health-check.
839 * Returns 0 if OK, -1 if error, and prints the error in this case.
840 */
841int start_checks() {
842
843 struct proxy *px;
844 struct server *s;
845 struct task *t;
846 int nbchk=0, mininter=0, srvpos=0;
847
Willy Tarreau2c43a1e2007-10-14 23:05:39 +0200848 /* 1- count the checkers to run simultaneously.
849 * We also determine the minimum interval among all of those which
850 * have an interval larger than SRV_CHK_INTER_THRES. This interval
851 * will be used to spread their start-up date. Those which have
852 * a shorter interval will start independantly and will not dictate
853 * too short an interval for all others.
854 */
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200855 for (px = proxy; px; px = px->next) {
856 for (s = px->srv; s; s = s->next) {
857 if (!(s->state & SRV_CHECKED))
858 continue;
859
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100860 if ((srv_getinter(s) >= SRV_CHK_INTER_THRES) &&
861 (!mininter || mininter > srv_getinter(s)))
862 mininter = srv_getinter(s);
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200863
864 nbchk++;
865 }
866 }
867
868 if (!nbchk)
869 return 0;
870
871 srand((unsigned)time(NULL));
872
873 /*
874 * 2- start them as far as possible from each others. For this, we will
875 * start them after their interval set to the min interval divided by
876 * the number of servers, weighted by the server's position in the list.
877 */
878 for (px = proxy; px; px = px->next) {
879 for (s = px->srv; s; s = s->next) {
880 if (!(s->state & SRV_CHECKED))
881 continue;
882
Willy Tarreaua4613182009-03-21 18:13:21 +0100883 if ((t = task_new()) == NULL) {
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200884 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
885 return -1;
886 }
887
Krzysztof Piotr Oledzkia643baf2008-05-29 23:53:44 +0200888 s->check = t;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200889 t->process = process_chk;
890 t->context = s;
891
892 /* check this every ms */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200893 t->expire = tick_add(now_ms,
894 MS_TO_TICKS(((mininter && mininter >= srv_getinter(s)) ?
895 mininter : srv_getinter(s)) * srvpos / nbchk));
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200896 task_queue(t);
897
898 srvpos++;
899 }
900 }
901 return 0;
902}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200903
904/*
905 * Local variables:
906 * c-indent-level: 8
907 * c-basic-offset: 8
908 * End:
909 */