blob: 40fb698ef9fb786ee194b995fb18d61660490301 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Health-checks functions.
3 *
Willy Tarreau26c25062009-03-08 09:38:41 +01004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +01005 * Copyright 2007-2008 Krzysztof Piotr Oledzki <ole@ans.pl>
Willy Tarreaubaaee002006-06-26 02:48:02 +02006 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
Willy Tarreaub8816082008-01-18 12:18:15 +010014#include <assert.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020015#include <errno.h>
16#include <fcntl.h>
17#include <stdio.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020018#include <stdlib.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020019#include <string.h>
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +020020#include <time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020021#include <unistd.h>
22#include <sys/socket.h>
23#include <netinet/in.h>
Willy Tarreau1274bc42009-07-15 07:16:31 +020024#include <netinet/tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025#include <arpa/inet.h>
26
Willy Tarreau2dd0d472006-06-29 17:53:05 +020027#include <common/compat.h>
28#include <common/config.h>
29#include <common/mini-clist.h>
Willy Tarreau83749182007-04-15 20:56:27 +020030#include <common/standard.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020031#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020032
33#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034
35#include <proto/backend.h>
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +010036#include <proto/buffers.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/fd.h>
38#include <proto/log.h>
39#include <proto/queue.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020040#include <proto/port_range.h>
Willy Tarreau3d300592007-03-18 18:34:41 +010041#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010042#include <proto/proto_tcp.h>
Willy Tarreau2b5652f2006-12-31 17:46:05 +010043#include <proto/proxy.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044#include <proto/server.h>
45#include <proto/task.h>
46
Willy Tarreau48494c02007-11-30 10:41:39 +010047/* sends a log message when a backend goes down, and also sets last
48 * change date.
49 */
50static void set_backend_down(struct proxy *be)
51{
52 be->last_change = now.tv_sec;
53 be->down_trans++;
54
55 Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
56 send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
57}
58
59/* Redistribute pending connections when a server goes down. The number of
60 * connections redistributed is returned.
61 */
62static int redistribute_pending(struct server *s)
63{
64 struct pendconn *pc, *pc_bck, *pc_end;
65 int xferred = 0;
66
67 FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
68 struct session *sess = pc->sess;
69 if (sess->be->options & PR_O_REDISP) {
70 /* The REDISP option was specified. We will ignore
71 * cookie and force to balance or use the dispatcher.
72 */
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +010073
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +010074 /* it's left to the dispatcher to choose a server */
Willy Tarreau48494c02007-11-30 10:41:39 +010075 sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +010076
Willy Tarreau48494c02007-11-30 10:41:39 +010077 pendconn_free(pc);
Willy Tarreaufdccded2008-08-29 18:19:04 +020078 task_wakeup(sess->task, TASK_WOKEN_RES);
Willy Tarreau48494c02007-11-30 10:41:39 +010079 xferred++;
80 }
81 }
82 return xferred;
83}
84
85/* Check for pending connections at the backend, and assign some of them to
86 * the server coming up. The server's weight is checked before being assigned
87 * connections it may not be able to handle. The total number of transferred
88 * connections is returned.
89 */
90static int check_for_pending(struct server *s)
91{
92 int xferred;
93
94 if (!s->eweight)
95 return 0;
96
97 for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
98 struct session *sess;
99 struct pendconn *p;
100
101 p = pendconn_from_px(s->proxy);
102 if (!p)
103 break;
104 p->sess->srv = s;
105 sess = p->sess;
106 pendconn_free(p);
Willy Tarreaufdccded2008-08-29 18:19:04 +0200107 task_wakeup(sess->task, TASK_WOKEN_RES);
Willy Tarreau48494c02007-11-30 10:41:39 +0100108 }
109 return xferred;
110}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200111
112/* Sets server <s> down, notifies by all available means, recounts the
113 * remaining servers on the proxy and transfers queued sessions whenever
Willy Tarreau5af3a692007-07-24 23:32:33 +0200114 * possible to other servers. It automatically recomputes the number of
115 * servers, but not the map.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200116 */
Willy Tarreau83749182007-04-15 20:56:27 +0200117static void set_server_down(struct server *s)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200118{
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100119 struct server *srv;
120 struct chunk msg;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200121 int xferred;
122
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100123 if (s->health == s->rise || s->tracked) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100124 int srv_was_paused = s->state & SRV_GOINGDOWN;
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200125
126 s->last_change = now.tv_sec;
Willy Tarreau48494c02007-11-30 10:41:39 +0100127 s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
Willy Tarreaub625a082007-11-26 01:15:43 +0100128 s->proxy->lbprm.set_server_status_down(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200129
130 /* we might have sessions queued on this server and waiting for
131 * a connection. Those which are redispatchable will be queued
132 * to another server or to the proxy itself.
133 */
Willy Tarreau48494c02007-11-30 10:41:39 +0100134 xferred = redistribute_pending(s);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100135
136 msg.len = 0;
137 msg.str = trash;
138
139 chunk_printf(&msg, sizeof(trash),
140 "%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
141 s->proxy->id, s->id);
142
143 if (s->tracked)
144 chunk_printf(&msg, sizeof(trash), " via %s/%s",
145 s->tracked->proxy->id, s->tracked->id);
146
147 chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers left.%s"
Willy Tarreaubaaee002006-06-26 02:48:02 +0200148 " %d sessions active, %d requeued, %d remaining in queue.\n",
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100149 s->proxy->srv_act, s->proxy->srv_bck,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200150 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
151 s->cur_sess, xferred, s->nbpend);
152
153 Warning("%s", trash);
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200154
Willy Tarreau48494c02007-11-30 10:41:39 +0100155 /* we don't send an alert if the server was previously paused */
156 if (srv_was_paused)
157 send_log(s->proxy, LOG_NOTICE, "%s", trash);
158 else
159 send_log(s->proxy, LOG_ALERT, "%s", trash);
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200160
Willy Tarreau48494c02007-11-30 10:41:39 +0100161 if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
162 set_backend_down(s->proxy);
163
Willy Tarreaubaaee002006-06-26 02:48:02 +0200164 s->down_trans++;
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100165
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100166 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100167 for(srv = s->tracknext; srv; srv = srv->tracknext)
168 set_server_down(srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200169 }
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100170
Willy Tarreaubaaee002006-06-26 02:48:02 +0200171 s->health = 0; /* failure */
172}
173
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100174static void set_server_up(struct server *s) {
175
176 struct server *srv;
177 struct chunk msg;
178 int xferred;
179
180 if (s->health == s->rise || s->tracked) {
181 if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
182 if (s->proxy->last_change < now.tv_sec) // ignore negative times
183 s->proxy->down_time += now.tv_sec - s->proxy->last_change;
184 s->proxy->last_change = now.tv_sec;
185 }
186
187 if (s->last_change < now.tv_sec) // ignore negative times
188 s->down_time += now.tv_sec - s->last_change;
189
190 s->last_change = now.tv_sec;
191 s->state |= SRV_RUNNING;
192
193 if (s->slowstart > 0) {
194 s->state |= SRV_WARMINGUP;
195 if (s->proxy->lbprm.algo & BE_LB_PROP_DYN) {
196 /* For dynamic algorithms, start at the first step of the weight,
197 * without multiplying by BE_WEIGHT_SCALE.
198 */
199 s->eweight = s->uweight;
200 if (s->proxy->lbprm.update_server_eweight)
201 s->proxy->lbprm.update_server_eweight(s);
202 }
203 }
204 s->proxy->lbprm.set_server_status_up(s);
205
206 /* check if we can handle some connections queued at the proxy. We
207 * will take as many as we can handle.
208 */
209 xferred = check_for_pending(s);
210
211 msg.len = 0;
212 msg.str = trash;
213
214 chunk_printf(&msg, sizeof(trash),
215 "%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
216 s->proxy->id, s->id);
217
218 if (s->tracked)
219 chunk_printf(&msg, sizeof(trash), " via %s/%s",
220 s->tracked->proxy->id, s->tracked->id);
221
222 chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers online.%s"
223 " %d sessions requeued, %d total in queue.\n",
224 s->proxy->srv_act, s->proxy->srv_bck,
225 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
Willy Tarreau1772ece2009-04-03 14:49:12 +0200226 xferred, s->nbpend);
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100227
228 Warning("%s", trash);
229 send_log(s->proxy, LOG_NOTICE, "%s", trash);
230
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100231 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100232 for(srv = s->tracknext; srv; srv = srv->tracknext)
233 set_server_up(srv);
234 }
235
236 if (s->health >= s->rise)
237 s->health = s->rise + s->fall - 1; /* OK now */
238
239}
240
241static void set_server_disabled(struct server *s) {
242
243 struct server *srv;
244 struct chunk msg;
245 int xferred;
246
247 s->state |= SRV_GOINGDOWN;
248 s->proxy->lbprm.set_server_status_down(s);
249
250 /* we might have sessions queued on this server and waiting for
251 * a connection. Those which are redispatchable will be queued
252 * to another server or to the proxy itself.
253 */
254 xferred = redistribute_pending(s);
255
256 msg.len = 0;
257 msg.str = trash;
258
259 chunk_printf(&msg, sizeof(trash),
260 "Load-balancing on %sServer %s/%s is disabled",
261 s->state & SRV_BACKUP ? "Backup " : "",
262 s->proxy->id, s->id);
263
264 if (s->tracked)
265 chunk_printf(&msg, sizeof(trash), " via %s/%s",
266 s->tracked->proxy->id, s->tracked->id);
267
268
269 chunk_printf(&msg, sizeof(trash),". %d active and %d backup servers online.%s"
270 " %d sessions requeued, %d total in queue.\n",
271 s->proxy->srv_act, s->proxy->srv_bck,
272 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
273 xferred, s->nbpend);
274
275 Warning("%s", trash);
276
277 send_log(s->proxy, LOG_NOTICE, "%s", trash);
278
279 if (!s->proxy->srv_bck && !s->proxy->srv_act)
280 set_backend_down(s->proxy);
281
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100282 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100283 for(srv = s->tracknext; srv; srv = srv->tracknext)
284 set_server_disabled(srv);
285}
286
287static void set_server_enabled(struct server *s) {
288
289 struct server *srv;
290 struct chunk msg;
291 int xferred;
292
293 s->state &= ~SRV_GOINGDOWN;
294 s->proxy->lbprm.set_server_status_up(s);
295
296 /* check if we can handle some connections queued at the proxy. We
297 * will take as many as we can handle.
298 */
299 xferred = check_for_pending(s);
300
301 msg.len = 0;
302 msg.str = trash;
303
304 chunk_printf(&msg, sizeof(trash),
305 "Load-balancing on %sServer %s/%s is enabled again",
306 s->state & SRV_BACKUP ? "Backup " : "",
307 s->proxy->id, s->id);
308
309 if (s->tracked)
310 chunk_printf(&msg, sizeof(trash), " via %s/%s",
311 s->tracked->proxy->id, s->tracked->id);
312
313 chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers online.%s"
314 " %d sessions requeued, %d total in queue.\n",
315 s->proxy->srv_act, s->proxy->srv_bck,
316 (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
317 xferred, s->nbpend);
318
319 Warning("%s", trash);
320 send_log(s->proxy, LOG_NOTICE, "%s", trash);
321
Krzysztof Piotr Oledzkif39c71c2009-01-30 00:52:49 +0100322 if (s->state & SRV_CHECKED)
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100323 for(srv = s->tracknext; srv; srv = srv->tracknext)
324 set_server_enabled(srv);
325}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200326
327/*
328 * This function is used only for server health-checks. It handles
329 * the connection acknowledgement. If the proxy requires HTTP health-checks,
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100330 * it sends the request. In other cases, it fills s->result with SRV_CHK_*.
Willy Tarreau83749182007-04-15 20:56:27 +0200331 * The function itself returns 0 if it needs some polling before being called
332 * again, otherwise 1.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200333 */
Willy Tarreau83749182007-04-15 20:56:27 +0200334static int event_srv_chk_w(int fd)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200335{
Willy Tarreau6996e152007-04-30 14:37:43 +0200336 __label__ out_wakeup, out_nowake, out_poll, out_error;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200337 struct task *t = fdtab[fd].owner;
338 struct server *s = t->context;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200339
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100340 //fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state));
Willy Tarreau6996e152007-04-30 14:37:43 +0200341 if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR)))
342 goto out_error;
343
344 /* here, we know that the connection is established */
Willy Tarreau83749182007-04-15 20:56:27 +0200345
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100346 if (!(s->result & SRV_CHK_ERROR)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200347 /* we don't want to mark 'UP' a server on which we detected an error earlier */
Willy Tarreauf3c69202006-07-09 16:42:34 +0200348 if ((s->proxy->options & PR_O_HTTP_CHK) ||
Willy Tarreau23677902007-05-08 23:50:35 +0200349 (s->proxy->options & PR_O_SSL3_CHK) ||
350 (s->proxy->options & PR_O_SMTP_CHK)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200351 int ret;
Willy Tarreauf3c69202006-07-09 16:42:34 +0200352 /* we want to check if this host replies to HTTP or SSLv3 requests
Willy Tarreaubaaee002006-06-26 02:48:02 +0200353 * so we'll send the request, and won't wake the checker up now.
354 */
Willy Tarreauf3c69202006-07-09 16:42:34 +0200355
356 if (s->proxy->options & PR_O_SSL3_CHK) {
357 /* SSL requires that we put Unix time in the request */
Willy Tarreaub7f694f2008-06-22 17:18:02 +0200358 int gmt_time = htonl(date.tv_sec);
Willy Tarreauf3c69202006-07-09 16:42:34 +0200359 memcpy(s->proxy->check_req + 11, &gmt_time, 4);
360 }
361
Willy Tarreaubaaee002006-06-26 02:48:02 +0200362#ifndef MSG_NOSIGNAL
363 ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT);
364#else
365 ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT | MSG_NOSIGNAL);
366#endif
367 if (ret == s->proxy->check_len) {
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100368 /* we allow up to <timeout.check> if nonzero for a responce */
Willy Tarreau7cd9d942008-12-21 13:00:41 +0100369 if (s->proxy->timeout.check)
370 t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
Willy Tarreauf161a342007-04-08 16:59:42 +0200371 EV_FD_SET(fd, DIR_RD); /* prepare for reading reply */
Willy Tarreau83749182007-04-15 20:56:27 +0200372 goto out_nowake;
373 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200374 else if (ret == 0 || errno == EAGAIN)
375 goto out_poll;
376 else
377 goto out_error;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200378 }
379 else {
Willy Tarreau6996e152007-04-30 14:37:43 +0200380 /* We have no data to send to check the connection, and
381 * getsockopt() will not inform us whether the connection
382 * is still pending. So we'll reuse connect() to check the
383 * state of the socket. This has the advantage of givig us
384 * the following info :
385 * - error
386 * - connecting (EALREADY, EINPROGRESS)
387 * - connected (EISCONN, 0)
388 */
389
390 struct sockaddr_in sa;
391
392 sa = (s->check_addr.sin_addr.s_addr) ? s->check_addr : s->addr;
393 sa.sin_port = htons(s->check_port);
394
395 if (connect(fd, (struct sockaddr *)&sa, sizeof(sa)) == 0)
396 errno = 0;
397
398 if (errno == EALREADY || errno == EINPROGRESS)
399 goto out_poll;
400
401 if (errno && errno != EISCONN)
402 goto out_error;
403
Willy Tarreaubaaee002006-06-26 02:48:02 +0200404 /* good TCP connection is enough */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100405 s->result |= SRV_CHK_RUNNING;
Willy Tarreau6996e152007-04-30 14:37:43 +0200406 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200407 }
408 }
Willy Tarreau83749182007-04-15 20:56:27 +0200409 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200410 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreau83749182007-04-15 20:56:27 +0200411 out_nowake:
412 EV_FD_CLR(fd, DIR_WR); /* nothing more to write */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100413 fdtab[fd].ev &= ~FD_POLL_OUT;
Willy Tarreau83749182007-04-15 20:56:27 +0200414 return 1;
Willy Tarreau6996e152007-04-30 14:37:43 +0200415 out_poll:
416 /* The connection is still pending. We'll have to poll it
417 * before attempting to go further. */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100418 fdtab[fd].ev &= ~FD_POLL_OUT;
Willy Tarreau6996e152007-04-30 14:37:43 +0200419 return 0;
420 out_error:
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100421 s->result |= SRV_CHK_ERROR;
Willy Tarreau6996e152007-04-30 14:37:43 +0200422 fdtab[fd].state = FD_STERROR;
423 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200424}
425
426
427/*
Willy Tarreauf3c69202006-07-09 16:42:34 +0200428 * This function is used only for server health-checks. It handles the server's
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100429 * reply to an HTTP request or SSL HELLO. It sets s->result to SRV_CHK_RUNNING
430 * if an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP
431 * server returns 2xx, or if an SSL server returns at least 5 bytes in response
432 * to an SSL HELLO (the principle is that this is enough to distinguish between
433 * an SSL server and a pure TCP relay). All other cases will set s->result to
434 * SRV_CHK_ERROR. The function returns 0 if it needs to be called again after
435 * some polling, otherwise non-zero..
Willy Tarreaubaaee002006-06-26 02:48:02 +0200436 */
Willy Tarreau83749182007-04-15 20:56:27 +0200437static int event_srv_chk_r(int fd)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200438{
Willy Tarreau83749182007-04-15 20:56:27 +0200439 __label__ out_wakeup;
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100440 int len;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200441 struct task *t = fdtab[fd].owner;
442 struct server *s = t->context;
443 int skerr;
444 socklen_t lskerr = sizeof(skerr);
445
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100446 len = -1;
Willy Tarreau83749182007-04-15 20:56:27 +0200447
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100448 if (unlikely((s->result & SRV_CHK_ERROR) ||
449 (fdtab[fd].state == FD_STERROR) ||
Willy Tarreau83749182007-04-15 20:56:27 +0200450 (fdtab[fd].ev & FD_POLL_ERR) ||
451 (getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == -1) ||
452 (skerr != 0))) {
453 /* in case of TCP only, this tells us if the connection failed */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100454 s->result |= SRV_CHK_ERROR;
Willy Tarreau83749182007-04-15 20:56:27 +0200455 goto out_wakeup;
456 }
457
Willy Tarreaubaaee002006-06-26 02:48:02 +0200458#ifndef MSG_NOSIGNAL
Krzysztof Oledzki6b3f8b42007-10-11 18:41:08 +0200459 len = recv(fd, trash, sizeof(trash), 0);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200460#else
Willy Tarreau83749182007-04-15 20:56:27 +0200461 /* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
462 * but the connection was closed on the remote end. Fortunately, recv still
463 * works correctly and we don't need to do the getsockopt() on linux.
464 */
Krzysztof Oledzki6b3f8b42007-10-11 18:41:08 +0200465 len = recv(fd, trash, sizeof(trash), MSG_NOSIGNAL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200466#endif
Willy Tarreau83749182007-04-15 20:56:27 +0200467 if (unlikely(len < 0 && errno == EAGAIN)) {
468 /* we want some polling to happen first */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100469 fdtab[fd].ev &= ~FD_POLL_IN;
Willy Tarreau83749182007-04-15 20:56:27 +0200470 return 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200471 }
472
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100473 /* Note: the response will only be accepted if read at once */
474 if (s->proxy->options & PR_O_HTTP_CHK) {
475 /* Check if the server speaks HTTP 1.X */
476 if ((len < strlen("HTTP/1.0 000\r")) ||
477 (memcmp(trash, "HTTP/1.", 7) != 0)) {
478 s->result |= SRV_CHK_ERROR;
479 goto out_wakeup;
480 }
481
482 /* check the reply : HTTP/1.X 2xx and 3xx are OK */
483 if (trash[9] == '2' || trash[9] == '3')
484 s->result |= SRV_CHK_RUNNING;
Willy Tarreau48494c02007-11-30 10:41:39 +0100485 else if ((s->proxy->options & PR_O_DISABLE404) &&
486 (s->state & SRV_RUNNING) &&
487 (memcmp(&trash[9], "404", 3) == 0)) {
488 /* 404 may be accepted as "stopping" only if the server was up */
489 s->result |= SRV_CHK_RUNNING | SRV_CHK_DISABLE;
490 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100491 else
492 s->result |= SRV_CHK_ERROR;
493 }
494 else if (s->proxy->options & PR_O_SSL3_CHK) {
495 /* Check for SSLv3 alert or handshake */
496 if ((len >= 5) && (trash[0] == 0x15 || trash[0] == 0x16))
497 s->result |= SRV_CHK_RUNNING;
498 else
499 s->result |= SRV_CHK_ERROR;
Willy Tarreau6996e152007-04-30 14:37:43 +0200500 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100501 else if (s->proxy->options & PR_O_SMTP_CHK) {
502 /* Check for SMTP code 2xx (should be 250) */
503 if ((len >= 3) && (trash[0] == '2'))
504 s->result |= SRV_CHK_RUNNING;
505 else
506 s->result |= SRV_CHK_ERROR;
Willy Tarreau6996e152007-04-30 14:37:43 +0200507 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100508 else {
509 /* other checks are valid if the connection succeeded anyway */
510 s->result |= SRV_CHK_RUNNING;
Willy Tarreau23677902007-05-08 23:50:35 +0200511 }
Willy Tarreau83749182007-04-15 20:56:27 +0200512
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100513 out_wakeup:
514 if (s->result & SRV_CHK_ERROR)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200515 fdtab[fd].state = FD_STERROR;
516
Willy Tarreauf161a342007-04-08 16:59:42 +0200517 EV_FD_CLR(fd, DIR_RD);
Willy Tarreaufdccded2008-08-29 18:19:04 +0200518 task_wakeup(t, TASK_WOKEN_IO);
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100519 fdtab[fd].ev &= ~FD_POLL_IN;
Willy Tarreau83749182007-04-15 20:56:27 +0200520 return 1;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200521}
522
523/*
524 * manages a server health-check. Returns
525 * the time the task accepts to wait, or TIME_ETERNITY for infinity.
526 */
Willy Tarreau26c25062009-03-08 09:38:41 +0100527struct task *process_chk(struct task *t)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200528{
Willy Tarreaue3838802009-03-21 18:58:32 +0100529 int attempts = 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200530 struct server *s = t->context;
531 struct sockaddr_in sa;
532 int fd;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200533 int rv;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200534
535 //fprintf(stderr, "process_chk: task=%p\n", t);
536
537 new_chk:
Willy Tarreaue3838802009-03-21 18:58:32 +0100538 if (attempts++ > 0) {
539 /* we always fail to create a server, let's stop insisting... */
540 while (tick_is_expired(t->expire, now_ms))
541 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
542 return t;
543 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200544 fd = s->curfd;
545 if (fd < 0) { /* no check currently running */
546 //fprintf(stderr, "process_chk: 2\n");
Willy Tarreau26c25062009-03-08 09:38:41 +0100547 if (!tick_is_expired(t->expire, now_ms)) /* woke up too early */
548 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200549
550 /* we don't send any health-checks when the proxy is stopped or when
551 * the server should not be checked.
552 */
553 if (!(s->state & SRV_CHECKED) || s->proxy->state == PR_STSTOPPED) {
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200554 while (tick_is_expired(t->expire, now_ms))
555 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
Willy Tarreau26c25062009-03-08 09:38:41 +0100556 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200557 }
558
559 /* we'll initiate a new check */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100560 s->result = SRV_CHK_UNKNOWN; /* no result yet */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200561 if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) {
562 if ((fd < global.maxsock) &&
563 (fcntl(fd, F_SETFL, O_NONBLOCK) != -1) &&
564 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) != -1)) {
565 //fprintf(stderr, "process_chk: 3\n");
566
Willy Tarreau9edd1612007-10-18 18:07:48 +0200567 if (s->proxy->options & PR_O_TCP_NOLING) {
568 /* We don't want to useless data */
569 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
570 }
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200571
Willy Tarreau0f03c6f2007-03-25 20:46:19 +0200572 if (s->check_addr.sin_addr.s_addr)
573 /* we'll connect to the check addr specified on the server */
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200574 sa = s->check_addr;
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200575 else
Willy Tarreau0f03c6f2007-03-25 20:46:19 +0200576 /* we'll connect to the addr on the server */
Willy Tarreau2ea3abb2007-03-25 16:45:16 +0200577 sa = s->addr;
Willy Tarreau0f03c6f2007-03-25 20:46:19 +0200578
Willy Tarreaubaaee002006-06-26 02:48:02 +0200579 /* we'll connect to the check port on the server */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200580 sa.sin_port = htons(s->check_port);
581
582 /* allow specific binding :
583 * - server-specific at first
584 * - proxy-specific next
585 */
586 if (s->state & SRV_BIND_SRC) {
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100587 struct sockaddr_in *remote = NULL;
588 int ret, flags = 0;
Willy Tarreau163c5322006-11-14 16:18:41 +0100589
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100590#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100591 if ((s->state & SRV_TPROXY_MASK) == SRV_TPROXY_ADDR) {
592 remote = (struct sockaddr_in *)&s->tproxy_addr;
593 flags = 3;
594 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100595#endif
Willy Tarreauc76721d2009-02-04 20:20:58 +0100596#ifdef SO_BINDTODEVICE
597 /* Note: this might fail if not CAP_NET_RAW */
598 if (s->iface_name)
599 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
Willy Tarreau604e8302009-03-06 00:48:23 +0100600 s->iface_name, s->iface_len + 1);
Willy Tarreauc76721d2009-02-04 20:20:58 +0100601#endif
Willy Tarreauc6f4ce82009-06-10 11:09:37 +0200602 if (s->sport_range) {
603 int bind_attempts = 10; /* should be more than enough to find a spare port */
604 struct sockaddr_in src;
605
606 ret = 1;
607 src = s->source_addr;
608
609 do {
610 /* note: in case of retry, we may have to release a previously
611 * allocated port, hence this loop's construct.
612 */
613 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
614 fdtab[fd].port_range = NULL;
615
616 if (!bind_attempts)
617 break;
618 bind_attempts--;
619
620 fdtab[fd].local_port = port_range_alloc_port(s->sport_range);
621 if (!fdtab[fd].local_port)
622 break;
623
624 fdtab[fd].port_range = s->sport_range;
625 src.sin_port = htons(fdtab[fd].local_port);
626
627 ret = tcpv4_bind_socket(fd, flags, &src, remote);
628 } while (ret != 0); /* binding NOK */
629 }
630 else {
631 ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
632 }
633
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100634 if (ret) {
635 s->result |= SRV_CHK_ERROR;
636 switch (ret) {
637 case 1:
638 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
639 s->proxy->id, s->id);
640 break;
641 case 2:
Willy Tarreau163c5322006-11-14 16:18:41 +0100642 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
643 s->proxy->id, s->id);
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100644 break;
Willy Tarreau163c5322006-11-14 16:18:41 +0100645 }
646 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200647 }
648 else if (s->proxy->options & PR_O_BIND_SRC) {
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100649 struct sockaddr_in *remote = NULL;
650 int ret, flags = 0;
651
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100652#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau163c5322006-11-14 16:18:41 +0100653 if ((s->proxy->options & PR_O_TPXY_MASK) == PR_O_TPXY_ADDR) {
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100654 remote = (struct sockaddr_in *)&s->proxy->tproxy_addr;
655 flags = 3;
656 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +0100657#endif
Willy Tarreaud53f96b2009-02-04 18:46:54 +0100658#ifdef SO_BINDTODEVICE
659 /* Note: this might fail if not CAP_NET_RAW */
660 if (s->proxy->iface_name)
661 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
Willy Tarreau604e8302009-03-06 00:48:23 +0100662 s->proxy->iface_name, s->proxy->iface_len + 1);
Willy Tarreaud53f96b2009-02-04 18:46:54 +0100663#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100664 ret = tcpv4_bind_socket(fd, flags, &s->proxy->source_addr, remote);
665 if (ret) {
666 s->result |= SRV_CHK_ERROR;
667 switch (ret) {
668 case 1:
669 Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n",
670 proxy_type_str(s->proxy), s->proxy->id);
671 break;
672 case 2:
Willy Tarreau2b5652f2006-12-31 17:46:05 +0100673 Alert("Cannot bind to tproxy source address before connect() for %s '%s'. Aborting.\n",
674 proxy_type_str(s->proxy), s->proxy->id);
Willy Tarreaue8c66af2008-01-13 18:40:14 +0100675 break;
Willy Tarreau163c5322006-11-14 16:18:41 +0100676 }
677 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200678 }
679
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100680 if (s->result == SRV_CHK_UNKNOWN) {
Willy Tarreau1274bc42009-07-15 07:16:31 +0200681#ifdef TCP_QUICKACK
682 /* disabling tcp quick ack now allows
683 * the request to leave the machine with
684 * the first ACK.
685 */
686 if (s->proxy->options2 & PR_O2_SMARTCON)
687 setsockopt(fd, SOL_TCP, TCP_QUICKACK, (char *) &zero, sizeof(zero));
688#endif
Willy Tarreaubaaee002006-06-26 02:48:02 +0200689 if ((connect(fd, (struct sockaddr *)&sa, sizeof(sa)) != -1) || (errno == EINPROGRESS)) {
690 /* OK, connection in progress or established */
691
692 //fprintf(stderr, "process_chk: 4\n");
693
694 s->curfd = fd; /* that's how we know a test is in progress ;-) */
Willy Tarreau7a966482007-04-15 10:58:02 +0200695 fd_insert(fd);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200696 fdtab[fd].owner = t;
Willy Tarreau54469402006-07-29 16:59:06 +0200697 fdtab[fd].cb[DIR_RD].f = &event_srv_chk_r;
698 fdtab[fd].cb[DIR_RD].b = NULL;
699 fdtab[fd].cb[DIR_WR].f = &event_srv_chk_w;
700 fdtab[fd].cb[DIR_WR].b = NULL;
Willy Tarreaue94ebd02007-10-09 17:14:37 +0200701 fdtab[fd].peeraddr = (struct sockaddr *)&sa;
702 fdtab[fd].peerlen = sizeof(sa);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200703 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaufb14edc2009-06-14 15:24:37 +0200704 fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
Willy Tarreauf161a342007-04-08 16:59:42 +0200705 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200706#ifdef DEBUG_FULL
Willy Tarreauf161a342007-04-08 16:59:42 +0200707 assert (!EV_FD_ISSET(fd, DIR_RD));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200708#endif
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100709 //fprintf(stderr, "process_chk: 4+, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
710 /* we allow up to min(inter, timeout.connect) for a connection
711 * to establish but only when timeout.check is set
712 * as it may be to short for a full check otherwise
713 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200714 t->expire = tick_add(now_ms, MS_TO_TICKS(s->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100715
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200716 if (s->proxy->timeout.check && s->proxy->timeout.connect) {
717 int t_con = tick_add(now_ms, s->proxy->timeout.connect);
718 t->expire = tick_first(t->expire, t_con);
Willy Tarreau60548192008-02-17 11:34:10 +0100719 }
Willy Tarreau26c25062009-03-08 09:38:41 +0100720 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200721 }
722 else if (errno != EALREADY && errno != EISCONN && errno != EAGAIN) {
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100723 s->result |= SRV_CHK_ERROR; /* a real error */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200724 }
725 }
726 }
Willy Tarreauc6f4ce82009-06-10 11:09:37 +0200727 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
728 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200729 close(fd); /* socket creation error */
730 }
731
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100732 if (s->result == SRV_CHK_UNKNOWN) { /* nothing done */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200733 //fprintf(stderr, "process_chk: 6\n");
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200734 while (tick_is_expired(t->expire, now_ms))
735 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200736 goto new_chk; /* may be we should initialize a new check */
737 }
738
739 /* here, we have seen a failure */
740 if (s->health > s->rise) {
741 s->health--; /* still good */
742 s->failed_checks++;
743 }
744 else
745 set_server_down(s);
746
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100747 //fprintf(stderr, "process_chk: 7, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
748 /* we allow up to min(inter, timeout.connect) for a connection
749 * to establish but only when timeout.check is set
750 * as it may be to short for a full check otherwise
751 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200752 while (tick_is_expired(t->expire, now_ms)) {
753 int t_con;
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100754
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200755 t_con = tick_add(t->expire, s->proxy->timeout.connect);
756 t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100757
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200758 if (s->proxy->timeout.check)
759 t->expire = tick_first(t->expire, t_con);
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100760 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200761 goto new_chk;
762 }
763 else {
764 //fprintf(stderr, "process_chk: 8\n");
765 /* there was a test running */
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100766 if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200767 //fprintf(stderr, "process_chk: 9\n");
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200768
Willy Tarreau9909fc12007-11-30 17:42:05 +0100769 if (s->state & SRV_WARMINGUP) {
770 if (now.tv_sec < s->last_change || now.tv_sec >= s->last_change + s->slowstart) {
771 s->state &= ~SRV_WARMINGUP;
772 if (s->proxy->lbprm.algo & BE_LB_PROP_DYN)
773 s->eweight = s->uweight * BE_WEIGHT_SCALE;
774 if (s->proxy->lbprm.update_server_eweight)
775 s->proxy->lbprm.update_server_eweight(s);
776 }
777 else if (s->proxy->lbprm.algo & BE_LB_PROP_DYN) {
778 /* for dynamic algorithms, let's update the weight */
Willy Tarreau5542af62007-12-03 02:04:00 +0100779 s->eweight = (BE_WEIGHT_SCALE * (now.tv_sec - s->last_change) +
780 s->slowstart - 1) / s->slowstart;
Willy Tarreau9909fc12007-11-30 17:42:05 +0100781 s->eweight *= s->uweight;
782 if (s->proxy->lbprm.update_server_eweight)
783 s->proxy->lbprm.update_server_eweight(s);
784 }
785 /* probably that we can refill this server with a bit more connections */
786 check_for_pending(s);
787 }
788
Willy Tarreau48494c02007-11-30 10:41:39 +0100789 /* we may have to add/remove this server from the LB group */
790 if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
791 if ((s->state & SRV_GOINGDOWN) &&
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100792 ((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING))
793 set_server_enabled(s);
Willy Tarreau48494c02007-11-30 10:41:39 +0100794 else if (!(s->state & SRV_GOINGDOWN) &&
795 ((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) ==
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100796 (SRV_CHK_RUNNING | SRV_CHK_DISABLE)))
797 set_server_disabled(s);
Willy Tarreau48494c02007-11-30 10:41:39 +0100798 }
799
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200800 if (s->health < s->rise + s->fall - 1) {
801 s->health++; /* was bad, stays for a while */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200802
Krzysztof Piotr Oledzkic8b16fc2008-02-18 01:26:35 +0100803 set_server_up(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200804 }
805 s->curfd = -1; /* no check running anymore */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200806 fd_delete(fd);
Willy Tarreau44ec0f02007-10-14 23:47:04 +0200807
808 rv = 0;
809 if (global.spread_checks > 0) {
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100810 rv = srv_getinter(s) * global.spread_checks / 100;
Willy Tarreau44ec0f02007-10-14 23:47:04 +0200811 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100812 //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv);
Willy Tarreau44ec0f02007-10-14 23:47:04 +0200813 }
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200814 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200815 goto new_chk;
816 }
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200817 else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200818 //fprintf(stderr, "process_chk: 10\n");
819 /* failure or timeout detected */
820 if (s->health > s->rise) {
821 s->health--; /* still good */
822 s->failed_checks++;
823 }
824 else
825 set_server_down(s);
826 s->curfd = -1;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200827 fd_delete(fd);
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200828
829 rv = 0;
830 if (global.spread_checks > 0) {
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100831 rv = srv_getinter(s) * global.spread_checks / 100;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200832 rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100833 //fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv);
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200834 }
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200835 t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200836 goto new_chk;
837 }
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100838 /* if result is unknown and there's no timeout, we have to wait again */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200839 }
840 //fprintf(stderr, "process_chk: 11\n");
Willy Tarreauc7dd71a2007-11-30 08:33:21 +0100841 s->result = SRV_CHK_UNKNOWN;
Willy Tarreau26c25062009-03-08 09:38:41 +0100842 return t;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200843}
844
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200845/*
846 * Start health-check.
847 * Returns 0 if OK, -1 if error, and prints the error in this case.
848 */
849int start_checks() {
850
851 struct proxy *px;
852 struct server *s;
853 struct task *t;
854 int nbchk=0, mininter=0, srvpos=0;
855
Willy Tarreau2c43a1e2007-10-14 23:05:39 +0200856 /* 1- count the checkers to run simultaneously.
857 * We also determine the minimum interval among all of those which
858 * have an interval larger than SRV_CHK_INTER_THRES. This interval
859 * will be used to spread their start-up date. Those which have
860 * a shorter interval will start independantly and will not dictate
861 * too short an interval for all others.
862 */
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200863 for (px = proxy; px; px = px->next) {
864 for (s = px->srv; s; s = s->next) {
865 if (!(s->state & SRV_CHECKED))
866 continue;
867
Krzysztof Piotr Oledzki5259dfe2008-01-21 01:54:06 +0100868 if ((srv_getinter(s) >= SRV_CHK_INTER_THRES) &&
869 (!mininter || mininter > srv_getinter(s)))
870 mininter = srv_getinter(s);
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200871
872 nbchk++;
873 }
874 }
875
876 if (!nbchk)
877 return 0;
878
879 srand((unsigned)time(NULL));
880
881 /*
882 * 2- start them as far as possible from each others. For this, we will
883 * start them after their interval set to the min interval divided by
884 * the number of servers, weighted by the server's position in the list.
885 */
886 for (px = proxy; px; px = px->next) {
887 for (s = px->srv; s; s = s->next) {
888 if (!(s->state & SRV_CHECKED))
889 continue;
890
Willy Tarreaua4613182009-03-21 18:13:21 +0100891 if ((t = task_new()) == NULL) {
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200892 Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
893 return -1;
894 }
895
Krzysztof Piotr Oledzkia643baf2008-05-29 23:53:44 +0200896 s->check = t;
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200897 t->process = process_chk;
898 t->context = s;
899
900 /* check this every ms */
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200901 t->expire = tick_add(now_ms,
902 MS_TO_TICKS(((mininter && mininter >= srv_getinter(s)) ?
903 mininter : srv_getinter(s)) * srvpos / nbchk));
Krzysztof Oledzkib304dc72007-10-14 23:40:01 +0200904 task_queue(t);
905
906 srvpos++;
907 }
908 }
909 return 0;
910}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200911
912/*
913 * Local variables:
914 * c-indent-level: 8
915 * c-basic-offset: 8
916 * End:
917 */