blob: ff91ee3d01be8bda33fd0f336d581410975ae745 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
4 * Copyright 2000-2006 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
18
Willy Tarreau2dd0d472006-06-29 17:53:05 +020019#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020020#include <common/config.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020022
23#include <types/buffers.h>
24#include <types/global.h>
25#include <types/polling.h>
26#include <types/proxy.h>
27#include <types/server.h>
28#include <types/session.h>
29
30#include <proto/backend.h>
31#include <proto/fd.h>
32#include <proto/log.h>
33#include <proto/proto_http.h>
34#include <proto/queue.h>
35#include <proto/stream_sock.h>
36#include <proto/task.h>
37
38
39/*
40 * This function recounts the number of usable active and backup servers for
41 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
42 * This function also recomputes the total active and backup weights.
43 */
44void recount_servers(struct proxy *px)
45{
46 struct server *srv;
47
48 px->srv_act = 0; px->srv_bck = px->tot_wact = px->tot_wbck = 0;
49 for (srv = px->srv; srv != NULL; srv = srv->next) {
50 if (srv->state & SRV_RUNNING) {
51 if (srv->state & SRV_BACKUP) {
52 px->srv_bck++;
53 px->tot_wbck += srv->eweight + 1;
54 } else {
55 px->srv_act++;
56 px->tot_wact += srv->eweight + 1;
57 }
58 }
59 }
60}
61
62/* This function recomputes the server map for proxy px. It
63 * relies on px->tot_wact and px->tot_wbck, so it must be
64 * called after recount_servers(). It also expects px->srv_map
65 * to be initialized to the largest value needed.
66 */
67void recalc_server_map(struct proxy *px)
68{
69 int o, tot, flag;
70 struct server *cur, *best;
71
72 if (px->srv_act) {
73 flag = SRV_RUNNING;
74 tot = px->tot_wact;
75 } else if (px->srv_bck) {
76 flag = SRV_RUNNING | SRV_BACKUP;
77 if (px->options & PR_O_USE_ALL_BK)
78 tot = px->tot_wbck;
79 else
80 tot = 1; /* the first server is enough */
81 } else {
82 px->srv_map_sz = 0;
83 return;
84 }
85
86 /* this algorithm gives priority to the first server, which means that
87 * it will respect the declaration order for equivalent weights, and
88 * that whatever the weights, the first server called will always be
89 * the first declard. This is an important asumption for the backup
90 * case, where we want the first server only.
91 */
92 for (cur = px->srv; cur; cur = cur->next)
93 cur->wscore = 0;
94
95 for (o = 0; o < tot; o++) {
96 int max = 0;
97 best = NULL;
98 for (cur = px->srv; cur; cur = cur->next) {
99 if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) {
100 int v;
101
102 /* If we are forced to return only one server, we don't want to
103 * go further, because we would return the wrong one due to
104 * divide overflow.
105 */
106 if (tot == 1) {
107 best = cur;
108 break;
109 }
110
111 cur->wscore += cur->eweight + 1;
112 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
113 if (best == NULL || v > max) {
114 max = v;
115 best = cur;
116 }
117 }
118 }
119 px->srv_map[o] = best;
120 best->wscore -= tot;
121 }
122 px->srv_map_sz = tot;
123}
124
125
126/*
127 * This function marks the session as 'assigned' in direct or dispatch modes,
128 * or tries to assign one in balance mode, according to the algorithm. It does
129 * nothing if the session had already been assigned a server.
130 *
131 * It may return :
132 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
133 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
134 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
135 * SRV_STATUS_INTERNAL for other unrecoverable errors.
136 *
137 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
138 * not need to be called anymore. This usually means that s->srv can be trusted
139 * in balance and direct modes. This flag is not cleared, so it's to the caller
140 * to clear it if required (eg: redispatch).
141 *
142 */
143
144int assign_server(struct session *s)
145{
146#ifdef DEBUG_FULL
147 fprintf(stderr,"assign_server : s=%p\n",s);
148#endif
149
150 if (s->pend_pos)
151 return SRV_STATUS_INTERNAL;
152
153 if (!(s->flags & SN_ASSIGNED)) {
154 if ((s->proxy->options & PR_O_BALANCE) && !(s->flags & SN_DIRECT)) {
155 if (!s->proxy->srv_act && !s->proxy->srv_bck)
156 return SRV_STATUS_NOSRV;
157
158 if (s->proxy->options & PR_O_BALANCE_RR) {
159 s->srv = get_server_rr_with_conns(s->proxy);
160 if (!s->srv)
161 return SRV_STATUS_FULL;
162 }
163 else if (s->proxy->options & PR_O_BALANCE_SH) {
164 int len;
165
166 if (s->cli_addr.ss_family == AF_INET)
167 len = 4;
168 else if (s->cli_addr.ss_family == AF_INET6)
169 len = 16;
170 else /* unknown IP family */
171 return SRV_STATUS_INTERNAL;
172
173 s->srv = get_server_sh(s->proxy,
174 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
175 len);
176 }
177 else /* unknown balancing algorithm */
178 return SRV_STATUS_INTERNAL;
179 }
180 s->flags |= SN_ASSIGNED;
181 }
182 return SRV_STATUS_OK;
183}
184
185
186/*
187 * This function assigns a server address to a session, and sets SN_ADDR_SET.
188 * The address is taken from the currently assigned server, or from the
189 * dispatch or transparent address.
190 *
191 * It may return :
192 * SRV_STATUS_OK if everything is OK.
193 * SRV_STATUS_INTERNAL for other unrecoverable errors.
194 *
195 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
196 * not cleared, so it's to the caller to clear it if required.
197 *
198 */
199int assign_server_address(struct session *s)
200{
201#ifdef DEBUG_FULL
202 fprintf(stderr,"assign_server_address : s=%p\n",s);
203#endif
204
205 if (s->flags & SN_DIRECT || s->proxy->options & PR_O_BALANCE) {
206 /* A server is necessarily known for this session */
207 if (!(s->flags & SN_ASSIGNED))
208 return SRV_STATUS_INTERNAL;
209
210 s->srv_addr = s->srv->addr;
211
212 /* if this server remaps proxied ports, we'll use
213 * the port the client connected to with an offset. */
214 if (s->srv->state & SRV_MAPPORTS) {
215 struct sockaddr_in sockname;
216 socklen_t namelen = sizeof(sockname);
217
218 if (!(s->proxy->options & PR_O_TRANSP) ||
219 get_original_dst(s->cli_fd, (struct sockaddr_in *)&sockname, &namelen) == -1)
220 getsockname(s->cli_fd, (struct sockaddr *)&sockname, &namelen);
221 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) + ntohs(sockname.sin_port));
222 }
223 }
224 else if (*(int *)&s->proxy->dispatch_addr.sin_addr) {
225 /* connect to the defined dispatch addr */
226 s->srv_addr = s->proxy->dispatch_addr;
227 }
228 else if (s->proxy->options & PR_O_TRANSP) {
229 /* in transparent mode, use the original dest addr if no dispatch specified */
230 socklen_t salen = sizeof(s->srv_addr);
231
232 if (get_original_dst(s->cli_fd, &s->srv_addr, &salen) == -1) {
233 qfprintf(stderr, "Cannot get original server address.\n");
234 return SRV_STATUS_INTERNAL;
235 }
236 }
237
238 s->flags |= SN_ADDR_SET;
239 return SRV_STATUS_OK;
240}
241
242
243/* This function assigns a server to session <s> if required, and can add the
244 * connection to either the assigned server's queue or to the proxy's queue.
245 *
246 * Returns :
247 *
248 * SRV_STATUS_OK if everything is OK.
249 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
250 * SRV_STATUS_QUEUED if the connection has been queued.
251 * SRV_STATUS_FULL if the server(s) is/are saturated and the
252 * connection could not be queued.
253 * SRV_STATUS_INTERNAL for other unrecoverable errors.
254 *
255 */
256int assign_server_and_queue(struct session *s)
257{
258 struct pendconn *p;
259 int err;
260
261 if (s->pend_pos)
262 return SRV_STATUS_INTERNAL;
263
264 if (s->flags & SN_ASSIGNED) {
265 /* a server does not need to be assigned, perhaps because we're in
266 * direct mode, or in dispatch or transparent modes where the server
267 * is not needed.
268 */
269 if (s->srv &&
270 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
271 p = pendconn_add(s);
272 if (p)
273 return SRV_STATUS_QUEUED;
274 else
275 return SRV_STATUS_FULL;
276 }
277 return SRV_STATUS_OK;
278 }
279
280 /* a server needs to be assigned */
281 err = assign_server(s);
282 switch (err) {
283 case SRV_STATUS_OK:
284 /* in balance mode, we might have servers with connection limits */
285 if (s->srv &&
286 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
287 p = pendconn_add(s);
288 if (p)
289 return SRV_STATUS_QUEUED;
290 else
291 return SRV_STATUS_FULL;
292 }
293 return SRV_STATUS_OK;
294
295 case SRV_STATUS_FULL:
296 /* queue this session into the proxy's queue */
297 p = pendconn_add(s);
298 if (p)
299 return SRV_STATUS_QUEUED;
300 else
301 return SRV_STATUS_FULL;
302
303 case SRV_STATUS_NOSRV:
304 case SRV_STATUS_INTERNAL:
305 return err;
306 default:
307 return SRV_STATUS_INTERNAL;
308 }
309}
310
311
312/*
313 * This function initiates a connection to the server assigned to this session
314 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
315 * It can return one of :
316 * - SN_ERR_NONE if everything's OK
317 * - SN_ERR_SRVTO if there are no more servers
318 * - SN_ERR_SRVCL if the connection was refused by the server
319 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
320 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
321 * - SN_ERR_INTERNAL for any other purely internal errors
322 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
323 */
324int connect_server(struct session *s)
325{
326 int fd, err;
327
328 if (!(s->flags & SN_ADDR_SET)) {
329 err = assign_server_address(s);
330 if (err != SRV_STATUS_OK)
331 return SN_ERR_INTERNAL;
332 }
333
334 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
335 qfprintf(stderr, "Cannot get a server socket.\n");
336
337 if (errno == ENFILE)
338 send_log(s->proxy, LOG_EMERG,
339 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
340 s->proxy->id, maxfd);
341 else if (errno == EMFILE)
342 send_log(s->proxy, LOG_EMERG,
343 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
344 s->proxy->id, maxfd);
345 else if (errno == ENOBUFS || errno == ENOMEM)
346 send_log(s->proxy, LOG_EMERG,
347 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
348 s->proxy->id, maxfd);
349 /* this is a resource error */
350 return SN_ERR_RESOURCE;
351 }
352
353 if (fd >= global.maxsock) {
354 /* do not log anything there, it's a normal condition when this option
355 * is used to serialize connections to a server !
356 */
357 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
358 close(fd);
359 return SN_ERR_PRXCOND; /* it is a configuration limit */
360 }
361
362 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
363 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
364 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
365 close(fd);
366 return SN_ERR_INTERNAL;
367 }
368
369 if (s->proxy->options & PR_O_TCP_SRV_KA)
370 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
371
372 /* allow specific binding :
373 * - server-specific at first
374 * - proxy-specific next
375 */
376 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
377 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
378 if (bind(fd, (struct sockaddr *)&s->srv->source_addr, sizeof(s->srv->source_addr)) == -1) {
379 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
380 s->proxy->id, s->srv->id);
381 close(fd);
382 send_log(s->proxy, LOG_EMERG,
383 "Cannot bind to source address before connect() for server %s/%s.\n",
384 s->proxy->id, s->srv->id);
385 return SN_ERR_RESOURCE;
386 }
387 }
388 else if (s->proxy->options & PR_O_BIND_SRC) {
389 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
390 if (bind(fd, (struct sockaddr *)&s->proxy->source_addr, sizeof(s->proxy->source_addr)) == -1) {
391 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n", s->proxy->id);
392 close(fd);
393 send_log(s->proxy, LOG_EMERG,
394 "Cannot bind to source address before connect() for server %s/%s.\n",
395 s->proxy->id, s->srv->id);
396 return SN_ERR_RESOURCE;
397 }
398 }
399
400 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
401 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
402
403 if (errno == EAGAIN || errno == EADDRINUSE) {
404 char *msg;
405 if (errno == EAGAIN) /* no free ports left, try again later */
406 msg = "no free ports";
407 else
408 msg = "local address already in use";
409
410 qfprintf(stderr,"Cannot connect: %s.\n",msg);
411 close(fd);
412 send_log(s->proxy, LOG_EMERG,
413 "Connect() failed for server %s/%s: %s.\n",
414 s->proxy->id, s->srv->id, msg);
415 return SN_ERR_RESOURCE;
416 } else if (errno == ETIMEDOUT) {
417 //qfprintf(stderr,"Connect(): ETIMEDOUT");
418 close(fd);
419 return SN_ERR_SRVTO;
420 } else {
421 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
422 //qfprintf(stderr,"Connect(): %d", errno);
423 close(fd);
424 return SN_ERR_SRVCL;
425 }
426 }
427
428 fdtab[fd].owner = s->task;
429 fdtab[fd].read = &event_srv_read;
430 fdtab[fd].write = &event_srv_write;
431 fdtab[fd].state = FD_STCONN; /* connection in progress */
432
433 FD_SET(fd, StaticWriteEvent); /* for connect status */
434#if defined(DEBUG_FULL) && defined(ENABLE_EPOLL)
435 if (PrevReadEvent) {
436 assert(!(FD_ISSET(fd, PrevReadEvent)));
437 assert(!(FD_ISSET(fd, PrevWriteEvent)));
438 }
439#endif
440
441 fd_insert(fd);
442 if (s->srv) {
443 s->srv->cur_sess++;
444 if (s->srv->cur_sess > s->srv->cur_sess_max)
445 s->srv->cur_sess_max = s->srv->cur_sess;
446 }
447
448 if (s->proxy->contimeout)
449 tv_delayfrom(&s->cnexpire, &now, s->proxy->contimeout);
450 else
451 tv_eternity(&s->cnexpire);
452 return SN_ERR_NONE; /* connection is OK */
453}
454
455
456/*
457 * This function checks the retry count during the connect() job.
458 * It updates the session's srv_state and retries, so that the caller knows
459 * what it has to do. It uses the last connection error to set the log when
460 * it expires. It returns 1 when it has expired, and 0 otherwise.
461 */
462int srv_count_retry_down(struct session *t, int conn_err)
463{
464 /* we are in front of a retryable error */
465 t->conn_retries--;
466 if (t->conn_retries < 0) {
467 /* if not retryable anymore, let's abort */
468 tv_eternity(&t->cnexpire);
469 srv_close_with_err(t, conn_err, SN_FINST_C,
470 503, t->proxy->errmsg.len503, t->proxy->errmsg.msg503);
471 if (t->srv)
472 t->srv->failed_conns++;
473 t->proxy->failed_conns++;
474
475 /* We used to have a free connection slot. Since we'll never use it,
476 * we have to inform the server that it may be used by another session.
477 */
478 if (may_dequeue_tasks(t->srv, t->proxy))
479 task_wakeup(&rq, t->srv->queue_mgt);
480 return 1;
481 }
482 return 0;
483}
484
485
486/*
487 * This function performs the retryable part of the connect() job.
488 * It updates the session's srv_state and retries, so that the caller knows
489 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
490 * it needs to redispatch.
491 */
492int srv_retryable_connect(struct session *t)
493{
494 int conn_err;
495
496 /* This loop ensures that we stop before the last retry in case of a
497 * redispatchable server.
498 */
499 do {
500 /* initiate a connection to the server */
501 conn_err = connect_server(t);
502 switch (conn_err) {
503
504 case SN_ERR_NONE:
505 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
506 t->srv_state = SV_STCONN;
507 return 1;
508
509 case SN_ERR_INTERNAL:
510 tv_eternity(&t->cnexpire);
511 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
512 500, t->proxy->errmsg.len500, t->proxy->errmsg.msg500);
513 if (t->srv)
514 t->srv->failed_conns++;
515 t->proxy->failed_conns++;
516 /* release other sessions waiting for this server */
517 if (may_dequeue_tasks(t->srv, t->proxy))
518 task_wakeup(&rq, t->srv->queue_mgt);
519 return 1;
520 }
521 /* ensure that we have enough retries left */
522 if (srv_count_retry_down(t, conn_err)) {
523 /* let's try to offer this slot to anybody */
524 if (may_dequeue_tasks(t->srv, t->proxy))
525 task_wakeup(&rq, t->srv->queue_mgt);
526 return 1;
527 }
528 } while (t->srv == NULL || t->conn_retries > 0 || !(t->proxy->options & PR_O_REDISP));
529
530 /* We're on our last chance, and the REDISP option was specified.
531 * We will ignore cookie and force to balance or use the dispatcher.
532 */
533 /* let's try to offer this slot to anybody */
534 if (may_dequeue_tasks(t->srv, t->proxy))
535 task_wakeup(&rq, t->srv->queue_mgt);
536
537 if (t->srv)
538 t->srv->failed_conns++;
539 t->proxy->failed_conns++;
540
541 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
542 t->srv = NULL; /* it's left to the dispatcher to choose a server */
543 if ((t->flags & SN_CK_MASK) == SN_CK_VALID) {
544 t->flags &= ~SN_CK_MASK;
545 t->flags |= SN_CK_DOWN;
546 }
547 return 0;
548}
549
550
551/* This function performs the "redispatch" part of a connection attempt. It
552 * will assign a server if required, queue the connection if required, and
553 * handle errors that might arise at this level. It can change the server
554 * state. It will return 1 if it encounters an error, switches the server
555 * state, or has to queue a connection. Otherwise, it will return 0 indicating
556 * that the connection is ready to use.
557 */
558
559int srv_redispatch_connect(struct session *t)
560{
561 int conn_err;
562
563 /* We know that we don't have any connection pending, so we will
564 * try to get a new one, and wait in this state if it's queued
565 */
566 conn_err = assign_server_and_queue(t);
567 switch (conn_err) {
568 case SRV_STATUS_OK:
569 break;
570
571 case SRV_STATUS_NOSRV:
572 /* note: it is guaranteed that t->srv == NULL here */
573 tv_eternity(&t->cnexpire);
574 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
575 503, t->proxy->errmsg.len503, t->proxy->errmsg.msg503);
576 if (t->srv)
577 t->srv->failed_conns++;
578 t->proxy->failed_conns++;
579
580 return 1;
581
582 case SRV_STATUS_QUEUED:
583 /* FIXME-20060503 : we should use the queue timeout instead */
584 if (t->proxy->contimeout)
585 tv_delayfrom(&t->cnexpire, &now, t->proxy->contimeout);
586 else
587 tv_eternity(&t->cnexpire);
588 t->srv_state = SV_STIDLE;
589 /* do nothing else and do not wake any other session up */
590 return 1;
591
592 case SRV_STATUS_FULL:
593 case SRV_STATUS_INTERNAL:
594 default:
595 tv_eternity(&t->cnexpire);
596 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
597 500, t->proxy->errmsg.len500, t->proxy->errmsg.msg500);
598 if (t->srv)
599 t->srv->failed_conns++;
600 t->proxy->failed_conns++;
601
602 /* release other sessions waiting for this server */
603 if (may_dequeue_tasks(t->srv, t->proxy))
604 task_wakeup(&rq, t->srv->queue_mgt);
605 return 1;
606 }
607 /* if we get here, it's because we got SRV_STATUS_OK, which also
608 * means that the connection has not been queued.
609 */
610 return 0;
611}
612
613
614/*
615 * Local variables:
616 * c-indent-level: 8
617 * c-basic-offset: 8
618 * End:
619 */