blob: ec997901f28fd47d661bee424a8bb40566d55cce [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
4 * Copyright 2000-2006 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
18
19#include <haproxy/compat.h>
20#include <haproxy/time.h>
21
22#include <types/buffers.h>
23#include <types/global.h>
24#include <types/polling.h>
25#include <types/proxy.h>
26#include <types/server.h>
27#include <types/session.h>
28
29#include <proto/backend.h>
30#include <proto/fd.h>
31#include <proto/log.h>
32#include <proto/proto_http.h>
33#include <proto/queue.h>
34#include <proto/stream_sock.h>
35#include <proto/task.h>
36
37
38/*
39 * This function recounts the number of usable active and backup servers for
40 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
41 * This function also recomputes the total active and backup weights.
42 */
43void recount_servers(struct proxy *px)
44{
45 struct server *srv;
46
47 px->srv_act = 0; px->srv_bck = px->tot_wact = px->tot_wbck = 0;
48 for (srv = px->srv; srv != NULL; srv = srv->next) {
49 if (srv->state & SRV_RUNNING) {
50 if (srv->state & SRV_BACKUP) {
51 px->srv_bck++;
52 px->tot_wbck += srv->eweight + 1;
53 } else {
54 px->srv_act++;
55 px->tot_wact += srv->eweight + 1;
56 }
57 }
58 }
59}
60
61/* This function recomputes the server map for proxy px. It
62 * relies on px->tot_wact and px->tot_wbck, so it must be
63 * called after recount_servers(). It also expects px->srv_map
64 * to be initialized to the largest value needed.
65 */
66void recalc_server_map(struct proxy *px)
67{
68 int o, tot, flag;
69 struct server *cur, *best;
70
71 if (px->srv_act) {
72 flag = SRV_RUNNING;
73 tot = px->tot_wact;
74 } else if (px->srv_bck) {
75 flag = SRV_RUNNING | SRV_BACKUP;
76 if (px->options & PR_O_USE_ALL_BK)
77 tot = px->tot_wbck;
78 else
79 tot = 1; /* the first server is enough */
80 } else {
81 px->srv_map_sz = 0;
82 return;
83 }
84
85 /* this algorithm gives priority to the first server, which means that
86 * it will respect the declaration order for equivalent weights, and
87 * that whatever the weights, the first server called will always be
88 * the first declard. This is an important asumption for the backup
89 * case, where we want the first server only.
90 */
91 for (cur = px->srv; cur; cur = cur->next)
92 cur->wscore = 0;
93
94 for (o = 0; o < tot; o++) {
95 int max = 0;
96 best = NULL;
97 for (cur = px->srv; cur; cur = cur->next) {
98 if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) {
99 int v;
100
101 /* If we are forced to return only one server, we don't want to
102 * go further, because we would return the wrong one due to
103 * divide overflow.
104 */
105 if (tot == 1) {
106 best = cur;
107 break;
108 }
109
110 cur->wscore += cur->eweight + 1;
111 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
112 if (best == NULL || v > max) {
113 max = v;
114 best = cur;
115 }
116 }
117 }
118 px->srv_map[o] = best;
119 best->wscore -= tot;
120 }
121 px->srv_map_sz = tot;
122}
123
124
125/*
126 * This function marks the session as 'assigned' in direct or dispatch modes,
127 * or tries to assign one in balance mode, according to the algorithm. It does
128 * nothing if the session had already been assigned a server.
129 *
130 * It may return :
131 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
132 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
133 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
134 * SRV_STATUS_INTERNAL for other unrecoverable errors.
135 *
136 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
137 * not need to be called anymore. This usually means that s->srv can be trusted
138 * in balance and direct modes. This flag is not cleared, so it's to the caller
139 * to clear it if required (eg: redispatch).
140 *
141 */
142
143int assign_server(struct session *s)
144{
145#ifdef DEBUG_FULL
146 fprintf(stderr,"assign_server : s=%p\n",s);
147#endif
148
149 if (s->pend_pos)
150 return SRV_STATUS_INTERNAL;
151
152 if (!(s->flags & SN_ASSIGNED)) {
153 if ((s->proxy->options & PR_O_BALANCE) && !(s->flags & SN_DIRECT)) {
154 if (!s->proxy->srv_act && !s->proxy->srv_bck)
155 return SRV_STATUS_NOSRV;
156
157 if (s->proxy->options & PR_O_BALANCE_RR) {
158 s->srv = get_server_rr_with_conns(s->proxy);
159 if (!s->srv)
160 return SRV_STATUS_FULL;
161 }
162 else if (s->proxy->options & PR_O_BALANCE_SH) {
163 int len;
164
165 if (s->cli_addr.ss_family == AF_INET)
166 len = 4;
167 else if (s->cli_addr.ss_family == AF_INET6)
168 len = 16;
169 else /* unknown IP family */
170 return SRV_STATUS_INTERNAL;
171
172 s->srv = get_server_sh(s->proxy,
173 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
174 len);
175 }
176 else /* unknown balancing algorithm */
177 return SRV_STATUS_INTERNAL;
178 }
179 s->flags |= SN_ASSIGNED;
180 }
181 return SRV_STATUS_OK;
182}
183
184
185/*
186 * This function assigns a server address to a session, and sets SN_ADDR_SET.
187 * The address is taken from the currently assigned server, or from the
188 * dispatch or transparent address.
189 *
190 * It may return :
191 * SRV_STATUS_OK if everything is OK.
192 * SRV_STATUS_INTERNAL for other unrecoverable errors.
193 *
194 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
195 * not cleared, so it's to the caller to clear it if required.
196 *
197 */
198int assign_server_address(struct session *s)
199{
200#ifdef DEBUG_FULL
201 fprintf(stderr,"assign_server_address : s=%p\n",s);
202#endif
203
204 if (s->flags & SN_DIRECT || s->proxy->options & PR_O_BALANCE) {
205 /* A server is necessarily known for this session */
206 if (!(s->flags & SN_ASSIGNED))
207 return SRV_STATUS_INTERNAL;
208
209 s->srv_addr = s->srv->addr;
210
211 /* if this server remaps proxied ports, we'll use
212 * the port the client connected to with an offset. */
213 if (s->srv->state & SRV_MAPPORTS) {
214 struct sockaddr_in sockname;
215 socklen_t namelen = sizeof(sockname);
216
217 if (!(s->proxy->options & PR_O_TRANSP) ||
218 get_original_dst(s->cli_fd, (struct sockaddr_in *)&sockname, &namelen) == -1)
219 getsockname(s->cli_fd, (struct sockaddr *)&sockname, &namelen);
220 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) + ntohs(sockname.sin_port));
221 }
222 }
223 else if (*(int *)&s->proxy->dispatch_addr.sin_addr) {
224 /* connect to the defined dispatch addr */
225 s->srv_addr = s->proxy->dispatch_addr;
226 }
227 else if (s->proxy->options & PR_O_TRANSP) {
228 /* in transparent mode, use the original dest addr if no dispatch specified */
229 socklen_t salen = sizeof(s->srv_addr);
230
231 if (get_original_dst(s->cli_fd, &s->srv_addr, &salen) == -1) {
232 qfprintf(stderr, "Cannot get original server address.\n");
233 return SRV_STATUS_INTERNAL;
234 }
235 }
236
237 s->flags |= SN_ADDR_SET;
238 return SRV_STATUS_OK;
239}
240
241
242/* This function assigns a server to session <s> if required, and can add the
243 * connection to either the assigned server's queue or to the proxy's queue.
244 *
245 * Returns :
246 *
247 * SRV_STATUS_OK if everything is OK.
248 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
249 * SRV_STATUS_QUEUED if the connection has been queued.
250 * SRV_STATUS_FULL if the server(s) is/are saturated and the
251 * connection could not be queued.
252 * SRV_STATUS_INTERNAL for other unrecoverable errors.
253 *
254 */
255int assign_server_and_queue(struct session *s)
256{
257 struct pendconn *p;
258 int err;
259
260 if (s->pend_pos)
261 return SRV_STATUS_INTERNAL;
262
263 if (s->flags & SN_ASSIGNED) {
264 /* a server does not need to be assigned, perhaps because we're in
265 * direct mode, or in dispatch or transparent modes where the server
266 * is not needed.
267 */
268 if (s->srv &&
269 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
270 p = pendconn_add(s);
271 if (p)
272 return SRV_STATUS_QUEUED;
273 else
274 return SRV_STATUS_FULL;
275 }
276 return SRV_STATUS_OK;
277 }
278
279 /* a server needs to be assigned */
280 err = assign_server(s);
281 switch (err) {
282 case SRV_STATUS_OK:
283 /* in balance mode, we might have servers with connection limits */
284 if (s->srv &&
285 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
286 p = pendconn_add(s);
287 if (p)
288 return SRV_STATUS_QUEUED;
289 else
290 return SRV_STATUS_FULL;
291 }
292 return SRV_STATUS_OK;
293
294 case SRV_STATUS_FULL:
295 /* queue this session into the proxy's queue */
296 p = pendconn_add(s);
297 if (p)
298 return SRV_STATUS_QUEUED;
299 else
300 return SRV_STATUS_FULL;
301
302 case SRV_STATUS_NOSRV:
303 case SRV_STATUS_INTERNAL:
304 return err;
305 default:
306 return SRV_STATUS_INTERNAL;
307 }
308}
309
310
311/*
312 * This function initiates a connection to the server assigned to this session
313 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
314 * It can return one of :
315 * - SN_ERR_NONE if everything's OK
316 * - SN_ERR_SRVTO if there are no more servers
317 * - SN_ERR_SRVCL if the connection was refused by the server
318 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
319 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
320 * - SN_ERR_INTERNAL for any other purely internal errors
321 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
322 */
323int connect_server(struct session *s)
324{
325 int fd, err;
326
327 if (!(s->flags & SN_ADDR_SET)) {
328 err = assign_server_address(s);
329 if (err != SRV_STATUS_OK)
330 return SN_ERR_INTERNAL;
331 }
332
333 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
334 qfprintf(stderr, "Cannot get a server socket.\n");
335
336 if (errno == ENFILE)
337 send_log(s->proxy, LOG_EMERG,
338 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
339 s->proxy->id, maxfd);
340 else if (errno == EMFILE)
341 send_log(s->proxy, LOG_EMERG,
342 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
343 s->proxy->id, maxfd);
344 else if (errno == ENOBUFS || errno == ENOMEM)
345 send_log(s->proxy, LOG_EMERG,
346 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
347 s->proxy->id, maxfd);
348 /* this is a resource error */
349 return SN_ERR_RESOURCE;
350 }
351
352 if (fd >= global.maxsock) {
353 /* do not log anything there, it's a normal condition when this option
354 * is used to serialize connections to a server !
355 */
356 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
357 close(fd);
358 return SN_ERR_PRXCOND; /* it is a configuration limit */
359 }
360
361 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
362 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
363 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
364 close(fd);
365 return SN_ERR_INTERNAL;
366 }
367
368 if (s->proxy->options & PR_O_TCP_SRV_KA)
369 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
370
371 /* allow specific binding :
372 * - server-specific at first
373 * - proxy-specific next
374 */
375 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
376 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
377 if (bind(fd, (struct sockaddr *)&s->srv->source_addr, sizeof(s->srv->source_addr)) == -1) {
378 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
379 s->proxy->id, s->srv->id);
380 close(fd);
381 send_log(s->proxy, LOG_EMERG,
382 "Cannot bind to source address before connect() for server %s/%s.\n",
383 s->proxy->id, s->srv->id);
384 return SN_ERR_RESOURCE;
385 }
386 }
387 else if (s->proxy->options & PR_O_BIND_SRC) {
388 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
389 if (bind(fd, (struct sockaddr *)&s->proxy->source_addr, sizeof(s->proxy->source_addr)) == -1) {
390 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n", s->proxy->id);
391 close(fd);
392 send_log(s->proxy, LOG_EMERG,
393 "Cannot bind to source address before connect() for server %s/%s.\n",
394 s->proxy->id, s->srv->id);
395 return SN_ERR_RESOURCE;
396 }
397 }
398
399 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
400 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
401
402 if (errno == EAGAIN || errno == EADDRINUSE) {
403 char *msg;
404 if (errno == EAGAIN) /* no free ports left, try again later */
405 msg = "no free ports";
406 else
407 msg = "local address already in use";
408
409 qfprintf(stderr,"Cannot connect: %s.\n",msg);
410 close(fd);
411 send_log(s->proxy, LOG_EMERG,
412 "Connect() failed for server %s/%s: %s.\n",
413 s->proxy->id, s->srv->id, msg);
414 return SN_ERR_RESOURCE;
415 } else if (errno == ETIMEDOUT) {
416 //qfprintf(stderr,"Connect(): ETIMEDOUT");
417 close(fd);
418 return SN_ERR_SRVTO;
419 } else {
420 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
421 //qfprintf(stderr,"Connect(): %d", errno);
422 close(fd);
423 return SN_ERR_SRVCL;
424 }
425 }
426
427 fdtab[fd].owner = s->task;
428 fdtab[fd].read = &event_srv_read;
429 fdtab[fd].write = &event_srv_write;
430 fdtab[fd].state = FD_STCONN; /* connection in progress */
431
432 FD_SET(fd, StaticWriteEvent); /* for connect status */
433#if defined(DEBUG_FULL) && defined(ENABLE_EPOLL)
434 if (PrevReadEvent) {
435 assert(!(FD_ISSET(fd, PrevReadEvent)));
436 assert(!(FD_ISSET(fd, PrevWriteEvent)));
437 }
438#endif
439
440 fd_insert(fd);
441 if (s->srv) {
442 s->srv->cur_sess++;
443 if (s->srv->cur_sess > s->srv->cur_sess_max)
444 s->srv->cur_sess_max = s->srv->cur_sess;
445 }
446
447 if (s->proxy->contimeout)
448 tv_delayfrom(&s->cnexpire, &now, s->proxy->contimeout);
449 else
450 tv_eternity(&s->cnexpire);
451 return SN_ERR_NONE; /* connection is OK */
452}
453
454
455/*
456 * This function checks the retry count during the connect() job.
457 * It updates the session's srv_state and retries, so that the caller knows
458 * what it has to do. It uses the last connection error to set the log when
459 * it expires. It returns 1 when it has expired, and 0 otherwise.
460 */
461int srv_count_retry_down(struct session *t, int conn_err)
462{
463 /* we are in front of a retryable error */
464 t->conn_retries--;
465 if (t->conn_retries < 0) {
466 /* if not retryable anymore, let's abort */
467 tv_eternity(&t->cnexpire);
468 srv_close_with_err(t, conn_err, SN_FINST_C,
469 503, t->proxy->errmsg.len503, t->proxy->errmsg.msg503);
470 if (t->srv)
471 t->srv->failed_conns++;
472 t->proxy->failed_conns++;
473
474 /* We used to have a free connection slot. Since we'll never use it,
475 * we have to inform the server that it may be used by another session.
476 */
477 if (may_dequeue_tasks(t->srv, t->proxy))
478 task_wakeup(&rq, t->srv->queue_mgt);
479 return 1;
480 }
481 return 0;
482}
483
484
485/*
486 * This function performs the retryable part of the connect() job.
487 * It updates the session's srv_state and retries, so that the caller knows
488 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
489 * it needs to redispatch.
490 */
491int srv_retryable_connect(struct session *t)
492{
493 int conn_err;
494
495 /* This loop ensures that we stop before the last retry in case of a
496 * redispatchable server.
497 */
498 do {
499 /* initiate a connection to the server */
500 conn_err = connect_server(t);
501 switch (conn_err) {
502
503 case SN_ERR_NONE:
504 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
505 t->srv_state = SV_STCONN;
506 return 1;
507
508 case SN_ERR_INTERNAL:
509 tv_eternity(&t->cnexpire);
510 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
511 500, t->proxy->errmsg.len500, t->proxy->errmsg.msg500);
512 if (t->srv)
513 t->srv->failed_conns++;
514 t->proxy->failed_conns++;
515 /* release other sessions waiting for this server */
516 if (may_dequeue_tasks(t->srv, t->proxy))
517 task_wakeup(&rq, t->srv->queue_mgt);
518 return 1;
519 }
520 /* ensure that we have enough retries left */
521 if (srv_count_retry_down(t, conn_err)) {
522 /* let's try to offer this slot to anybody */
523 if (may_dequeue_tasks(t->srv, t->proxy))
524 task_wakeup(&rq, t->srv->queue_mgt);
525 return 1;
526 }
527 } while (t->srv == NULL || t->conn_retries > 0 || !(t->proxy->options & PR_O_REDISP));
528
529 /* We're on our last chance, and the REDISP option was specified.
530 * We will ignore cookie and force to balance or use the dispatcher.
531 */
532 /* let's try to offer this slot to anybody */
533 if (may_dequeue_tasks(t->srv, t->proxy))
534 task_wakeup(&rq, t->srv->queue_mgt);
535
536 if (t->srv)
537 t->srv->failed_conns++;
538 t->proxy->failed_conns++;
539
540 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
541 t->srv = NULL; /* it's left to the dispatcher to choose a server */
542 if ((t->flags & SN_CK_MASK) == SN_CK_VALID) {
543 t->flags &= ~SN_CK_MASK;
544 t->flags |= SN_CK_DOWN;
545 }
546 return 0;
547}
548
549
550/* This function performs the "redispatch" part of a connection attempt. It
551 * will assign a server if required, queue the connection if required, and
552 * handle errors that might arise at this level. It can change the server
553 * state. It will return 1 if it encounters an error, switches the server
554 * state, or has to queue a connection. Otherwise, it will return 0 indicating
555 * that the connection is ready to use.
556 */
557
558int srv_redispatch_connect(struct session *t)
559{
560 int conn_err;
561
562 /* We know that we don't have any connection pending, so we will
563 * try to get a new one, and wait in this state if it's queued
564 */
565 conn_err = assign_server_and_queue(t);
566 switch (conn_err) {
567 case SRV_STATUS_OK:
568 break;
569
570 case SRV_STATUS_NOSRV:
571 /* note: it is guaranteed that t->srv == NULL here */
572 tv_eternity(&t->cnexpire);
573 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
574 503, t->proxy->errmsg.len503, t->proxy->errmsg.msg503);
575 if (t->srv)
576 t->srv->failed_conns++;
577 t->proxy->failed_conns++;
578
579 return 1;
580
581 case SRV_STATUS_QUEUED:
582 /* FIXME-20060503 : we should use the queue timeout instead */
583 if (t->proxy->contimeout)
584 tv_delayfrom(&t->cnexpire, &now, t->proxy->contimeout);
585 else
586 tv_eternity(&t->cnexpire);
587 t->srv_state = SV_STIDLE;
588 /* do nothing else and do not wake any other session up */
589 return 1;
590
591 case SRV_STATUS_FULL:
592 case SRV_STATUS_INTERNAL:
593 default:
594 tv_eternity(&t->cnexpire);
595 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
596 500, t->proxy->errmsg.len500, t->proxy->errmsg.msg500);
597 if (t->srv)
598 t->srv->failed_conns++;
599 t->proxy->failed_conns++;
600
601 /* release other sessions waiting for this server */
602 if (may_dequeue_tasks(t->srv, t->proxy))
603 task_wakeup(&rq, t->srv->queue_mgt);
604 return 1;
605 }
606 /* if we get here, it's because we got SRV_STATUS_OK, which also
607 * means that the connection has not been queued.
608 */
609 return 0;
610}
611
612
613/*
614 * Local variables:
615 * c-indent-level: 8
616 * c-basic-offset: 8
617 * End:
618 */