blob: 96de0ba375a3b4dc066726585cd53f0b21864847 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaud825eef2007-05-12 22:35:00 +02004 * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020019
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020021#include <common/config.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020023
24#include <types/buffers.h>
25#include <types/global.h>
26#include <types/polling.h>
27#include <types/proxy.h>
28#include <types/server.h>
29#include <types/session.h>
30
31#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020032#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010034#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/log.h>
36#include <proto/proto_http.h>
37#include <proto/queue.h>
38#include <proto/stream_sock.h>
39#include <proto/task.h>
40
Willy Tarreau77074d52006-11-12 23:57:19 +010041#ifdef CONFIG_HAP_CTTPROXY
42#include <import/ip_tproxy.h>
43#endif
Willy Tarreaubaaee002006-06-26 02:48:02 +020044
Willy Tarreau6d1a9882007-01-07 02:03:04 +010045#ifdef CONFIG_HAP_TCPSPLICE
46#include <libtcpsplice.h>
47#endif
48
Willy Tarreaubaaee002006-06-26 02:48:02 +020049/*
50 * This function recounts the number of usable active and backup servers for
51 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
52 * This function also recomputes the total active and backup weights.
53 */
54void recount_servers(struct proxy *px)
55{
56 struct server *srv;
Willy Tarreau20697042007-11-15 23:26:18 +010057 int first_bkw = 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +020058
Willy Tarreau20697042007-11-15 23:26:18 +010059 px->srv_act = px->srv_bck = 0;
60 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +020061 for (srv = px->srv; srv != NULL; srv = srv->next) {
62 if (srv->state & SRV_RUNNING) {
63 if (srv->state & SRV_BACKUP) {
64 px->srv_bck++;
Willy Tarreau20697042007-11-15 23:26:18 +010065 px->lbprm.tot_wbck += srv->eweight;
66 if (px->srv_bck == 1)
67 first_bkw = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020068 } else {
69 px->srv_act++;
Willy Tarreau20697042007-11-15 23:26:18 +010070 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020071 }
72 }
73 }
Willy Tarreau20697042007-11-15 23:26:18 +010074
75 if (px->srv_act) {
76 px->lbprm.tot_weight = px->lbprm.tot_wact;
77 px->lbprm.tot_used = px->srv_act;
78 }
79 else if (px->srv_bck) {
80 if (px->options & PR_O_USE_ALL_BK) {
81 px->lbprm.tot_weight = px->lbprm.tot_wbck;
82 px->lbprm.tot_used = px->srv_bck;
83 }
84 else { /* the first backup server is enough */
85 px->lbprm.tot_weight = first_bkw;
86 px->lbprm.tot_used = 1;
87 }
88 }
89 else {
90 px->lbprm.tot_weight = 0;
91 px->lbprm.tot_used = 0;
92 }
93
Willy Tarreaubaaee002006-06-26 02:48:02 +020094}
95
Willy Tarreau20697042007-11-15 23:26:18 +010096/* This function recomputes the server map for proxy px. It relies on
97 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
98 * called after recount_servers(). It also expects px->lbprm.map.srv
99 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200100 */
101void recalc_server_map(struct proxy *px)
102{
103 int o, tot, flag;
104 struct server *cur, *best;
105
Willy Tarreau20697042007-11-15 23:26:18 +0100106 switch (px->lbprm.tot_used) {
107 case 0: /* no server */
108 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200109 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100110 case 1: /* only one server, just fill first entry */
111 tot = 1;
112 break;
113 default:
114 tot = px->lbprm.tot_weight;
115 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200116 }
117
Willy Tarreau20697042007-11-15 23:26:18 +0100118 /* here we *know* that we have some servers */
119 if (px->srv_act)
120 flag = SRV_RUNNING;
121 else
122 flag = SRV_RUNNING | SRV_BACKUP;
123
Willy Tarreaubaaee002006-06-26 02:48:02 +0200124 /* this algorithm gives priority to the first server, which means that
125 * it will respect the declaration order for equivalent weights, and
126 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100127 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200128 * case, where we want the first server only.
129 */
130 for (cur = px->srv; cur; cur = cur->next)
131 cur->wscore = 0;
132
133 for (o = 0; o < tot; o++) {
134 int max = 0;
135 best = NULL;
136 for (cur = px->srv; cur; cur = cur->next) {
137 if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) {
138 int v;
139
140 /* If we are forced to return only one server, we don't want to
141 * go further, because we would return the wrong one due to
142 * divide overflow.
143 */
144 if (tot == 1) {
145 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100146 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200147 break;
148 }
149
Willy Tarreau417fae02007-03-25 21:16:40 +0200150 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200151 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
152 if (best == NULL || v > max) {
153 max = v;
154 best = cur;
155 }
156 }
157 }
Willy Tarreau20697042007-11-15 23:26:18 +0100158 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200159 best->wscore -= tot;
160 }
Willy Tarreau20697042007-11-15 23:26:18 +0100161 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200162}
163
Willy Tarreau01732802007-11-01 22:48:15 +0100164/*
165 * This function tries to find a running server for the proxy <px> following
166 * the URL parameter hash method. It looks for a specific parameter in the
167 * URL and hashes it to compute the server ID. This is useful to optimize
168 * performance by avoiding bounces between servers in contexts where sessions
169 * are shared but cookies are not usable. If the parameter is not found, NULL
170 * is returned. If any server is found, it will be returned. If no valid server
171 * is found, NULL is returned.
172 *
173 */
174struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
175{
176 unsigned long hash = 0;
177 char *p;
178 int plen;
179
Willy Tarreau20697042007-11-15 23:26:18 +0100180 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +0100181 return NULL;
182
Willy Tarreau20697042007-11-15 23:26:18 +0100183 if (px->lbprm.map.state & PR_MAP_RECALC)
184 recalc_server_map(px);
185
Willy Tarreau01732802007-11-01 22:48:15 +0100186 p = memchr(uri, '?', uri_len);
187 if (!p)
188 return NULL;
189 p++;
190
191 uri_len -= (p - uri);
192 plen = px->url_param_len;
193
194 if (uri_len <= plen)
195 return NULL;
196
197 while (uri_len > plen) {
198 /* Look for the parameter name followed by an equal symbol */
199 if (p[plen] == '=') {
200 /* skip the equal symbol */
201 uri = p;
202 p += plen + 1;
203 uri_len -= plen + 1;
204 if (memcmp(uri, px->url_param_name, plen) == 0) {
205 /* OK, we have the parameter here at <uri>, and
206 * the value after the equal sign, at <p>
207 */
208 while (uri_len && *p != '&') {
209 hash = *p + (hash << 6) + (hash << 16) - hash;
210 uri_len--;
211 p++;
212 }
Willy Tarreau20697042007-11-15 23:26:18 +0100213 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +0100214 }
215 }
216
217 /* skip to next parameter */
218 uri = p;
219 p = memchr(uri, '&', uri_len);
220 if (!p)
221 return NULL;
222 p++;
223 uri_len -= (p - uri);
224 }
225 return NULL;
226}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200227
228/*
229 * This function marks the session as 'assigned' in direct or dispatch modes,
230 * or tries to assign one in balance mode, according to the algorithm. It does
231 * nothing if the session had already been assigned a server.
232 *
233 * It may return :
234 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
235 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
236 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
237 * SRV_STATUS_INTERNAL for other unrecoverable errors.
238 *
239 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
240 * not need to be called anymore. This usually means that s->srv can be trusted
241 * in balance and direct modes. This flag is not cleared, so it's to the caller
242 * to clear it if required (eg: redispatch).
243 *
244 */
245
246int assign_server(struct session *s)
247{
248#ifdef DEBUG_FULL
249 fprintf(stderr,"assign_server : s=%p\n",s);
250#endif
251
252 if (s->pend_pos)
253 return SRV_STATUS_INTERNAL;
254
255 if (!(s->flags & SN_ASSIGNED)) {
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200256 if (s->be->options & PR_O_BALANCE) {
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100257 int len;
258
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100259 if (s->flags & SN_DIRECT) {
260 s->flags |= SN_ASSIGNED;
261 return SRV_STATUS_OK;
262 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100263
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200264 if (!s->be->srv_act && !s->be->srv_bck)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200265 return SRV_STATUS_NOSRV;
266
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100267 switch (s->be->options & PR_O_BALANCE) {
268 case PR_O_BALANCE_RR:
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200269 s->srv = get_server_rr_with_conns(s->be);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200270 if (!s->srv)
271 return SRV_STATUS_FULL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100272 break;
273 case PR_O_BALANCE_SH:
Willy Tarreaubaaee002006-06-26 02:48:02 +0200274 if (s->cli_addr.ss_family == AF_INET)
275 len = 4;
276 else if (s->cli_addr.ss_family == AF_INET6)
277 len = 16;
278 else /* unknown IP family */
279 return SRV_STATUS_INTERNAL;
280
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200281 s->srv = get_server_sh(s->be,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200282 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
283 len);
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100284 break;
285 case PR_O_BALANCE_UH:
Willy Tarreau2fcb5002007-05-08 13:35:26 +0200286 /* URI hashing */
287 s->srv = get_server_uh(s->be,
288 s->txn.req.sol + s->txn.req.sl.rq.u,
289 s->txn.req.sl.rq.u_l);
Willy Tarreau01732802007-11-01 22:48:15 +0100290 break;
291 case PR_O_BALANCE_PH:
292 /* URL Parameter hashing */
293 s->srv = get_server_ph(s->be,
294 s->txn.req.sol + s->txn.req.sl.rq.u,
295 s->txn.req.sl.rq.u_l);
296 if (!s->srv) {
297 /* parameter not found, fall back to round robin */
298 s->srv = get_server_rr_with_conns(s->be);
299 if (!s->srv)
300 return SRV_STATUS_FULL;
301 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100302 break;
303 default:
304 /* unknown balancing algorithm */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200305 return SRV_STATUS_INTERNAL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100306 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200307 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200308 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100309 !(s->fe->options & PR_O_TRANSP)) {
Willy Tarreau1a1158b2007-01-20 11:07:46 +0100310 return SRV_STATUS_NOSRV;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100311 }
312 s->flags |= SN_ASSIGNED;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200313 }
314 return SRV_STATUS_OK;
315}
316
317
318/*
319 * This function assigns a server address to a session, and sets SN_ADDR_SET.
320 * The address is taken from the currently assigned server, or from the
321 * dispatch or transparent address.
322 *
323 * It may return :
324 * SRV_STATUS_OK if everything is OK.
325 * SRV_STATUS_INTERNAL for other unrecoverable errors.
326 *
327 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
328 * not cleared, so it's to the caller to clear it if required.
329 *
330 */
331int assign_server_address(struct session *s)
332{
333#ifdef DEBUG_FULL
334 fprintf(stderr,"assign_server_address : s=%p\n",s);
335#endif
336
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200337 if ((s->flags & SN_DIRECT) || (s->be->options & PR_O_BALANCE)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200338 /* A server is necessarily known for this session */
339 if (!(s->flags & SN_ASSIGNED))
340 return SRV_STATUS_INTERNAL;
341
342 s->srv_addr = s->srv->addr;
343
344 /* if this server remaps proxied ports, we'll use
345 * the port the client connected to with an offset. */
346 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +0200347 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
348 get_frt_addr(s);
349 if (s->frt_addr.ss_family == AF_INET) {
350 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
351 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
352 } else {
353 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
354 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
355 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200356 }
357 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200358 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200359 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200360 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200361 }
Willy Tarreau73de9892006-11-30 11:40:23 +0100362 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200363 /* in transparent mode, use the original dest addr if no dispatch specified */
364 socklen_t salen = sizeof(s->srv_addr);
365
366 if (get_original_dst(s->cli_fd, &s->srv_addr, &salen) == -1) {
367 qfprintf(stderr, "Cannot get original server address.\n");
368 return SRV_STATUS_INTERNAL;
369 }
370 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +0100371 else {
372 /* no server and no LB algorithm ! */
373 return SRV_STATUS_INTERNAL;
374 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200375
376 s->flags |= SN_ADDR_SET;
377 return SRV_STATUS_OK;
378}
379
380
381/* This function assigns a server to session <s> if required, and can add the
382 * connection to either the assigned server's queue or to the proxy's queue.
383 *
384 * Returns :
385 *
386 * SRV_STATUS_OK if everything is OK.
387 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
388 * SRV_STATUS_QUEUED if the connection has been queued.
389 * SRV_STATUS_FULL if the server(s) is/are saturated and the
390 * connection could not be queued.
391 * SRV_STATUS_INTERNAL for other unrecoverable errors.
392 *
393 */
394int assign_server_and_queue(struct session *s)
395{
396 struct pendconn *p;
397 int err;
398
399 if (s->pend_pos)
400 return SRV_STATUS_INTERNAL;
401
402 if (s->flags & SN_ASSIGNED) {
Elijah Epifanovacafc5f2007-10-25 20:15:38 +0200403 if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
404 s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
405 s->srv = NULL;
406 http_flush_cookie_flags(&s->txn);
407 } else {
408 /* a server does not need to be assigned, perhaps because we're in
409 * direct mode, or in dispatch or transparent modes where the server
410 * is not needed.
411 */
412 if (s->srv &&
413 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
414 p = pendconn_add(s);
415 if (p)
416 return SRV_STATUS_QUEUED;
417 else
418 return SRV_STATUS_FULL;
419 }
420 return SRV_STATUS_OK;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200421 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200422 }
423
424 /* a server needs to be assigned */
425 err = assign_server(s);
426 switch (err) {
427 case SRV_STATUS_OK:
428 /* in balance mode, we might have servers with connection limits */
429 if (s->srv &&
430 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
431 p = pendconn_add(s);
432 if (p)
433 return SRV_STATUS_QUEUED;
434 else
435 return SRV_STATUS_FULL;
436 }
437 return SRV_STATUS_OK;
438
439 case SRV_STATUS_FULL:
440 /* queue this session into the proxy's queue */
441 p = pendconn_add(s);
442 if (p)
443 return SRV_STATUS_QUEUED;
444 else
445 return SRV_STATUS_FULL;
446
447 case SRV_STATUS_NOSRV:
448 case SRV_STATUS_INTERNAL:
449 return err;
450 default:
451 return SRV_STATUS_INTERNAL;
452 }
453}
454
455
456/*
457 * This function initiates a connection to the server assigned to this session
458 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
459 * It can return one of :
460 * - SN_ERR_NONE if everything's OK
461 * - SN_ERR_SRVTO if there are no more servers
462 * - SN_ERR_SRVCL if the connection was refused by the server
463 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
464 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
465 * - SN_ERR_INTERNAL for any other purely internal errors
466 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
467 */
468int connect_server(struct session *s)
469{
470 int fd, err;
471
472 if (!(s->flags & SN_ADDR_SET)) {
473 err = assign_server_address(s);
474 if (err != SRV_STATUS_OK)
475 return SN_ERR_INTERNAL;
476 }
477
478 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
479 qfprintf(stderr, "Cannot get a server socket.\n");
480
481 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200482 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200483 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200484 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200485 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200486 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200487 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200488 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200489 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200490 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200491 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200492 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200493 /* this is a resource error */
494 return SN_ERR_RESOURCE;
495 }
496
497 if (fd >= global.maxsock) {
498 /* do not log anything there, it's a normal condition when this option
499 * is used to serialize connections to a server !
500 */
501 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
502 close(fd);
503 return SN_ERR_PRXCOND; /* it is a configuration limit */
504 }
505
Willy Tarreau6d1a9882007-01-07 02:03:04 +0100506#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200507 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +0100508 /* TCP splicing supported by both FE and BE */
509 tcp_splice_initfd(s->cli_fd, fd);
510 }
511#endif
512
Willy Tarreaubaaee002006-06-26 02:48:02 +0200513 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
514 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
515 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
516 close(fd);
517 return SN_ERR_INTERNAL;
518 }
519
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200520 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200521 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
522
Alexandre Cassen87ea5482007-10-11 20:48:58 +0200523 if (s->be->options & PR_O_TCP_NOLING)
524 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
525
Willy Tarreaubaaee002006-06-26 02:48:02 +0200526 /* allow specific binding :
527 * - server-specific at first
528 * - proxy-specific next
529 */
530 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
531 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
532 if (bind(fd, (struct sockaddr *)&s->srv->source_addr, sizeof(s->srv->source_addr)) == -1) {
533 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200534 s->be->id, s->srv->id);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200535 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200536 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200537 "Cannot bind to source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200538 s->be->id, s->srv->id);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200539 return SN_ERR_RESOURCE;
540 }
Willy Tarreau77074d52006-11-12 23:57:19 +0100541#ifdef CONFIG_HAP_CTTPROXY
542 if (s->srv->state & SRV_TPROXY_MASK) {
543 struct in_tproxy itp1, itp2;
544 memset(&itp1, 0, sizeof(itp1));
545
546 itp1.op = TPROXY_ASSIGN;
547 switch (s->srv->state & SRV_TPROXY_MASK) {
548 case SRV_TPROXY_ADDR:
549 itp1.v.addr.faddr = s->srv->tproxy_addr.sin_addr;
550 itp1.v.addr.fport = s->srv->tproxy_addr.sin_port;
551 break;
552 case SRV_TPROXY_CLI:
553 itp1.v.addr.fport = ((struct sockaddr_in *)&s->cli_addr)->sin_port;
554 /* fall through */
555 case SRV_TPROXY_CIP:
556 /* FIXME: what can we do if the client connects in IPv6 ? */
557 itp1.v.addr.faddr = ((struct sockaddr_in *)&s->cli_addr)->sin_addr;
558 break;
559 }
560
561 /* set connect flag on socket */
562 itp2.op = TPROXY_FLAGS;
563 itp2.v.flags = ITP_CONNECT | ITP_ONCE;
564
565 if (setsockopt(fd, SOL_IP, IP_TPROXY, &itp1, sizeof(itp1)) == -1 ||
566 setsockopt(fd, SOL_IP, IP_TPROXY, &itp2, sizeof(itp2)) == -1) {
567 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200568 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +0100569 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200570 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +0100571 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200572 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +0100573 return SN_ERR_RESOURCE;
574 }
575 }
576#endif
Willy Tarreaubaaee002006-06-26 02:48:02 +0200577 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200578 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200579 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200580 if (bind(fd, (struct sockaddr *)&s->be->source_addr, sizeof(s->be->source_addr)) == -1) {
581 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n", s->be->id);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200582 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200583 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200584 "Cannot bind to source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200585 s->be->id, s->srv->id);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200586 return SN_ERR_RESOURCE;
587 }
Willy Tarreau77074d52006-11-12 23:57:19 +0100588#ifdef CONFIG_HAP_CTTPROXY
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200589 if (s->be->options & PR_O_TPXY_MASK) {
Willy Tarreau77074d52006-11-12 23:57:19 +0100590 struct in_tproxy itp1, itp2;
591 memset(&itp1, 0, sizeof(itp1));
592
593 itp1.op = TPROXY_ASSIGN;
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200594 switch (s->be->options & PR_O_TPXY_MASK) {
Willy Tarreau77074d52006-11-12 23:57:19 +0100595 case PR_O_TPXY_ADDR:
596 itp1.v.addr.faddr = s->srv->tproxy_addr.sin_addr;
597 itp1.v.addr.fport = s->srv->tproxy_addr.sin_port;
598 break;
599 case PR_O_TPXY_CLI:
600 itp1.v.addr.fport = ((struct sockaddr_in *)&s->cli_addr)->sin_port;
601 /* fall through */
602 case PR_O_TPXY_CIP:
603 /* FIXME: what can we do if the client connects in IPv6 ? */
604 itp1.v.addr.faddr = ((struct sockaddr_in *)&s->cli_addr)->sin_addr;
605 break;
606 }
607
608 /* set connect flag on socket */
609 itp2.op = TPROXY_FLAGS;
610 itp2.v.flags = ITP_CONNECT | ITP_ONCE;
611
612 if (setsockopt(fd, SOL_IP, IP_TPROXY, &itp1, sizeof(itp1)) == -1 ||
613 setsockopt(fd, SOL_IP, IP_TPROXY, &itp2, sizeof(itp2)) == -1) {
614 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200615 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +0100616 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200617 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +0100618 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200619 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +0100620 return SN_ERR_RESOURCE;
621 }
622 }
623#endif
Willy Tarreaubaaee002006-06-26 02:48:02 +0200624 }
625
626 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
627 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
628
629 if (errno == EAGAIN || errno == EADDRINUSE) {
630 char *msg;
631 if (errno == EAGAIN) /* no free ports left, try again later */
632 msg = "no free ports";
633 else
634 msg = "local address already in use";
635
636 qfprintf(stderr,"Cannot connect: %s.\n",msg);
637 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200638 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200639 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200640 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200641 return SN_ERR_RESOURCE;
642 } else if (errno == ETIMEDOUT) {
643 //qfprintf(stderr,"Connect(): ETIMEDOUT");
644 close(fd);
645 return SN_ERR_SRVTO;
646 } else {
647 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
648 //qfprintf(stderr,"Connect(): %d", errno);
649 close(fd);
650 return SN_ERR_SRVCL;
651 }
652 }
653
654 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200655 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +0200656 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +0200657 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +0200658 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +0200659 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +0200660
661 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
662 fdtab[fd].peerlen = sizeof(s->srv_addr);
663
Willy Tarreauf161a342007-04-08 16:59:42 +0200664 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200665
666 fd_insert(fd);
667 if (s->srv) {
668 s->srv->cur_sess++;
669 if (s->srv->cur_sess > s->srv->cur_sess_max)
670 s->srv->cur_sess_max = s->srv->cur_sess;
671 }
672
Willy Tarreaua8b55e32007-05-13 16:08:19 +0200673 if (!tv_add_ifset(&s->req->cex, &now, &s->be->contimeout))
Willy Tarreaud7971282006-07-29 18:36:34 +0200674 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200675 return SN_ERR_NONE; /* connection is OK */
676}
677
678
679/*
680 * This function checks the retry count during the connect() job.
681 * It updates the session's srv_state and retries, so that the caller knows
682 * what it has to do. It uses the last connection error to set the log when
683 * it expires. It returns 1 when it has expired, and 0 otherwise.
684 */
685int srv_count_retry_down(struct session *t, int conn_err)
686{
687 /* we are in front of a retryable error */
688 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +0200689 if (t->srv)
690 t->srv->retries++;
691 t->be->retries++;
692
Willy Tarreaubaaee002006-06-26 02:48:02 +0200693 if (t->conn_retries < 0) {
694 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +0200695 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200696 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +0100697 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200698 if (t->srv)
699 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200700 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200701
702 /* We used to have a free connection slot. Since we'll never use it,
703 * we have to inform the server that it may be used by another session.
704 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200705 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +0200706 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200707 return 1;
708 }
709 return 0;
710}
711
712
713/*
714 * This function performs the retryable part of the connect() job.
715 * It updates the session's srv_state and retries, so that the caller knows
716 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
717 * it needs to redispatch.
718 */
719int srv_retryable_connect(struct session *t)
720{
721 int conn_err;
722
723 /* This loop ensures that we stop before the last retry in case of a
724 * redispatchable server.
725 */
726 do {
727 /* initiate a connection to the server */
728 conn_err = connect_server(t);
729 switch (conn_err) {
730
731 case SN_ERR_NONE:
732 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
733 t->srv_state = SV_STCONN;
734 return 1;
735
736 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +0200737 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200738 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +0100739 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200740 if (t->srv)
741 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200742 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200743 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200744 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +0200745 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200746 return 1;
747 }
748 /* ensure that we have enough retries left */
749 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200750 return 1;
751 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200752 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200753
754 /* We're on our last chance, and the REDISP option was specified.
755 * We will ignore cookie and force to balance or use the dispatcher.
756 */
757 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200758 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +0200759 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200760
761 if (t->srv)
762 t->srv->failed_conns++;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +0200763 t->be->redispatches++;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200764
765 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
766 t->srv = NULL; /* it's left to the dispatcher to choose a server */
Willy Tarreau3d300592007-03-18 18:34:41 +0100767 http_flush_cookie_flags(&t->txn);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200768 return 0;
769}
770
771
772/* This function performs the "redispatch" part of a connection attempt. It
773 * will assign a server if required, queue the connection if required, and
774 * handle errors that might arise at this level. It can change the server
775 * state. It will return 1 if it encounters an error, switches the server
776 * state, or has to queue a connection. Otherwise, it will return 0 indicating
777 * that the connection is ready to use.
778 */
779
780int srv_redispatch_connect(struct session *t)
781{
782 int conn_err;
783
784 /* We know that we don't have any connection pending, so we will
785 * try to get a new one, and wait in this state if it's queued
786 */
787 conn_err = assign_server_and_queue(t);
788 switch (conn_err) {
789 case SRV_STATUS_OK:
790 break;
791
792 case SRV_STATUS_NOSRV:
793 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +0200794 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200795 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +0100796 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200797 if (t->srv)
798 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200799 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200800
801 return 1;
802
803 case SRV_STATUS_QUEUED:
804 /* FIXME-20060503 : we should use the queue timeout instead */
Willy Tarreaua8b55e32007-05-13 16:08:19 +0200805 if (!tv_add_ifset(&t->req->cex, &now, &t->be->contimeout))
Willy Tarreaud7971282006-07-29 18:36:34 +0200806 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200807 t->srv_state = SV_STIDLE;
808 /* do nothing else and do not wake any other session up */
809 return 1;
810
811 case SRV_STATUS_FULL:
812 case SRV_STATUS_INTERNAL:
813 default:
Willy Tarreaud7971282006-07-29 18:36:34 +0200814 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200815 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +0100816 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +0200817 if (t->srv)
818 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200819 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200820
821 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200822 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +0200823 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200824 return 1;
825 }
826 /* if we get here, it's because we got SRV_STATUS_OK, which also
827 * means that the connection has not been queued.
828 */
829 return 0;
830}
831
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200832int be_downtime(struct proxy *px) {
Krzysztof Oledzki85130942007-10-22 16:21:10 +0200833 if ((px->srv_act || px->srv_bck) && px->last_change < now.tv_sec) // ignore negative time
834 return px->down_time;
835
836 return now.tv_sec - px->last_change + px->down_time;
837}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200838
Willy Tarreaua0cbda62007-11-01 21:39:54 +0100839/* This function parses a "balance" statement in a backend section describing
840 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
841 * returns -1, it may write an error message into ther <err> buffer, for at
842 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
843 * written. The function must be called with <args> pointing to the first word
844 * after "balance".
845 */
846int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
847{
848 if (!*(args[0])) {
849 /* if no option is set, use round-robin by default */
850 curproxy->options &= ~PR_O_BALANCE;
851 curproxy->options |= PR_O_BALANCE_RR;
852 return 0;
853 }
854
855 if (!strcmp(args[0], "roundrobin")) {
856 curproxy->options &= ~PR_O_BALANCE;
857 curproxy->options |= PR_O_BALANCE_RR;
858 }
859 else if (!strcmp(args[0], "source")) {
860 curproxy->options &= ~PR_O_BALANCE;
861 curproxy->options |= PR_O_BALANCE_SH;
862 }
863 else if (!strcmp(args[0], "uri")) {
864 curproxy->options &= ~PR_O_BALANCE;
865 curproxy->options |= PR_O_BALANCE_UH;
866 }
Willy Tarreau01732802007-11-01 22:48:15 +0100867 else if (!strcmp(args[0], "url_param")) {
868 if (!*args[1]) {
869 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
870 return -1;
871 }
872 curproxy->options &= ~PR_O_BALANCE;
873 curproxy->options |= PR_O_BALANCE_PH;
874 if (curproxy->url_param_name)
875 free(curproxy->url_param_name);
876 curproxy->url_param_name = strdup(args[1]);
877 curproxy->url_param_len = strlen(args[1]);
878 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +0100879 else {
Willy Tarreau01732802007-11-01 22:48:15 +0100880 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +0100881 return -1;
882 }
883 return 0;
884}
885
Willy Tarreaubaaee002006-06-26 02:48:02 +0200886/*
887 * Local variables:
888 * c-indent-level: 8
889 * c-basic-offset: 8
890 * End:
891 */