blob: aadc65057da7ff52ec1df44d24c410d0be29c523 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaud825eef2007-05-12 22:35:00 +02004 * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020019
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020021#include <common/config.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010022#include <common/eb32tree.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010025#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <types/buffers.h>
27#include <types/global.h>
28#include <types/polling.h>
29#include <types/proxy.h>
30#include <types/server.h>
31#include <types/session.h>
32
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010033#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020035#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020036#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010037#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/log.h>
39#include <proto/proto_http.h>
40#include <proto/queue.h>
41#include <proto/stream_sock.h>
42#include <proto/task.h>
43
Willy Tarreau77074d52006-11-12 23:57:19 +010044#ifdef CONFIG_HAP_CTTPROXY
45#include <import/ip_tproxy.h>
46#endif
Willy Tarreaubaaee002006-06-26 02:48:02 +020047
Willy Tarreau6d1a9882007-01-07 02:03:04 +010048#ifdef CONFIG_HAP_TCPSPLICE
49#include <libtcpsplice.h>
50#endif
51
Willy Tarreaub625a082007-11-26 01:15:43 +010052static inline void fwrr_remove_from_tree(struct server *s);
53static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
54static inline void fwrr_dequeue_srv(struct server *s);
55static void fwrr_get_srv(struct server *s);
56static void fwrr_queue_srv(struct server *s);
57
58/* This function returns non-zero if a server with the given weight and state
59 * is usable for LB, otherwise zero.
60 */
61static inline int srv_is_usable(int state, int weight)
62{
63 if (!weight)
64 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010065 if (state & SRV_GOINGDOWN)
66 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010067 if (!(state & SRV_RUNNING))
68 return 0;
69 return 1;
70}
71
Willy Tarreaubaaee002006-06-26 02:48:02 +020072/*
73 * This function recounts the number of usable active and backup servers for
74 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010075 * This function also recomputes the total active and backup weights. However,
76 * it does nout update tot_weight nor tot_used. Use update_backend_weight() for
77 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020078 */
Willy Tarreaub625a082007-11-26 01:15:43 +010079static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020080{
81 struct server *srv;
82
Willy Tarreau20697042007-11-15 23:26:18 +010083 px->srv_act = px->srv_bck = 0;
84 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010085 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020086 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010087 if (!srv_is_usable(srv->state, srv->eweight))
88 continue;
89
90 if (srv->state & SRV_BACKUP) {
91 if (!px->srv_bck &&
Willy Tarreau31682232007-11-29 15:38:04 +010092 !(px->lbprm.algo & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010093 px->lbprm.fbck = srv;
94 px->srv_bck++;
95 px->lbprm.tot_wbck += srv->eweight;
96 } else {
97 px->srv_act++;
98 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020099 }
100 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100101}
Willy Tarreau20697042007-11-15 23:26:18 +0100102
Willy Tarreaub625a082007-11-26 01:15:43 +0100103/* This function simply updates the backend's tot_weight and tot_used values
104 * after servers weights have been updated. It is designed to be used after
105 * recount_servers() or equivalent.
106 */
107static void update_backend_weight(struct proxy *px)
108{
Willy Tarreau20697042007-11-15 23:26:18 +0100109 if (px->srv_act) {
110 px->lbprm.tot_weight = px->lbprm.tot_wact;
111 px->lbprm.tot_used = px->srv_act;
112 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100113 else if (px->lbprm.fbck) {
114 /* use only the first backup server */
115 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
116 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100117 }
118 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100119 px->lbprm.tot_weight = px->lbprm.tot_wbck;
120 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100121 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100122}
123
124/* this function updates the map according to server <srv>'s new state */
125static void map_set_server_status_down(struct server *srv)
126{
127 struct proxy *p = srv->proxy;
128
129 if (srv->state == srv->prev_state &&
130 srv->eweight == srv->prev_eweight)
131 return;
132
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100133 if (srv_is_usable(srv->state, srv->eweight))
134 goto out_update_state;
135
Willy Tarreaub625a082007-11-26 01:15:43 +0100136 /* FIXME: could be optimized since we know what changed */
137 recount_servers(p);
138 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100139 p->lbprm.map.state |= PR_MAP_RECALC;
140 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100141 srv->prev_state = srv->state;
142 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200143}
144
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100145/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100146static void map_set_server_status_up(struct server *srv)
147{
148 struct proxy *p = srv->proxy;
149
150 if (srv->state == srv->prev_state &&
151 srv->eweight == srv->prev_eweight)
152 return;
153
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100154 if (!srv_is_usable(srv->state, srv->eweight))
155 goto out_update_state;
156
Willy Tarreaub625a082007-11-26 01:15:43 +0100157 /* FIXME: could be optimized since we know what changed */
158 recount_servers(p);
159 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100160 p->lbprm.map.state |= PR_MAP_RECALC;
161 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100162 srv->prev_state = srv->state;
163 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100164}
165
Willy Tarreau20697042007-11-15 23:26:18 +0100166/* This function recomputes the server map for proxy px. It relies on
167 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
168 * called after recount_servers(). It also expects px->lbprm.map.srv
169 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200170 */
171void recalc_server_map(struct proxy *px)
172{
173 int o, tot, flag;
174 struct server *cur, *best;
175
Willy Tarreau20697042007-11-15 23:26:18 +0100176 switch (px->lbprm.tot_used) {
177 case 0: /* no server */
178 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200179 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100180 case 1: /* only one server, just fill first entry */
181 tot = 1;
182 break;
183 default:
184 tot = px->lbprm.tot_weight;
185 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200186 }
187
Willy Tarreau20697042007-11-15 23:26:18 +0100188 /* here we *know* that we have some servers */
189 if (px->srv_act)
190 flag = SRV_RUNNING;
191 else
192 flag = SRV_RUNNING | SRV_BACKUP;
193
Willy Tarreaubaaee002006-06-26 02:48:02 +0200194 /* this algorithm gives priority to the first server, which means that
195 * it will respect the declaration order for equivalent weights, and
196 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100197 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200198 * case, where we want the first server only.
199 */
200 for (cur = px->srv; cur; cur = cur->next)
201 cur->wscore = 0;
202
203 for (o = 0; o < tot; o++) {
204 int max = 0;
205 best = NULL;
206 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100207 if (flag == (cur->state &
208 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200209 int v;
210
211 /* If we are forced to return only one server, we don't want to
212 * go further, because we would return the wrong one due to
213 * divide overflow.
214 */
215 if (tot == 1) {
216 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100217 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200218 break;
219 }
220
Willy Tarreau417fae02007-03-25 21:16:40 +0200221 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200222 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
223 if (best == NULL || v > max) {
224 max = v;
225 best = cur;
226 }
227 }
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230 best->wscore -= tot;
231 }
Willy Tarreau20697042007-11-15 23:26:18 +0100232 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200233}
234
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100235/* This function is responsible of building the server MAP for map-based LB
236 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
237 * weights if applicable. It should be called only once per proxy, at config
238 * time.
239 */
240void init_server_map(struct proxy *p)
241{
242 struct server *srv;
243 int pgcd;
244 int act, bck;
245
Willy Tarreaub625a082007-11-26 01:15:43 +0100246 p->lbprm.set_server_status_up = map_set_server_status_up;
247 p->lbprm.set_server_status_down = map_set_server_status_down;
248 p->lbprm.update_server_eweight = NULL;
249
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100250 if (!p->srv)
251 return;
252
253 /* We will factor the weights to reduce the table,
254 * using Euclide's largest common divisor algorithm
255 */
256 pgcd = p->srv->uweight;
257 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
258 int w = srv->uweight;
259 while (w) {
260 int t = pgcd % w;
261 pgcd = w;
262 w = t;
263 }
264 }
265
266 /* It is sometimes useful to know what factor to apply
267 * to the backend's effective weight to know its real
268 * weight.
269 */
270 p->lbprm.wmult = pgcd;
271
272 act = bck = 0;
273 for (srv = p->srv; srv; srv = srv->next) {
274 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100275 srv->prev_eweight = srv->eweight;
276 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100277 if (srv->state & SRV_BACKUP)
278 bck += srv->eweight;
279 else
280 act += srv->eweight;
281 }
282
283 /* this is the largest map we will ever need for this servers list */
284 if (act < bck)
285 act = bck;
286
287 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
288 /* recounts servers and their weights */
289 p->lbprm.map.state = PR_MAP_RECALC;
290 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100291 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100292 recalc_server_map(p);
293}
294
Willy Tarreaub625a082007-11-26 01:15:43 +0100295/* This function updates the server trees according to server <srv>'s new
296 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100297 * It is not important whether the server was already down or not. It is not
298 * important either that the new state is completely down (the caller may not
299 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100300 */
301static void fwrr_set_server_status_down(struct server *srv)
302{
303 struct proxy *p = srv->proxy;
304 struct fwrr_group *grp;
305
306 if (srv->state == srv->prev_state &&
307 srv->eweight == srv->prev_eweight)
308 return;
309
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100310 if (srv_is_usable(srv->state, srv->eweight))
311 goto out_update_state;
312
Willy Tarreaub625a082007-11-26 01:15:43 +0100313 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
314 /* server was already down */
315 goto out_update_backend;
316
317 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
318 grp->next_weight -= srv->prev_eweight;
319
320 if (srv->state & SRV_BACKUP) {
321 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
322 p->srv_bck--;
323
324 if (srv == p->lbprm.fbck) {
325 /* we lost the first backup server in a single-backup
326 * configuration, we must search another one.
327 */
328 struct server *srv2 = p->lbprm.fbck;
329 do {
330 srv2 = srv2->next;
331 } while (srv2 &&
332 !((srv2->state & SRV_BACKUP) &&
333 srv_is_usable(srv2->state, srv2->eweight)));
334 p->lbprm.fbck = srv2;
335 }
336 } else {
337 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
338 p->srv_act--;
339 }
340
341 fwrr_dequeue_srv(srv);
342 fwrr_remove_from_tree(srv);
343
344out_update_backend:
345 /* check/update tot_used, tot_weight */
346 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100347 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100348 srv->prev_state = srv->state;
349 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100350}
351
352/* This function updates the server trees according to server <srv>'s new
353 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100354 * It is not important whether the server was already down or not. It is not
355 * important either that the new state is completely UP (the caller may not
356 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100357 * the weight of a server which was already up.
358 */
359static void fwrr_set_server_status_up(struct server *srv)
360{
361 struct proxy *p = srv->proxy;
362 struct fwrr_group *grp;
363
364 if (srv->state == srv->prev_state &&
365 srv->eweight == srv->prev_eweight)
366 return;
367
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100368 if (!srv_is_usable(srv->state, srv->eweight))
369 goto out_update_state;
370
Willy Tarreaub625a082007-11-26 01:15:43 +0100371 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
372 /* server was already up */
373 goto out_update_backend;
374
375 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
376 grp->next_weight += srv->eweight;
377
378 if (srv->state & SRV_BACKUP) {
379 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
380 p->srv_bck++;
381
382 if (p->lbprm.fbck) {
383 /* we may have restored a backup server prior to fbck,
384 * in which case it should replace it.
385 */
386 struct server *srv2 = srv;
387 do {
388 srv2 = srv2->next;
389 } while (srv2 && (srv2 != p->lbprm.fbck));
390 if (srv2)
391 p->lbprm.fbck = srv;
392 }
393 } else {
394 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
395 p->srv_act++;
396 }
397
398 /* note that eweight cannot be 0 here */
399 fwrr_get_srv(srv);
400 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
401 fwrr_queue_srv(srv);
402
403out_update_backend:
404 /* check/update tot_used, tot_weight */
405 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100406 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100407 srv->prev_state = srv->state;
408 srv->prev_eweight = srv->eweight;
409}
410
411/* This function must be called after an update to server <srv>'s effective
412 * weight. It may be called after a state change too.
413 */
414static void fwrr_update_server_weight(struct server *srv)
415{
416 int old_state, new_state;
417 struct proxy *p = srv->proxy;
418 struct fwrr_group *grp;
419
420 if (srv->state == srv->prev_state &&
421 srv->eweight == srv->prev_eweight)
422 return;
423
424 /* If changing the server's weight changes its state, we simply apply
425 * the procedures we already have for status change. If the state
426 * remains down, the server is not in any tree, so it's as easy as
427 * updating its values. If the state remains up with different weights,
428 * there are some computations to perform to find a new place and
429 * possibly a new tree for this server.
430 */
431
432 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
433 new_state = srv_is_usable(srv->state, srv->eweight);
434
435 if (!old_state && !new_state) {
436 srv->prev_state = srv->state;
437 srv->prev_eweight = srv->eweight;
438 return;
439 }
440 else if (!old_state && new_state) {
441 fwrr_set_server_status_up(srv);
442 return;
443 }
444 else if (old_state && !new_state) {
445 fwrr_set_server_status_down(srv);
446 return;
447 }
448
449 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
450 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
451
452 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
453 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
454
455 if (srv->lb_tree == grp->init) {
456 fwrr_dequeue_srv(srv);
457 fwrr_queue_by_weight(grp->init, srv);
458 }
459 else if (!srv->lb_tree) {
460 /* FIXME: server was down. This is not possible right now but
461 * may be needed soon for slowstart or graceful shutdown.
462 */
463 fwrr_dequeue_srv(srv);
464 fwrr_get_srv(srv);
465 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
466 fwrr_queue_srv(srv);
467 } else {
468 /* The server is either active or in the next queue. If it's
469 * still in the active queue and it has not consumed all of its
470 * places, let's adjust its next position.
471 */
472 fwrr_get_srv(srv);
473
474 if (srv->eweight > 0) {
475 int prev_next = srv->npos;
476 int step = grp->next_weight / srv->eweight;
477
478 srv->npos = srv->lpos + step;
479 srv->rweight = 0;
480
481 if (srv->npos > prev_next)
482 srv->npos = prev_next;
483 if (srv->npos < grp->curr_pos + 2)
484 srv->npos = grp->curr_pos + step;
485 } else {
486 /* push it into the next tree */
487 srv->npos = grp->curr_pos + grp->curr_weight;
488 }
489
490 fwrr_dequeue_srv(srv);
491 fwrr_queue_srv(srv);
492 }
493
494 update_backend_weight(p);
495 srv->prev_state = srv->state;
496 srv->prev_eweight = srv->eweight;
497}
498
499/* Remove a server from a tree. It must have previously been dequeued. This
500 * function is meant to be called when a server is going down or has its
501 * weight disabled.
502 */
503static inline void fwrr_remove_from_tree(struct server *s)
504{
505 s->lb_tree = NULL;
506}
507
508/* Queue a server in the weight tree <root>, assuming the weight is >0.
509 * We want to sort them by inverted weights, because we need to place
510 * heavy servers first in order to get a smooth distribution.
511 */
512static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
513{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100514 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100515 eb32_insert(root, &s->lb_node);
516 s->lb_tree = root;
517}
518
519/* This function is responsible for building the weight trees in case of fast
520 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
521 * ratio. Both active and backup groups are initialized.
522 */
523void fwrr_init_server_groups(struct proxy *p)
524{
525 struct server *srv;
526 struct eb_root init_head = EB_ROOT;
527
528 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
529 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
530 p->lbprm.update_server_eweight = fwrr_update_server_weight;
531
532 p->lbprm.wdiv = BE_WEIGHT_SCALE;
533 for (srv = p->srv; srv; srv = srv->next) {
534 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
535 srv->prev_state = srv->state;
536 }
537
538 recount_servers(p);
539 update_backend_weight(p);
540
541 /* prepare the active servers group */
542 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
543 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
544 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
545 p->lbprm.fwrr.act.t1 = init_head;
546 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
547 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
548
549 /* prepare the backup servers group */
550 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
551 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
552 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
553 p->lbprm.fwrr.bck.t1 = init_head;
554 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
555 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
556
557 /* queue active and backup servers in two distinct groups */
558 for (srv = p->srv; srv; srv = srv->next) {
559 if (!srv_is_usable(srv->state, srv->eweight))
560 continue;
561 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
562 p->lbprm.fwrr.bck.init :
563 p->lbprm.fwrr.act.init,
564 srv);
565 }
566}
567
568/* simply removes a server from a weight tree */
569static inline void fwrr_dequeue_srv(struct server *s)
570{
571 eb32_delete(&s->lb_node);
572}
573
574/* queues a server into the appropriate group and tree depending on its
575 * backup status, and ->npos. If the server is disabled, simply assign
576 * it to the NULL tree.
577 */
578static void fwrr_queue_srv(struct server *s)
579{
580 struct proxy *p = s->proxy;
581 struct fwrr_group *grp;
582
583 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
584
585 /* Delay everything which does not fit into the window and everything
586 * which does not fit into the theorical new window.
587 */
588 if (!srv_is_usable(s->state, s->eweight)) {
589 fwrr_remove_from_tree(s);
590 }
591 else if (s->eweight <= 0 ||
592 s->npos >= 2 * grp->curr_weight ||
593 s->npos >= grp->curr_weight + grp->next_weight) {
594 /* put into next tree, and readjust npos in case we could
595 * finally take this back to current. */
596 s->npos -= grp->curr_weight;
597 fwrr_queue_by_weight(grp->next, s);
598 }
599 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100600 /* The sorting key is stored in units of s->npos * user_weight
601 * in order to avoid overflows. As stated in backend.h, the
602 * lower the scale, the rougher the weights modulation, and the
603 * higher the scale, the lower the number of servers without
604 * overflow. With this formula, the result is always positive,
605 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100606 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100607 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
608 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
609
610 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100611 s->lb_tree = &grp->curr;
612 }
613}
614
615/* prepares a server when extracting it from the "init" tree */
616static inline void fwrr_get_srv_init(struct server *s)
617{
618 s->npos = s->rweight = 0;
619}
620
621/* prepares a server when extracting it from the "next" tree */
622static inline void fwrr_get_srv_next(struct server *s)
623{
624 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
625 &s->proxy->lbprm.fwrr.bck :
626 &s->proxy->lbprm.fwrr.act;
627
628 s->npos += grp->curr_weight;
629}
630
631/* prepares a server when it was marked down */
632static inline void fwrr_get_srv_down(struct server *s)
633{
634 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
635 &s->proxy->lbprm.fwrr.bck :
636 &s->proxy->lbprm.fwrr.act;
637
638 s->npos = grp->curr_pos;
639}
640
641/* prepares a server when extracting it from its tree */
642static void fwrr_get_srv(struct server *s)
643{
644 struct proxy *p = s->proxy;
645 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
646 &p->lbprm.fwrr.bck :
647 &p->lbprm.fwrr.act;
648
649 if (s->lb_tree == grp->init) {
650 fwrr_get_srv_init(s);
651 }
652 else if (s->lb_tree == grp->next) {
653 fwrr_get_srv_next(s);
654 }
655 else if (s->lb_tree == NULL) {
656 fwrr_get_srv_down(s);
657 }
658}
659
660/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
661 * when this happens, and "next" filled with servers sorted by weights.
662 */
663static inline void fwrr_switch_trees(struct fwrr_group *grp)
664{
665 struct eb_root *swap;
666 swap = grp->init;
667 grp->init = grp->next;
668 grp->next = swap;
669 grp->curr_weight = grp->next_weight;
670 grp->curr_pos = grp->curr_weight;
671}
672
673/* return next server from the current tree in FWRR group <grp>, or a server
674 * from the "init" tree if appropriate. If both trees are empty, return NULL.
675 */
676static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
677{
678 struct eb32_node *node;
679 struct server *s;
680
681 node = eb32_first(&grp->curr);
682 s = eb32_entry(node, struct server, lb_node);
683
684 if (!node || s->npos > grp->curr_pos) {
685 /* either we have no server left, or we have a hole */
686 struct eb32_node *node2;
687 node2 = eb32_first(grp->init);
688 if (node2) {
689 node = node2;
690 s = eb32_entry(node, struct server, lb_node);
691 fwrr_get_srv_init(s);
692 if (s->eweight == 0) /* FIXME: is it possible at all ? */
693 node = NULL;
694 }
695 }
696 if (node)
697 return s;
698 else
699 return NULL;
700}
701
702/* Computes next position of server <s> in the group. It is mandatory for <s>
703 * to have a non-zero, positive eweight.
704*/
705static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
706{
707 if (!s->npos) {
708 /* first time ever for this server */
709 s->lpos = grp->curr_pos;
710 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
711 s->rweight += grp->next_weight % s->eweight;
712
713 if (s->rweight >= s->eweight) {
714 s->rweight -= s->eweight;
715 s->npos++;
716 }
717 } else {
718 s->lpos = s->npos;
719 s->npos += grp->next_weight / s->eweight;
720 s->rweight += grp->next_weight % s->eweight;
721
722 if (s->rweight >= s->eweight) {
723 s->rweight -= s->eweight;
724 s->npos++;
725 }
726 }
727}
728
729/* Return next server from the current tree in backend <p>, or a server from
730 * the init tree if appropriate. If both trees are empty, return NULL.
731 * Saturated servers are skipped and requeued.
732 */
733static struct server *fwrr_get_next_server(struct proxy *p)
734{
735 struct server *srv;
736 struct fwrr_group *grp;
737 struct server *full;
738 int switched;
739
740 if (p->srv_act)
741 grp = &p->lbprm.fwrr.act;
742 else if (p->lbprm.fbck)
743 return p->lbprm.fbck;
744 else if (p->srv_bck)
745 grp = &p->lbprm.fwrr.bck;
746 else
747 return NULL;
748
749 switched = 0;
750 full = NULL; /* NULL-terminated list of saturated servers */
751 while (1) {
752 /* if we see an empty group, let's first try to collect weights
753 * which might have recently changed.
754 */
755 if (!grp->curr_weight)
756 grp->curr_pos = grp->curr_weight = grp->next_weight;
757
758 /* get first server from the "current" tree. When the end of
759 * the tree is reached, we may have to switch, but only once.
760 */
761 while (1) {
762 srv = fwrr_get_server_from_group(grp);
763 if (srv)
764 break;
765 if (switched)
766 goto requeue_servers;
767 switched = 1;
768 fwrr_switch_trees(grp);
769
770 }
771
772 /* OK, we have a server. However, it may be saturated, in which
773 * case we don't want to reconsider it for now. We'll update
774 * its position and dequeue it anyway, so that we can move it
775 * to a better place afterwards.
776 */
777 fwrr_update_position(grp, srv);
778 fwrr_dequeue_srv(srv);
779 grp->curr_pos++;
780 if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv))
781 break;
782
783 /* the server is saturated, let's chain it for later reinsertion */
784 srv->next_full = full;
785 full = srv;
786 }
787
788 /* OK, we got the best server, let's update it */
789 fwrr_queue_srv(srv);
790
791 requeue_servers:
792 if (unlikely(full)) {
793 if (switched) {
794 /* the tree has switched, requeue all extracted servers
795 * into "init", because their place was lost, and only
796 * their weight matters.
797 */
798 do {
799 fwrr_queue_by_weight(grp->init, full);
800 full = full->next_full;
801 } while (full);
802 } else {
803 /* requeue all extracted servers just as if they were consumed
804 * so that they regain their expected place.
805 */
806 do {
807 fwrr_queue_srv(full);
808 full = full->next_full;
809 } while (full);
810 }
811 }
812 return srv;
813}
814
Willy Tarreau01732802007-11-01 22:48:15 +0100815/*
816 * This function tries to find a running server for the proxy <px> following
817 * the URL parameter hash method. It looks for a specific parameter in the
818 * URL and hashes it to compute the server ID. This is useful to optimize
819 * performance by avoiding bounces between servers in contexts where sessions
820 * are shared but cookies are not usable. If the parameter is not found, NULL
821 * is returned. If any server is found, it will be returned. If no valid server
822 * is found, NULL is returned.
823 *
824 */
825struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
826{
827 unsigned long hash = 0;
828 char *p;
829 int plen;
830
Willy Tarreau20697042007-11-15 23:26:18 +0100831 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +0100832 return NULL;
833
Willy Tarreau20697042007-11-15 23:26:18 +0100834 if (px->lbprm.map.state & PR_MAP_RECALC)
835 recalc_server_map(px);
836
Willy Tarreau01732802007-11-01 22:48:15 +0100837 p = memchr(uri, '?', uri_len);
838 if (!p)
839 return NULL;
840 p++;
841
842 uri_len -= (p - uri);
843 plen = px->url_param_len;
844
845 if (uri_len <= plen)
846 return NULL;
847
848 while (uri_len > plen) {
849 /* Look for the parameter name followed by an equal symbol */
850 if (p[plen] == '=') {
851 /* skip the equal symbol */
852 uri = p;
853 p += plen + 1;
854 uri_len -= plen + 1;
855 if (memcmp(uri, px->url_param_name, plen) == 0) {
856 /* OK, we have the parameter here at <uri>, and
857 * the value after the equal sign, at <p>
858 */
859 while (uri_len && *p != '&') {
860 hash = *p + (hash << 6) + (hash << 16) - hash;
861 uri_len--;
862 p++;
863 }
Willy Tarreau20697042007-11-15 23:26:18 +0100864 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +0100865 }
866 }
867
868 /* skip to next parameter */
869 uri = p;
870 p = memchr(uri, '&', uri_len);
871 if (!p)
872 return NULL;
873 p++;
874 uri_len -= (p - uri);
875 }
876 return NULL;
877}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200878
879/*
880 * This function marks the session as 'assigned' in direct or dispatch modes,
881 * or tries to assign one in balance mode, according to the algorithm. It does
882 * nothing if the session had already been assigned a server.
883 *
884 * It may return :
885 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
886 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
887 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
888 * SRV_STATUS_INTERNAL for other unrecoverable errors.
889 *
890 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
891 * not need to be called anymore. This usually means that s->srv can be trusted
892 * in balance and direct modes. This flag is not cleared, so it's to the caller
893 * to clear it if required (eg: redispatch).
894 *
895 */
896
897int assign_server(struct session *s)
898{
899#ifdef DEBUG_FULL
900 fprintf(stderr,"assign_server : s=%p\n",s);
901#endif
902
903 if (s->pend_pos)
904 return SRV_STATUS_INTERNAL;
905
906 if (!(s->flags & SN_ASSIGNED)) {
Willy Tarreau31682232007-11-29 15:38:04 +0100907 if (s->be->lbprm.algo & BE_LB_ALGO) {
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100908 int len;
909
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100910 if (s->flags & SN_DIRECT) {
911 s->flags |= SN_ASSIGNED;
912 return SRV_STATUS_OK;
913 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100914
Willy Tarreaub625a082007-11-26 01:15:43 +0100915 if (!s->be->lbprm.tot_weight)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200916 return SRV_STATUS_NOSRV;
917
Willy Tarreau31682232007-11-29 15:38:04 +0100918 switch (s->be->lbprm.algo & BE_LB_ALGO) {
919 case BE_LB_ALGO_RR:
Willy Tarreaub625a082007-11-26 01:15:43 +0100920 s->srv = fwrr_get_next_server(s->be);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200921 if (!s->srv)
922 return SRV_STATUS_FULL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100923 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100924 case BE_LB_ALGO_SH:
Willy Tarreaubaaee002006-06-26 02:48:02 +0200925 if (s->cli_addr.ss_family == AF_INET)
926 len = 4;
927 else if (s->cli_addr.ss_family == AF_INET6)
928 len = 16;
929 else /* unknown IP family */
930 return SRV_STATUS_INTERNAL;
931
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200932 s->srv = get_server_sh(s->be,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200933 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
934 len);
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100935 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100936 case BE_LB_ALGO_UH:
Willy Tarreau2fcb5002007-05-08 13:35:26 +0200937 /* URI hashing */
938 s->srv = get_server_uh(s->be,
939 s->txn.req.sol + s->txn.req.sl.rq.u,
940 s->txn.req.sl.rq.u_l);
Willy Tarreau01732802007-11-01 22:48:15 +0100941 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100942 case BE_LB_ALGO_PH:
Willy Tarreau01732802007-11-01 22:48:15 +0100943 /* URL Parameter hashing */
944 s->srv = get_server_ph(s->be,
945 s->txn.req.sol + s->txn.req.sl.rq.u,
946 s->txn.req.sl.rq.u_l);
947 if (!s->srv) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100948 /* parameter not found, fall back to round robin on the map */
Willy Tarreau01732802007-11-01 22:48:15 +0100949 s->srv = get_server_rr_with_conns(s->be);
950 if (!s->srv)
951 return SRV_STATUS_FULL;
952 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100953 break;
954 default:
955 /* unknown balancing algorithm */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200956 return SRV_STATUS_INTERNAL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100957 }
Willy Tarreauddbb82f2007-12-05 10:34:49 +0100958 s->be->cum_lbconn++;
959 s->srv->cum_lbconn++;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200960 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +0100961 else if (s->be->options & PR_O_HTTP_PROXY) {
962 if (!s->srv_addr.sin_addr.s_addr)
963 return SRV_STATUS_NOSRV;
964 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200965 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100966 !(s->fe->options & PR_O_TRANSP)) {
Willy Tarreau1a1158b2007-01-20 11:07:46 +0100967 return SRV_STATUS_NOSRV;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100968 }
969 s->flags |= SN_ASSIGNED;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200970 }
971 return SRV_STATUS_OK;
972}
973
974
975/*
976 * This function assigns a server address to a session, and sets SN_ADDR_SET.
977 * The address is taken from the currently assigned server, or from the
978 * dispatch or transparent address.
979 *
980 * It may return :
981 * SRV_STATUS_OK if everything is OK.
982 * SRV_STATUS_INTERNAL for other unrecoverable errors.
983 *
984 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
985 * not cleared, so it's to the caller to clear it if required.
986 *
987 */
988int assign_server_address(struct session *s)
989{
990#ifdef DEBUG_FULL
991 fprintf(stderr,"assign_server_address : s=%p\n",s);
992#endif
993
Willy Tarreau31682232007-11-29 15:38:04 +0100994 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200995 /* A server is necessarily known for this session */
996 if (!(s->flags & SN_ASSIGNED))
997 return SRV_STATUS_INTERNAL;
998
999 s->srv_addr = s->srv->addr;
1000
1001 /* if this server remaps proxied ports, we'll use
1002 * the port the client connected to with an offset. */
1003 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001004 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1005 get_frt_addr(s);
1006 if (s->frt_addr.ss_family == AF_INET) {
1007 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1008 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1009 } else {
1010 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1011 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1012 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001013 }
1014 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001015 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001016 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001017 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001018 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001019 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001020 /* in transparent mode, use the original dest addr if no dispatch specified */
1021 socklen_t salen = sizeof(s->srv_addr);
1022
1023 if (get_original_dst(s->cli_fd, &s->srv_addr, &salen) == -1) {
1024 qfprintf(stderr, "Cannot get original server address.\n");
1025 return SRV_STATUS_INTERNAL;
1026 }
1027 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001028 else if (s->be->options & PR_O_HTTP_PROXY) {
1029 /* If HTTP PROXY option is set, then server is already assigned
1030 * during incoming client request parsing. */
1031 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001032 else {
1033 /* no server and no LB algorithm ! */
1034 return SRV_STATUS_INTERNAL;
1035 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001036
1037 s->flags |= SN_ADDR_SET;
1038 return SRV_STATUS_OK;
1039}
1040
1041
1042/* This function assigns a server to session <s> if required, and can add the
1043 * connection to either the assigned server's queue or to the proxy's queue.
1044 *
1045 * Returns :
1046 *
1047 * SRV_STATUS_OK if everything is OK.
1048 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1049 * SRV_STATUS_QUEUED if the connection has been queued.
1050 * SRV_STATUS_FULL if the server(s) is/are saturated and the
1051 * connection could not be queued.
1052 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1053 *
1054 */
1055int assign_server_and_queue(struct session *s)
1056{
1057 struct pendconn *p;
1058 int err;
1059
1060 if (s->pend_pos)
1061 return SRV_STATUS_INTERNAL;
1062
1063 if (s->flags & SN_ASSIGNED) {
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001064 if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
1065 s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
1066 s->srv = NULL;
1067 http_flush_cookie_flags(&s->txn);
1068 } else {
1069 /* a server does not need to be assigned, perhaps because we're in
1070 * direct mode, or in dispatch or transparent modes where the server
1071 * is not needed.
1072 */
1073 if (s->srv &&
1074 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1075 p = pendconn_add(s);
1076 if (p)
1077 return SRV_STATUS_QUEUED;
1078 else
1079 return SRV_STATUS_FULL;
1080 }
1081 return SRV_STATUS_OK;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001082 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001083 }
1084
1085 /* a server needs to be assigned */
1086 err = assign_server(s);
1087 switch (err) {
1088 case SRV_STATUS_OK:
1089 /* in balance mode, we might have servers with connection limits */
1090 if (s->srv &&
1091 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1092 p = pendconn_add(s);
1093 if (p)
1094 return SRV_STATUS_QUEUED;
1095 else
1096 return SRV_STATUS_FULL;
1097 }
1098 return SRV_STATUS_OK;
1099
1100 case SRV_STATUS_FULL:
1101 /* queue this session into the proxy's queue */
1102 p = pendconn_add(s);
1103 if (p)
1104 return SRV_STATUS_QUEUED;
1105 else
1106 return SRV_STATUS_FULL;
1107
1108 case SRV_STATUS_NOSRV:
1109 case SRV_STATUS_INTERNAL:
1110 return err;
1111 default:
1112 return SRV_STATUS_INTERNAL;
1113 }
1114}
1115
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001116/* Binds ipv4 address <local> to socket <fd>, unless <flags> is set, in which
1117 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
1118 * - 0 : ignore remote address (may even be a NULL pointer)
1119 * - 1 : use provided address
1120 * - 2 : use provided port
1121 * - 3 : use both
1122 *
1123 * The function supports multiple foreign binding methods :
1124 * - linux_tproxy: we directly bind to the foreign address
1125 * - cttproxy: we bind to a local address then nat.
1126 * The second one can be used as a fallback for the first one.
1127 * This function returns 0 when everything's OK, 1 if it could not bind, to the
1128 * local address, 2 if it could not bind to the foreign address.
1129 */
1130static int bind_ipv4(int fd, int flags, struct sockaddr_in *local, struct sockaddr_in *remote)
1131{
1132 struct sockaddr_in bind_addr;
1133 int foreign_ok = 0;
1134 int ret;
1135
1136#ifdef CONFIG_HAP_LINUX_TPROXY
1137 static int ip_transp_working = 1;
1138 if (flags && ip_transp_working) {
Willy Tarreau0a459892008-01-13 17:37:16 +01001139 if (setsockopt(fd, SOL_IP, IP_TRANSPARENT, (char *) &one, sizeof(one)) == 0
1140 || setsockopt(fd, SOL_IP, IP_FREEBIND, (char *) &one, sizeof(one)) == 0)
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001141 foreign_ok = 1;
1142 else
1143 ip_transp_working = 0;
1144 }
1145#endif
1146
1147 if (flags) {
1148 memset(&bind_addr, 0, sizeof(bind_addr));
1149 if (flags & 1)
1150 bind_addr.sin_addr = remote->sin_addr;
1151 if (flags & 2)
1152 bind_addr.sin_port = remote->sin_port;
1153 }
1154
1155 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one));
1156 if (foreign_ok) {
1157 ret = bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
1158 if (ret < 0)
1159 return 2;
1160 }
1161 else {
1162 ret = bind(fd, (struct sockaddr *)local, sizeof(*local));
1163 if (ret < 0)
1164 return 1;
1165 }
1166
1167 if (!flags)
1168 return 0;
1169
1170#ifdef CONFIG_HAP_CTTPROXY
1171 if (!foreign_ok) {
1172 struct in_tproxy itp1, itp2;
1173 memset(&itp1, 0, sizeof(itp1));
1174
1175 itp1.op = TPROXY_ASSIGN;
1176 itp1.v.addr.faddr = bind_addr.sin_addr;
1177 itp1.v.addr.fport = bind_addr.sin_port;
1178
1179 /* set connect flag on socket */
1180 itp2.op = TPROXY_FLAGS;
1181 itp2.v.flags = ITP_CONNECT | ITP_ONCE;
1182
1183 if (setsockopt(fd, SOL_IP, IP_TPROXY, &itp1, sizeof(itp1)) != -1 &&
1184 setsockopt(fd, SOL_IP, IP_TPROXY, &itp2, sizeof(itp2)) != -1) {
1185 foreign_ok = 1;
1186 }
1187 }
1188#endif
1189
1190 if (!foreign_ok) {
1191 /* we could not bind to a foreign address */
1192 close(fd);
1193 return 2;
1194 }
1195
1196 return 0;
1197}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001198
1199/*
1200 * This function initiates a connection to the server assigned to this session
1201 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1202 * It can return one of :
1203 * - SN_ERR_NONE if everything's OK
1204 * - SN_ERR_SRVTO if there are no more servers
1205 * - SN_ERR_SRVCL if the connection was refused by the server
1206 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1207 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1208 * - SN_ERR_INTERNAL for any other purely internal errors
1209 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1210 */
1211int connect_server(struct session *s)
1212{
1213 int fd, err;
1214
1215 if (!(s->flags & SN_ADDR_SET)) {
1216 err = assign_server_address(s);
1217 if (err != SRV_STATUS_OK)
1218 return SN_ERR_INTERNAL;
1219 }
1220
1221 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1222 qfprintf(stderr, "Cannot get a server socket.\n");
1223
1224 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001225 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001226 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001227 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001228 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001229 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001230 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001231 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001232 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001233 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001234 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001235 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001236 /* this is a resource error */
1237 return SN_ERR_RESOURCE;
1238 }
1239
1240 if (fd >= global.maxsock) {
1241 /* do not log anything there, it's a normal condition when this option
1242 * is used to serialize connections to a server !
1243 */
1244 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1245 close(fd);
1246 return SN_ERR_PRXCOND; /* it is a configuration limit */
1247 }
1248
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001249#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001250 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001251 /* TCP splicing supported by both FE and BE */
1252 tcp_splice_initfd(s->cli_fd, fd);
1253 }
1254#endif
1255
Willy Tarreaubaaee002006-06-26 02:48:02 +02001256 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1257 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1258 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1259 close(fd);
1260 return SN_ERR_INTERNAL;
1261 }
1262
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001263 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001264 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1265
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001266 if (s->be->options & PR_O_TCP_NOLING)
1267 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1268
Willy Tarreaubaaee002006-06-26 02:48:02 +02001269 /* allow specific binding :
1270 * - server-specific at first
1271 * - proxy-specific next
1272 */
1273 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001274 struct sockaddr_in *remote = NULL;
1275 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001276
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001277 if (s->srv->state & SRV_TPROXY_MASK) {
Willy Tarreau77074d52006-11-12 23:57:19 +01001278 switch (s->srv->state & SRV_TPROXY_MASK) {
1279 case SRV_TPROXY_ADDR:
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001280 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1281 flags = 3;
Willy Tarreau77074d52006-11-12 23:57:19 +01001282 break;
1283 case SRV_TPROXY_CLI:
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001284 flags |= 2;
Willy Tarreau77074d52006-11-12 23:57:19 +01001285 /* fall through */
1286 case SRV_TPROXY_CIP:
1287 /* FIXME: what can we do if the client connects in IPv6 ? */
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001288 flags |= 1;
1289 remote = (struct sockaddr_in *)&s->cli_addr;
Willy Tarreau77074d52006-11-12 23:57:19 +01001290 break;
1291 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001292 }
Willy Tarreau77074d52006-11-12 23:57:19 +01001293
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001294 ret = bind_ipv4(fd, flags, &s->srv->source_addr, remote);
1295 if (ret) {
1296 close(fd);
1297 if (ret == 1) {
1298 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1299 s->be->id, s->srv->id);
1300 send_log(s->be, LOG_EMERG,
1301 "Cannot bind to source address before connect() for server %s/%s.\n",
1302 s->be->id, s->srv->id);
1303 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001304 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001305 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001306 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001307 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001308 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001309 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001310 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001311 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001312 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001313 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001314 struct sockaddr_in *remote = NULL;
1315 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001316
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001317 if (s->be->options & PR_O_TPXY_MASK) {
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001318 switch (s->be->options & PR_O_TPXY_MASK) {
Willy Tarreau77074d52006-11-12 23:57:19 +01001319 case PR_O_TPXY_ADDR:
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001320 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1321 flags = 3;
Willy Tarreau77074d52006-11-12 23:57:19 +01001322 break;
1323 case PR_O_TPXY_CLI:
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001324 flags |= 2;
Willy Tarreau77074d52006-11-12 23:57:19 +01001325 /* fall through */
1326 case PR_O_TPXY_CIP:
1327 /* FIXME: what can we do if the client connects in IPv6 ? */
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001328 flags |= 1;
1329 remote = (struct sockaddr_in *)&s->cli_addr;
Willy Tarreau77074d52006-11-12 23:57:19 +01001330 break;
1331 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001332 }
Willy Tarreau77074d52006-11-12 23:57:19 +01001333
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001334 ret = bind_ipv4(fd, flags, &s->srv->source_addr, remote);
1335 if (ret) {
1336 close(fd);
1337 if (ret == 1) {
1338 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1339 s->be->id);
1340 send_log(s->be, LOG_EMERG,
1341 "Cannot bind to source address before connect() for proxy %s.\n",
1342 s->be->id);
1343 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001344 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001345 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001346 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001347 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1348 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001349 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001350 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001351 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001352 }
1353
1354 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1355 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1356
1357 if (errno == EAGAIN || errno == EADDRINUSE) {
1358 char *msg;
1359 if (errno == EAGAIN) /* no free ports left, try again later */
1360 msg = "no free ports";
1361 else
1362 msg = "local address already in use";
1363
1364 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1365 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001366 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001367 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001368 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001369 return SN_ERR_RESOURCE;
1370 } else if (errno == ETIMEDOUT) {
1371 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1372 close(fd);
1373 return SN_ERR_SRVTO;
1374 } else {
1375 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1376 //qfprintf(stderr,"Connect(): %d", errno);
1377 close(fd);
1378 return SN_ERR_SRVCL;
1379 }
1380 }
1381
1382 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001383 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001384 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001385 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001386 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001387 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001388
1389 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1390 fdtab[fd].peerlen = sizeof(s->srv_addr);
1391
Willy Tarreauf161a342007-04-08 16:59:42 +02001392 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001393
1394 fd_insert(fd);
1395 if (s->srv) {
1396 s->srv->cur_sess++;
1397 if (s->srv->cur_sess > s->srv->cur_sess_max)
1398 s->srv->cur_sess_max = s->srv->cur_sess;
1399 }
1400
Willy Tarreaud7c30f92007-12-03 01:38:36 +01001401 if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect))
Willy Tarreaud7971282006-07-29 18:36:34 +02001402 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001403 return SN_ERR_NONE; /* connection is OK */
1404}
1405
1406
1407/*
1408 * This function checks the retry count during the connect() job.
1409 * It updates the session's srv_state and retries, so that the caller knows
1410 * what it has to do. It uses the last connection error to set the log when
1411 * it expires. It returns 1 when it has expired, and 0 otherwise.
1412 */
1413int srv_count_retry_down(struct session *t, int conn_err)
1414{
1415 /* we are in front of a retryable error */
1416 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001417 if (t->srv)
1418 t->srv->retries++;
1419 t->be->retries++;
1420
Willy Tarreaubaaee002006-06-26 02:48:02 +02001421 if (t->conn_retries < 0) {
1422 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +02001423 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001424 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001425 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001426 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001427 t->srv->cum_sess++;
1428 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001429 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001430 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001431
1432 /* We used to have a free connection slot. Since we'll never use it,
1433 * we have to inform the server that it may be used by another session.
1434 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001435 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001436 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001437 return 1;
1438 }
1439 return 0;
1440}
1441
1442
1443/*
1444 * This function performs the retryable part of the connect() job.
1445 * It updates the session's srv_state and retries, so that the caller knows
1446 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1447 * it needs to redispatch.
1448 */
1449int srv_retryable_connect(struct session *t)
1450{
1451 int conn_err;
1452
1453 /* This loop ensures that we stop before the last retry in case of a
1454 * redispatchable server.
1455 */
1456 do {
1457 /* initiate a connection to the server */
1458 conn_err = connect_server(t);
1459 switch (conn_err) {
1460
1461 case SN_ERR_NONE:
1462 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1463 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001464 if (t->srv)
1465 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001466 return 1;
1467
1468 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +02001469 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001470 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001471 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001472 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001473 t->srv->cum_sess++;
1474 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001475 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001476 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001477 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001478 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001479 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001480 return 1;
1481 }
1482 /* ensure that we have enough retries left */
1483 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001484 return 1;
1485 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001486 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001487
1488 /* We're on our last chance, and the REDISP option was specified.
1489 * We will ignore cookie and force to balance or use the dispatcher.
1490 */
1491 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001492 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001493 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001494
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +01001495 if (t->srv) {
Willy Tarreau98937b82007-12-10 15:05:42 +01001496 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001497 t->srv->failed_conns++;
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +01001498 t->srv->redispatches++;
1499 }
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001500 t->be->redispatches++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001501
1502 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Krzysztof Piotr Oledzki25b501a2008-01-06 16:36:16 +01001503 t->flags |= SN_REDISP;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001504 t->srv = NULL; /* it's left to the dispatcher to choose a server */
Willy Tarreau3d300592007-03-18 18:34:41 +01001505 http_flush_cookie_flags(&t->txn);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001506 return 0;
1507}
1508
1509
1510/* This function performs the "redispatch" part of a connection attempt. It
1511 * will assign a server if required, queue the connection if required, and
1512 * handle errors that might arise at this level. It can change the server
1513 * state. It will return 1 if it encounters an error, switches the server
1514 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1515 * that the connection is ready to use.
1516 */
1517
1518int srv_redispatch_connect(struct session *t)
1519{
1520 int conn_err;
1521
1522 /* We know that we don't have any connection pending, so we will
1523 * try to get a new one, and wait in this state if it's queued
1524 */
1525 conn_err = assign_server_and_queue(t);
1526 switch (conn_err) {
1527 case SRV_STATUS_OK:
1528 break;
1529
1530 case SRV_STATUS_NOSRV:
1531 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +02001532 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001533 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001534 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001535 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001536 t->srv->cum_sess++;
1537 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001538 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001539 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001540
1541 return 1;
1542
1543 case SRV_STATUS_QUEUED:
Willy Tarreau1fa31262007-12-03 00:36:16 +01001544 /* note: we use the connect expiration date for the queue. */
1545 if (!tv_add_ifset(&t->req->cex, &now, &t->be->timeout.queue))
Willy Tarreaud7971282006-07-29 18:36:34 +02001546 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001547 t->srv_state = SV_STIDLE;
1548 /* do nothing else and do not wake any other session up */
1549 return 1;
1550
1551 case SRV_STATUS_FULL:
1552 case SRV_STATUS_INTERNAL:
1553 default:
Willy Tarreaud7971282006-07-29 18:36:34 +02001554 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001555 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001556 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001557 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001558 t->srv->cum_sess++;
1559 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001560 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001561 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001562
1563 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001564 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001565 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001566 return 1;
1567 }
1568 /* if we get here, it's because we got SRV_STATUS_OK, which also
1569 * means that the connection has not been queued.
1570 */
1571 return 0;
1572}
1573
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001574int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001575 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001576 return px->down_time;
1577
1578 return now.tv_sec - px->last_change + px->down_time;
1579}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001580
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001581/* This function parses a "balance" statement in a backend section describing
1582 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
1583 * returns -1, it may write an error message into ther <err> buffer, for at
1584 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
1585 * written. The function must be called with <args> pointing to the first word
1586 * after "balance".
1587 */
1588int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
1589{
1590 if (!*(args[0])) {
1591 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01001592 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1593 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001594 return 0;
1595 }
1596
1597 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001598 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1599 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001600 }
1601 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001602 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1603 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001604 }
1605 else if (!strcmp(args[0], "uri")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001606 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1607 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001608 }
Willy Tarreau01732802007-11-01 22:48:15 +01001609 else if (!strcmp(args[0], "url_param")) {
1610 if (!*args[1]) {
1611 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
1612 return -1;
1613 }
Willy Tarreau31682232007-11-29 15:38:04 +01001614 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1615 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01001616 if (curproxy->url_param_name)
1617 free(curproxy->url_param_name);
1618 curproxy->url_param_name = strdup(args[1]);
1619 curproxy->url_param_len = strlen(args[1]);
1620 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001621 else {
Willy Tarreau01732802007-11-01 22:48:15 +01001622 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001623 return -1;
1624 }
1625 return 0;
1626}
1627
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01001628
1629/************************************************************************/
1630/* All supported keywords must be declared here. */
1631/************************************************************************/
1632
1633/* set test->i to the number of enabled servers on the proxy */
1634static int
1635acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
1636 struct acl_expr *expr, struct acl_test *test)
1637{
1638 test->flags = ACL_TEST_F_VOL_TEST;
1639 if (expr->arg_len) {
1640 /* another proxy was designated, we must look for it */
1641 for (px = proxy; px; px = px->next)
1642 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
1643 break;
1644 }
1645 if (!px)
1646 return 0;
1647
1648 if (px->srv_act)
1649 test->i = px->srv_act;
1650 else if (px->lbprm.fbck)
1651 test->i = 1;
1652 else
1653 test->i = px->srv_bck;
1654
1655 return 1;
1656}
1657
1658
1659/* Note: must not be declared <const> as its list will be overwritten */
1660static struct acl_kw_list acl_kws = {{ },{
1661 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
1662 { NULL, NULL, NULL, NULL },
1663}};
1664
1665
1666__attribute__((constructor))
1667static void __backend_init(void)
1668{
1669 acl_register_keywords(&acl_kws);
1670}
1671
1672
Willy Tarreaubaaee002006-06-26 02:48:02 +02001673/*
1674 * Local variables:
1675 * c-indent-level: 8
1676 * c-basic-offset: 8
1677 * End:
1678 */