blob: 1cd2a191fb564474a040485c4d1b57010be9e169 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020019
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020021#include <common/config.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010022#include <common/eb32tree.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010025#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <types/buffers.h>
27#include <types/global.h>
28#include <types/polling.h>
29#include <types/proxy.h>
30#include <types/server.h>
31#include <types/session.h>
32
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010033#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020035#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020036#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010037#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/log.h>
39#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010040#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/queue.h>
42#include <proto/stream_sock.h>
43#include <proto/task.h>
44
Willy Tarreau6d1a9882007-01-07 02:03:04 +010045#ifdef CONFIG_HAP_TCPSPLICE
46#include <libtcpsplice.h>
47#endif
48
Willy Tarreaub625a082007-11-26 01:15:43 +010049static inline void fwrr_remove_from_tree(struct server *s);
50static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
51static inline void fwrr_dequeue_srv(struct server *s);
52static void fwrr_get_srv(struct server *s);
53static void fwrr_queue_srv(struct server *s);
54
55/* This function returns non-zero if a server with the given weight and state
56 * is usable for LB, otherwise zero.
57 */
58static inline int srv_is_usable(int state, int weight)
59{
60 if (!weight)
61 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010062 if (state & SRV_GOINGDOWN)
63 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010064 if (!(state & SRV_RUNNING))
65 return 0;
66 return 1;
67}
68
Willy Tarreaubaaee002006-06-26 02:48:02 +020069/*
70 * This function recounts the number of usable active and backup servers for
71 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010072 * This function also recomputes the total active and backup weights. However,
73 * it does nout update tot_weight nor tot_used. Use update_backend_weight() for
74 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020075 */
Willy Tarreaub625a082007-11-26 01:15:43 +010076static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020077{
78 struct server *srv;
79
Willy Tarreau20697042007-11-15 23:26:18 +010080 px->srv_act = px->srv_bck = 0;
81 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010082 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020083 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010084 if (!srv_is_usable(srv->state, srv->eweight))
85 continue;
86
87 if (srv->state & SRV_BACKUP) {
88 if (!px->srv_bck &&
Willy Tarreau31682232007-11-29 15:38:04 +010089 !(px->lbprm.algo & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010090 px->lbprm.fbck = srv;
91 px->srv_bck++;
92 px->lbprm.tot_wbck += srv->eweight;
93 } else {
94 px->srv_act++;
95 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020096 }
97 }
Willy Tarreaub625a082007-11-26 01:15:43 +010098}
Willy Tarreau20697042007-11-15 23:26:18 +010099
Willy Tarreaub625a082007-11-26 01:15:43 +0100100/* This function simply updates the backend's tot_weight and tot_used values
101 * after servers weights have been updated. It is designed to be used after
102 * recount_servers() or equivalent.
103 */
104static void update_backend_weight(struct proxy *px)
105{
Willy Tarreau20697042007-11-15 23:26:18 +0100106 if (px->srv_act) {
107 px->lbprm.tot_weight = px->lbprm.tot_wact;
108 px->lbprm.tot_used = px->srv_act;
109 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100110 else if (px->lbprm.fbck) {
111 /* use only the first backup server */
112 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
113 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100114 }
115 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100116 px->lbprm.tot_weight = px->lbprm.tot_wbck;
117 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100118 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100119}
120
121/* this function updates the map according to server <srv>'s new state */
122static void map_set_server_status_down(struct server *srv)
123{
124 struct proxy *p = srv->proxy;
125
126 if (srv->state == srv->prev_state &&
127 srv->eweight == srv->prev_eweight)
128 return;
129
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100130 if (srv_is_usable(srv->state, srv->eweight))
131 goto out_update_state;
132
Willy Tarreaub625a082007-11-26 01:15:43 +0100133 /* FIXME: could be optimized since we know what changed */
134 recount_servers(p);
135 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100136 p->lbprm.map.state |= PR_MAP_RECALC;
137 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100138 srv->prev_state = srv->state;
139 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200140}
141
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100142/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100143static void map_set_server_status_up(struct server *srv)
144{
145 struct proxy *p = srv->proxy;
146
147 if (srv->state == srv->prev_state &&
148 srv->eweight == srv->prev_eweight)
149 return;
150
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100151 if (!srv_is_usable(srv->state, srv->eweight))
152 goto out_update_state;
153
Willy Tarreaub625a082007-11-26 01:15:43 +0100154 /* FIXME: could be optimized since we know what changed */
155 recount_servers(p);
156 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100157 p->lbprm.map.state |= PR_MAP_RECALC;
158 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100159 srv->prev_state = srv->state;
160 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100161}
162
Willy Tarreau20697042007-11-15 23:26:18 +0100163/* This function recomputes the server map for proxy px. It relies on
164 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
165 * called after recount_servers(). It also expects px->lbprm.map.srv
166 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200167 */
168void recalc_server_map(struct proxy *px)
169{
170 int o, tot, flag;
171 struct server *cur, *best;
172
Willy Tarreau20697042007-11-15 23:26:18 +0100173 switch (px->lbprm.tot_used) {
174 case 0: /* no server */
175 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200176 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100177 case 1: /* only one server, just fill first entry */
178 tot = 1;
179 break;
180 default:
181 tot = px->lbprm.tot_weight;
182 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200183 }
184
Willy Tarreau20697042007-11-15 23:26:18 +0100185 /* here we *know* that we have some servers */
186 if (px->srv_act)
187 flag = SRV_RUNNING;
188 else
189 flag = SRV_RUNNING | SRV_BACKUP;
190
Willy Tarreaubaaee002006-06-26 02:48:02 +0200191 /* this algorithm gives priority to the first server, which means that
192 * it will respect the declaration order for equivalent weights, and
193 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100194 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200195 * case, where we want the first server only.
196 */
197 for (cur = px->srv; cur; cur = cur->next)
198 cur->wscore = 0;
199
200 for (o = 0; o < tot; o++) {
201 int max = 0;
202 best = NULL;
203 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100204 if (flag == (cur->state &
205 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200206 int v;
207
208 /* If we are forced to return only one server, we don't want to
209 * go further, because we would return the wrong one due to
210 * divide overflow.
211 */
212 if (tot == 1) {
213 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100214 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200215 break;
216 }
217
Willy Tarreau417fae02007-03-25 21:16:40 +0200218 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200219 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
220 if (best == NULL || v > max) {
221 max = v;
222 best = cur;
223 }
224 }
225 }
Willy Tarreau20697042007-11-15 23:26:18 +0100226 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200227 best->wscore -= tot;
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230}
231
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100232/* This function is responsible of building the server MAP for map-based LB
233 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
234 * weights if applicable. It should be called only once per proxy, at config
235 * time.
236 */
237void init_server_map(struct proxy *p)
238{
239 struct server *srv;
240 int pgcd;
241 int act, bck;
242
Willy Tarreaub625a082007-11-26 01:15:43 +0100243 p->lbprm.set_server_status_up = map_set_server_status_up;
244 p->lbprm.set_server_status_down = map_set_server_status_down;
245 p->lbprm.update_server_eweight = NULL;
246
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100247 if (!p->srv)
248 return;
249
250 /* We will factor the weights to reduce the table,
251 * using Euclide's largest common divisor algorithm
252 */
253 pgcd = p->srv->uweight;
254 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
255 int w = srv->uweight;
256 while (w) {
257 int t = pgcd % w;
258 pgcd = w;
259 w = t;
260 }
261 }
262
263 /* It is sometimes useful to know what factor to apply
264 * to the backend's effective weight to know its real
265 * weight.
266 */
267 p->lbprm.wmult = pgcd;
268
269 act = bck = 0;
270 for (srv = p->srv; srv; srv = srv->next) {
271 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100272 srv->prev_eweight = srv->eweight;
273 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100274 if (srv->state & SRV_BACKUP)
275 bck += srv->eweight;
276 else
277 act += srv->eweight;
278 }
279
280 /* this is the largest map we will ever need for this servers list */
281 if (act < bck)
282 act = bck;
283
284 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
285 /* recounts servers and their weights */
286 p->lbprm.map.state = PR_MAP_RECALC;
287 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100288 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100289 recalc_server_map(p);
290}
291
Willy Tarreaub625a082007-11-26 01:15:43 +0100292/* This function updates the server trees according to server <srv>'s new
293 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100294 * It is not important whether the server was already down or not. It is not
295 * important either that the new state is completely down (the caller may not
296 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100297 */
298static void fwrr_set_server_status_down(struct server *srv)
299{
300 struct proxy *p = srv->proxy;
301 struct fwrr_group *grp;
302
303 if (srv->state == srv->prev_state &&
304 srv->eweight == srv->prev_eweight)
305 return;
306
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100307 if (srv_is_usable(srv->state, srv->eweight))
308 goto out_update_state;
309
Willy Tarreaub625a082007-11-26 01:15:43 +0100310 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
311 /* server was already down */
312 goto out_update_backend;
313
314 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
315 grp->next_weight -= srv->prev_eweight;
316
317 if (srv->state & SRV_BACKUP) {
318 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
319 p->srv_bck--;
320
321 if (srv == p->lbprm.fbck) {
322 /* we lost the first backup server in a single-backup
323 * configuration, we must search another one.
324 */
325 struct server *srv2 = p->lbprm.fbck;
326 do {
327 srv2 = srv2->next;
328 } while (srv2 &&
329 !((srv2->state & SRV_BACKUP) &&
330 srv_is_usable(srv2->state, srv2->eweight)));
331 p->lbprm.fbck = srv2;
332 }
333 } else {
334 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
335 p->srv_act--;
336 }
337
338 fwrr_dequeue_srv(srv);
339 fwrr_remove_from_tree(srv);
340
341out_update_backend:
342 /* check/update tot_used, tot_weight */
343 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100344 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100345 srv->prev_state = srv->state;
346 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100347}
348
349/* This function updates the server trees according to server <srv>'s new
350 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100351 * It is not important whether the server was already down or not. It is not
352 * important either that the new state is completely UP (the caller may not
353 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100354 * the weight of a server which was already up.
355 */
356static void fwrr_set_server_status_up(struct server *srv)
357{
358 struct proxy *p = srv->proxy;
359 struct fwrr_group *grp;
360
361 if (srv->state == srv->prev_state &&
362 srv->eweight == srv->prev_eweight)
363 return;
364
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100365 if (!srv_is_usable(srv->state, srv->eweight))
366 goto out_update_state;
367
Willy Tarreaub625a082007-11-26 01:15:43 +0100368 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
369 /* server was already up */
370 goto out_update_backend;
371
372 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
373 grp->next_weight += srv->eweight;
374
375 if (srv->state & SRV_BACKUP) {
376 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
377 p->srv_bck++;
378
379 if (p->lbprm.fbck) {
380 /* we may have restored a backup server prior to fbck,
381 * in which case it should replace it.
382 */
383 struct server *srv2 = srv;
384 do {
385 srv2 = srv2->next;
386 } while (srv2 && (srv2 != p->lbprm.fbck));
387 if (srv2)
388 p->lbprm.fbck = srv;
389 }
390 } else {
391 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
392 p->srv_act++;
393 }
394
395 /* note that eweight cannot be 0 here */
396 fwrr_get_srv(srv);
397 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
398 fwrr_queue_srv(srv);
399
400out_update_backend:
401 /* check/update tot_used, tot_weight */
402 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100403 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100404 srv->prev_state = srv->state;
405 srv->prev_eweight = srv->eweight;
406}
407
408/* This function must be called after an update to server <srv>'s effective
409 * weight. It may be called after a state change too.
410 */
411static void fwrr_update_server_weight(struct server *srv)
412{
413 int old_state, new_state;
414 struct proxy *p = srv->proxy;
415 struct fwrr_group *grp;
416
417 if (srv->state == srv->prev_state &&
418 srv->eweight == srv->prev_eweight)
419 return;
420
421 /* If changing the server's weight changes its state, we simply apply
422 * the procedures we already have for status change. If the state
423 * remains down, the server is not in any tree, so it's as easy as
424 * updating its values. If the state remains up with different weights,
425 * there are some computations to perform to find a new place and
426 * possibly a new tree for this server.
427 */
428
429 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
430 new_state = srv_is_usable(srv->state, srv->eweight);
431
432 if (!old_state && !new_state) {
433 srv->prev_state = srv->state;
434 srv->prev_eweight = srv->eweight;
435 return;
436 }
437 else if (!old_state && new_state) {
438 fwrr_set_server_status_up(srv);
439 return;
440 }
441 else if (old_state && !new_state) {
442 fwrr_set_server_status_down(srv);
443 return;
444 }
445
446 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
447 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
448
449 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
450 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
451
452 if (srv->lb_tree == grp->init) {
453 fwrr_dequeue_srv(srv);
454 fwrr_queue_by_weight(grp->init, srv);
455 }
456 else if (!srv->lb_tree) {
457 /* FIXME: server was down. This is not possible right now but
458 * may be needed soon for slowstart or graceful shutdown.
459 */
460 fwrr_dequeue_srv(srv);
461 fwrr_get_srv(srv);
462 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
463 fwrr_queue_srv(srv);
464 } else {
465 /* The server is either active or in the next queue. If it's
466 * still in the active queue and it has not consumed all of its
467 * places, let's adjust its next position.
468 */
469 fwrr_get_srv(srv);
470
471 if (srv->eweight > 0) {
472 int prev_next = srv->npos;
473 int step = grp->next_weight / srv->eweight;
474
475 srv->npos = srv->lpos + step;
476 srv->rweight = 0;
477
478 if (srv->npos > prev_next)
479 srv->npos = prev_next;
480 if (srv->npos < grp->curr_pos + 2)
481 srv->npos = grp->curr_pos + step;
482 } else {
483 /* push it into the next tree */
484 srv->npos = grp->curr_pos + grp->curr_weight;
485 }
486
487 fwrr_dequeue_srv(srv);
488 fwrr_queue_srv(srv);
489 }
490
491 update_backend_weight(p);
492 srv->prev_state = srv->state;
493 srv->prev_eweight = srv->eweight;
494}
495
496/* Remove a server from a tree. It must have previously been dequeued. This
497 * function is meant to be called when a server is going down or has its
498 * weight disabled.
499 */
500static inline void fwrr_remove_from_tree(struct server *s)
501{
502 s->lb_tree = NULL;
503}
504
505/* Queue a server in the weight tree <root>, assuming the weight is >0.
506 * We want to sort them by inverted weights, because we need to place
507 * heavy servers first in order to get a smooth distribution.
508 */
509static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
510{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100511 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100512 eb32_insert(root, &s->lb_node);
513 s->lb_tree = root;
514}
515
516/* This function is responsible for building the weight trees in case of fast
517 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
518 * ratio. Both active and backup groups are initialized.
519 */
520void fwrr_init_server_groups(struct proxy *p)
521{
522 struct server *srv;
523 struct eb_root init_head = EB_ROOT;
524
525 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
526 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
527 p->lbprm.update_server_eweight = fwrr_update_server_weight;
528
529 p->lbprm.wdiv = BE_WEIGHT_SCALE;
530 for (srv = p->srv; srv; srv = srv->next) {
531 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
532 srv->prev_state = srv->state;
533 }
534
535 recount_servers(p);
536 update_backend_weight(p);
537
538 /* prepare the active servers group */
539 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
540 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
541 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
542 p->lbprm.fwrr.act.t1 = init_head;
543 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
544 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
545
546 /* prepare the backup servers group */
547 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
548 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
549 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
550 p->lbprm.fwrr.bck.t1 = init_head;
551 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
552 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
553
554 /* queue active and backup servers in two distinct groups */
555 for (srv = p->srv; srv; srv = srv->next) {
556 if (!srv_is_usable(srv->state, srv->eweight))
557 continue;
558 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
559 p->lbprm.fwrr.bck.init :
560 p->lbprm.fwrr.act.init,
561 srv);
562 }
563}
564
565/* simply removes a server from a weight tree */
566static inline void fwrr_dequeue_srv(struct server *s)
567{
568 eb32_delete(&s->lb_node);
569}
570
571/* queues a server into the appropriate group and tree depending on its
572 * backup status, and ->npos. If the server is disabled, simply assign
573 * it to the NULL tree.
574 */
575static void fwrr_queue_srv(struct server *s)
576{
577 struct proxy *p = s->proxy;
578 struct fwrr_group *grp;
579
580 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
581
582 /* Delay everything which does not fit into the window and everything
583 * which does not fit into the theorical new window.
584 */
585 if (!srv_is_usable(s->state, s->eweight)) {
586 fwrr_remove_from_tree(s);
587 }
588 else if (s->eweight <= 0 ||
589 s->npos >= 2 * grp->curr_weight ||
590 s->npos >= grp->curr_weight + grp->next_weight) {
591 /* put into next tree, and readjust npos in case we could
592 * finally take this back to current. */
593 s->npos -= grp->curr_weight;
594 fwrr_queue_by_weight(grp->next, s);
595 }
596 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100597 /* The sorting key is stored in units of s->npos * user_weight
598 * in order to avoid overflows. As stated in backend.h, the
599 * lower the scale, the rougher the weights modulation, and the
600 * higher the scale, the lower the number of servers without
601 * overflow. With this formula, the result is always positive,
602 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100603 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100604 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
605 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
606
607 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100608 s->lb_tree = &grp->curr;
609 }
610}
611
612/* prepares a server when extracting it from the "init" tree */
613static inline void fwrr_get_srv_init(struct server *s)
614{
615 s->npos = s->rweight = 0;
616}
617
618/* prepares a server when extracting it from the "next" tree */
619static inline void fwrr_get_srv_next(struct server *s)
620{
621 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
622 &s->proxy->lbprm.fwrr.bck :
623 &s->proxy->lbprm.fwrr.act;
624
625 s->npos += grp->curr_weight;
626}
627
628/* prepares a server when it was marked down */
629static inline void fwrr_get_srv_down(struct server *s)
630{
631 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
632 &s->proxy->lbprm.fwrr.bck :
633 &s->proxy->lbprm.fwrr.act;
634
635 s->npos = grp->curr_pos;
636}
637
638/* prepares a server when extracting it from its tree */
639static void fwrr_get_srv(struct server *s)
640{
641 struct proxy *p = s->proxy;
642 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
643 &p->lbprm.fwrr.bck :
644 &p->lbprm.fwrr.act;
645
646 if (s->lb_tree == grp->init) {
647 fwrr_get_srv_init(s);
648 }
649 else if (s->lb_tree == grp->next) {
650 fwrr_get_srv_next(s);
651 }
652 else if (s->lb_tree == NULL) {
653 fwrr_get_srv_down(s);
654 }
655}
656
657/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
658 * when this happens, and "next" filled with servers sorted by weights.
659 */
660static inline void fwrr_switch_trees(struct fwrr_group *grp)
661{
662 struct eb_root *swap;
663 swap = grp->init;
664 grp->init = grp->next;
665 grp->next = swap;
666 grp->curr_weight = grp->next_weight;
667 grp->curr_pos = grp->curr_weight;
668}
669
670/* return next server from the current tree in FWRR group <grp>, or a server
671 * from the "init" tree if appropriate. If both trees are empty, return NULL.
672 */
673static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
674{
675 struct eb32_node *node;
676 struct server *s;
677
678 node = eb32_first(&grp->curr);
679 s = eb32_entry(node, struct server, lb_node);
680
681 if (!node || s->npos > grp->curr_pos) {
682 /* either we have no server left, or we have a hole */
683 struct eb32_node *node2;
684 node2 = eb32_first(grp->init);
685 if (node2) {
686 node = node2;
687 s = eb32_entry(node, struct server, lb_node);
688 fwrr_get_srv_init(s);
689 if (s->eweight == 0) /* FIXME: is it possible at all ? */
690 node = NULL;
691 }
692 }
693 if (node)
694 return s;
695 else
696 return NULL;
697}
698
699/* Computes next position of server <s> in the group. It is mandatory for <s>
700 * to have a non-zero, positive eweight.
701*/
702static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
703{
704 if (!s->npos) {
705 /* first time ever for this server */
706 s->lpos = grp->curr_pos;
707 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
708 s->rweight += grp->next_weight % s->eweight;
709
710 if (s->rweight >= s->eweight) {
711 s->rweight -= s->eweight;
712 s->npos++;
713 }
714 } else {
715 s->lpos = s->npos;
716 s->npos += grp->next_weight / s->eweight;
717 s->rweight += grp->next_weight % s->eweight;
718
719 if (s->rweight >= s->eweight) {
720 s->rweight -= s->eweight;
721 s->npos++;
722 }
723 }
724}
725
726/* Return next server from the current tree in backend <p>, or a server from
727 * the init tree if appropriate. If both trees are empty, return NULL.
728 * Saturated servers are skipped and requeued.
729 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100730static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100731{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100732 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100733 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100734 int switched;
735
736 if (p->srv_act)
737 grp = &p->lbprm.fwrr.act;
738 else if (p->lbprm.fbck)
739 return p->lbprm.fbck;
740 else if (p->srv_bck)
741 grp = &p->lbprm.fwrr.bck;
742 else
743 return NULL;
744
745 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100746 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100747 full = NULL; /* NULL-terminated list of saturated servers */
748 while (1) {
749 /* if we see an empty group, let's first try to collect weights
750 * which might have recently changed.
751 */
752 if (!grp->curr_weight)
753 grp->curr_pos = grp->curr_weight = grp->next_weight;
754
755 /* get first server from the "current" tree. When the end of
756 * the tree is reached, we may have to switch, but only once.
757 */
758 while (1) {
759 srv = fwrr_get_server_from_group(grp);
760 if (srv)
761 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100762 if (switched) {
763 if (avoided) {
764 srv = avoided;
765 break;
766 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100767 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100768 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100769 switched = 1;
770 fwrr_switch_trees(grp);
771
772 }
773
774 /* OK, we have a server. However, it may be saturated, in which
775 * case we don't want to reconsider it for now. We'll update
776 * its position and dequeue it anyway, so that we can move it
777 * to a better place afterwards.
778 */
779 fwrr_update_position(grp, srv);
780 fwrr_dequeue_srv(srv);
781 grp->curr_pos++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100782 if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
783 /* make sure it is not the server we are trying to exclude... */
784 if (srv != srvtoavoid || avoided)
785 break;
786
787 avoided = srv; /* ...but remember that is was selected yet avoided */
788 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100789
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100790 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100791 srv->next_full = full;
792 full = srv;
793 }
794
795 /* OK, we got the best server, let's update it */
796 fwrr_queue_srv(srv);
797
798 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100799 /* Requeue all extracted servers. If full==srv then it was
800 * avoided (unsucessfully) and chained, omit it now.
801 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100802 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100803 if (switched) {
804 /* the tree has switched, requeue all extracted servers
805 * into "init", because their place was lost, and only
806 * their weight matters.
807 */
808 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100809 if (likely(full != srv))
810 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100811 full = full->next_full;
812 } while (full);
813 } else {
814 /* requeue all extracted servers just as if they were consumed
815 * so that they regain their expected place.
816 */
817 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100818 if (likely(full != srv))
819 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100820 full = full->next_full;
821 } while (full);
822 }
823 }
824 return srv;
825}
826
Willy Tarreau01732802007-11-01 22:48:15 +0100827/*
828 * This function tries to find a running server for the proxy <px> following
829 * the URL parameter hash method. It looks for a specific parameter in the
830 * URL and hashes it to compute the server ID. This is useful to optimize
831 * performance by avoiding bounces between servers in contexts where sessions
832 * are shared but cookies are not usable. If the parameter is not found, NULL
833 * is returned. If any server is found, it will be returned. If no valid server
834 * is found, NULL is returned.
835 *
836 */
837struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
838{
839 unsigned long hash = 0;
840 char *p;
841 int plen;
842
Willy Tarreau20697042007-11-15 23:26:18 +0100843 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +0100844 return NULL;
845
Willy Tarreau20697042007-11-15 23:26:18 +0100846 if (px->lbprm.map.state & PR_MAP_RECALC)
847 recalc_server_map(px);
848
Willy Tarreau01732802007-11-01 22:48:15 +0100849 p = memchr(uri, '?', uri_len);
850 if (!p)
851 return NULL;
852 p++;
853
854 uri_len -= (p - uri);
855 plen = px->url_param_len;
856
857 if (uri_len <= plen)
858 return NULL;
859
860 while (uri_len > plen) {
861 /* Look for the parameter name followed by an equal symbol */
862 if (p[plen] == '=') {
863 /* skip the equal symbol */
864 uri = p;
865 p += plen + 1;
866 uri_len -= plen + 1;
867 if (memcmp(uri, px->url_param_name, plen) == 0) {
868 /* OK, we have the parameter here at <uri>, and
869 * the value after the equal sign, at <p>
870 */
871 while (uri_len && *p != '&') {
872 hash = *p + (hash << 6) + (hash << 16) - hash;
873 uri_len--;
874 p++;
875 }
Willy Tarreau20697042007-11-15 23:26:18 +0100876 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +0100877 }
878 }
879
880 /* skip to next parameter */
881 uri = p;
882 p = memchr(uri, '&', uri_len);
883 if (!p)
884 return NULL;
885 p++;
886 uri_len -= (p - uri);
887 }
888 return NULL;
889}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200890
891/*
892 * This function marks the session as 'assigned' in direct or dispatch modes,
893 * or tries to assign one in balance mode, according to the algorithm. It does
894 * nothing if the session had already been assigned a server.
895 *
896 * It may return :
897 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
898 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
899 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
900 * SRV_STATUS_INTERNAL for other unrecoverable errors.
901 *
902 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
903 * not need to be called anymore. This usually means that s->srv can be trusted
904 * in balance and direct modes. This flag is not cleared, so it's to the caller
905 * to clear it if required (eg: redispatch).
906 *
907 */
908
909int assign_server(struct session *s)
910{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100911
912 struct server *srvtoavoid;
913
Willy Tarreaubaaee002006-06-26 02:48:02 +0200914#ifdef DEBUG_FULL
915 fprintf(stderr,"assign_server : s=%p\n",s);
916#endif
917
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100918 srvtoavoid = s->srv;
919 s->srv = NULL;
920
Willy Tarreaubaaee002006-06-26 02:48:02 +0200921 if (s->pend_pos)
922 return SRV_STATUS_INTERNAL;
923
924 if (!(s->flags & SN_ASSIGNED)) {
Willy Tarreau31682232007-11-29 15:38:04 +0100925 if (s->be->lbprm.algo & BE_LB_ALGO) {
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100926 int len;
927
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100928 if (s->flags & SN_DIRECT) {
929 s->flags |= SN_ASSIGNED;
930 return SRV_STATUS_OK;
931 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100932
Willy Tarreaub625a082007-11-26 01:15:43 +0100933 if (!s->be->lbprm.tot_weight)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200934 return SRV_STATUS_NOSRV;
935
Willy Tarreau31682232007-11-29 15:38:04 +0100936 switch (s->be->lbprm.algo & BE_LB_ALGO) {
937 case BE_LB_ALGO_RR:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100938 s->srv = fwrr_get_next_server(s->be, srvtoavoid);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200939 if (!s->srv)
940 return SRV_STATUS_FULL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100941 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100942 case BE_LB_ALGO_SH:
Willy Tarreaubaaee002006-06-26 02:48:02 +0200943 if (s->cli_addr.ss_family == AF_INET)
944 len = 4;
945 else if (s->cli_addr.ss_family == AF_INET6)
946 len = 16;
947 else /* unknown IP family */
948 return SRV_STATUS_INTERNAL;
949
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200950 s->srv = get_server_sh(s->be,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200951 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
952 len);
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100953 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100954 case BE_LB_ALGO_UH:
Willy Tarreau2fcb5002007-05-08 13:35:26 +0200955 /* URI hashing */
956 s->srv = get_server_uh(s->be,
957 s->txn.req.sol + s->txn.req.sl.rq.u,
958 s->txn.req.sl.rq.u_l);
Willy Tarreau01732802007-11-01 22:48:15 +0100959 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100960 case BE_LB_ALGO_PH:
Willy Tarreau01732802007-11-01 22:48:15 +0100961 /* URL Parameter hashing */
962 s->srv = get_server_ph(s->be,
963 s->txn.req.sol + s->txn.req.sl.rq.u,
964 s->txn.req.sl.rq.u_l);
965 if (!s->srv) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100966 /* parameter not found, fall back to round robin on the map */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100967 s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
Willy Tarreau01732802007-11-01 22:48:15 +0100968 if (!s->srv)
969 return SRV_STATUS_FULL;
970 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100971 break;
972 default:
973 /* unknown balancing algorithm */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200974 return SRV_STATUS_INTERNAL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100975 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100976 if (s->srv != srvtoavoid) {
977 s->be->cum_lbconn++;
978 s->srv->cum_lbconn++;
979 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200980 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +0100981 else if (s->be->options & PR_O_HTTP_PROXY) {
982 if (!s->srv_addr.sin_addr.s_addr)
983 return SRV_STATUS_NOSRV;
984 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200985 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100986 !(s->fe->options & PR_O_TRANSP)) {
Willy Tarreau1a1158b2007-01-20 11:07:46 +0100987 return SRV_STATUS_NOSRV;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100988 }
989 s->flags |= SN_ASSIGNED;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200990 }
991 return SRV_STATUS_OK;
992}
993
994
995/*
996 * This function assigns a server address to a session, and sets SN_ADDR_SET.
997 * The address is taken from the currently assigned server, or from the
998 * dispatch or transparent address.
999 *
1000 * It may return :
1001 * SRV_STATUS_OK if everything is OK.
1002 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1003 *
1004 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1005 * not cleared, so it's to the caller to clear it if required.
1006 *
1007 */
1008int assign_server_address(struct session *s)
1009{
1010#ifdef DEBUG_FULL
1011 fprintf(stderr,"assign_server_address : s=%p\n",s);
1012#endif
1013
Willy Tarreau31682232007-11-29 15:38:04 +01001014 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001015 /* A server is necessarily known for this session */
1016 if (!(s->flags & SN_ASSIGNED))
1017 return SRV_STATUS_INTERNAL;
1018
1019 s->srv_addr = s->srv->addr;
1020
1021 /* if this server remaps proxied ports, we'll use
1022 * the port the client connected to with an offset. */
1023 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001024 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1025 get_frt_addr(s);
1026 if (s->frt_addr.ss_family == AF_INET) {
1027 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1028 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1029 } else {
1030 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1031 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1032 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001033 }
1034 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001035 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001036 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001037 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001038 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001039 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001040 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001041 if (!(s->flags & SN_FRT_ADDR_SET))
1042 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001043
Willy Tarreaubd414282008-01-19 13:46:35 +01001044 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1045 /* when we support IPv6 on the backend, we may add other tests */
1046 //qfprintf(stderr, "Cannot get original server address.\n");
1047 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001048 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001049 else if (s->be->options & PR_O_HTTP_PROXY) {
1050 /* If HTTP PROXY option is set, then server is already assigned
1051 * during incoming client request parsing. */
1052 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001053 else {
1054 /* no server and no LB algorithm ! */
1055 return SRV_STATUS_INTERNAL;
1056 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001057
1058 s->flags |= SN_ADDR_SET;
1059 return SRV_STATUS_OK;
1060}
1061
1062
1063/* This function assigns a server to session <s> if required, and can add the
1064 * connection to either the assigned server's queue or to the proxy's queue.
1065 *
1066 * Returns :
1067 *
1068 * SRV_STATUS_OK if everything is OK.
1069 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1070 * SRV_STATUS_QUEUED if the connection has been queued.
1071 * SRV_STATUS_FULL if the server(s) is/are saturated and the
1072 * connection could not be queued.
1073 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1074 *
1075 */
1076int assign_server_and_queue(struct session *s)
1077{
1078 struct pendconn *p;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001079 struct server *srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001080 int err;
1081
1082 if (s->pend_pos)
1083 return SRV_STATUS_INTERNAL;
1084
1085 if (s->flags & SN_ASSIGNED) {
Willy Tarreau21d2af32008-02-14 20:25:24 +01001086 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1087 /* server scheduled for redirection, and already assigned. We
1088 * don't want to go further nor check the queue.
1089 */
1090 return SRV_STATUS_OK;
1091 }
1092
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001093 if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001094 /* it's left to the dispatcher to choose a server */
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001095 s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001096 } else {
1097 /* a server does not need to be assigned, perhaps because we're in
1098 * direct mode, or in dispatch or transparent modes where the server
1099 * is not needed.
1100 */
1101 if (s->srv &&
1102 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1103 p = pendconn_add(s);
1104 if (p)
1105 return SRV_STATUS_QUEUED;
1106 else
1107 return SRV_STATUS_FULL;
1108 }
1109 return SRV_STATUS_OK;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001110 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001111 }
1112
1113 /* a server needs to be assigned */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001114 srv = s->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001115 err = assign_server(s);
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001116
1117 if (srv) {
1118 if (srv != s->srv) {
1119 /* This session was previously dispatched to another server:
1120 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1121 * - set SN_REDISP if it was successfully redispatched
1122 * - increment srv->redispatches and be->redispatches
1123 */
1124
1125 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1126 s->txn.flags &= ~TX_CK_MASK;
1127 s->txn.flags |= TX_CK_DOWN;
1128 }
1129
1130 s->flags |= SN_REDISP;
1131
1132 srv->redispatches++;
1133 s->be->redispatches++;
1134 } else {
1135 srv->retries++;
1136 s->be->retries++;
1137 }
1138 }
1139
Willy Tarreaubaaee002006-06-26 02:48:02 +02001140 switch (err) {
1141 case SRV_STATUS_OK:
Willy Tarreau21d2af32008-02-14 20:25:24 +01001142 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1143 /* server supporting redirection and it is possible.
1144 * Let's report that and ignore maxconn !
1145 */
1146 return SRV_STATUS_OK;
1147 }
1148
Willy Tarreaubaaee002006-06-26 02:48:02 +02001149 /* in balance mode, we might have servers with connection limits */
1150 if (s->srv &&
1151 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1152 p = pendconn_add(s);
1153 if (p)
1154 return SRV_STATUS_QUEUED;
1155 else
1156 return SRV_STATUS_FULL;
1157 }
1158 return SRV_STATUS_OK;
1159
1160 case SRV_STATUS_FULL:
1161 /* queue this session into the proxy's queue */
1162 p = pendconn_add(s);
1163 if (p)
1164 return SRV_STATUS_QUEUED;
1165 else
1166 return SRV_STATUS_FULL;
1167
1168 case SRV_STATUS_NOSRV:
1169 case SRV_STATUS_INTERNAL:
1170 return err;
1171 default:
1172 return SRV_STATUS_INTERNAL;
1173 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001174}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001175
1176/*
1177 * This function initiates a connection to the server assigned to this session
1178 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1179 * It can return one of :
1180 * - SN_ERR_NONE if everything's OK
1181 * - SN_ERR_SRVTO if there are no more servers
1182 * - SN_ERR_SRVCL if the connection was refused by the server
1183 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1184 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1185 * - SN_ERR_INTERNAL for any other purely internal errors
1186 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1187 */
1188int connect_server(struct session *s)
1189{
1190 int fd, err;
1191
1192 if (!(s->flags & SN_ADDR_SET)) {
1193 err = assign_server_address(s);
1194 if (err != SRV_STATUS_OK)
1195 return SN_ERR_INTERNAL;
1196 }
1197
1198 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1199 qfprintf(stderr, "Cannot get a server socket.\n");
1200
1201 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001202 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001203 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001204 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001205 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001206 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001207 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001208 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001209 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001210 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001211 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001212 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001213 /* this is a resource error */
1214 return SN_ERR_RESOURCE;
1215 }
1216
1217 if (fd >= global.maxsock) {
1218 /* do not log anything there, it's a normal condition when this option
1219 * is used to serialize connections to a server !
1220 */
1221 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1222 close(fd);
1223 return SN_ERR_PRXCOND; /* it is a configuration limit */
1224 }
1225
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001226#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001227 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001228 /* TCP splicing supported by both FE and BE */
1229 tcp_splice_initfd(s->cli_fd, fd);
1230 }
1231#endif
1232
Willy Tarreaubaaee002006-06-26 02:48:02 +02001233 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1234 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1235 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1236 close(fd);
1237 return SN_ERR_INTERNAL;
1238 }
1239
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001240 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001241 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1242
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001243 if (s->be->options & PR_O_TCP_NOLING)
1244 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1245
Willy Tarreaubaaee002006-06-26 02:48:02 +02001246 /* allow specific binding :
1247 * - server-specific at first
1248 * - proxy-specific next
1249 */
1250 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001251 struct sockaddr_in *remote = NULL;
1252 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001253
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001254#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001255 switch (s->srv->state & SRV_TPROXY_MASK) {
1256 case SRV_TPROXY_ADDR:
1257 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1258 flags = 3;
1259 break;
1260 case SRV_TPROXY_CLI:
1261 flags |= 2;
1262 /* fall through */
1263 case SRV_TPROXY_CIP:
1264 /* FIXME: what can we do if the client connects in IPv6 ? */
1265 flags |= 1;
1266 remote = (struct sockaddr_in *)&s->cli_addr;
1267 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001268 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001269#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001270 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001271 if (ret) {
1272 close(fd);
1273 if (ret == 1) {
1274 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1275 s->be->id, s->srv->id);
1276 send_log(s->be, LOG_EMERG,
1277 "Cannot bind to source address before connect() for server %s/%s.\n",
1278 s->be->id, s->srv->id);
1279 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001280 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001281 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001282 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001283 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001284 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001285 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001286 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001287 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001288 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001289 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001290 struct sockaddr_in *remote = NULL;
1291 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001292
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001293#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001294 switch (s->be->options & PR_O_TPXY_MASK) {
1295 case PR_O_TPXY_ADDR:
1296 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1297 flags = 3;
1298 break;
1299 case PR_O_TPXY_CLI:
1300 flags |= 2;
1301 /* fall through */
1302 case PR_O_TPXY_CIP:
1303 /* FIXME: what can we do if the client connects in IPv6 ? */
1304 flags |= 1;
1305 remote = (struct sockaddr_in *)&s->cli_addr;
1306 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001307 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001308#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001309 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001310 if (ret) {
1311 close(fd);
1312 if (ret == 1) {
1313 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1314 s->be->id);
1315 send_log(s->be, LOG_EMERG,
1316 "Cannot bind to source address before connect() for proxy %s.\n",
1317 s->be->id);
1318 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001319 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001320 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001321 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001322 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1323 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001324 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001325 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001326 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001327 }
1328
1329 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1330 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1331
1332 if (errno == EAGAIN || errno == EADDRINUSE) {
1333 char *msg;
1334 if (errno == EAGAIN) /* no free ports left, try again later */
1335 msg = "no free ports";
1336 else
1337 msg = "local address already in use";
1338
1339 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1340 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001341 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001342 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001343 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001344 return SN_ERR_RESOURCE;
1345 } else if (errno == ETIMEDOUT) {
1346 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1347 close(fd);
1348 return SN_ERR_SRVTO;
1349 } else {
1350 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1351 //qfprintf(stderr,"Connect(): %d", errno);
1352 close(fd);
1353 return SN_ERR_SRVCL;
1354 }
1355 }
1356
1357 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001358 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001359 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001360 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001361 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001362 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001363
1364 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1365 fdtab[fd].peerlen = sizeof(s->srv_addr);
1366
Willy Tarreauf161a342007-04-08 16:59:42 +02001367 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001368
1369 fd_insert(fd);
1370 if (s->srv) {
1371 s->srv->cur_sess++;
1372 if (s->srv->cur_sess > s->srv->cur_sess_max)
1373 s->srv->cur_sess_max = s->srv->cur_sess;
1374 }
1375
Willy Tarreaud7c30f92007-12-03 01:38:36 +01001376 if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect))
Willy Tarreaud7971282006-07-29 18:36:34 +02001377 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001378 return SN_ERR_NONE; /* connection is OK */
1379}
1380
1381
1382/*
1383 * This function checks the retry count during the connect() job.
1384 * It updates the session's srv_state and retries, so that the caller knows
1385 * what it has to do. It uses the last connection error to set the log when
1386 * it expires. It returns 1 when it has expired, and 0 otherwise.
1387 */
1388int srv_count_retry_down(struct session *t, int conn_err)
1389{
1390 /* we are in front of a retryable error */
1391 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001392
Willy Tarreaubaaee002006-06-26 02:48:02 +02001393 if (t->conn_retries < 0) {
1394 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +02001395 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001396 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001397 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001398 if (t->srv)
1399 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001400 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001401
1402 /* We used to have a free connection slot. Since we'll never use it,
1403 * we have to inform the server that it may be used by another session.
1404 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001405 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001406 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001407 return 1;
1408 }
1409 return 0;
1410}
1411
1412
1413/*
1414 * This function performs the retryable part of the connect() job.
1415 * It updates the session's srv_state and retries, so that the caller knows
1416 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1417 * it needs to redispatch.
1418 */
1419int srv_retryable_connect(struct session *t)
1420{
1421 int conn_err;
1422
1423 /* This loop ensures that we stop before the last retry in case of a
1424 * redispatchable server.
1425 */
1426 do {
1427 /* initiate a connection to the server */
1428 conn_err = connect_server(t);
1429 switch (conn_err) {
1430
1431 case SN_ERR_NONE:
1432 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1433 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001434 if (t->srv)
1435 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001436 return 1;
1437
1438 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +02001439 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001440 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001441 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001442 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001443 t->srv->cum_sess++;
1444 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001445 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001446 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001447 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001448 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001449 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001450 return 1;
1451 }
1452 /* ensure that we have enough retries left */
1453 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001454 return 1;
1455 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001456 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001457
1458 /* We're on our last chance, and the REDISP option was specified.
1459 * We will ignore cookie and force to balance or use the dispatcher.
1460 */
1461 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001462 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001463 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001464
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001465 if (t->srv)
1466 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001467
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001468 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001469 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001470 return 0;
1471}
1472
1473
1474/* This function performs the "redispatch" part of a connection attempt. It
1475 * will assign a server if required, queue the connection if required, and
1476 * handle errors that might arise at this level. It can change the server
1477 * state. It will return 1 if it encounters an error, switches the server
1478 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1479 * that the connection is ready to use.
1480 */
1481
1482int srv_redispatch_connect(struct session *t)
1483{
1484 int conn_err;
1485
1486 /* We know that we don't have any connection pending, so we will
1487 * try to get a new one, and wait in this state if it's queued
1488 */
1489 conn_err = assign_server_and_queue(t);
1490 switch (conn_err) {
1491 case SRV_STATUS_OK:
1492 break;
1493
1494 case SRV_STATUS_NOSRV:
1495 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +02001496 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001497 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001498 503, error_message(t, HTTP_ERR_503));
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001499
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001500 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001501
1502 return 1;
1503
1504 case SRV_STATUS_QUEUED:
Willy Tarreau1fa31262007-12-03 00:36:16 +01001505 /* note: we use the connect expiration date for the queue. */
1506 if (!tv_add_ifset(&t->req->cex, &now, &t->be->timeout.queue))
Willy Tarreaud7971282006-07-29 18:36:34 +02001507 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001508 t->srv_state = SV_STIDLE;
1509 /* do nothing else and do not wake any other session up */
1510 return 1;
1511
1512 case SRV_STATUS_FULL:
1513 case SRV_STATUS_INTERNAL:
1514 default:
Willy Tarreaud7971282006-07-29 18:36:34 +02001515 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001516 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001517 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001518 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001519 t->srv->cum_sess++;
1520 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001521 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001522 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001523
1524 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001525 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001526 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001527 return 1;
1528 }
1529 /* if we get here, it's because we got SRV_STATUS_OK, which also
1530 * means that the connection has not been queued.
1531 */
1532 return 0;
1533}
1534
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001535int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001536 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001537 return px->down_time;
1538
1539 return now.tv_sec - px->last_change + px->down_time;
1540}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001541
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001542/* This function parses a "balance" statement in a backend section describing
1543 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
1544 * returns -1, it may write an error message into ther <err> buffer, for at
1545 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
1546 * written. The function must be called with <args> pointing to the first word
1547 * after "balance".
1548 */
1549int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
1550{
1551 if (!*(args[0])) {
1552 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01001553 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1554 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001555 return 0;
1556 }
1557
1558 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001559 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1560 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001561 }
1562 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001563 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1564 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001565 }
1566 else if (!strcmp(args[0], "uri")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001567 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1568 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001569 }
Willy Tarreau01732802007-11-01 22:48:15 +01001570 else if (!strcmp(args[0], "url_param")) {
1571 if (!*args[1]) {
1572 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
1573 return -1;
1574 }
Willy Tarreau31682232007-11-29 15:38:04 +01001575 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1576 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01001577 if (curproxy->url_param_name)
1578 free(curproxy->url_param_name);
1579 curproxy->url_param_name = strdup(args[1]);
1580 curproxy->url_param_len = strlen(args[1]);
1581 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001582 else {
Willy Tarreau01732802007-11-01 22:48:15 +01001583 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001584 return -1;
1585 }
1586 return 0;
1587}
1588
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01001589
1590/************************************************************************/
1591/* All supported keywords must be declared here. */
1592/************************************************************************/
1593
1594/* set test->i to the number of enabled servers on the proxy */
1595static int
1596acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
1597 struct acl_expr *expr, struct acl_test *test)
1598{
1599 test->flags = ACL_TEST_F_VOL_TEST;
1600 if (expr->arg_len) {
1601 /* another proxy was designated, we must look for it */
1602 for (px = proxy; px; px = px->next)
1603 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
1604 break;
1605 }
1606 if (!px)
1607 return 0;
1608
1609 if (px->srv_act)
1610 test->i = px->srv_act;
1611 else if (px->lbprm.fbck)
1612 test->i = 1;
1613 else
1614 test->i = px->srv_bck;
1615
1616 return 1;
1617}
1618
1619
1620/* Note: must not be declared <const> as its list will be overwritten */
1621static struct acl_kw_list acl_kws = {{ },{
1622 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
1623 { NULL, NULL, NULL, NULL },
1624}};
1625
1626
1627__attribute__((constructor))
1628static void __backend_init(void)
1629{
1630 acl_register_keywords(&acl_kws);
1631}
1632
1633
Willy Tarreaubaaee002006-06-26 02:48:02 +02001634/*
1635 * Local variables:
1636 * c-indent-level: 8
1637 * c-basic-offset: 8
1638 * End:
1639 */