blob: 666c218bf855261065b4b48b0d6dbb68ec1d8386 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020019
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020021#include <common/config.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010022#include <common/eb32tree.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010025#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <types/buffers.h>
27#include <types/global.h>
28#include <types/polling.h>
29#include <types/proxy.h>
30#include <types/server.h>
31#include <types/session.h>
32
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010033#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020035#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020036#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010037#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/log.h>
39#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010040#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/queue.h>
42#include <proto/stream_sock.h>
43#include <proto/task.h>
44
Willy Tarreau6d1a9882007-01-07 02:03:04 +010045#ifdef CONFIG_HAP_TCPSPLICE
46#include <libtcpsplice.h>
47#endif
48
Willy Tarreaub625a082007-11-26 01:15:43 +010049static inline void fwrr_remove_from_tree(struct server *s);
50static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
51static inline void fwrr_dequeue_srv(struct server *s);
52static void fwrr_get_srv(struct server *s);
53static void fwrr_queue_srv(struct server *s);
54
55/* This function returns non-zero if a server with the given weight and state
56 * is usable for LB, otherwise zero.
57 */
58static inline int srv_is_usable(int state, int weight)
59{
60 if (!weight)
61 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010062 if (state & SRV_GOINGDOWN)
63 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010064 if (!(state & SRV_RUNNING))
65 return 0;
66 return 1;
67}
68
Willy Tarreaubaaee002006-06-26 02:48:02 +020069/*
70 * This function recounts the number of usable active and backup servers for
71 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010072 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010073 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010074 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020075 */
Willy Tarreaub625a082007-11-26 01:15:43 +010076static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020077{
78 struct server *srv;
79
Willy Tarreau20697042007-11-15 23:26:18 +010080 px->srv_act = px->srv_bck = 0;
81 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010082 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020083 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010084 if (!srv_is_usable(srv->state, srv->eweight))
85 continue;
86
87 if (srv->state & SRV_BACKUP) {
88 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010089 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010090 px->lbprm.fbck = srv;
91 px->srv_bck++;
92 px->lbprm.tot_wbck += srv->eweight;
93 } else {
94 px->srv_act++;
95 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020096 }
97 }
Willy Tarreaub625a082007-11-26 01:15:43 +010098}
Willy Tarreau20697042007-11-15 23:26:18 +010099
Willy Tarreaub625a082007-11-26 01:15:43 +0100100/* This function simply updates the backend's tot_weight and tot_used values
101 * after servers weights have been updated. It is designed to be used after
102 * recount_servers() or equivalent.
103 */
104static void update_backend_weight(struct proxy *px)
105{
Willy Tarreau20697042007-11-15 23:26:18 +0100106 if (px->srv_act) {
107 px->lbprm.tot_weight = px->lbprm.tot_wact;
108 px->lbprm.tot_used = px->srv_act;
109 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100110 else if (px->lbprm.fbck) {
111 /* use only the first backup server */
112 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
113 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100114 }
115 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100116 px->lbprm.tot_weight = px->lbprm.tot_wbck;
117 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100118 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100119}
120
121/* this function updates the map according to server <srv>'s new state */
122static void map_set_server_status_down(struct server *srv)
123{
124 struct proxy *p = srv->proxy;
125
126 if (srv->state == srv->prev_state &&
127 srv->eweight == srv->prev_eweight)
128 return;
129
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100130 if (srv_is_usable(srv->state, srv->eweight))
131 goto out_update_state;
132
Willy Tarreaub625a082007-11-26 01:15:43 +0100133 /* FIXME: could be optimized since we know what changed */
134 recount_servers(p);
135 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100136 p->lbprm.map.state |= PR_MAP_RECALC;
137 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100138 srv->prev_state = srv->state;
139 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200140}
141
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100142/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100143static void map_set_server_status_up(struct server *srv)
144{
145 struct proxy *p = srv->proxy;
146
147 if (srv->state == srv->prev_state &&
148 srv->eweight == srv->prev_eweight)
149 return;
150
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100151 if (!srv_is_usable(srv->state, srv->eweight))
152 goto out_update_state;
153
Willy Tarreaub625a082007-11-26 01:15:43 +0100154 /* FIXME: could be optimized since we know what changed */
155 recount_servers(p);
156 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100157 p->lbprm.map.state |= PR_MAP_RECALC;
158 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100159 srv->prev_state = srv->state;
160 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100161}
162
Willy Tarreau20697042007-11-15 23:26:18 +0100163/* This function recomputes the server map for proxy px. It relies on
164 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
165 * called after recount_servers(). It also expects px->lbprm.map.srv
166 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200167 */
168void recalc_server_map(struct proxy *px)
169{
170 int o, tot, flag;
171 struct server *cur, *best;
172
Willy Tarreau20697042007-11-15 23:26:18 +0100173 switch (px->lbprm.tot_used) {
174 case 0: /* no server */
175 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200176 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100177 case 1: /* only one server, just fill first entry */
178 tot = 1;
179 break;
180 default:
181 tot = px->lbprm.tot_weight;
182 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200183 }
184
Willy Tarreau20697042007-11-15 23:26:18 +0100185 /* here we *know* that we have some servers */
186 if (px->srv_act)
187 flag = SRV_RUNNING;
188 else
189 flag = SRV_RUNNING | SRV_BACKUP;
190
Willy Tarreaubaaee002006-06-26 02:48:02 +0200191 /* this algorithm gives priority to the first server, which means that
192 * it will respect the declaration order for equivalent weights, and
193 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100194 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200195 * case, where we want the first server only.
196 */
197 for (cur = px->srv; cur; cur = cur->next)
198 cur->wscore = 0;
199
200 for (o = 0; o < tot; o++) {
201 int max = 0;
202 best = NULL;
203 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100204 if (flag == (cur->state &
205 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200206 int v;
207
208 /* If we are forced to return only one server, we don't want to
209 * go further, because we would return the wrong one due to
210 * divide overflow.
211 */
212 if (tot == 1) {
213 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100214 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200215 break;
216 }
217
Willy Tarreau417fae02007-03-25 21:16:40 +0200218 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200219 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
220 if (best == NULL || v > max) {
221 max = v;
222 best = cur;
223 }
224 }
225 }
Willy Tarreau20697042007-11-15 23:26:18 +0100226 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200227 best->wscore -= tot;
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230}
231
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100232/* This function is responsible of building the server MAP for map-based LB
233 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
234 * weights if applicable. It should be called only once per proxy, at config
235 * time.
236 */
237void init_server_map(struct proxy *p)
238{
239 struct server *srv;
240 int pgcd;
241 int act, bck;
242
Willy Tarreaub625a082007-11-26 01:15:43 +0100243 p->lbprm.set_server_status_up = map_set_server_status_up;
244 p->lbprm.set_server_status_down = map_set_server_status_down;
245 p->lbprm.update_server_eweight = NULL;
246
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100247 if (!p->srv)
248 return;
249
250 /* We will factor the weights to reduce the table,
251 * using Euclide's largest common divisor algorithm
252 */
253 pgcd = p->srv->uweight;
254 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
255 int w = srv->uweight;
256 while (w) {
257 int t = pgcd % w;
258 pgcd = w;
259 w = t;
260 }
261 }
262
263 /* It is sometimes useful to know what factor to apply
264 * to the backend's effective weight to know its real
265 * weight.
266 */
267 p->lbprm.wmult = pgcd;
268
269 act = bck = 0;
270 for (srv = p->srv; srv; srv = srv->next) {
271 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100272 srv->prev_eweight = srv->eweight;
273 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100274 if (srv->state & SRV_BACKUP)
275 bck += srv->eweight;
276 else
277 act += srv->eweight;
278 }
279
280 /* this is the largest map we will ever need for this servers list */
281 if (act < bck)
282 act = bck;
283
284 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
285 /* recounts servers and their weights */
286 p->lbprm.map.state = PR_MAP_RECALC;
287 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100288 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100289 recalc_server_map(p);
290}
291
Willy Tarreaub625a082007-11-26 01:15:43 +0100292/* This function updates the server trees according to server <srv>'s new
293 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100294 * It is not important whether the server was already down or not. It is not
295 * important either that the new state is completely down (the caller may not
296 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100297 */
298static void fwrr_set_server_status_down(struct server *srv)
299{
300 struct proxy *p = srv->proxy;
301 struct fwrr_group *grp;
302
303 if (srv->state == srv->prev_state &&
304 srv->eweight == srv->prev_eweight)
305 return;
306
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100307 if (srv_is_usable(srv->state, srv->eweight))
308 goto out_update_state;
309
Willy Tarreaub625a082007-11-26 01:15:43 +0100310 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
311 /* server was already down */
312 goto out_update_backend;
313
314 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
315 grp->next_weight -= srv->prev_eweight;
316
317 if (srv->state & SRV_BACKUP) {
318 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
319 p->srv_bck--;
320
321 if (srv == p->lbprm.fbck) {
322 /* we lost the first backup server in a single-backup
323 * configuration, we must search another one.
324 */
325 struct server *srv2 = p->lbprm.fbck;
326 do {
327 srv2 = srv2->next;
328 } while (srv2 &&
329 !((srv2->state & SRV_BACKUP) &&
330 srv_is_usable(srv2->state, srv2->eweight)));
331 p->lbprm.fbck = srv2;
332 }
333 } else {
334 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
335 p->srv_act--;
336 }
337
338 fwrr_dequeue_srv(srv);
339 fwrr_remove_from_tree(srv);
340
341out_update_backend:
342 /* check/update tot_used, tot_weight */
343 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100344 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100345 srv->prev_state = srv->state;
346 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100347}
348
349/* This function updates the server trees according to server <srv>'s new
350 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100351 * It is not important whether the server was already down or not. It is not
352 * important either that the new state is completely UP (the caller may not
353 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100354 * the weight of a server which was already up.
355 */
356static void fwrr_set_server_status_up(struct server *srv)
357{
358 struct proxy *p = srv->proxy;
359 struct fwrr_group *grp;
360
361 if (srv->state == srv->prev_state &&
362 srv->eweight == srv->prev_eweight)
363 return;
364
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100365 if (!srv_is_usable(srv->state, srv->eweight))
366 goto out_update_state;
367
Willy Tarreaub625a082007-11-26 01:15:43 +0100368 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
369 /* server was already up */
370 goto out_update_backend;
371
372 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
373 grp->next_weight += srv->eweight;
374
375 if (srv->state & SRV_BACKUP) {
376 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
377 p->srv_bck++;
378
Willy Tarreauf4cca452008-03-08 21:42:54 +0100379 if (!(p->options & PR_O_USE_ALL_BK)) {
380 if (!p->lbprm.fbck) {
381 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100382 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100383 } else {
384 /* we may have restored a backup server prior to fbck,
385 * in which case it should replace it.
386 */
387 struct server *srv2 = srv;
388 do {
389 srv2 = srv2->next;
390 } while (srv2 && (srv2 != p->lbprm.fbck));
391 if (srv2)
392 p->lbprm.fbck = srv;
393 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100394 }
395 } else {
396 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
397 p->srv_act++;
398 }
399
400 /* note that eweight cannot be 0 here */
401 fwrr_get_srv(srv);
402 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
403 fwrr_queue_srv(srv);
404
405out_update_backend:
406 /* check/update tot_used, tot_weight */
407 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100408 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100409 srv->prev_state = srv->state;
410 srv->prev_eweight = srv->eweight;
411}
412
413/* This function must be called after an update to server <srv>'s effective
414 * weight. It may be called after a state change too.
415 */
416static void fwrr_update_server_weight(struct server *srv)
417{
418 int old_state, new_state;
419 struct proxy *p = srv->proxy;
420 struct fwrr_group *grp;
421
422 if (srv->state == srv->prev_state &&
423 srv->eweight == srv->prev_eweight)
424 return;
425
426 /* If changing the server's weight changes its state, we simply apply
427 * the procedures we already have for status change. If the state
428 * remains down, the server is not in any tree, so it's as easy as
429 * updating its values. If the state remains up with different weights,
430 * there are some computations to perform to find a new place and
431 * possibly a new tree for this server.
432 */
433
434 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
435 new_state = srv_is_usable(srv->state, srv->eweight);
436
437 if (!old_state && !new_state) {
438 srv->prev_state = srv->state;
439 srv->prev_eweight = srv->eweight;
440 return;
441 }
442 else if (!old_state && new_state) {
443 fwrr_set_server_status_up(srv);
444 return;
445 }
446 else if (old_state && !new_state) {
447 fwrr_set_server_status_down(srv);
448 return;
449 }
450
451 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
452 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
453
454 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
455 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
456
457 if (srv->lb_tree == grp->init) {
458 fwrr_dequeue_srv(srv);
459 fwrr_queue_by_weight(grp->init, srv);
460 }
461 else if (!srv->lb_tree) {
462 /* FIXME: server was down. This is not possible right now but
463 * may be needed soon for slowstart or graceful shutdown.
464 */
465 fwrr_dequeue_srv(srv);
466 fwrr_get_srv(srv);
467 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
468 fwrr_queue_srv(srv);
469 } else {
470 /* The server is either active or in the next queue. If it's
471 * still in the active queue and it has not consumed all of its
472 * places, let's adjust its next position.
473 */
474 fwrr_get_srv(srv);
475
476 if (srv->eweight > 0) {
477 int prev_next = srv->npos;
478 int step = grp->next_weight / srv->eweight;
479
480 srv->npos = srv->lpos + step;
481 srv->rweight = 0;
482
483 if (srv->npos > prev_next)
484 srv->npos = prev_next;
485 if (srv->npos < grp->curr_pos + 2)
486 srv->npos = grp->curr_pos + step;
487 } else {
488 /* push it into the next tree */
489 srv->npos = grp->curr_pos + grp->curr_weight;
490 }
491
492 fwrr_dequeue_srv(srv);
493 fwrr_queue_srv(srv);
494 }
495
496 update_backend_weight(p);
497 srv->prev_state = srv->state;
498 srv->prev_eweight = srv->eweight;
499}
500
501/* Remove a server from a tree. It must have previously been dequeued. This
502 * function is meant to be called when a server is going down or has its
503 * weight disabled.
504 */
505static inline void fwrr_remove_from_tree(struct server *s)
506{
507 s->lb_tree = NULL;
508}
509
510/* Queue a server in the weight tree <root>, assuming the weight is >0.
511 * We want to sort them by inverted weights, because we need to place
512 * heavy servers first in order to get a smooth distribution.
513 */
514static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
515{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100516 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100517 eb32_insert(root, &s->lb_node);
518 s->lb_tree = root;
519}
520
521/* This function is responsible for building the weight trees in case of fast
522 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
523 * ratio. Both active and backup groups are initialized.
524 */
525void fwrr_init_server_groups(struct proxy *p)
526{
527 struct server *srv;
528 struct eb_root init_head = EB_ROOT;
529
530 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
531 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
532 p->lbprm.update_server_eweight = fwrr_update_server_weight;
533
534 p->lbprm.wdiv = BE_WEIGHT_SCALE;
535 for (srv = p->srv; srv; srv = srv->next) {
536 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
537 srv->prev_state = srv->state;
538 }
539
540 recount_servers(p);
541 update_backend_weight(p);
542
543 /* prepare the active servers group */
544 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
545 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
546 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
547 p->lbprm.fwrr.act.t1 = init_head;
548 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
549 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
550
551 /* prepare the backup servers group */
552 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
553 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
554 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
555 p->lbprm.fwrr.bck.t1 = init_head;
556 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
557 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
558
559 /* queue active and backup servers in two distinct groups */
560 for (srv = p->srv; srv; srv = srv->next) {
561 if (!srv_is_usable(srv->state, srv->eweight))
562 continue;
563 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
564 p->lbprm.fwrr.bck.init :
565 p->lbprm.fwrr.act.init,
566 srv);
567 }
568}
569
570/* simply removes a server from a weight tree */
571static inline void fwrr_dequeue_srv(struct server *s)
572{
573 eb32_delete(&s->lb_node);
574}
575
576/* queues a server into the appropriate group and tree depending on its
577 * backup status, and ->npos. If the server is disabled, simply assign
578 * it to the NULL tree.
579 */
580static void fwrr_queue_srv(struct server *s)
581{
582 struct proxy *p = s->proxy;
583 struct fwrr_group *grp;
584
585 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
586
587 /* Delay everything which does not fit into the window and everything
588 * which does not fit into the theorical new window.
589 */
590 if (!srv_is_usable(s->state, s->eweight)) {
591 fwrr_remove_from_tree(s);
592 }
593 else if (s->eweight <= 0 ||
594 s->npos >= 2 * grp->curr_weight ||
595 s->npos >= grp->curr_weight + grp->next_weight) {
596 /* put into next tree, and readjust npos in case we could
597 * finally take this back to current. */
598 s->npos -= grp->curr_weight;
599 fwrr_queue_by_weight(grp->next, s);
600 }
601 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100602 /* The sorting key is stored in units of s->npos * user_weight
603 * in order to avoid overflows. As stated in backend.h, the
604 * lower the scale, the rougher the weights modulation, and the
605 * higher the scale, the lower the number of servers without
606 * overflow. With this formula, the result is always positive,
607 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100608 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100609 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
610 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
611
612 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100613 s->lb_tree = &grp->curr;
614 }
615}
616
617/* prepares a server when extracting it from the "init" tree */
618static inline void fwrr_get_srv_init(struct server *s)
619{
620 s->npos = s->rweight = 0;
621}
622
623/* prepares a server when extracting it from the "next" tree */
624static inline void fwrr_get_srv_next(struct server *s)
625{
626 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
627 &s->proxy->lbprm.fwrr.bck :
628 &s->proxy->lbprm.fwrr.act;
629
630 s->npos += grp->curr_weight;
631}
632
633/* prepares a server when it was marked down */
634static inline void fwrr_get_srv_down(struct server *s)
635{
636 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
637 &s->proxy->lbprm.fwrr.bck :
638 &s->proxy->lbprm.fwrr.act;
639
640 s->npos = grp->curr_pos;
641}
642
643/* prepares a server when extracting it from its tree */
644static void fwrr_get_srv(struct server *s)
645{
646 struct proxy *p = s->proxy;
647 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
648 &p->lbprm.fwrr.bck :
649 &p->lbprm.fwrr.act;
650
651 if (s->lb_tree == grp->init) {
652 fwrr_get_srv_init(s);
653 }
654 else if (s->lb_tree == grp->next) {
655 fwrr_get_srv_next(s);
656 }
657 else if (s->lb_tree == NULL) {
658 fwrr_get_srv_down(s);
659 }
660}
661
662/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
663 * when this happens, and "next" filled with servers sorted by weights.
664 */
665static inline void fwrr_switch_trees(struct fwrr_group *grp)
666{
667 struct eb_root *swap;
668 swap = grp->init;
669 grp->init = grp->next;
670 grp->next = swap;
671 grp->curr_weight = grp->next_weight;
672 grp->curr_pos = grp->curr_weight;
673}
674
675/* return next server from the current tree in FWRR group <grp>, or a server
676 * from the "init" tree if appropriate. If both trees are empty, return NULL.
677 */
678static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
679{
680 struct eb32_node *node;
681 struct server *s;
682
683 node = eb32_first(&grp->curr);
684 s = eb32_entry(node, struct server, lb_node);
685
686 if (!node || s->npos > grp->curr_pos) {
687 /* either we have no server left, or we have a hole */
688 struct eb32_node *node2;
689 node2 = eb32_first(grp->init);
690 if (node2) {
691 node = node2;
692 s = eb32_entry(node, struct server, lb_node);
693 fwrr_get_srv_init(s);
694 if (s->eweight == 0) /* FIXME: is it possible at all ? */
695 node = NULL;
696 }
697 }
698 if (node)
699 return s;
700 else
701 return NULL;
702}
703
704/* Computes next position of server <s> in the group. It is mandatory for <s>
705 * to have a non-zero, positive eweight.
706*/
707static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
708{
709 if (!s->npos) {
710 /* first time ever for this server */
711 s->lpos = grp->curr_pos;
712 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
713 s->rweight += grp->next_weight % s->eweight;
714
715 if (s->rweight >= s->eweight) {
716 s->rweight -= s->eweight;
717 s->npos++;
718 }
719 } else {
720 s->lpos = s->npos;
721 s->npos += grp->next_weight / s->eweight;
722 s->rweight += grp->next_weight % s->eweight;
723
724 if (s->rweight >= s->eweight) {
725 s->rweight -= s->eweight;
726 s->npos++;
727 }
728 }
729}
730
731/* Return next server from the current tree in backend <p>, or a server from
732 * the init tree if appropriate. If both trees are empty, return NULL.
733 * Saturated servers are skipped and requeued.
734 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100735static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100736{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100737 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100738 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100739 int switched;
740
741 if (p->srv_act)
742 grp = &p->lbprm.fwrr.act;
743 else if (p->lbprm.fbck)
744 return p->lbprm.fbck;
745 else if (p->srv_bck)
746 grp = &p->lbprm.fwrr.bck;
747 else
748 return NULL;
749
750 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100751 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100752 full = NULL; /* NULL-terminated list of saturated servers */
753 while (1) {
754 /* if we see an empty group, let's first try to collect weights
755 * which might have recently changed.
756 */
757 if (!grp->curr_weight)
758 grp->curr_pos = grp->curr_weight = grp->next_weight;
759
760 /* get first server from the "current" tree. When the end of
761 * the tree is reached, we may have to switch, but only once.
762 */
763 while (1) {
764 srv = fwrr_get_server_from_group(grp);
765 if (srv)
766 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100767 if (switched) {
768 if (avoided) {
769 srv = avoided;
770 break;
771 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100772 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100773 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100774 switched = 1;
775 fwrr_switch_trees(grp);
776
777 }
778
779 /* OK, we have a server. However, it may be saturated, in which
780 * case we don't want to reconsider it for now. We'll update
781 * its position and dequeue it anyway, so that we can move it
782 * to a better place afterwards.
783 */
784 fwrr_update_position(grp, srv);
785 fwrr_dequeue_srv(srv);
786 grp->curr_pos++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100787 if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
788 /* make sure it is not the server we are trying to exclude... */
789 if (srv != srvtoavoid || avoided)
790 break;
791
792 avoided = srv; /* ...but remember that is was selected yet avoided */
793 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100794
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100795 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100796 srv->next_full = full;
797 full = srv;
798 }
799
800 /* OK, we got the best server, let's update it */
801 fwrr_queue_srv(srv);
802
803 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100804 /* Requeue all extracted servers. If full==srv then it was
805 * avoided (unsucessfully) and chained, omit it now.
806 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100807 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100808 if (switched) {
809 /* the tree has switched, requeue all extracted servers
810 * into "init", because their place was lost, and only
811 * their weight matters.
812 */
813 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100814 if (likely(full != srv))
815 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100816 full = full->next_full;
817 } while (full);
818 } else {
819 /* requeue all extracted servers just as if they were consumed
820 * so that they regain their expected place.
821 */
822 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100823 if (likely(full != srv))
824 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100825 full = full->next_full;
826 } while (full);
827 }
828 }
829 return srv;
830}
831
Willy Tarreau01732802007-11-01 22:48:15 +0100832/*
833 * This function tries to find a running server for the proxy <px> following
834 * the URL parameter hash method. It looks for a specific parameter in the
835 * URL and hashes it to compute the server ID. This is useful to optimize
836 * performance by avoiding bounces between servers in contexts where sessions
837 * are shared but cookies are not usable. If the parameter is not found, NULL
838 * is returned. If any server is found, it will be returned. If no valid server
839 * is found, NULL is returned.
840 *
841 */
842struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
843{
844 unsigned long hash = 0;
845 char *p;
846 int plen;
847
Willy Tarreau20697042007-11-15 23:26:18 +0100848 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +0100849 return NULL;
850
Willy Tarreau20697042007-11-15 23:26:18 +0100851 if (px->lbprm.map.state & PR_MAP_RECALC)
852 recalc_server_map(px);
853
Willy Tarreau01732802007-11-01 22:48:15 +0100854 p = memchr(uri, '?', uri_len);
855 if (!p)
856 return NULL;
857 p++;
858
859 uri_len -= (p - uri);
860 plen = px->url_param_len;
861
862 if (uri_len <= plen)
863 return NULL;
864
865 while (uri_len > plen) {
866 /* Look for the parameter name followed by an equal symbol */
867 if (p[plen] == '=') {
868 /* skip the equal symbol */
869 uri = p;
870 p += plen + 1;
871 uri_len -= plen + 1;
872 if (memcmp(uri, px->url_param_name, plen) == 0) {
873 /* OK, we have the parameter here at <uri>, and
874 * the value after the equal sign, at <p>
875 */
876 while (uri_len && *p != '&') {
877 hash = *p + (hash << 6) + (hash << 16) - hash;
878 uri_len--;
879 p++;
880 }
Willy Tarreau20697042007-11-15 23:26:18 +0100881 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +0100882 }
883 }
884
885 /* skip to next parameter */
886 uri = p;
887 p = memchr(uri, '&', uri_len);
888 if (!p)
889 return NULL;
890 p++;
891 uri_len -= (p - uri);
892 }
893 return NULL;
894}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200895
896/*
897 * This function marks the session as 'assigned' in direct or dispatch modes,
898 * or tries to assign one in balance mode, according to the algorithm. It does
899 * nothing if the session had already been assigned a server.
900 *
901 * It may return :
902 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
903 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
904 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
905 * SRV_STATUS_INTERNAL for other unrecoverable errors.
906 *
907 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
908 * not need to be called anymore. This usually means that s->srv can be trusted
909 * in balance and direct modes. This flag is not cleared, so it's to the caller
910 * to clear it if required (eg: redispatch).
911 *
912 */
913
914int assign_server(struct session *s)
915{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100916
917 struct server *srvtoavoid;
918
Willy Tarreaubaaee002006-06-26 02:48:02 +0200919#ifdef DEBUG_FULL
920 fprintf(stderr,"assign_server : s=%p\n",s);
921#endif
922
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100923 srvtoavoid = s->srv;
924 s->srv = NULL;
925
Willy Tarreaubaaee002006-06-26 02:48:02 +0200926 if (s->pend_pos)
927 return SRV_STATUS_INTERNAL;
928
929 if (!(s->flags & SN_ASSIGNED)) {
Willy Tarreau31682232007-11-29 15:38:04 +0100930 if (s->be->lbprm.algo & BE_LB_ALGO) {
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100931 int len;
932
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100933 if (s->flags & SN_DIRECT) {
934 s->flags |= SN_ASSIGNED;
935 return SRV_STATUS_OK;
936 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100937
Willy Tarreaub625a082007-11-26 01:15:43 +0100938 if (!s->be->lbprm.tot_weight)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200939 return SRV_STATUS_NOSRV;
940
Willy Tarreau31682232007-11-29 15:38:04 +0100941 switch (s->be->lbprm.algo & BE_LB_ALGO) {
942 case BE_LB_ALGO_RR:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100943 s->srv = fwrr_get_next_server(s->be, srvtoavoid);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200944 if (!s->srv)
945 return SRV_STATUS_FULL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100946 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100947 case BE_LB_ALGO_SH:
Willy Tarreaubaaee002006-06-26 02:48:02 +0200948 if (s->cli_addr.ss_family == AF_INET)
949 len = 4;
950 else if (s->cli_addr.ss_family == AF_INET6)
951 len = 16;
952 else /* unknown IP family */
953 return SRV_STATUS_INTERNAL;
954
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200955 s->srv = get_server_sh(s->be,
Willy Tarreaubaaee002006-06-26 02:48:02 +0200956 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
957 len);
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100958 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100959 case BE_LB_ALGO_UH:
Willy Tarreau2fcb5002007-05-08 13:35:26 +0200960 /* URI hashing */
961 s->srv = get_server_uh(s->be,
962 s->txn.req.sol + s->txn.req.sl.rq.u,
963 s->txn.req.sl.rq.u_l);
Willy Tarreau01732802007-11-01 22:48:15 +0100964 break;
Willy Tarreau31682232007-11-29 15:38:04 +0100965 case BE_LB_ALGO_PH:
Willy Tarreau01732802007-11-01 22:48:15 +0100966 /* URL Parameter hashing */
967 s->srv = get_server_ph(s->be,
968 s->txn.req.sol + s->txn.req.sl.rq.u,
969 s->txn.req.sl.rq.u_l);
970 if (!s->srv) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100971 /* parameter not found, fall back to round robin on the map */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100972 s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
Willy Tarreau01732802007-11-01 22:48:15 +0100973 if (!s->srv)
974 return SRV_STATUS_FULL;
975 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100976 break;
977 default:
978 /* unknown balancing algorithm */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200979 return SRV_STATUS_INTERNAL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +0100980 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100981 if (s->srv != srvtoavoid) {
982 s->be->cum_lbconn++;
983 s->srv->cum_lbconn++;
984 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200985 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +0100986 else if (s->be->options & PR_O_HTTP_PROXY) {
987 if (!s->srv_addr.sin_addr.s_addr)
988 return SRV_STATUS_NOSRV;
989 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +0200990 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100991 !(s->fe->options & PR_O_TRANSP)) {
Willy Tarreau1a1158b2007-01-20 11:07:46 +0100992 return SRV_STATUS_NOSRV;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +0100993 }
994 s->flags |= SN_ASSIGNED;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200995 }
996 return SRV_STATUS_OK;
997}
998
999
1000/*
1001 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1002 * The address is taken from the currently assigned server, or from the
1003 * dispatch or transparent address.
1004 *
1005 * It may return :
1006 * SRV_STATUS_OK if everything is OK.
1007 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1008 *
1009 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1010 * not cleared, so it's to the caller to clear it if required.
1011 *
1012 */
1013int assign_server_address(struct session *s)
1014{
1015#ifdef DEBUG_FULL
1016 fprintf(stderr,"assign_server_address : s=%p\n",s);
1017#endif
1018
Willy Tarreau31682232007-11-29 15:38:04 +01001019 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001020 /* A server is necessarily known for this session */
1021 if (!(s->flags & SN_ASSIGNED))
1022 return SRV_STATUS_INTERNAL;
1023
1024 s->srv_addr = s->srv->addr;
1025
1026 /* if this server remaps proxied ports, we'll use
1027 * the port the client connected to with an offset. */
1028 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001029 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1030 get_frt_addr(s);
1031 if (s->frt_addr.ss_family == AF_INET) {
1032 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1033 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1034 } else {
1035 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1036 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1037 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001038 }
1039 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001040 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001041 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001042 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001043 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001044 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001045 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001046 if (!(s->flags & SN_FRT_ADDR_SET))
1047 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001048
Willy Tarreaubd414282008-01-19 13:46:35 +01001049 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1050 /* when we support IPv6 on the backend, we may add other tests */
1051 //qfprintf(stderr, "Cannot get original server address.\n");
1052 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001053 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001054 else if (s->be->options & PR_O_HTTP_PROXY) {
1055 /* If HTTP PROXY option is set, then server is already assigned
1056 * during incoming client request parsing. */
1057 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001058 else {
1059 /* no server and no LB algorithm ! */
1060 return SRV_STATUS_INTERNAL;
1061 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001062
1063 s->flags |= SN_ADDR_SET;
1064 return SRV_STATUS_OK;
1065}
1066
1067
1068/* This function assigns a server to session <s> if required, and can add the
1069 * connection to either the assigned server's queue or to the proxy's queue.
1070 *
1071 * Returns :
1072 *
1073 * SRV_STATUS_OK if everything is OK.
1074 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1075 * SRV_STATUS_QUEUED if the connection has been queued.
1076 * SRV_STATUS_FULL if the server(s) is/are saturated and the
1077 * connection could not be queued.
1078 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1079 *
1080 */
1081int assign_server_and_queue(struct session *s)
1082{
1083 struct pendconn *p;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001084 struct server *srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001085 int err;
1086
1087 if (s->pend_pos)
1088 return SRV_STATUS_INTERNAL;
1089
1090 if (s->flags & SN_ASSIGNED) {
Willy Tarreau21d2af32008-02-14 20:25:24 +01001091 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1092 /* server scheduled for redirection, and already assigned. We
1093 * don't want to go further nor check the queue.
1094 */
1095 return SRV_STATUS_OK;
1096 }
1097
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001098 if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001099 /* it's left to the dispatcher to choose a server */
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001100 s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001101 } else {
1102 /* a server does not need to be assigned, perhaps because we're in
1103 * direct mode, or in dispatch or transparent modes where the server
1104 * is not needed.
1105 */
1106 if (s->srv &&
1107 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1108 p = pendconn_add(s);
1109 if (p)
1110 return SRV_STATUS_QUEUED;
1111 else
1112 return SRV_STATUS_FULL;
1113 }
1114 return SRV_STATUS_OK;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001115 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001116 }
1117
1118 /* a server needs to be assigned */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001119 srv = s->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001120 err = assign_server(s);
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001121
1122 if (srv) {
1123 if (srv != s->srv) {
1124 /* This session was previously dispatched to another server:
1125 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1126 * - set SN_REDISP if it was successfully redispatched
1127 * - increment srv->redispatches and be->redispatches
1128 */
1129
1130 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1131 s->txn.flags &= ~TX_CK_MASK;
1132 s->txn.flags |= TX_CK_DOWN;
1133 }
1134
1135 s->flags |= SN_REDISP;
1136
1137 srv->redispatches++;
1138 s->be->redispatches++;
1139 } else {
1140 srv->retries++;
1141 s->be->retries++;
1142 }
1143 }
1144
Willy Tarreaubaaee002006-06-26 02:48:02 +02001145 switch (err) {
1146 case SRV_STATUS_OK:
Willy Tarreau21d2af32008-02-14 20:25:24 +01001147 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1148 /* server supporting redirection and it is possible.
1149 * Let's report that and ignore maxconn !
1150 */
1151 return SRV_STATUS_OK;
1152 }
1153
Willy Tarreaubaaee002006-06-26 02:48:02 +02001154 /* in balance mode, we might have servers with connection limits */
1155 if (s->srv &&
1156 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1157 p = pendconn_add(s);
1158 if (p)
1159 return SRV_STATUS_QUEUED;
1160 else
1161 return SRV_STATUS_FULL;
1162 }
1163 return SRV_STATUS_OK;
1164
1165 case SRV_STATUS_FULL:
1166 /* queue this session into the proxy's queue */
1167 p = pendconn_add(s);
1168 if (p)
1169 return SRV_STATUS_QUEUED;
1170 else
1171 return SRV_STATUS_FULL;
1172
1173 case SRV_STATUS_NOSRV:
1174 case SRV_STATUS_INTERNAL:
1175 return err;
1176 default:
1177 return SRV_STATUS_INTERNAL;
1178 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001179}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001180
1181/*
1182 * This function initiates a connection to the server assigned to this session
1183 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1184 * It can return one of :
1185 * - SN_ERR_NONE if everything's OK
1186 * - SN_ERR_SRVTO if there are no more servers
1187 * - SN_ERR_SRVCL if the connection was refused by the server
1188 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1189 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1190 * - SN_ERR_INTERNAL for any other purely internal errors
1191 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1192 */
1193int connect_server(struct session *s)
1194{
1195 int fd, err;
1196
1197 if (!(s->flags & SN_ADDR_SET)) {
1198 err = assign_server_address(s);
1199 if (err != SRV_STATUS_OK)
1200 return SN_ERR_INTERNAL;
1201 }
1202
1203 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1204 qfprintf(stderr, "Cannot get a server socket.\n");
1205
1206 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001207 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001208 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001209 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001210 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001211 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001212 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001213 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001214 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001215 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001216 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001217 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001218 /* this is a resource error */
1219 return SN_ERR_RESOURCE;
1220 }
1221
1222 if (fd >= global.maxsock) {
1223 /* do not log anything there, it's a normal condition when this option
1224 * is used to serialize connections to a server !
1225 */
1226 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1227 close(fd);
1228 return SN_ERR_PRXCOND; /* it is a configuration limit */
1229 }
1230
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001231#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001232 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001233 /* TCP splicing supported by both FE and BE */
1234 tcp_splice_initfd(s->cli_fd, fd);
1235 }
1236#endif
1237
Willy Tarreaubaaee002006-06-26 02:48:02 +02001238 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1239 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1240 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1241 close(fd);
1242 return SN_ERR_INTERNAL;
1243 }
1244
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001245 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001246 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1247
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001248 if (s->be->options & PR_O_TCP_NOLING)
1249 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1250
Willy Tarreaubaaee002006-06-26 02:48:02 +02001251 /* allow specific binding :
1252 * - server-specific at first
1253 * - proxy-specific next
1254 */
1255 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001256 struct sockaddr_in *remote = NULL;
1257 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001258
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001259#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001260 switch (s->srv->state & SRV_TPROXY_MASK) {
1261 case SRV_TPROXY_ADDR:
1262 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1263 flags = 3;
1264 break;
1265 case SRV_TPROXY_CLI:
1266 flags |= 2;
1267 /* fall through */
1268 case SRV_TPROXY_CIP:
1269 /* FIXME: what can we do if the client connects in IPv6 ? */
1270 flags |= 1;
1271 remote = (struct sockaddr_in *)&s->cli_addr;
1272 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001273 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001274#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001275 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001276 if (ret) {
1277 close(fd);
1278 if (ret == 1) {
1279 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1280 s->be->id, s->srv->id);
1281 send_log(s->be, LOG_EMERG,
1282 "Cannot bind to source address before connect() for server %s/%s.\n",
1283 s->be->id, s->srv->id);
1284 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001285 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001286 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001287 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001288 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001289 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001290 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001291 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001292 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001293 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001294 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001295 struct sockaddr_in *remote = NULL;
1296 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001297
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001298#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001299 switch (s->be->options & PR_O_TPXY_MASK) {
1300 case PR_O_TPXY_ADDR:
1301 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1302 flags = 3;
1303 break;
1304 case PR_O_TPXY_CLI:
1305 flags |= 2;
1306 /* fall through */
1307 case PR_O_TPXY_CIP:
1308 /* FIXME: what can we do if the client connects in IPv6 ? */
1309 flags |= 1;
1310 remote = (struct sockaddr_in *)&s->cli_addr;
1311 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001312 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001313#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001314 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001315 if (ret) {
1316 close(fd);
1317 if (ret == 1) {
1318 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1319 s->be->id);
1320 send_log(s->be, LOG_EMERG,
1321 "Cannot bind to source address before connect() for proxy %s.\n",
1322 s->be->id);
1323 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001324 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001325 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001326 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001327 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1328 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001329 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001330 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001331 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001332 }
1333
1334 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1335 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1336
1337 if (errno == EAGAIN || errno == EADDRINUSE) {
1338 char *msg;
1339 if (errno == EAGAIN) /* no free ports left, try again later */
1340 msg = "no free ports";
1341 else
1342 msg = "local address already in use";
1343
1344 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1345 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001346 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001347 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001348 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001349 return SN_ERR_RESOURCE;
1350 } else if (errno == ETIMEDOUT) {
1351 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1352 close(fd);
1353 return SN_ERR_SRVTO;
1354 } else {
1355 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1356 //qfprintf(stderr,"Connect(): %d", errno);
1357 close(fd);
1358 return SN_ERR_SRVCL;
1359 }
1360 }
1361
1362 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001363 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001364 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001365 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001366 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001367 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001368
1369 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1370 fdtab[fd].peerlen = sizeof(s->srv_addr);
1371
Willy Tarreauf161a342007-04-08 16:59:42 +02001372 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001373
1374 fd_insert(fd);
1375 if (s->srv) {
1376 s->srv->cur_sess++;
1377 if (s->srv->cur_sess > s->srv->cur_sess_max)
1378 s->srv->cur_sess_max = s->srv->cur_sess;
1379 }
1380
Willy Tarreaud7c30f92007-12-03 01:38:36 +01001381 if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect))
Willy Tarreaud7971282006-07-29 18:36:34 +02001382 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001383 return SN_ERR_NONE; /* connection is OK */
1384}
1385
1386
1387/*
1388 * This function checks the retry count during the connect() job.
1389 * It updates the session's srv_state and retries, so that the caller knows
1390 * what it has to do. It uses the last connection error to set the log when
1391 * it expires. It returns 1 when it has expired, and 0 otherwise.
1392 */
1393int srv_count_retry_down(struct session *t, int conn_err)
1394{
1395 /* we are in front of a retryable error */
1396 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001397
Willy Tarreaubaaee002006-06-26 02:48:02 +02001398 if (t->conn_retries < 0) {
1399 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +02001400 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001401 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001402 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001403 if (t->srv)
1404 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001405 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001406
1407 /* We used to have a free connection slot. Since we'll never use it,
1408 * we have to inform the server that it may be used by another session.
1409 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001410 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001411 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001412 return 1;
1413 }
1414 return 0;
1415}
1416
1417
1418/*
1419 * This function performs the retryable part of the connect() job.
1420 * It updates the session's srv_state and retries, so that the caller knows
1421 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1422 * it needs to redispatch.
1423 */
1424int srv_retryable_connect(struct session *t)
1425{
1426 int conn_err;
1427
1428 /* This loop ensures that we stop before the last retry in case of a
1429 * redispatchable server.
1430 */
1431 do {
1432 /* initiate a connection to the server */
1433 conn_err = connect_server(t);
1434 switch (conn_err) {
1435
1436 case SN_ERR_NONE:
1437 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1438 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001439 if (t->srv)
1440 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001441 return 1;
1442
1443 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +02001444 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001445 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001446 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001447 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001448 t->srv->cum_sess++;
1449 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001450 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001451 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001452 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001453 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001454 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001455 return 1;
1456 }
1457 /* ensure that we have enough retries left */
1458 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001459 return 1;
1460 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001461 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001462
1463 /* We're on our last chance, and the REDISP option was specified.
1464 * We will ignore cookie and force to balance or use the dispatcher.
1465 */
1466 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001467 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001468 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001469
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001470 if (t->srv)
1471 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001472
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001473 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001474 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001475 return 0;
1476}
1477
1478
1479/* This function performs the "redispatch" part of a connection attempt. It
1480 * will assign a server if required, queue the connection if required, and
1481 * handle errors that might arise at this level. It can change the server
1482 * state. It will return 1 if it encounters an error, switches the server
1483 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1484 * that the connection is ready to use.
1485 */
1486
1487int srv_redispatch_connect(struct session *t)
1488{
1489 int conn_err;
1490
1491 /* We know that we don't have any connection pending, so we will
1492 * try to get a new one, and wait in this state if it's queued
1493 */
1494 conn_err = assign_server_and_queue(t);
1495 switch (conn_err) {
1496 case SRV_STATUS_OK:
1497 break;
1498
1499 case SRV_STATUS_NOSRV:
1500 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +02001501 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001502 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001503 503, error_message(t, HTTP_ERR_503));
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001504
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001505 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001506
1507 return 1;
1508
1509 case SRV_STATUS_QUEUED:
Willy Tarreau1fa31262007-12-03 00:36:16 +01001510 /* note: we use the connect expiration date for the queue. */
1511 if (!tv_add_ifset(&t->req->cex, &now, &t->be->timeout.queue))
Willy Tarreaud7971282006-07-29 18:36:34 +02001512 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001513 t->srv_state = SV_STIDLE;
1514 /* do nothing else and do not wake any other session up */
1515 return 1;
1516
1517 case SRV_STATUS_FULL:
1518 case SRV_STATUS_INTERNAL:
1519 default:
Willy Tarreaud7971282006-07-29 18:36:34 +02001520 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001521 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001522 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001523 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001524 t->srv->cum_sess++;
1525 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001526 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001527 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001528
1529 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001530 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001531 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001532 return 1;
1533 }
1534 /* if we get here, it's because we got SRV_STATUS_OK, which also
1535 * means that the connection has not been queued.
1536 */
1537 return 0;
1538}
1539
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001540int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001541 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001542 return px->down_time;
1543
1544 return now.tv_sec - px->last_change + px->down_time;
1545}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001546
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001547/* This function parses a "balance" statement in a backend section describing
1548 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
1549 * returns -1, it may write an error message into ther <err> buffer, for at
1550 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
1551 * written. The function must be called with <args> pointing to the first word
1552 * after "balance".
1553 */
1554int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
1555{
1556 if (!*(args[0])) {
1557 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01001558 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1559 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001560 return 0;
1561 }
1562
1563 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001564 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1565 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001566 }
1567 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001568 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1569 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001570 }
1571 else if (!strcmp(args[0], "uri")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001572 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1573 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001574 }
Willy Tarreau01732802007-11-01 22:48:15 +01001575 else if (!strcmp(args[0], "url_param")) {
1576 if (!*args[1]) {
1577 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
1578 return -1;
1579 }
Willy Tarreau31682232007-11-29 15:38:04 +01001580 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1581 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01001582 if (curproxy->url_param_name)
1583 free(curproxy->url_param_name);
1584 curproxy->url_param_name = strdup(args[1]);
1585 curproxy->url_param_len = strlen(args[1]);
1586 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001587 else {
Willy Tarreau01732802007-11-01 22:48:15 +01001588 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001589 return -1;
1590 }
1591 return 0;
1592}
1593
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01001594
1595/************************************************************************/
1596/* All supported keywords must be declared here. */
1597/************************************************************************/
1598
1599/* set test->i to the number of enabled servers on the proxy */
1600static int
1601acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
1602 struct acl_expr *expr, struct acl_test *test)
1603{
1604 test->flags = ACL_TEST_F_VOL_TEST;
1605 if (expr->arg_len) {
1606 /* another proxy was designated, we must look for it */
1607 for (px = proxy; px; px = px->next)
1608 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
1609 break;
1610 }
1611 if (!px)
1612 return 0;
1613
1614 if (px->srv_act)
1615 test->i = px->srv_act;
1616 else if (px->lbprm.fbck)
1617 test->i = 1;
1618 else
1619 test->i = px->srv_bck;
1620
1621 return 1;
1622}
1623
1624
1625/* Note: must not be declared <const> as its list will be overwritten */
1626static struct acl_kw_list acl_kws = {{ },{
1627 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
1628 { NULL, NULL, NULL, NULL },
1629}};
1630
1631
1632__attribute__((constructor))
1633static void __backend_init(void)
1634{
1635 acl_register_keywords(&acl_kws);
1636}
1637
1638
Willy Tarreaubaaee002006-06-26 02:48:02 +02001639/*
1640 * Local variables:
1641 * c-indent-level: 8
1642 * c-basic-offset: 8
1643 * End:
1644 */