blob: 9ced724a467bc9fd2f57938ab49589ae0d5ec14e [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020022#include <common/config.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010023#include <common/eb32tree.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020024#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010026#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020027#include <types/buffers.h>
28#include <types/global.h>
29#include <types/polling.h>
30#include <types/proxy.h>
31#include <types/server.h>
32#include <types/session.h>
33
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010034#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020036#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010038#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020039#include <proto/log.h>
40#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010041#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020042#include <proto/queue.h>
43#include <proto/stream_sock.h>
44#include <proto/task.h>
45
Willy Tarreau6d1a9882007-01-07 02:03:04 +010046#ifdef CONFIG_HAP_TCPSPLICE
47#include <libtcpsplice.h>
48#endif
49
Willy Tarreaub625a082007-11-26 01:15:43 +010050static inline void fwrr_remove_from_tree(struct server *s);
51static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
52static inline void fwrr_dequeue_srv(struct server *s);
53static void fwrr_get_srv(struct server *s);
54static void fwrr_queue_srv(struct server *s);
55
56/* This function returns non-zero if a server with the given weight and state
57 * is usable for LB, otherwise zero.
58 */
59static inline int srv_is_usable(int state, int weight)
60{
61 if (!weight)
62 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010063 if (state & SRV_GOINGDOWN)
64 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010065 if (!(state & SRV_RUNNING))
66 return 0;
67 return 1;
68}
69
Willy Tarreaubaaee002006-06-26 02:48:02 +020070/*
71 * This function recounts the number of usable active and backup servers for
72 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010073 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010074 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010075 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020076 */
Willy Tarreaub625a082007-11-26 01:15:43 +010077static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020078{
79 struct server *srv;
80
Willy Tarreau20697042007-11-15 23:26:18 +010081 px->srv_act = px->srv_bck = 0;
82 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010083 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020084 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010085 if (!srv_is_usable(srv->state, srv->eweight))
86 continue;
87
88 if (srv->state & SRV_BACKUP) {
89 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010090 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010091 px->lbprm.fbck = srv;
92 px->srv_bck++;
93 px->lbprm.tot_wbck += srv->eweight;
94 } else {
95 px->srv_act++;
96 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020097 }
98 }
Willy Tarreaub625a082007-11-26 01:15:43 +010099}
Willy Tarreau20697042007-11-15 23:26:18 +0100100
Willy Tarreaub625a082007-11-26 01:15:43 +0100101/* This function simply updates the backend's tot_weight and tot_used values
102 * after servers weights have been updated. It is designed to be used after
103 * recount_servers() or equivalent.
104 */
105static void update_backend_weight(struct proxy *px)
106{
Willy Tarreau20697042007-11-15 23:26:18 +0100107 if (px->srv_act) {
108 px->lbprm.tot_weight = px->lbprm.tot_wact;
109 px->lbprm.tot_used = px->srv_act;
110 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100111 else if (px->lbprm.fbck) {
112 /* use only the first backup server */
113 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
114 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100115 }
116 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100117 px->lbprm.tot_weight = px->lbprm.tot_wbck;
118 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100119 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100120}
121
122/* this function updates the map according to server <srv>'s new state */
123static void map_set_server_status_down(struct server *srv)
124{
125 struct proxy *p = srv->proxy;
126
127 if (srv->state == srv->prev_state &&
128 srv->eweight == srv->prev_eweight)
129 return;
130
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100131 if (srv_is_usable(srv->state, srv->eweight))
132 goto out_update_state;
133
Willy Tarreaub625a082007-11-26 01:15:43 +0100134 /* FIXME: could be optimized since we know what changed */
135 recount_servers(p);
136 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100137 p->lbprm.map.state |= PR_MAP_RECALC;
138 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100139 srv->prev_state = srv->state;
140 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200141}
142
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100143/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100144static void map_set_server_status_up(struct server *srv)
145{
146 struct proxy *p = srv->proxy;
147
148 if (srv->state == srv->prev_state &&
149 srv->eweight == srv->prev_eweight)
150 return;
151
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100152 if (!srv_is_usable(srv->state, srv->eweight))
153 goto out_update_state;
154
Willy Tarreaub625a082007-11-26 01:15:43 +0100155 /* FIXME: could be optimized since we know what changed */
156 recount_servers(p);
157 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100158 p->lbprm.map.state |= PR_MAP_RECALC;
159 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100160 srv->prev_state = srv->state;
161 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100162}
163
Willy Tarreau20697042007-11-15 23:26:18 +0100164/* This function recomputes the server map for proxy px. It relies on
165 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
166 * called after recount_servers(). It also expects px->lbprm.map.srv
167 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200168 */
169void recalc_server_map(struct proxy *px)
170{
171 int o, tot, flag;
172 struct server *cur, *best;
173
Willy Tarreau20697042007-11-15 23:26:18 +0100174 switch (px->lbprm.tot_used) {
175 case 0: /* no server */
176 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200177 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100178 case 1: /* only one server, just fill first entry */
179 tot = 1;
180 break;
181 default:
182 tot = px->lbprm.tot_weight;
183 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200184 }
185
Willy Tarreau20697042007-11-15 23:26:18 +0100186 /* here we *know* that we have some servers */
187 if (px->srv_act)
188 flag = SRV_RUNNING;
189 else
190 flag = SRV_RUNNING | SRV_BACKUP;
191
Willy Tarreaubaaee002006-06-26 02:48:02 +0200192 /* this algorithm gives priority to the first server, which means that
193 * it will respect the declaration order for equivalent weights, and
194 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100195 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200196 * case, where we want the first server only.
197 */
198 for (cur = px->srv; cur; cur = cur->next)
199 cur->wscore = 0;
200
201 for (o = 0; o < tot; o++) {
202 int max = 0;
203 best = NULL;
204 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100205 if (flag == (cur->state &
206 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200207 int v;
208
209 /* If we are forced to return only one server, we don't want to
210 * go further, because we would return the wrong one due to
211 * divide overflow.
212 */
213 if (tot == 1) {
214 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100215 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200216 break;
217 }
218
Willy Tarreau417fae02007-03-25 21:16:40 +0200219 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200220 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
221 if (best == NULL || v > max) {
222 max = v;
223 best = cur;
224 }
225 }
226 }
Willy Tarreau20697042007-11-15 23:26:18 +0100227 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200228 best->wscore -= tot;
229 }
Willy Tarreau20697042007-11-15 23:26:18 +0100230 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200231}
232
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100233/* This function is responsible of building the server MAP for map-based LB
234 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
235 * weights if applicable. It should be called only once per proxy, at config
236 * time.
237 */
238void init_server_map(struct proxy *p)
239{
240 struct server *srv;
241 int pgcd;
242 int act, bck;
243
Willy Tarreaub625a082007-11-26 01:15:43 +0100244 p->lbprm.set_server_status_up = map_set_server_status_up;
245 p->lbprm.set_server_status_down = map_set_server_status_down;
246 p->lbprm.update_server_eweight = NULL;
247
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100248 if (!p->srv)
249 return;
250
251 /* We will factor the weights to reduce the table,
252 * using Euclide's largest common divisor algorithm
253 */
254 pgcd = p->srv->uweight;
255 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
256 int w = srv->uweight;
257 while (w) {
258 int t = pgcd % w;
259 pgcd = w;
260 w = t;
261 }
262 }
263
264 /* It is sometimes useful to know what factor to apply
265 * to the backend's effective weight to know its real
266 * weight.
267 */
268 p->lbprm.wmult = pgcd;
269
270 act = bck = 0;
271 for (srv = p->srv; srv; srv = srv->next) {
272 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100273 srv->prev_eweight = srv->eweight;
274 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100275 if (srv->state & SRV_BACKUP)
276 bck += srv->eweight;
277 else
278 act += srv->eweight;
279 }
280
281 /* this is the largest map we will ever need for this servers list */
282 if (act < bck)
283 act = bck;
284
285 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
286 /* recounts servers and their weights */
287 p->lbprm.map.state = PR_MAP_RECALC;
288 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100289 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100290 recalc_server_map(p);
291}
292
Willy Tarreaub625a082007-11-26 01:15:43 +0100293/* This function updates the server trees according to server <srv>'s new
294 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100295 * It is not important whether the server was already down or not. It is not
296 * important either that the new state is completely down (the caller may not
297 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100298 */
299static void fwrr_set_server_status_down(struct server *srv)
300{
301 struct proxy *p = srv->proxy;
302 struct fwrr_group *grp;
303
304 if (srv->state == srv->prev_state &&
305 srv->eweight == srv->prev_eweight)
306 return;
307
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100308 if (srv_is_usable(srv->state, srv->eweight))
309 goto out_update_state;
310
Willy Tarreaub625a082007-11-26 01:15:43 +0100311 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
312 /* server was already down */
313 goto out_update_backend;
314
315 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
316 grp->next_weight -= srv->prev_eweight;
317
318 if (srv->state & SRV_BACKUP) {
319 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
320 p->srv_bck--;
321
322 if (srv == p->lbprm.fbck) {
323 /* we lost the first backup server in a single-backup
324 * configuration, we must search another one.
325 */
326 struct server *srv2 = p->lbprm.fbck;
327 do {
328 srv2 = srv2->next;
329 } while (srv2 &&
330 !((srv2->state & SRV_BACKUP) &&
331 srv_is_usable(srv2->state, srv2->eweight)));
332 p->lbprm.fbck = srv2;
333 }
334 } else {
335 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
336 p->srv_act--;
337 }
338
339 fwrr_dequeue_srv(srv);
340 fwrr_remove_from_tree(srv);
341
342out_update_backend:
343 /* check/update tot_used, tot_weight */
344 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100345 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100346 srv->prev_state = srv->state;
347 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100348}
349
350/* This function updates the server trees according to server <srv>'s new
351 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100352 * It is not important whether the server was already down or not. It is not
353 * important either that the new state is completely UP (the caller may not
354 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100355 * the weight of a server which was already up.
356 */
357static void fwrr_set_server_status_up(struct server *srv)
358{
359 struct proxy *p = srv->proxy;
360 struct fwrr_group *grp;
361
362 if (srv->state == srv->prev_state &&
363 srv->eweight == srv->prev_eweight)
364 return;
365
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100366 if (!srv_is_usable(srv->state, srv->eweight))
367 goto out_update_state;
368
Willy Tarreaub625a082007-11-26 01:15:43 +0100369 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
370 /* server was already up */
371 goto out_update_backend;
372
373 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
374 grp->next_weight += srv->eweight;
375
376 if (srv->state & SRV_BACKUP) {
377 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
378 p->srv_bck++;
379
Willy Tarreauf4cca452008-03-08 21:42:54 +0100380 if (!(p->options & PR_O_USE_ALL_BK)) {
381 if (!p->lbprm.fbck) {
382 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100383 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100384 } else {
385 /* we may have restored a backup server prior to fbck,
386 * in which case it should replace it.
387 */
388 struct server *srv2 = srv;
389 do {
390 srv2 = srv2->next;
391 } while (srv2 && (srv2 != p->lbprm.fbck));
392 if (srv2)
393 p->lbprm.fbck = srv;
394 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100395 }
396 } else {
397 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
398 p->srv_act++;
399 }
400
401 /* note that eweight cannot be 0 here */
402 fwrr_get_srv(srv);
403 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
404 fwrr_queue_srv(srv);
405
406out_update_backend:
407 /* check/update tot_used, tot_weight */
408 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100409 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100410 srv->prev_state = srv->state;
411 srv->prev_eweight = srv->eweight;
412}
413
414/* This function must be called after an update to server <srv>'s effective
415 * weight. It may be called after a state change too.
416 */
417static void fwrr_update_server_weight(struct server *srv)
418{
419 int old_state, new_state;
420 struct proxy *p = srv->proxy;
421 struct fwrr_group *grp;
422
423 if (srv->state == srv->prev_state &&
424 srv->eweight == srv->prev_eweight)
425 return;
426
427 /* If changing the server's weight changes its state, we simply apply
428 * the procedures we already have for status change. If the state
429 * remains down, the server is not in any tree, so it's as easy as
430 * updating its values. If the state remains up with different weights,
431 * there are some computations to perform to find a new place and
432 * possibly a new tree for this server.
433 */
434
435 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
436 new_state = srv_is_usable(srv->state, srv->eweight);
437
438 if (!old_state && !new_state) {
439 srv->prev_state = srv->state;
440 srv->prev_eweight = srv->eweight;
441 return;
442 }
443 else if (!old_state && new_state) {
444 fwrr_set_server_status_up(srv);
445 return;
446 }
447 else if (old_state && !new_state) {
448 fwrr_set_server_status_down(srv);
449 return;
450 }
451
452 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
453 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
454
455 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
456 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
457
458 if (srv->lb_tree == grp->init) {
459 fwrr_dequeue_srv(srv);
460 fwrr_queue_by_weight(grp->init, srv);
461 }
462 else if (!srv->lb_tree) {
463 /* FIXME: server was down. This is not possible right now but
464 * may be needed soon for slowstart or graceful shutdown.
465 */
466 fwrr_dequeue_srv(srv);
467 fwrr_get_srv(srv);
468 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
469 fwrr_queue_srv(srv);
470 } else {
471 /* The server is either active or in the next queue. If it's
472 * still in the active queue and it has not consumed all of its
473 * places, let's adjust its next position.
474 */
475 fwrr_get_srv(srv);
476
477 if (srv->eweight > 0) {
478 int prev_next = srv->npos;
479 int step = grp->next_weight / srv->eweight;
480
481 srv->npos = srv->lpos + step;
482 srv->rweight = 0;
483
484 if (srv->npos > prev_next)
485 srv->npos = prev_next;
486 if (srv->npos < grp->curr_pos + 2)
487 srv->npos = grp->curr_pos + step;
488 } else {
489 /* push it into the next tree */
490 srv->npos = grp->curr_pos + grp->curr_weight;
491 }
492
493 fwrr_dequeue_srv(srv);
494 fwrr_queue_srv(srv);
495 }
496
497 update_backend_weight(p);
498 srv->prev_state = srv->state;
499 srv->prev_eweight = srv->eweight;
500}
501
502/* Remove a server from a tree. It must have previously been dequeued. This
503 * function is meant to be called when a server is going down or has its
504 * weight disabled.
505 */
506static inline void fwrr_remove_from_tree(struct server *s)
507{
508 s->lb_tree = NULL;
509}
510
511/* Queue a server in the weight tree <root>, assuming the weight is >0.
512 * We want to sort them by inverted weights, because we need to place
513 * heavy servers first in order to get a smooth distribution.
514 */
515static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
516{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100517 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100518 eb32_insert(root, &s->lb_node);
519 s->lb_tree = root;
520}
521
522/* This function is responsible for building the weight trees in case of fast
523 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
524 * ratio. Both active and backup groups are initialized.
525 */
526void fwrr_init_server_groups(struct proxy *p)
527{
528 struct server *srv;
529 struct eb_root init_head = EB_ROOT;
530
531 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
532 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
533 p->lbprm.update_server_eweight = fwrr_update_server_weight;
534
535 p->lbprm.wdiv = BE_WEIGHT_SCALE;
536 for (srv = p->srv; srv; srv = srv->next) {
537 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
538 srv->prev_state = srv->state;
539 }
540
541 recount_servers(p);
542 update_backend_weight(p);
543
544 /* prepare the active servers group */
545 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
546 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
547 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
548 p->lbprm.fwrr.act.t1 = init_head;
549 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
550 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
551
552 /* prepare the backup servers group */
553 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
554 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
555 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
556 p->lbprm.fwrr.bck.t1 = init_head;
557 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
558 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
559
560 /* queue active and backup servers in two distinct groups */
561 for (srv = p->srv; srv; srv = srv->next) {
562 if (!srv_is_usable(srv->state, srv->eweight))
563 continue;
564 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
565 p->lbprm.fwrr.bck.init :
566 p->lbprm.fwrr.act.init,
567 srv);
568 }
569}
570
571/* simply removes a server from a weight tree */
572static inline void fwrr_dequeue_srv(struct server *s)
573{
574 eb32_delete(&s->lb_node);
575}
576
577/* queues a server into the appropriate group and tree depending on its
578 * backup status, and ->npos. If the server is disabled, simply assign
579 * it to the NULL tree.
580 */
581static void fwrr_queue_srv(struct server *s)
582{
583 struct proxy *p = s->proxy;
584 struct fwrr_group *grp;
585
586 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
587
588 /* Delay everything which does not fit into the window and everything
589 * which does not fit into the theorical new window.
590 */
591 if (!srv_is_usable(s->state, s->eweight)) {
592 fwrr_remove_from_tree(s);
593 }
594 else if (s->eweight <= 0 ||
595 s->npos >= 2 * grp->curr_weight ||
596 s->npos >= grp->curr_weight + grp->next_weight) {
597 /* put into next tree, and readjust npos in case we could
598 * finally take this back to current. */
599 s->npos -= grp->curr_weight;
600 fwrr_queue_by_weight(grp->next, s);
601 }
602 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100603 /* The sorting key is stored in units of s->npos * user_weight
604 * in order to avoid overflows. As stated in backend.h, the
605 * lower the scale, the rougher the weights modulation, and the
606 * higher the scale, the lower the number of servers without
607 * overflow. With this formula, the result is always positive,
608 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100609 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100610 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
611 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
612
613 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100614 s->lb_tree = &grp->curr;
615 }
616}
617
618/* prepares a server when extracting it from the "init" tree */
619static inline void fwrr_get_srv_init(struct server *s)
620{
621 s->npos = s->rweight = 0;
622}
623
624/* prepares a server when extracting it from the "next" tree */
625static inline void fwrr_get_srv_next(struct server *s)
626{
627 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
628 &s->proxy->lbprm.fwrr.bck :
629 &s->proxy->lbprm.fwrr.act;
630
631 s->npos += grp->curr_weight;
632}
633
634/* prepares a server when it was marked down */
635static inline void fwrr_get_srv_down(struct server *s)
636{
637 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
638 &s->proxy->lbprm.fwrr.bck :
639 &s->proxy->lbprm.fwrr.act;
640
641 s->npos = grp->curr_pos;
642}
643
644/* prepares a server when extracting it from its tree */
645static void fwrr_get_srv(struct server *s)
646{
647 struct proxy *p = s->proxy;
648 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
649 &p->lbprm.fwrr.bck :
650 &p->lbprm.fwrr.act;
651
652 if (s->lb_tree == grp->init) {
653 fwrr_get_srv_init(s);
654 }
655 else if (s->lb_tree == grp->next) {
656 fwrr_get_srv_next(s);
657 }
658 else if (s->lb_tree == NULL) {
659 fwrr_get_srv_down(s);
660 }
661}
662
663/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
664 * when this happens, and "next" filled with servers sorted by weights.
665 */
666static inline void fwrr_switch_trees(struct fwrr_group *grp)
667{
668 struct eb_root *swap;
669 swap = grp->init;
670 grp->init = grp->next;
671 grp->next = swap;
672 grp->curr_weight = grp->next_weight;
673 grp->curr_pos = grp->curr_weight;
674}
675
676/* return next server from the current tree in FWRR group <grp>, or a server
677 * from the "init" tree if appropriate. If both trees are empty, return NULL.
678 */
679static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
680{
681 struct eb32_node *node;
682 struct server *s;
683
684 node = eb32_first(&grp->curr);
685 s = eb32_entry(node, struct server, lb_node);
686
687 if (!node || s->npos > grp->curr_pos) {
688 /* either we have no server left, or we have a hole */
689 struct eb32_node *node2;
690 node2 = eb32_first(grp->init);
691 if (node2) {
692 node = node2;
693 s = eb32_entry(node, struct server, lb_node);
694 fwrr_get_srv_init(s);
695 if (s->eweight == 0) /* FIXME: is it possible at all ? */
696 node = NULL;
697 }
698 }
699 if (node)
700 return s;
701 else
702 return NULL;
703}
704
705/* Computes next position of server <s> in the group. It is mandatory for <s>
706 * to have a non-zero, positive eweight.
707*/
708static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
709{
710 if (!s->npos) {
711 /* first time ever for this server */
712 s->lpos = grp->curr_pos;
713 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
714 s->rweight += grp->next_weight % s->eweight;
715
716 if (s->rweight >= s->eweight) {
717 s->rweight -= s->eweight;
718 s->npos++;
719 }
720 } else {
721 s->lpos = s->npos;
722 s->npos += grp->next_weight / s->eweight;
723 s->rweight += grp->next_weight % s->eweight;
724
725 if (s->rweight >= s->eweight) {
726 s->rweight -= s->eweight;
727 s->npos++;
728 }
729 }
730}
731
732/* Return next server from the current tree in backend <p>, or a server from
733 * the init tree if appropriate. If both trees are empty, return NULL.
734 * Saturated servers are skipped and requeued.
735 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100736static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100737{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100738 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100739 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100740 int switched;
741
742 if (p->srv_act)
743 grp = &p->lbprm.fwrr.act;
744 else if (p->lbprm.fbck)
745 return p->lbprm.fbck;
746 else if (p->srv_bck)
747 grp = &p->lbprm.fwrr.bck;
748 else
749 return NULL;
750
751 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100752 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100753 full = NULL; /* NULL-terminated list of saturated servers */
754 while (1) {
755 /* if we see an empty group, let's first try to collect weights
756 * which might have recently changed.
757 */
758 if (!grp->curr_weight)
759 grp->curr_pos = grp->curr_weight = grp->next_weight;
760
761 /* get first server from the "current" tree. When the end of
762 * the tree is reached, we may have to switch, but only once.
763 */
764 while (1) {
765 srv = fwrr_get_server_from_group(grp);
766 if (srv)
767 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100768 if (switched) {
769 if (avoided) {
770 srv = avoided;
771 break;
772 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100773 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100774 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100775 switched = 1;
776 fwrr_switch_trees(grp);
777
778 }
779
780 /* OK, we have a server. However, it may be saturated, in which
781 * case we don't want to reconsider it for now. We'll update
782 * its position and dequeue it anyway, so that we can move it
783 * to a better place afterwards.
784 */
785 fwrr_update_position(grp, srv);
786 fwrr_dequeue_srv(srv);
787 grp->curr_pos++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100788 if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
789 /* make sure it is not the server we are trying to exclude... */
790 if (srv != srvtoavoid || avoided)
791 break;
792
793 avoided = srv; /* ...but remember that is was selected yet avoided */
794 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100795
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100796 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100797 srv->next_full = full;
798 full = srv;
799 }
800
801 /* OK, we got the best server, let's update it */
802 fwrr_queue_srv(srv);
803
804 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100805 /* Requeue all extracted servers. If full==srv then it was
806 * avoided (unsucessfully) and chained, omit it now.
807 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100808 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100809 if (switched) {
810 /* the tree has switched, requeue all extracted servers
811 * into "init", because their place was lost, and only
812 * their weight matters.
813 */
814 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100815 if (likely(full != srv))
816 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100817 full = full->next_full;
818 } while (full);
819 } else {
820 /* requeue all extracted servers just as if they were consumed
821 * so that they regain their expected place.
822 */
823 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100824 if (likely(full != srv))
825 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100826 full = full->next_full;
827 } while (full);
828 }
829 }
830 return srv;
831}
832
Willy Tarreau51406232008-03-10 22:04:20 +0100833/* Remove a server from a tree. It must have previously been dequeued. This
834 * function is meant to be called when a server is going down or has its
835 * weight disabled.
836 */
837static inline void fwlc_remove_from_tree(struct server *s)
838{
839 s->lb_tree = NULL;
840}
841
842/* simply removes a server from a tree */
843static inline void fwlc_dequeue_srv(struct server *s)
844{
845 eb32_delete(&s->lb_node);
846}
847
848/* Queue a server in its associated tree, assuming the weight is >0.
849 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
850 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
851 */
852static inline void fwlc_queue_srv(struct server *s)
853{
854 s->lb_node.key = s->cur_sess * SRV_EWGHT_MAX / s->eweight;
855 eb32_insert(s->lb_tree, &s->lb_node);
856}
857
858/* Re-position the server in the FWLC tree after it has been assigned one
859 * connection or after it has released one. Note that it is possible that
860 * the server has been moved out of the tree due to failed health-checks.
861 */
862static void fwlc_srv_reposition(struct server *s)
863{
864 if (!s->lb_tree)
865 return;
866 fwlc_dequeue_srv(s);
867 fwlc_queue_srv(s);
868}
869
870/* This function updates the server trees according to server <srv>'s new
871 * state. It should be called when server <srv>'s status changes to down.
872 * It is not important whether the server was already down or not. It is not
873 * important either that the new state is completely down (the caller may not
874 * know all the variables of a server's state).
875 */
876static void fwlc_set_server_status_down(struct server *srv)
877{
878 struct proxy *p = srv->proxy;
879
880 if (srv->state == srv->prev_state &&
881 srv->eweight == srv->prev_eweight)
882 return;
883
884 if (srv_is_usable(srv->state, srv->eweight))
885 goto out_update_state;
886
887 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
888 /* server was already down */
889 goto out_update_backend;
890
891 if (srv->state & SRV_BACKUP) {
892 p->lbprm.tot_wbck -= srv->prev_eweight;
893 p->srv_bck--;
894
895 if (srv == p->lbprm.fbck) {
896 /* we lost the first backup server in a single-backup
897 * configuration, we must search another one.
898 */
899 struct server *srv2 = p->lbprm.fbck;
900 do {
901 srv2 = srv2->next;
902 } while (srv2 &&
903 !((srv2->state & SRV_BACKUP) &&
904 srv_is_usable(srv2->state, srv2->eweight)));
905 p->lbprm.fbck = srv2;
906 }
907 } else {
908 p->lbprm.tot_wact -= srv->prev_eweight;
909 p->srv_act--;
910 }
911
912 fwlc_dequeue_srv(srv);
913 fwlc_remove_from_tree(srv);
914
915out_update_backend:
916 /* check/update tot_used, tot_weight */
917 update_backend_weight(p);
918 out_update_state:
919 srv->prev_state = srv->state;
920 srv->prev_eweight = srv->eweight;
921}
922
923/* This function updates the server trees according to server <srv>'s new
924 * state. It should be called when server <srv>'s status changes to up.
925 * It is not important whether the server was already down or not. It is not
926 * important either that the new state is completely UP (the caller may not
927 * know all the variables of a server's state). This function will not change
928 * the weight of a server which was already up.
929 */
930static void fwlc_set_server_status_up(struct server *srv)
931{
932 struct proxy *p = srv->proxy;
933
934 if (srv->state == srv->prev_state &&
935 srv->eweight == srv->prev_eweight)
936 return;
937
938 if (!srv_is_usable(srv->state, srv->eweight))
939 goto out_update_state;
940
941 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
942 /* server was already up */
943 goto out_update_backend;
944
945 if (srv->state & SRV_BACKUP) {
946 srv->lb_tree = &p->lbprm.fwlc.bck;
947 p->lbprm.tot_wbck += srv->eweight;
948 p->srv_bck++;
949
950 if (!(p->options & PR_O_USE_ALL_BK)) {
951 if (!p->lbprm.fbck) {
952 /* there was no backup server anymore */
953 p->lbprm.fbck = srv;
954 } else {
955 /* we may have restored a backup server prior to fbck,
956 * in which case it should replace it.
957 */
958 struct server *srv2 = srv;
959 do {
960 srv2 = srv2->next;
961 } while (srv2 && (srv2 != p->lbprm.fbck));
962 if (srv2)
963 p->lbprm.fbck = srv;
964 }
965 }
966 } else {
967 srv->lb_tree = &p->lbprm.fwlc.act;
968 p->lbprm.tot_wact += srv->eweight;
969 p->srv_act++;
970 }
971
972 /* note that eweight cannot be 0 here */
973 fwlc_queue_srv(srv);
974
975 out_update_backend:
976 /* check/update tot_used, tot_weight */
977 update_backend_weight(p);
978 out_update_state:
979 srv->prev_state = srv->state;
980 srv->prev_eweight = srv->eweight;
981}
982
983/* This function must be called after an update to server <srv>'s effective
984 * weight. It may be called after a state change too.
985 */
986static void fwlc_update_server_weight(struct server *srv)
987{
988 int old_state, new_state;
989 struct proxy *p = srv->proxy;
990
991 if (srv->state == srv->prev_state &&
992 srv->eweight == srv->prev_eweight)
993 return;
994
995 /* If changing the server's weight changes its state, we simply apply
996 * the procedures we already have for status change. If the state
997 * remains down, the server is not in any tree, so it's as easy as
998 * updating its values. If the state remains up with different weights,
999 * there are some computations to perform to find a new place and
1000 * possibly a new tree for this server.
1001 */
1002
1003 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1004 new_state = srv_is_usable(srv->state, srv->eweight);
1005
1006 if (!old_state && !new_state) {
1007 srv->prev_state = srv->state;
1008 srv->prev_eweight = srv->eweight;
1009 return;
1010 }
1011 else if (!old_state && new_state) {
1012 fwlc_set_server_status_up(srv);
1013 return;
1014 }
1015 else if (old_state && !new_state) {
1016 fwlc_set_server_status_down(srv);
1017 return;
1018 }
1019
1020 if (srv->lb_tree)
1021 fwlc_dequeue_srv(srv);
1022
1023 if (srv->state & SRV_BACKUP) {
1024 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1025 srv->lb_tree = &p->lbprm.fwlc.bck;
1026 } else {
1027 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1028 srv->lb_tree = &p->lbprm.fwlc.act;
1029 }
1030
1031 fwlc_queue_srv(srv);
1032
1033 update_backend_weight(p);
1034 srv->prev_state = srv->state;
1035 srv->prev_eweight = srv->eweight;
1036}
1037
1038/* This function is responsible for building the trees in case of fast
1039 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1040 * uweight ratio. Both active and backup groups are initialized.
1041 */
1042void fwlc_init_server_tree(struct proxy *p)
1043{
1044 struct server *srv;
1045 struct eb_root init_head = EB_ROOT;
1046
1047 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1048 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1049 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1050 p->lbprm.server_take_conn = fwlc_srv_reposition;
1051 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1052
1053 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1054 for (srv = p->srv; srv; srv = srv->next) {
1055 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1056 srv->prev_state = srv->state;
1057 }
1058
1059 recount_servers(p);
1060 update_backend_weight(p);
1061
1062 p->lbprm.fwlc.act = init_head;
1063 p->lbprm.fwlc.bck = init_head;
1064
1065 /* queue active and backup servers in two distinct groups */
1066 for (srv = p->srv; srv; srv = srv->next) {
1067 if (!srv_is_usable(srv->state, srv->eweight))
1068 continue;
1069 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1070 fwlc_queue_srv(srv);
1071 }
1072}
1073
1074/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1075 * return NULL. Saturated servers are skipped.
1076 */
1077static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1078{
1079 struct server *srv, *avoided;
1080 struct eb32_node *node;
1081
1082 srv = avoided = NULL;
1083
1084 if (p->srv_act)
1085 node = eb32_first(&p->lbprm.fwlc.act);
1086 else if (p->lbprm.fbck)
1087 return p->lbprm.fbck;
1088 else if (p->srv_bck)
1089 node = eb32_first(&p->lbprm.fwlc.bck);
1090 else
1091 return NULL;
1092
1093 while (node) {
1094 /* OK, we have a server. However, it may be saturated, in which
1095 * case we don't want to reconsider it for now, so we'll simply
1096 * skip it. Same if it's the server we try to avoid, in which
1097 * case we simply remember it for later use if needed.
1098 */
1099 struct server *s;
1100
1101 s = eb32_entry(node, struct server, lb_node);
1102 if (!s->maxconn || s->cur_sess < srv_dynamic_maxconn(s)) {
1103 if (s != srvtoavoid) {
1104 srv = s;
1105 break;
1106 }
1107 avoided = s;
1108 }
1109 node = eb32_next(node);
1110 }
1111
1112 if (!srv)
1113 srv = avoided;
1114
1115 return srv;
1116}
1117
Willy Tarreau01732802007-11-01 22:48:15 +01001118/*
1119 * This function tries to find a running server for the proxy <px> following
1120 * the URL parameter hash method. It looks for a specific parameter in the
1121 * URL and hashes it to compute the server ID. This is useful to optimize
1122 * performance by avoiding bounces between servers in contexts where sessions
1123 * are shared but cookies are not usable. If the parameter is not found, NULL
1124 * is returned. If any server is found, it will be returned. If no valid server
1125 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001126 */
1127struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1128{
1129 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001130 const char *p;
1131 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001132 int plen;
1133
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001134 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001135 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001136 return NULL;
1137
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001138 if ((p = memchr(uri, '?', uri_len)) == NULL)
1139 return NULL;
1140
Willy Tarreau20697042007-11-15 23:26:18 +01001141 if (px->lbprm.map.state & PR_MAP_RECALC)
1142 recalc_server_map(px);
1143
Willy Tarreau01732802007-11-01 22:48:15 +01001144 p++;
1145
1146 uri_len -= (p - uri);
1147 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001148 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001149
1150 while (uri_len > plen) {
1151 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001152 if (params[plen] == '=') {
1153 if (memcmp(params, px->url_param_name, plen) == 0) {
1154 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001155 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001156 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001157 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001158 p += plen + 1;
1159 uri_len -= plen + 1;
1160
Willy Tarreau01732802007-11-01 22:48:15 +01001161 while (uri_len && *p != '&') {
1162 hash = *p + (hash << 6) + (hash << 16) - hash;
1163 uri_len--;
1164 p++;
1165 }
Willy Tarreau20697042007-11-15 23:26:18 +01001166 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001167 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001168 }
1169 /* skip to next parameter */
1170 p = memchr(params, '&', uri_len);
1171 if (!p)
1172 return NULL;
1173 p++;
1174 uri_len -= (p - params);
1175 params = p;
1176 }
1177 return NULL;
1178}
1179
1180/*
1181 * this does the same as the previous server_ph, but check the body contents
1182 */
1183struct server *get_server_ph_post(struct session *s)
1184{
1185 unsigned long hash = 0;
1186 struct http_txn *txn = &s->txn;
1187 struct buffer *req = s->req;
1188 struct http_msg *msg = &txn->req;
1189 struct proxy *px = s->be;
1190 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001191 unsigned long body;
1192 unsigned long len;
1193 const char *params;
1194 struct hdr_ctx ctx;
1195 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001196
1197 /* tot_weight appears to mean srv_count */
1198 if (px->lbprm.tot_weight == 0)
1199 return NULL;
1200
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001201 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
1202 len = req->total - body;
1203 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001204
1205 if ( len == 0 )
1206 return NULL;
1207
1208 if (px->lbprm.map.state & PR_MAP_RECALC)
1209 recalc_server_map(px);
1210
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001211 ctx.idx = 0;
1212
1213 /* if the message is chunked, we skip the chunk size, but use the value as len */
1214 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
1215 if ( ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",ctx.vlen)==0) {
1216 unsigned int chunk = 0;
1217 while ( params < req->rlim && !HTTP_IS_CRLF(*params)) {
1218 char c = *params;
1219 if (ishex(c)) {
1220 unsigned int hex = toupper(c) - '0';
1221 if ( hex > 9 )
1222 hex -= 'A' - '9' - 1;
1223 chunk = (chunk << 4) | hex;
1224 }
1225 else
1226 return NULL;
1227 params++;
1228 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001229 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001230 /* spec says we get CRLF */
1231 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1232 params += 2;
1233 else
1234 return NULL;
1235 /* ok we have some encoded length, just inspect the first chunk */
1236 len = chunk;
1237 }
Willy Tarreau01732802007-11-01 22:48:15 +01001238
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001239 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001240
1241 while (len > plen) {
1242 /* Look for the parameter name followed by an equal symbol */
1243 if (params[plen] == '=') {
1244 if (memcmp(params, px->url_param_name, plen) == 0) {
1245 /* OK, we have the parameter here at <params>, and
1246 * the value after the equal sign, at <p>
1247 * skip the equal symbol
1248 */
1249 p += plen + 1;
1250 len -= plen + 1;
1251
1252 while (len && *p != '&') {
1253 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1254 /* if in a POST, body must be URI encoded or its not a URI.
1255 * Do not interprete any possible binary data as a parameter.
1256 */
1257 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1258 break;
1259 return NULL; /* oh, no; this is not uri-encoded.
1260 * This body does not contain parameters.
1261 */
1262 }
1263 hash = *p + (hash << 6) + (hash << 16) - hash;
1264 len--;
1265 p++;
1266 /* should we break if vlen exceeds limit? */
1267 }
1268 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1269 }
1270 }
Willy Tarreau01732802007-11-01 22:48:15 +01001271 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001272 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001273 if (!p)
1274 return NULL;
1275 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001276 len -= (p - params);
1277 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001278 }
1279 return NULL;
1280}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001281
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001282
Willy Tarreaubaaee002006-06-26 02:48:02 +02001283/*
1284 * This function marks the session as 'assigned' in direct or dispatch modes,
1285 * or tries to assign one in balance mode, according to the algorithm. It does
1286 * nothing if the session had already been assigned a server.
1287 *
1288 * It may return :
1289 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
1290 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1291 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
1292 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1293 *
1294 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
1295 * not need to be called anymore. This usually means that s->srv can be trusted
1296 * in balance and direct modes. This flag is not cleared, so it's to the caller
1297 * to clear it if required (eg: redispatch).
1298 *
1299 */
1300
1301int assign_server(struct session *s)
1302{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001303
1304 struct server *srvtoavoid;
1305
Willy Tarreaubaaee002006-06-26 02:48:02 +02001306#ifdef DEBUG_FULL
1307 fprintf(stderr,"assign_server : s=%p\n",s);
1308#endif
1309
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001310 srvtoavoid = s->srv;
1311 s->srv = NULL;
1312
Willy Tarreaubaaee002006-06-26 02:48:02 +02001313 if (s->pend_pos)
1314 return SRV_STATUS_INTERNAL;
1315
1316 if (!(s->flags & SN_ASSIGNED)) {
Willy Tarreau31682232007-11-29 15:38:04 +01001317 if (s->be->lbprm.algo & BE_LB_ALGO) {
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001318 int len;
1319
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001320 if (s->flags & SN_DIRECT) {
1321 s->flags |= SN_ASSIGNED;
1322 return SRV_STATUS_OK;
1323 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001324
Willy Tarreaub625a082007-11-26 01:15:43 +01001325 if (!s->be->lbprm.tot_weight)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001326 return SRV_STATUS_NOSRV;
1327
Willy Tarreau31682232007-11-29 15:38:04 +01001328 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1329 case BE_LB_ALGO_RR:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001330 s->srv = fwrr_get_next_server(s->be, srvtoavoid);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001331 if (!s->srv)
1332 return SRV_STATUS_FULL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001333 break;
Willy Tarreau51406232008-03-10 22:04:20 +01001334 case BE_LB_ALGO_LC:
1335 s->srv = fwlc_get_next_server(s->be, srvtoavoid);
1336 if (!s->srv)
1337 return SRV_STATUS_FULL;
1338 break;
Willy Tarreau31682232007-11-29 15:38:04 +01001339 case BE_LB_ALGO_SH:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001340 if (s->cli_addr.ss_family == AF_INET)
1341 len = 4;
1342 else if (s->cli_addr.ss_family == AF_INET6)
1343 len = 16;
1344 else /* unknown IP family */
1345 return SRV_STATUS_INTERNAL;
1346
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001347 s->srv = get_server_sh(s->be,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001348 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1349 len);
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001350 break;
Willy Tarreau31682232007-11-29 15:38:04 +01001351 case BE_LB_ALGO_UH:
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001352 /* URI hashing */
1353 s->srv = get_server_uh(s->be,
1354 s->txn.req.sol + s->txn.req.sl.rq.u,
1355 s->txn.req.sl.rq.u_l);
Willy Tarreau01732802007-11-01 22:48:15 +01001356 break;
Willy Tarreau31682232007-11-29 15:38:04 +01001357 case BE_LB_ALGO_PH:
Willy Tarreau01732802007-11-01 22:48:15 +01001358 /* URL Parameter hashing */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001359 if (s->txn.meth == HTTP_METH_POST &&
1360 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1361 s->txn.req.sl.rq.u_l ) == NULL)
1362 s->srv = get_server_ph_post(s);
1363 else
1364 s->srv = get_server_ph(s->be,
1365 s->txn.req.sol + s->txn.req.sl.rq.u,
1366 s->txn.req.sl.rq.u_l);
1367
Willy Tarreau01732802007-11-01 22:48:15 +01001368 if (!s->srv) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001369 /* parameter not found, fall back to round robin on the map */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001370 s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
Willy Tarreau01732802007-11-01 22:48:15 +01001371 if (!s->srv)
1372 return SRV_STATUS_FULL;
1373 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001374 break;
1375 default:
1376 /* unknown balancing algorithm */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001377 return SRV_STATUS_INTERNAL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001378 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001379 if (s->srv != srvtoavoid) {
1380 s->be->cum_lbconn++;
1381 s->srv->cum_lbconn++;
1382 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001383 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001384 else if (s->be->options & PR_O_HTTP_PROXY) {
1385 if (!s->srv_addr.sin_addr.s_addr)
1386 return SRV_STATUS_NOSRV;
1387 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001388 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001389 !(s->fe->options & PR_O_TRANSP)) {
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001390 return SRV_STATUS_NOSRV;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001391 }
1392 s->flags |= SN_ASSIGNED;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001393 }
1394 return SRV_STATUS_OK;
1395}
1396
1397
1398/*
1399 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1400 * The address is taken from the currently assigned server, or from the
1401 * dispatch or transparent address.
1402 *
1403 * It may return :
1404 * SRV_STATUS_OK if everything is OK.
1405 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1406 *
1407 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1408 * not cleared, so it's to the caller to clear it if required.
1409 *
1410 */
1411int assign_server_address(struct session *s)
1412{
1413#ifdef DEBUG_FULL
1414 fprintf(stderr,"assign_server_address : s=%p\n",s);
1415#endif
1416
Willy Tarreau31682232007-11-29 15:38:04 +01001417 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001418 /* A server is necessarily known for this session */
1419 if (!(s->flags & SN_ASSIGNED))
1420 return SRV_STATUS_INTERNAL;
1421
1422 s->srv_addr = s->srv->addr;
1423
1424 /* if this server remaps proxied ports, we'll use
1425 * the port the client connected to with an offset. */
1426 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001427 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1428 get_frt_addr(s);
1429 if (s->frt_addr.ss_family == AF_INET) {
1430 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1431 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1432 } else {
1433 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1434 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1435 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001436 }
1437 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001438 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001439 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001440 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001441 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001442 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001443 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001444 if (!(s->flags & SN_FRT_ADDR_SET))
1445 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001446
Willy Tarreaubd414282008-01-19 13:46:35 +01001447 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1448 /* when we support IPv6 on the backend, we may add other tests */
1449 //qfprintf(stderr, "Cannot get original server address.\n");
1450 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001451 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001452 else if (s->be->options & PR_O_HTTP_PROXY) {
1453 /* If HTTP PROXY option is set, then server is already assigned
1454 * during incoming client request parsing. */
1455 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001456 else {
1457 /* no server and no LB algorithm ! */
1458 return SRV_STATUS_INTERNAL;
1459 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001460
1461 s->flags |= SN_ADDR_SET;
1462 return SRV_STATUS_OK;
1463}
1464
1465
1466/* This function assigns a server to session <s> if required, and can add the
1467 * connection to either the assigned server's queue or to the proxy's queue.
1468 *
1469 * Returns :
1470 *
1471 * SRV_STATUS_OK if everything is OK.
1472 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1473 * SRV_STATUS_QUEUED if the connection has been queued.
1474 * SRV_STATUS_FULL if the server(s) is/are saturated and the
1475 * connection could not be queued.
1476 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1477 *
1478 */
1479int assign_server_and_queue(struct session *s)
1480{
1481 struct pendconn *p;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001482 struct server *srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001483 int err;
1484
1485 if (s->pend_pos)
1486 return SRV_STATUS_INTERNAL;
1487
1488 if (s->flags & SN_ASSIGNED) {
Willy Tarreau21d2af32008-02-14 20:25:24 +01001489 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1490 /* server scheduled for redirection, and already assigned. We
1491 * don't want to go further nor check the queue.
1492 */
1493 return SRV_STATUS_OK;
1494 }
1495
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001496 if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001497 /* it's left to the dispatcher to choose a server */
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001498 s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001499 } else {
1500 /* a server does not need to be assigned, perhaps because we're in
1501 * direct mode, or in dispatch or transparent modes where the server
1502 * is not needed.
1503 */
1504 if (s->srv &&
1505 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1506 p = pendconn_add(s);
1507 if (p)
1508 return SRV_STATUS_QUEUED;
1509 else
1510 return SRV_STATUS_FULL;
1511 }
1512 return SRV_STATUS_OK;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001513 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001514 }
1515
1516 /* a server needs to be assigned */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001517 srv = s->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001518 err = assign_server(s);
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001519
1520 if (srv) {
1521 if (srv != s->srv) {
1522 /* This session was previously dispatched to another server:
1523 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1524 * - set SN_REDISP if it was successfully redispatched
1525 * - increment srv->redispatches and be->redispatches
1526 */
1527
1528 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1529 s->txn.flags &= ~TX_CK_MASK;
1530 s->txn.flags |= TX_CK_DOWN;
1531 }
1532
1533 s->flags |= SN_REDISP;
1534
1535 srv->redispatches++;
1536 s->be->redispatches++;
1537 } else {
1538 srv->retries++;
1539 s->be->retries++;
1540 }
1541 }
1542
Willy Tarreaubaaee002006-06-26 02:48:02 +02001543 switch (err) {
1544 case SRV_STATUS_OK:
Willy Tarreau21d2af32008-02-14 20:25:24 +01001545 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1546 /* server supporting redirection and it is possible.
1547 * Let's report that and ignore maxconn !
1548 */
1549 return SRV_STATUS_OK;
1550 }
1551
Willy Tarreaubaaee002006-06-26 02:48:02 +02001552 /* in balance mode, we might have servers with connection limits */
1553 if (s->srv &&
1554 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1555 p = pendconn_add(s);
1556 if (p)
1557 return SRV_STATUS_QUEUED;
1558 else
1559 return SRV_STATUS_FULL;
1560 }
1561 return SRV_STATUS_OK;
1562
1563 case SRV_STATUS_FULL:
1564 /* queue this session into the proxy's queue */
1565 p = pendconn_add(s);
1566 if (p)
1567 return SRV_STATUS_QUEUED;
1568 else
1569 return SRV_STATUS_FULL;
1570
1571 case SRV_STATUS_NOSRV:
1572 case SRV_STATUS_INTERNAL:
1573 return err;
1574 default:
1575 return SRV_STATUS_INTERNAL;
1576 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001577}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001578
1579/*
1580 * This function initiates a connection to the server assigned to this session
1581 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1582 * It can return one of :
1583 * - SN_ERR_NONE if everything's OK
1584 * - SN_ERR_SRVTO if there are no more servers
1585 * - SN_ERR_SRVCL if the connection was refused by the server
1586 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1587 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1588 * - SN_ERR_INTERNAL for any other purely internal errors
1589 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1590 */
1591int connect_server(struct session *s)
1592{
1593 int fd, err;
1594
1595 if (!(s->flags & SN_ADDR_SET)) {
1596 err = assign_server_address(s);
1597 if (err != SRV_STATUS_OK)
1598 return SN_ERR_INTERNAL;
1599 }
1600
1601 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1602 qfprintf(stderr, "Cannot get a server socket.\n");
1603
1604 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001605 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001606 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001607 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001608 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001609 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001610 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001611 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001612 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001613 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001614 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001615 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001616 /* this is a resource error */
1617 return SN_ERR_RESOURCE;
1618 }
1619
1620 if (fd >= global.maxsock) {
1621 /* do not log anything there, it's a normal condition when this option
1622 * is used to serialize connections to a server !
1623 */
1624 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1625 close(fd);
1626 return SN_ERR_PRXCOND; /* it is a configuration limit */
1627 }
1628
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001629#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001630 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001631 /* TCP splicing supported by both FE and BE */
1632 tcp_splice_initfd(s->cli_fd, fd);
1633 }
1634#endif
1635
Willy Tarreaubaaee002006-06-26 02:48:02 +02001636 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1637 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1638 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1639 close(fd);
1640 return SN_ERR_INTERNAL;
1641 }
1642
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001643 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001644 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1645
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001646 if (s->be->options & PR_O_TCP_NOLING)
1647 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1648
Willy Tarreaubaaee002006-06-26 02:48:02 +02001649 /* allow specific binding :
1650 * - server-specific at first
1651 * - proxy-specific next
1652 */
1653 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001654 struct sockaddr_in *remote = NULL;
1655 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001656
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001657#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001658 switch (s->srv->state & SRV_TPROXY_MASK) {
1659 case SRV_TPROXY_ADDR:
1660 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1661 flags = 3;
1662 break;
1663 case SRV_TPROXY_CLI:
1664 flags |= 2;
1665 /* fall through */
1666 case SRV_TPROXY_CIP:
1667 /* FIXME: what can we do if the client connects in IPv6 ? */
1668 flags |= 1;
1669 remote = (struct sockaddr_in *)&s->cli_addr;
1670 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001671 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001672#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001673 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001674 if (ret) {
1675 close(fd);
1676 if (ret == 1) {
1677 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1678 s->be->id, s->srv->id);
1679 send_log(s->be, LOG_EMERG,
1680 "Cannot bind to source address before connect() for server %s/%s.\n",
1681 s->be->id, s->srv->id);
1682 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001683 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001684 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001685 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001686 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001687 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001688 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001689 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001690 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001691 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001692 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001693 struct sockaddr_in *remote = NULL;
1694 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001695
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001696#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001697 switch (s->be->options & PR_O_TPXY_MASK) {
1698 case PR_O_TPXY_ADDR:
1699 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1700 flags = 3;
1701 break;
1702 case PR_O_TPXY_CLI:
1703 flags |= 2;
1704 /* fall through */
1705 case PR_O_TPXY_CIP:
1706 /* FIXME: what can we do if the client connects in IPv6 ? */
1707 flags |= 1;
1708 remote = (struct sockaddr_in *)&s->cli_addr;
1709 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001710 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001711#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001712 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001713 if (ret) {
1714 close(fd);
1715 if (ret == 1) {
1716 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1717 s->be->id);
1718 send_log(s->be, LOG_EMERG,
1719 "Cannot bind to source address before connect() for proxy %s.\n",
1720 s->be->id);
1721 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001722 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001723 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001724 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001725 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1726 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001727 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001728 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001729 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001730 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001731
Willy Tarreaubaaee002006-06-26 02:48:02 +02001732 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1733 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1734
1735 if (errno == EAGAIN || errno == EADDRINUSE) {
1736 char *msg;
1737 if (errno == EAGAIN) /* no free ports left, try again later */
1738 msg = "no free ports";
1739 else
1740 msg = "local address already in use";
1741
1742 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1743 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001744 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001745 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001746 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001747 return SN_ERR_RESOURCE;
1748 } else if (errno == ETIMEDOUT) {
1749 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1750 close(fd);
1751 return SN_ERR_SRVTO;
1752 } else {
1753 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1754 //qfprintf(stderr,"Connect(): %d", errno);
1755 close(fd);
1756 return SN_ERR_SRVCL;
1757 }
1758 }
1759
1760 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001761 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001762 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001763 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001764 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001765 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001766
1767 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1768 fdtab[fd].peerlen = sizeof(s->srv_addr);
1769
Willy Tarreauf161a342007-04-08 16:59:42 +02001770 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001771
1772 fd_insert(fd);
1773 if (s->srv) {
1774 s->srv->cur_sess++;
1775 if (s->srv->cur_sess > s->srv->cur_sess_max)
1776 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001777 if (s->be->lbprm.server_take_conn)
1778 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001779 }
1780
Willy Tarreaud7c30f92007-12-03 01:38:36 +01001781 if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect))
Willy Tarreaud7971282006-07-29 18:36:34 +02001782 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001783 return SN_ERR_NONE; /* connection is OK */
1784}
1785
1786
1787/*
1788 * This function checks the retry count during the connect() job.
1789 * It updates the session's srv_state and retries, so that the caller knows
1790 * what it has to do. It uses the last connection error to set the log when
1791 * it expires. It returns 1 when it has expired, and 0 otherwise.
1792 */
1793int srv_count_retry_down(struct session *t, int conn_err)
1794{
1795 /* we are in front of a retryable error */
1796 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001797
Willy Tarreaubaaee002006-06-26 02:48:02 +02001798 if (t->conn_retries < 0) {
1799 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +02001800 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001801 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001802 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001803 if (t->srv)
1804 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001805 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001806
1807 /* We used to have a free connection slot. Since we'll never use it,
1808 * we have to inform the server that it may be used by another session.
1809 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001810 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001811 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001812 return 1;
1813 }
1814 return 0;
1815}
1816
1817
1818/*
1819 * This function performs the retryable part of the connect() job.
1820 * It updates the session's srv_state and retries, so that the caller knows
1821 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1822 * it needs to redispatch.
1823 */
1824int srv_retryable_connect(struct session *t)
1825{
1826 int conn_err;
1827
1828 /* This loop ensures that we stop before the last retry in case of a
1829 * redispatchable server.
1830 */
1831 do {
1832 /* initiate a connection to the server */
1833 conn_err = connect_server(t);
1834 switch (conn_err) {
1835
1836 case SN_ERR_NONE:
1837 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1838 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001839 if (t->srv)
1840 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001841 return 1;
1842
1843 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +02001844 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001845 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001846 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001847 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001848 t->srv->cum_sess++;
1849 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001850 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001851 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001852 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001853 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001854 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001855 return 1;
1856 }
1857 /* ensure that we have enough retries left */
1858 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001859 return 1;
1860 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001861 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001862
1863 /* We're on our last chance, and the REDISP option was specified.
1864 * We will ignore cookie and force to balance or use the dispatcher.
1865 */
1866 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001867 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001868 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001869
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001870 if (t->srv)
1871 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001872
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001873 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001874 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001875 return 0;
1876}
1877
1878
1879/* This function performs the "redispatch" part of a connection attempt. It
1880 * will assign a server if required, queue the connection if required, and
1881 * handle errors that might arise at this level. It can change the server
1882 * state. It will return 1 if it encounters an error, switches the server
1883 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1884 * that the connection is ready to use.
1885 */
1886
1887int srv_redispatch_connect(struct session *t)
1888{
1889 int conn_err;
1890
1891 /* We know that we don't have any connection pending, so we will
1892 * try to get a new one, and wait in this state if it's queued
1893 */
1894 conn_err = assign_server_and_queue(t);
1895 switch (conn_err) {
1896 case SRV_STATUS_OK:
1897 break;
1898
1899 case SRV_STATUS_NOSRV:
1900 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +02001901 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001902 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001903 503, error_message(t, HTTP_ERR_503));
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001904
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001905 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001906
1907 return 1;
1908
1909 case SRV_STATUS_QUEUED:
Willy Tarreau1fa31262007-12-03 00:36:16 +01001910 /* note: we use the connect expiration date for the queue. */
1911 if (!tv_add_ifset(&t->req->cex, &now, &t->be->timeout.queue))
Willy Tarreaud7971282006-07-29 18:36:34 +02001912 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001913 t->srv_state = SV_STIDLE;
1914 /* do nothing else and do not wake any other session up */
1915 return 1;
1916
1917 case SRV_STATUS_FULL:
1918 case SRV_STATUS_INTERNAL:
1919 default:
Willy Tarreaud7971282006-07-29 18:36:34 +02001920 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001921 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001922 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001923 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001924 t->srv->cum_sess++;
1925 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001926 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001927 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001928
1929 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001930 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001931 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001932 return 1;
1933 }
1934 /* if we get here, it's because we got SRV_STATUS_OK, which also
1935 * means that the connection has not been queued.
1936 */
1937 return 0;
1938}
1939
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001940int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001941 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001942 return px->down_time;
1943
1944 return now.tv_sec - px->last_change + px->down_time;
1945}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001946
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001947/* This function parses a "balance" statement in a backend section describing
1948 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
1949 * returns -1, it may write an error message into ther <err> buffer, for at
1950 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
1951 * written. The function must be called with <args> pointing to the first word
1952 * after "balance".
1953 */
1954int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
1955{
1956 if (!*(args[0])) {
1957 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01001958 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1959 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001960 return 0;
1961 }
1962
1963 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001964 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1965 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001966 }
Willy Tarreau51406232008-03-10 22:04:20 +01001967 else if (!strcmp(args[0], "leastconn")) {
1968 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1969 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
1970 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001971 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001972 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1973 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001974 }
1975 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02001976 int arg = 1;
1977
Willy Tarreau31682232007-11-29 15:38:04 +01001978 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1979 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02001980
1981 while (*args[arg]) {
1982 if (!strcmp(args[arg], "len")) {
1983 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
1984 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
1985 return -1;
1986 }
1987 curproxy->uri_len_limit = atoi(args[arg+1]);
1988 arg += 2;
1989 }
1990 else if (!strcmp(args[arg], "depth")) {
1991 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
1992 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
1993 return -1;
1994 }
1995 /* hint: we store the position of the ending '/' (depth+1) so
1996 * that we avoid a comparison while computing the hash.
1997 */
1998 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
1999 arg += 2;
2000 }
2001 else {
2002 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2003 return -1;
2004 }
2005 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002006 }
Willy Tarreau01732802007-11-01 22:48:15 +01002007 else if (!strcmp(args[0], "url_param")) {
2008 if (!*args[1]) {
2009 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2010 return -1;
2011 }
Willy Tarreau31682232007-11-29 15:38:04 +01002012 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2013 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01002014 if (curproxy->url_param_name)
2015 free(curproxy->url_param_name);
2016 curproxy->url_param_name = strdup(args[1]);
2017 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002018 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002019 if (strcmp(args[2], "check_post")) {
2020 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2021 return -1;
2022 }
2023 if (*args[3]) {
2024 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2025 curproxy->url_param_post_limit = str2ui(args[3]);
2026 }
2027 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2028 if (!curproxy->url_param_post_limit)
2029 curproxy->url_param_post_limit = 48;
2030 else if ( curproxy->url_param_post_limit < 3 )
2031 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2032 }
Willy Tarreau01732802007-11-01 22:48:15 +01002033 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002034 else {
Willy Tarreau51406232008-03-10 22:04:20 +01002035 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002036 return -1;
2037 }
2038 return 0;
2039}
2040
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002041
2042/************************************************************************/
2043/* All supported keywords must be declared here. */
2044/************************************************************************/
2045
2046/* set test->i to the number of enabled servers on the proxy */
2047static int
2048acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2049 struct acl_expr *expr, struct acl_test *test)
2050{
2051 test->flags = ACL_TEST_F_VOL_TEST;
2052 if (expr->arg_len) {
2053 /* another proxy was designated, we must look for it */
2054 for (px = proxy; px; px = px->next)
2055 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2056 break;
2057 }
2058 if (!px)
2059 return 0;
2060
2061 if (px->srv_act)
2062 test->i = px->srv_act;
2063 else if (px->lbprm.fbck)
2064 test->i = 1;
2065 else
2066 test->i = px->srv_bck;
2067
2068 return 1;
2069}
2070
2071
2072/* Note: must not be declared <const> as its list will be overwritten */
2073static struct acl_kw_list acl_kws = {{ },{
2074 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
2075 { NULL, NULL, NULL, NULL },
2076}};
2077
2078
2079__attribute__((constructor))
2080static void __backend_init(void)
2081{
2082 acl_register_keywords(&acl_kws);
2083}
2084
2085
Willy Tarreaubaaee002006-06-26 02:48:02 +02002086/*
2087 * Local variables:
2088 * c-indent-level: 8
2089 * c-basic-offset: 8
2090 * End:
2091 */