blob: f51ac8fc38ee3bdef8fc4cf99fb852418de63e4b [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020022#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020023#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010024#include <common/eb32tree.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020025#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020026#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020027
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010030#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020031#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020032#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010034#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/log.h>
36#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010037#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/queue.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020039#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020040#include <proto/stream_sock.h>
41#include <proto/task.h>
42
Willy Tarreau6d1a9882007-01-07 02:03:04 +010043#ifdef CONFIG_HAP_TCPSPLICE
44#include <libtcpsplice.h>
45#endif
46
Willy Tarreaub625a082007-11-26 01:15:43 +010047static inline void fwrr_remove_from_tree(struct server *s);
48static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
49static inline void fwrr_dequeue_srv(struct server *s);
50static void fwrr_get_srv(struct server *s);
51static void fwrr_queue_srv(struct server *s);
52
53/* This function returns non-zero if a server with the given weight and state
54 * is usable for LB, otherwise zero.
55 */
56static inline int srv_is_usable(int state, int weight)
57{
58 if (!weight)
59 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010060 if (state & SRV_GOINGDOWN)
61 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010062 if (!(state & SRV_RUNNING))
63 return 0;
64 return 1;
65}
66
Willy Tarreaubaaee002006-06-26 02:48:02 +020067/*
68 * This function recounts the number of usable active and backup servers for
69 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010070 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010071 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010072 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020073 */
Willy Tarreaub625a082007-11-26 01:15:43 +010074static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020075{
76 struct server *srv;
77
Willy Tarreau20697042007-11-15 23:26:18 +010078 px->srv_act = px->srv_bck = 0;
79 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010080 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020081 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010082 if (!srv_is_usable(srv->state, srv->eweight))
83 continue;
84
85 if (srv->state & SRV_BACKUP) {
86 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010087 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010088 px->lbprm.fbck = srv;
89 px->srv_bck++;
90 px->lbprm.tot_wbck += srv->eweight;
91 } else {
92 px->srv_act++;
93 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020094 }
95 }
Willy Tarreaub625a082007-11-26 01:15:43 +010096}
Willy Tarreau20697042007-11-15 23:26:18 +010097
Willy Tarreaub625a082007-11-26 01:15:43 +010098/* This function simply updates the backend's tot_weight and tot_used values
99 * after servers weights have been updated. It is designed to be used after
100 * recount_servers() or equivalent.
101 */
102static void update_backend_weight(struct proxy *px)
103{
Willy Tarreau20697042007-11-15 23:26:18 +0100104 if (px->srv_act) {
105 px->lbprm.tot_weight = px->lbprm.tot_wact;
106 px->lbprm.tot_used = px->srv_act;
107 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100108 else if (px->lbprm.fbck) {
109 /* use only the first backup server */
110 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
111 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100112 }
113 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100114 px->lbprm.tot_weight = px->lbprm.tot_wbck;
115 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100116 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100117}
118
119/* this function updates the map according to server <srv>'s new state */
120static void map_set_server_status_down(struct server *srv)
121{
122 struct proxy *p = srv->proxy;
123
124 if (srv->state == srv->prev_state &&
125 srv->eweight == srv->prev_eweight)
126 return;
127
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100128 if (srv_is_usable(srv->state, srv->eweight))
129 goto out_update_state;
130
Willy Tarreaub625a082007-11-26 01:15:43 +0100131 /* FIXME: could be optimized since we know what changed */
132 recount_servers(p);
133 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100134 p->lbprm.map.state |= PR_MAP_RECALC;
135 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100136 srv->prev_state = srv->state;
137 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200138}
139
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100140/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100141static void map_set_server_status_up(struct server *srv)
142{
143 struct proxy *p = srv->proxy;
144
145 if (srv->state == srv->prev_state &&
146 srv->eweight == srv->prev_eweight)
147 return;
148
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100149 if (!srv_is_usable(srv->state, srv->eweight))
150 goto out_update_state;
151
Willy Tarreaub625a082007-11-26 01:15:43 +0100152 /* FIXME: could be optimized since we know what changed */
153 recount_servers(p);
154 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100155 p->lbprm.map.state |= PR_MAP_RECALC;
156 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100157 srv->prev_state = srv->state;
158 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100159}
160
Willy Tarreau20697042007-11-15 23:26:18 +0100161/* This function recomputes the server map for proxy px. It relies on
162 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
163 * called after recount_servers(). It also expects px->lbprm.map.srv
164 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200165 */
166void recalc_server_map(struct proxy *px)
167{
168 int o, tot, flag;
169 struct server *cur, *best;
170
Willy Tarreau20697042007-11-15 23:26:18 +0100171 switch (px->lbprm.tot_used) {
172 case 0: /* no server */
173 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200174 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100175 case 1: /* only one server, just fill first entry */
176 tot = 1;
177 break;
178 default:
179 tot = px->lbprm.tot_weight;
180 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200181 }
182
Willy Tarreau20697042007-11-15 23:26:18 +0100183 /* here we *know* that we have some servers */
184 if (px->srv_act)
185 flag = SRV_RUNNING;
186 else
187 flag = SRV_RUNNING | SRV_BACKUP;
188
Willy Tarreaubaaee002006-06-26 02:48:02 +0200189 /* this algorithm gives priority to the first server, which means that
190 * it will respect the declaration order for equivalent weights, and
191 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100192 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200193 * case, where we want the first server only.
194 */
195 for (cur = px->srv; cur; cur = cur->next)
196 cur->wscore = 0;
197
198 for (o = 0; o < tot; o++) {
199 int max = 0;
200 best = NULL;
201 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100202 if (flag == (cur->state &
203 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200204 int v;
205
206 /* If we are forced to return only one server, we don't want to
207 * go further, because we would return the wrong one due to
208 * divide overflow.
209 */
210 if (tot == 1) {
211 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100212 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200213 break;
214 }
215
Willy Tarreau417fae02007-03-25 21:16:40 +0200216 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200217 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
218 if (best == NULL || v > max) {
219 max = v;
220 best = cur;
221 }
222 }
223 }
Willy Tarreau20697042007-11-15 23:26:18 +0100224 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200225 best->wscore -= tot;
226 }
Willy Tarreau20697042007-11-15 23:26:18 +0100227 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200228}
229
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100230/* This function is responsible of building the server MAP for map-based LB
231 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
232 * weights if applicable. It should be called only once per proxy, at config
233 * time.
234 */
235void init_server_map(struct proxy *p)
236{
237 struct server *srv;
238 int pgcd;
239 int act, bck;
240
Willy Tarreaub625a082007-11-26 01:15:43 +0100241 p->lbprm.set_server_status_up = map_set_server_status_up;
242 p->lbprm.set_server_status_down = map_set_server_status_down;
243 p->lbprm.update_server_eweight = NULL;
244
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100245 if (!p->srv)
246 return;
247
248 /* We will factor the weights to reduce the table,
249 * using Euclide's largest common divisor algorithm
250 */
251 pgcd = p->srv->uweight;
252 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
253 int w = srv->uweight;
254 while (w) {
255 int t = pgcd % w;
256 pgcd = w;
257 w = t;
258 }
259 }
260
261 /* It is sometimes useful to know what factor to apply
262 * to the backend's effective weight to know its real
263 * weight.
264 */
265 p->lbprm.wmult = pgcd;
266
267 act = bck = 0;
268 for (srv = p->srv; srv; srv = srv->next) {
269 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100270 srv->prev_eweight = srv->eweight;
271 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100272 if (srv->state & SRV_BACKUP)
273 bck += srv->eweight;
274 else
275 act += srv->eweight;
276 }
277
278 /* this is the largest map we will ever need for this servers list */
279 if (act < bck)
280 act = bck;
281
282 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
283 /* recounts servers and their weights */
284 p->lbprm.map.state = PR_MAP_RECALC;
285 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100286 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100287 recalc_server_map(p);
288}
289
Willy Tarreaub625a082007-11-26 01:15:43 +0100290/* This function updates the server trees according to server <srv>'s new
291 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100292 * It is not important whether the server was already down or not. It is not
293 * important either that the new state is completely down (the caller may not
294 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100295 */
296static void fwrr_set_server_status_down(struct server *srv)
297{
298 struct proxy *p = srv->proxy;
299 struct fwrr_group *grp;
300
301 if (srv->state == srv->prev_state &&
302 srv->eweight == srv->prev_eweight)
303 return;
304
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100305 if (srv_is_usable(srv->state, srv->eweight))
306 goto out_update_state;
307
Willy Tarreaub625a082007-11-26 01:15:43 +0100308 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
309 /* server was already down */
310 goto out_update_backend;
311
312 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
313 grp->next_weight -= srv->prev_eweight;
314
315 if (srv->state & SRV_BACKUP) {
316 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
317 p->srv_bck--;
318
319 if (srv == p->lbprm.fbck) {
320 /* we lost the first backup server in a single-backup
321 * configuration, we must search another one.
322 */
323 struct server *srv2 = p->lbprm.fbck;
324 do {
325 srv2 = srv2->next;
326 } while (srv2 &&
327 !((srv2->state & SRV_BACKUP) &&
328 srv_is_usable(srv2->state, srv2->eweight)));
329 p->lbprm.fbck = srv2;
330 }
331 } else {
332 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
333 p->srv_act--;
334 }
335
336 fwrr_dequeue_srv(srv);
337 fwrr_remove_from_tree(srv);
338
339out_update_backend:
340 /* check/update tot_used, tot_weight */
341 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100342 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100343 srv->prev_state = srv->state;
344 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100345}
346
347/* This function updates the server trees according to server <srv>'s new
348 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100349 * It is not important whether the server was already down or not. It is not
350 * important either that the new state is completely UP (the caller may not
351 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100352 * the weight of a server which was already up.
353 */
354static void fwrr_set_server_status_up(struct server *srv)
355{
356 struct proxy *p = srv->proxy;
357 struct fwrr_group *grp;
358
359 if (srv->state == srv->prev_state &&
360 srv->eweight == srv->prev_eweight)
361 return;
362
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100363 if (!srv_is_usable(srv->state, srv->eweight))
364 goto out_update_state;
365
Willy Tarreaub625a082007-11-26 01:15:43 +0100366 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
367 /* server was already up */
368 goto out_update_backend;
369
370 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
371 grp->next_weight += srv->eweight;
372
373 if (srv->state & SRV_BACKUP) {
374 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
375 p->srv_bck++;
376
Willy Tarreauf4cca452008-03-08 21:42:54 +0100377 if (!(p->options & PR_O_USE_ALL_BK)) {
378 if (!p->lbprm.fbck) {
379 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100380 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100381 } else {
382 /* we may have restored a backup server prior to fbck,
383 * in which case it should replace it.
384 */
385 struct server *srv2 = srv;
386 do {
387 srv2 = srv2->next;
388 } while (srv2 && (srv2 != p->lbprm.fbck));
389 if (srv2)
390 p->lbprm.fbck = srv;
391 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100392 }
393 } else {
394 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
395 p->srv_act++;
396 }
397
398 /* note that eweight cannot be 0 here */
399 fwrr_get_srv(srv);
400 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
401 fwrr_queue_srv(srv);
402
403out_update_backend:
404 /* check/update tot_used, tot_weight */
405 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100406 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100407 srv->prev_state = srv->state;
408 srv->prev_eweight = srv->eweight;
409}
410
411/* This function must be called after an update to server <srv>'s effective
412 * weight. It may be called after a state change too.
413 */
414static void fwrr_update_server_weight(struct server *srv)
415{
416 int old_state, new_state;
417 struct proxy *p = srv->proxy;
418 struct fwrr_group *grp;
419
420 if (srv->state == srv->prev_state &&
421 srv->eweight == srv->prev_eweight)
422 return;
423
424 /* If changing the server's weight changes its state, we simply apply
425 * the procedures we already have for status change. If the state
426 * remains down, the server is not in any tree, so it's as easy as
427 * updating its values. If the state remains up with different weights,
428 * there are some computations to perform to find a new place and
429 * possibly a new tree for this server.
430 */
431
432 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
433 new_state = srv_is_usable(srv->state, srv->eweight);
434
435 if (!old_state && !new_state) {
436 srv->prev_state = srv->state;
437 srv->prev_eweight = srv->eweight;
438 return;
439 }
440 else if (!old_state && new_state) {
441 fwrr_set_server_status_up(srv);
442 return;
443 }
444 else if (old_state && !new_state) {
445 fwrr_set_server_status_down(srv);
446 return;
447 }
448
449 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
450 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
451
452 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
453 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
454
455 if (srv->lb_tree == grp->init) {
456 fwrr_dequeue_srv(srv);
457 fwrr_queue_by_weight(grp->init, srv);
458 }
459 else if (!srv->lb_tree) {
460 /* FIXME: server was down. This is not possible right now but
461 * may be needed soon for slowstart or graceful shutdown.
462 */
463 fwrr_dequeue_srv(srv);
464 fwrr_get_srv(srv);
465 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
466 fwrr_queue_srv(srv);
467 } else {
468 /* The server is either active or in the next queue. If it's
469 * still in the active queue and it has not consumed all of its
470 * places, let's adjust its next position.
471 */
472 fwrr_get_srv(srv);
473
474 if (srv->eweight > 0) {
475 int prev_next = srv->npos;
476 int step = grp->next_weight / srv->eweight;
477
478 srv->npos = srv->lpos + step;
479 srv->rweight = 0;
480
481 if (srv->npos > prev_next)
482 srv->npos = prev_next;
483 if (srv->npos < grp->curr_pos + 2)
484 srv->npos = grp->curr_pos + step;
485 } else {
486 /* push it into the next tree */
487 srv->npos = grp->curr_pos + grp->curr_weight;
488 }
489
490 fwrr_dequeue_srv(srv);
491 fwrr_queue_srv(srv);
492 }
493
494 update_backend_weight(p);
495 srv->prev_state = srv->state;
496 srv->prev_eweight = srv->eweight;
497}
498
499/* Remove a server from a tree. It must have previously been dequeued. This
500 * function is meant to be called when a server is going down or has its
501 * weight disabled.
502 */
503static inline void fwrr_remove_from_tree(struct server *s)
504{
505 s->lb_tree = NULL;
506}
507
508/* Queue a server in the weight tree <root>, assuming the weight is >0.
509 * We want to sort them by inverted weights, because we need to place
510 * heavy servers first in order to get a smooth distribution.
511 */
512static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
513{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100514 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100515 eb32_insert(root, &s->lb_node);
516 s->lb_tree = root;
517}
518
519/* This function is responsible for building the weight trees in case of fast
520 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
521 * ratio. Both active and backup groups are initialized.
522 */
523void fwrr_init_server_groups(struct proxy *p)
524{
525 struct server *srv;
526 struct eb_root init_head = EB_ROOT;
527
528 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
529 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
530 p->lbprm.update_server_eweight = fwrr_update_server_weight;
531
532 p->lbprm.wdiv = BE_WEIGHT_SCALE;
533 for (srv = p->srv; srv; srv = srv->next) {
534 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
535 srv->prev_state = srv->state;
536 }
537
538 recount_servers(p);
539 update_backend_weight(p);
540
541 /* prepare the active servers group */
542 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
543 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
544 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
545 p->lbprm.fwrr.act.t1 = init_head;
546 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
547 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
548
549 /* prepare the backup servers group */
550 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
551 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
552 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
553 p->lbprm.fwrr.bck.t1 = init_head;
554 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
555 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
556
557 /* queue active and backup servers in two distinct groups */
558 for (srv = p->srv; srv; srv = srv->next) {
559 if (!srv_is_usable(srv->state, srv->eweight))
560 continue;
561 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
562 p->lbprm.fwrr.bck.init :
563 p->lbprm.fwrr.act.init,
564 srv);
565 }
566}
567
568/* simply removes a server from a weight tree */
569static inline void fwrr_dequeue_srv(struct server *s)
570{
571 eb32_delete(&s->lb_node);
572}
573
574/* queues a server into the appropriate group and tree depending on its
575 * backup status, and ->npos. If the server is disabled, simply assign
576 * it to the NULL tree.
577 */
578static void fwrr_queue_srv(struct server *s)
579{
580 struct proxy *p = s->proxy;
581 struct fwrr_group *grp;
582
583 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
584
585 /* Delay everything which does not fit into the window and everything
586 * which does not fit into the theorical new window.
587 */
588 if (!srv_is_usable(s->state, s->eweight)) {
589 fwrr_remove_from_tree(s);
590 }
591 else if (s->eweight <= 0 ||
592 s->npos >= 2 * grp->curr_weight ||
593 s->npos >= grp->curr_weight + grp->next_weight) {
594 /* put into next tree, and readjust npos in case we could
595 * finally take this back to current. */
596 s->npos -= grp->curr_weight;
597 fwrr_queue_by_weight(grp->next, s);
598 }
599 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100600 /* The sorting key is stored in units of s->npos * user_weight
601 * in order to avoid overflows. As stated in backend.h, the
602 * lower the scale, the rougher the weights modulation, and the
603 * higher the scale, the lower the number of servers without
604 * overflow. With this formula, the result is always positive,
605 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100606 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100607 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
608 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
609
610 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100611 s->lb_tree = &grp->curr;
612 }
613}
614
615/* prepares a server when extracting it from the "init" tree */
616static inline void fwrr_get_srv_init(struct server *s)
617{
618 s->npos = s->rweight = 0;
619}
620
621/* prepares a server when extracting it from the "next" tree */
622static inline void fwrr_get_srv_next(struct server *s)
623{
624 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
625 &s->proxy->lbprm.fwrr.bck :
626 &s->proxy->lbprm.fwrr.act;
627
628 s->npos += grp->curr_weight;
629}
630
631/* prepares a server when it was marked down */
632static inline void fwrr_get_srv_down(struct server *s)
633{
634 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
635 &s->proxy->lbprm.fwrr.bck :
636 &s->proxy->lbprm.fwrr.act;
637
638 s->npos = grp->curr_pos;
639}
640
641/* prepares a server when extracting it from its tree */
642static void fwrr_get_srv(struct server *s)
643{
644 struct proxy *p = s->proxy;
645 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
646 &p->lbprm.fwrr.bck :
647 &p->lbprm.fwrr.act;
648
649 if (s->lb_tree == grp->init) {
650 fwrr_get_srv_init(s);
651 }
652 else if (s->lb_tree == grp->next) {
653 fwrr_get_srv_next(s);
654 }
655 else if (s->lb_tree == NULL) {
656 fwrr_get_srv_down(s);
657 }
658}
659
660/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
661 * when this happens, and "next" filled with servers sorted by weights.
662 */
663static inline void fwrr_switch_trees(struct fwrr_group *grp)
664{
665 struct eb_root *swap;
666 swap = grp->init;
667 grp->init = grp->next;
668 grp->next = swap;
669 grp->curr_weight = grp->next_weight;
670 grp->curr_pos = grp->curr_weight;
671}
672
673/* return next server from the current tree in FWRR group <grp>, or a server
674 * from the "init" tree if appropriate. If both trees are empty, return NULL.
675 */
676static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
677{
678 struct eb32_node *node;
679 struct server *s;
680
681 node = eb32_first(&grp->curr);
682 s = eb32_entry(node, struct server, lb_node);
683
684 if (!node || s->npos > grp->curr_pos) {
685 /* either we have no server left, or we have a hole */
686 struct eb32_node *node2;
687 node2 = eb32_first(grp->init);
688 if (node2) {
689 node = node2;
690 s = eb32_entry(node, struct server, lb_node);
691 fwrr_get_srv_init(s);
692 if (s->eweight == 0) /* FIXME: is it possible at all ? */
693 node = NULL;
694 }
695 }
696 if (node)
697 return s;
698 else
699 return NULL;
700}
701
702/* Computes next position of server <s> in the group. It is mandatory for <s>
703 * to have a non-zero, positive eweight.
704*/
705static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
706{
707 if (!s->npos) {
708 /* first time ever for this server */
709 s->lpos = grp->curr_pos;
710 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
711 s->rweight += grp->next_weight % s->eweight;
712
713 if (s->rweight >= s->eweight) {
714 s->rweight -= s->eweight;
715 s->npos++;
716 }
717 } else {
718 s->lpos = s->npos;
719 s->npos += grp->next_weight / s->eweight;
720 s->rweight += grp->next_weight % s->eweight;
721
722 if (s->rweight >= s->eweight) {
723 s->rweight -= s->eweight;
724 s->npos++;
725 }
726 }
727}
728
729/* Return next server from the current tree in backend <p>, or a server from
730 * the init tree if appropriate. If both trees are empty, return NULL.
731 * Saturated servers are skipped and requeued.
732 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100733static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100734{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100735 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100736 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100737 int switched;
738
739 if (p->srv_act)
740 grp = &p->lbprm.fwrr.act;
741 else if (p->lbprm.fbck)
742 return p->lbprm.fbck;
743 else if (p->srv_bck)
744 grp = &p->lbprm.fwrr.bck;
745 else
746 return NULL;
747
748 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100749 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100750 full = NULL; /* NULL-terminated list of saturated servers */
751 while (1) {
752 /* if we see an empty group, let's first try to collect weights
753 * which might have recently changed.
754 */
755 if (!grp->curr_weight)
756 grp->curr_pos = grp->curr_weight = grp->next_weight;
757
758 /* get first server from the "current" tree. When the end of
759 * the tree is reached, we may have to switch, but only once.
760 */
761 while (1) {
762 srv = fwrr_get_server_from_group(grp);
763 if (srv)
764 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100765 if (switched) {
766 if (avoided) {
767 srv = avoided;
768 break;
769 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100770 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100771 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100772 switched = 1;
773 fwrr_switch_trees(grp);
774
775 }
776
777 /* OK, we have a server. However, it may be saturated, in which
778 * case we don't want to reconsider it for now. We'll update
779 * its position and dequeue it anyway, so that we can move it
780 * to a better place afterwards.
781 */
782 fwrr_update_position(grp, srv);
783 fwrr_dequeue_srv(srv);
784 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200785 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100786 /* make sure it is not the server we are trying to exclude... */
787 if (srv != srvtoavoid || avoided)
788 break;
789
790 avoided = srv; /* ...but remember that is was selected yet avoided */
791 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100792
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100793 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100794 srv->next_full = full;
795 full = srv;
796 }
797
798 /* OK, we got the best server, let's update it */
799 fwrr_queue_srv(srv);
800
801 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100802 /* Requeue all extracted servers. If full==srv then it was
803 * avoided (unsucessfully) and chained, omit it now.
804 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100805 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100806 if (switched) {
807 /* the tree has switched, requeue all extracted servers
808 * into "init", because their place was lost, and only
809 * their weight matters.
810 */
811 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100812 if (likely(full != srv))
813 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100814 full = full->next_full;
815 } while (full);
816 } else {
817 /* requeue all extracted servers just as if they were consumed
818 * so that they regain their expected place.
819 */
820 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100821 if (likely(full != srv))
822 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100823 full = full->next_full;
824 } while (full);
825 }
826 }
827 return srv;
828}
829
Willy Tarreau51406232008-03-10 22:04:20 +0100830/* Remove a server from a tree. It must have previously been dequeued. This
831 * function is meant to be called when a server is going down or has its
832 * weight disabled.
833 */
834static inline void fwlc_remove_from_tree(struct server *s)
835{
836 s->lb_tree = NULL;
837}
838
839/* simply removes a server from a tree */
840static inline void fwlc_dequeue_srv(struct server *s)
841{
842 eb32_delete(&s->lb_node);
843}
844
845/* Queue a server in its associated tree, assuming the weight is >0.
846 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
847 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
848 */
849static inline void fwlc_queue_srv(struct server *s)
850{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200851 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100852 eb32_insert(s->lb_tree, &s->lb_node);
853}
854
855/* Re-position the server in the FWLC tree after it has been assigned one
856 * connection or after it has released one. Note that it is possible that
857 * the server has been moved out of the tree due to failed health-checks.
858 */
859static void fwlc_srv_reposition(struct server *s)
860{
861 if (!s->lb_tree)
862 return;
863 fwlc_dequeue_srv(s);
864 fwlc_queue_srv(s);
865}
866
867/* This function updates the server trees according to server <srv>'s new
868 * state. It should be called when server <srv>'s status changes to down.
869 * It is not important whether the server was already down or not. It is not
870 * important either that the new state is completely down (the caller may not
871 * know all the variables of a server's state).
872 */
873static void fwlc_set_server_status_down(struct server *srv)
874{
875 struct proxy *p = srv->proxy;
876
877 if (srv->state == srv->prev_state &&
878 srv->eweight == srv->prev_eweight)
879 return;
880
881 if (srv_is_usable(srv->state, srv->eweight))
882 goto out_update_state;
883
884 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
885 /* server was already down */
886 goto out_update_backend;
887
888 if (srv->state & SRV_BACKUP) {
889 p->lbprm.tot_wbck -= srv->prev_eweight;
890 p->srv_bck--;
891
892 if (srv == p->lbprm.fbck) {
893 /* we lost the first backup server in a single-backup
894 * configuration, we must search another one.
895 */
896 struct server *srv2 = p->lbprm.fbck;
897 do {
898 srv2 = srv2->next;
899 } while (srv2 &&
900 !((srv2->state & SRV_BACKUP) &&
901 srv_is_usable(srv2->state, srv2->eweight)));
902 p->lbprm.fbck = srv2;
903 }
904 } else {
905 p->lbprm.tot_wact -= srv->prev_eweight;
906 p->srv_act--;
907 }
908
909 fwlc_dequeue_srv(srv);
910 fwlc_remove_from_tree(srv);
911
912out_update_backend:
913 /* check/update tot_used, tot_weight */
914 update_backend_weight(p);
915 out_update_state:
916 srv->prev_state = srv->state;
917 srv->prev_eweight = srv->eweight;
918}
919
920/* This function updates the server trees according to server <srv>'s new
921 * state. It should be called when server <srv>'s status changes to up.
922 * It is not important whether the server was already down or not. It is not
923 * important either that the new state is completely UP (the caller may not
924 * know all the variables of a server's state). This function will not change
925 * the weight of a server which was already up.
926 */
927static void fwlc_set_server_status_up(struct server *srv)
928{
929 struct proxy *p = srv->proxy;
930
931 if (srv->state == srv->prev_state &&
932 srv->eweight == srv->prev_eweight)
933 return;
934
935 if (!srv_is_usable(srv->state, srv->eweight))
936 goto out_update_state;
937
938 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
939 /* server was already up */
940 goto out_update_backend;
941
942 if (srv->state & SRV_BACKUP) {
943 srv->lb_tree = &p->lbprm.fwlc.bck;
944 p->lbprm.tot_wbck += srv->eweight;
945 p->srv_bck++;
946
947 if (!(p->options & PR_O_USE_ALL_BK)) {
948 if (!p->lbprm.fbck) {
949 /* there was no backup server anymore */
950 p->lbprm.fbck = srv;
951 } else {
952 /* we may have restored a backup server prior to fbck,
953 * in which case it should replace it.
954 */
955 struct server *srv2 = srv;
956 do {
957 srv2 = srv2->next;
958 } while (srv2 && (srv2 != p->lbprm.fbck));
959 if (srv2)
960 p->lbprm.fbck = srv;
961 }
962 }
963 } else {
964 srv->lb_tree = &p->lbprm.fwlc.act;
965 p->lbprm.tot_wact += srv->eweight;
966 p->srv_act++;
967 }
968
969 /* note that eweight cannot be 0 here */
970 fwlc_queue_srv(srv);
971
972 out_update_backend:
973 /* check/update tot_used, tot_weight */
974 update_backend_weight(p);
975 out_update_state:
976 srv->prev_state = srv->state;
977 srv->prev_eweight = srv->eweight;
978}
979
980/* This function must be called after an update to server <srv>'s effective
981 * weight. It may be called after a state change too.
982 */
983static void fwlc_update_server_weight(struct server *srv)
984{
985 int old_state, new_state;
986 struct proxy *p = srv->proxy;
987
988 if (srv->state == srv->prev_state &&
989 srv->eweight == srv->prev_eweight)
990 return;
991
992 /* If changing the server's weight changes its state, we simply apply
993 * the procedures we already have for status change. If the state
994 * remains down, the server is not in any tree, so it's as easy as
995 * updating its values. If the state remains up with different weights,
996 * there are some computations to perform to find a new place and
997 * possibly a new tree for this server.
998 */
999
1000 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1001 new_state = srv_is_usable(srv->state, srv->eweight);
1002
1003 if (!old_state && !new_state) {
1004 srv->prev_state = srv->state;
1005 srv->prev_eweight = srv->eweight;
1006 return;
1007 }
1008 else if (!old_state && new_state) {
1009 fwlc_set_server_status_up(srv);
1010 return;
1011 }
1012 else if (old_state && !new_state) {
1013 fwlc_set_server_status_down(srv);
1014 return;
1015 }
1016
1017 if (srv->lb_tree)
1018 fwlc_dequeue_srv(srv);
1019
1020 if (srv->state & SRV_BACKUP) {
1021 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1022 srv->lb_tree = &p->lbprm.fwlc.bck;
1023 } else {
1024 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1025 srv->lb_tree = &p->lbprm.fwlc.act;
1026 }
1027
1028 fwlc_queue_srv(srv);
1029
1030 update_backend_weight(p);
1031 srv->prev_state = srv->state;
1032 srv->prev_eweight = srv->eweight;
1033}
1034
1035/* This function is responsible for building the trees in case of fast
1036 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1037 * uweight ratio. Both active and backup groups are initialized.
1038 */
1039void fwlc_init_server_tree(struct proxy *p)
1040{
1041 struct server *srv;
1042 struct eb_root init_head = EB_ROOT;
1043
1044 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1045 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1046 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1047 p->lbprm.server_take_conn = fwlc_srv_reposition;
1048 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1049
1050 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1051 for (srv = p->srv; srv; srv = srv->next) {
1052 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1053 srv->prev_state = srv->state;
1054 }
1055
1056 recount_servers(p);
1057 update_backend_weight(p);
1058
1059 p->lbprm.fwlc.act = init_head;
1060 p->lbprm.fwlc.bck = init_head;
1061
1062 /* queue active and backup servers in two distinct groups */
1063 for (srv = p->srv; srv; srv = srv->next) {
1064 if (!srv_is_usable(srv->state, srv->eweight))
1065 continue;
1066 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1067 fwlc_queue_srv(srv);
1068 }
1069}
1070
1071/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1072 * return NULL. Saturated servers are skipped.
1073 */
1074static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1075{
1076 struct server *srv, *avoided;
1077 struct eb32_node *node;
1078
1079 srv = avoided = NULL;
1080
1081 if (p->srv_act)
1082 node = eb32_first(&p->lbprm.fwlc.act);
1083 else if (p->lbprm.fbck)
1084 return p->lbprm.fbck;
1085 else if (p->srv_bck)
1086 node = eb32_first(&p->lbprm.fwlc.bck);
1087 else
1088 return NULL;
1089
1090 while (node) {
1091 /* OK, we have a server. However, it may be saturated, in which
1092 * case we don't want to reconsider it for now, so we'll simply
1093 * skip it. Same if it's the server we try to avoid, in which
1094 * case we simply remember it for later use if needed.
1095 */
1096 struct server *s;
1097
1098 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001099 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001100 if (s != srvtoavoid) {
1101 srv = s;
1102 break;
1103 }
1104 avoided = s;
1105 }
1106 node = eb32_next(node);
1107 }
1108
1109 if (!srv)
1110 srv = avoided;
1111
1112 return srv;
1113}
1114
Willy Tarreau01732802007-11-01 22:48:15 +01001115/*
1116 * This function tries to find a running server for the proxy <px> following
1117 * the URL parameter hash method. It looks for a specific parameter in the
1118 * URL and hashes it to compute the server ID. This is useful to optimize
1119 * performance by avoiding bounces between servers in contexts where sessions
1120 * are shared but cookies are not usable. If the parameter is not found, NULL
1121 * is returned. If any server is found, it will be returned. If no valid server
1122 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001123 */
1124struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1125{
1126 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001127 const char *p;
1128 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001129 int plen;
1130
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001131 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001132 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001133 return NULL;
1134
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001135 if ((p = memchr(uri, '?', uri_len)) == NULL)
1136 return NULL;
1137
Willy Tarreau20697042007-11-15 23:26:18 +01001138 if (px->lbprm.map.state & PR_MAP_RECALC)
1139 recalc_server_map(px);
1140
Willy Tarreau01732802007-11-01 22:48:15 +01001141 p++;
1142
1143 uri_len -= (p - uri);
1144 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001145 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001146
1147 while (uri_len > plen) {
1148 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001149 if (params[plen] == '=') {
1150 if (memcmp(params, px->url_param_name, plen) == 0) {
1151 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001152 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001153 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001154 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001155 p += plen + 1;
1156 uri_len -= plen + 1;
1157
Willy Tarreau01732802007-11-01 22:48:15 +01001158 while (uri_len && *p != '&') {
1159 hash = *p + (hash << 6) + (hash << 16) - hash;
1160 uri_len--;
1161 p++;
1162 }
Willy Tarreau20697042007-11-15 23:26:18 +01001163 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001164 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001165 }
1166 /* skip to next parameter */
1167 p = memchr(params, '&', uri_len);
1168 if (!p)
1169 return NULL;
1170 p++;
1171 uri_len -= (p - params);
1172 params = p;
1173 }
1174 return NULL;
1175}
1176
1177/*
1178 * this does the same as the previous server_ph, but check the body contents
1179 */
1180struct server *get_server_ph_post(struct session *s)
1181{
1182 unsigned long hash = 0;
1183 struct http_txn *txn = &s->txn;
1184 struct buffer *req = s->req;
1185 struct http_msg *msg = &txn->req;
1186 struct proxy *px = s->be;
1187 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001188 unsigned long body;
1189 unsigned long len;
1190 const char *params;
1191 struct hdr_ctx ctx;
1192 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001193
1194 /* tot_weight appears to mean srv_count */
1195 if (px->lbprm.tot_weight == 0)
1196 return NULL;
1197
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001198 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
Willy Tarreaufb0528b2008-08-11 00:21:56 +02001199 len = req->l - body;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001200 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001201
1202 if ( len == 0 )
1203 return NULL;
1204
1205 if (px->lbprm.map.state & PR_MAP_RECALC)
1206 recalc_server_map(px);
1207
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001208 ctx.idx = 0;
1209
1210 /* if the message is chunked, we skip the chunk size, but use the value as len */
1211 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
Willy Tarreauadfb8562008-08-11 15:24:42 +02001212 if (ctx.idx && ctx.vlen >= 7 && strncasecmp(ctx.line+ctx.val, "chunked", 7) == 0) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001213 unsigned int chunk = 0;
1214 while ( params < req->rlim && !HTTP_IS_CRLF(*params)) {
1215 char c = *params;
1216 if (ishex(c)) {
1217 unsigned int hex = toupper(c) - '0';
1218 if ( hex > 9 )
1219 hex -= 'A' - '9' - 1;
1220 chunk = (chunk << 4) | hex;
1221 }
1222 else
1223 return NULL;
1224 params++;
1225 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001226 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001227 /* spec says we get CRLF */
1228 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1229 params += 2;
1230 else
1231 return NULL;
1232 /* ok we have some encoded length, just inspect the first chunk */
1233 len = chunk;
1234 }
Willy Tarreau01732802007-11-01 22:48:15 +01001235
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001236 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001237
1238 while (len > plen) {
1239 /* Look for the parameter name followed by an equal symbol */
1240 if (params[plen] == '=') {
1241 if (memcmp(params, px->url_param_name, plen) == 0) {
1242 /* OK, we have the parameter here at <params>, and
1243 * the value after the equal sign, at <p>
1244 * skip the equal symbol
1245 */
1246 p += plen + 1;
1247 len -= plen + 1;
1248
1249 while (len && *p != '&') {
1250 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1251 /* if in a POST, body must be URI encoded or its not a URI.
1252 * Do not interprete any possible binary data as a parameter.
1253 */
1254 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1255 break;
1256 return NULL; /* oh, no; this is not uri-encoded.
1257 * This body does not contain parameters.
1258 */
1259 }
1260 hash = *p + (hash << 6) + (hash << 16) - hash;
1261 len--;
1262 p++;
1263 /* should we break if vlen exceeds limit? */
1264 }
1265 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1266 }
1267 }
Willy Tarreau01732802007-11-01 22:48:15 +01001268 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001269 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001270 if (!p)
1271 return NULL;
1272 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001273 len -= (p - params);
1274 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001275 }
1276 return NULL;
1277}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001278
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001279
Willy Tarreaubaaee002006-06-26 02:48:02 +02001280/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001281 * This function applies the load-balancing algorithm to the session, as
1282 * defined by the backend it is assigned to. The session is then marked as
1283 * 'assigned'.
1284 *
1285 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1286 * had a server previously assigned, it is rebalanced, trying to avoid the same
1287 * server.
1288 * The function tries to keep the original connection slot if it reconnects to
1289 * the same server, otherwise it releases it and tries to offer it.
1290 *
1291 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001292 *
1293 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001294 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1295 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1296 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001297 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1298 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001299 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1300 * it does not need to be called anymore. This means that s->srv can be trusted
1301 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001302 *
1303 */
1304
1305int assign_server(struct session *s)
1306{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001307
Willy Tarreau7c669d72008-06-20 15:04:11 +02001308 struct server *conn_slot;
1309 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001310
Willy Tarreaubaaee002006-06-26 02:48:02 +02001311#ifdef DEBUG_FULL
1312 fprintf(stderr,"assign_server : s=%p\n",s);
1313#endif
1314
Willy Tarreau7c669d72008-06-20 15:04:11 +02001315 err = SRV_STATUS_INTERNAL;
1316 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1317 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001318
Willy Tarreau7c669d72008-06-20 15:04:11 +02001319 s->prev_srv = s->prev_srv;
1320 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001321
Willy Tarreau7c669d72008-06-20 15:04:11 +02001322 /* We have to release any connection slot before applying any LB algo,
1323 * otherwise we may erroneously end up with no available slot.
1324 */
1325 if (conn_slot)
1326 sess_change_server(s, NULL);
1327
1328 /* We will now try to find the good server and store it into <s->srv>.
1329 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1330 * as well as if no server is available (check error code).
1331 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001332
Willy Tarreau7c669d72008-06-20 15:04:11 +02001333 s->srv = NULL;
1334 if (s->be->lbprm.algo & BE_LB_ALGO) {
1335 int len;
1336 /* we must check if we have at least one server available */
1337 if (!s->be->lbprm.tot_weight) {
1338 err = SRV_STATUS_NOSRV;
1339 goto out;
1340 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001341
Willy Tarreau7c669d72008-06-20 15:04:11 +02001342 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1343 case BE_LB_ALGO_RR:
1344 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1345 if (!s->srv) {
1346 err = SRV_STATUS_FULL;
1347 goto out;
1348 }
1349 break;
1350 case BE_LB_ALGO_LC:
1351 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1352 if (!s->srv) {
1353 err = SRV_STATUS_FULL;
1354 goto out;
1355 }
1356 break;
1357 case BE_LB_ALGO_SH:
1358 if (s->cli_addr.ss_family == AF_INET)
1359 len = 4;
1360 else if (s->cli_addr.ss_family == AF_INET6)
1361 len = 16;
1362 else {
1363 /* unknown IP family */
1364 err = SRV_STATUS_INTERNAL;
1365 goto out;
1366 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001367
Willy Tarreau7c669d72008-06-20 15:04:11 +02001368 s->srv = get_server_sh(s->be,
1369 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1370 len);
1371 break;
1372 case BE_LB_ALGO_UH:
1373 /* URI hashing */
1374 s->srv = get_server_uh(s->be,
1375 s->txn.req.sol + s->txn.req.sl.rq.u,
1376 s->txn.req.sl.rq.u_l);
1377 break;
1378 case BE_LB_ALGO_PH:
1379 /* URL Parameter hashing */
1380 if (s->txn.meth == HTTP_METH_POST &&
1381 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1382 s->txn.req.sl.rq.u_l ) == NULL)
1383 s->srv = get_server_ph_post(s);
1384 else
1385 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001386 s->txn.req.sol + s->txn.req.sl.rq.u,
1387 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001388
Willy Tarreau7c669d72008-06-20 15:04:11 +02001389 if (!s->srv) {
1390 /* parameter not found, fall back to round robin on the map */
1391 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001392 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001393 err = SRV_STATUS_FULL;
1394 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001395 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001396 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001397 break;
1398 default:
1399 /* unknown balancing algorithm */
1400 err = SRV_STATUS_INTERNAL;
1401 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001402 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001403 if (s->srv != s->prev_srv) {
1404 s->be->cum_lbconn++;
1405 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001406 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001407 }
1408 else if (s->be->options & PR_O_HTTP_PROXY) {
1409 if (!s->srv_addr.sin_addr.s_addr) {
1410 err = SRV_STATUS_NOSRV;
1411 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001412 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001413 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001414 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
1415 !(s->fe->options & PR_O_TRANSP)) {
1416 err = SRV_STATUS_NOSRV;
1417 goto out;
1418 }
1419
1420 s->flags |= SN_ASSIGNED;
1421 err = SRV_STATUS_OK;
1422 out:
1423
1424 /* Either we take back our connection slot, or we offer it to someone
1425 * else if we don't need it anymore.
1426 */
1427 if (conn_slot) {
1428 if (conn_slot == s->srv) {
1429 sess_change_server(s, s->srv);
1430 } else {
1431 if (may_dequeue_tasks(conn_slot, s->be))
1432 process_srv_queue(conn_slot);
1433 }
1434 }
1435
1436 out_err:
1437 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001438}
1439
1440
1441/*
1442 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1443 * The address is taken from the currently assigned server, or from the
1444 * dispatch or transparent address.
1445 *
1446 * It may return :
1447 * SRV_STATUS_OK if everything is OK.
1448 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1449 *
1450 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1451 * not cleared, so it's to the caller to clear it if required.
1452 *
1453 */
1454int assign_server_address(struct session *s)
1455{
1456#ifdef DEBUG_FULL
1457 fprintf(stderr,"assign_server_address : s=%p\n",s);
1458#endif
1459
Willy Tarreau31682232007-11-29 15:38:04 +01001460 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001461 /* A server is necessarily known for this session */
1462 if (!(s->flags & SN_ASSIGNED))
1463 return SRV_STATUS_INTERNAL;
1464
1465 s->srv_addr = s->srv->addr;
1466
1467 /* if this server remaps proxied ports, we'll use
1468 * the port the client connected to with an offset. */
1469 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001470 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1471 get_frt_addr(s);
1472 if (s->frt_addr.ss_family == AF_INET) {
1473 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1474 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1475 } else {
1476 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1477 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1478 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001479 }
1480 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001481 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001482 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001483 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001484 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001485 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001486 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001487 if (!(s->flags & SN_FRT_ADDR_SET))
1488 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001489
Willy Tarreaubd414282008-01-19 13:46:35 +01001490 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1491 /* when we support IPv6 on the backend, we may add other tests */
1492 //qfprintf(stderr, "Cannot get original server address.\n");
1493 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001494 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001495 else if (s->be->options & PR_O_HTTP_PROXY) {
1496 /* If HTTP PROXY option is set, then server is already assigned
1497 * during incoming client request parsing. */
1498 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001499 else {
1500 /* no server and no LB algorithm ! */
1501 return SRV_STATUS_INTERNAL;
1502 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001503
1504 s->flags |= SN_ADDR_SET;
1505 return SRV_STATUS_OK;
1506}
1507
1508
1509/* This function assigns a server to session <s> if required, and can add the
1510 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001511 * If ->srv_conn is set, the session is first released from the server.
1512 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1513 * be called before any connection and after any retry or redispatch occurs.
1514 *
1515 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001516 *
1517 * Returns :
1518 *
1519 * SRV_STATUS_OK if everything is OK.
1520 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1521 * SRV_STATUS_QUEUED if the connection has been queued.
1522 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001523 * connection could not be queued in s->srv,
1524 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001525 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1526 *
1527 */
1528int assign_server_and_queue(struct session *s)
1529{
1530 struct pendconn *p;
1531 int err;
1532
1533 if (s->pend_pos)
1534 return SRV_STATUS_INTERNAL;
1535
Willy Tarreau7c669d72008-06-20 15:04:11 +02001536 err = SRV_STATUS_OK;
1537 if (!(s->flags & SN_ASSIGNED)) {
1538 err = assign_server(s);
1539 if (s->prev_srv) {
1540 /* This session was previously assigned to a server. We have to
1541 * update the session's and the server's stats :
1542 * - if the server changed :
1543 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1544 * - set SN_REDISP if it was successfully redispatched
1545 * - increment srv->redispatches and be->redispatches
1546 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001547 */
1548
Willy Tarreau7c669d72008-06-20 15:04:11 +02001549 if (s->prev_srv != s->srv) {
1550 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1551 s->txn.flags &= ~TX_CK_MASK;
1552 s->txn.flags |= TX_CK_DOWN;
1553 }
1554 s->flags |= SN_REDISP;
1555 s->prev_srv->redispatches++;
1556 s->be->redispatches++;
1557 } else {
1558 s->prev_srv->retries++;
1559 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001560 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001561 }
1562 }
1563
Willy Tarreaubaaee002006-06-26 02:48:02 +02001564 switch (err) {
1565 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001566 /* we have SN_ASSIGNED set */
1567 if (!s->srv)
1568 return SRV_STATUS_OK; /* dispatch or proxy mode */
1569
1570 /* If we already have a connection slot, no need to check any queue */
1571 if (s->srv_conn == s->srv)
1572 return SRV_STATUS_OK;
1573
1574 /* OK, this session already has an assigned server, but no
1575 * connection slot yet. Either it is a redispatch, or it was
1576 * assigned from persistence information (direct mode).
1577 */
1578 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1579 /* server scheduled for redirection, and already assigned. We
1580 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001581 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001582 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001583 return SRV_STATUS_OK;
1584 }
1585
Willy Tarreau7c669d72008-06-20 15:04:11 +02001586 /* We might have to queue this session if the assigned server is full.
1587 * We know we have to queue it into the server's queue, so if a maxqueue
1588 * is set on the server, we must also check that the server's queue is
1589 * not full, in which case we have to return FULL.
1590 */
1591 if (s->srv->maxconn &&
1592 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1593
1594 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1595 return SRV_STATUS_FULL;
1596
Willy Tarreaubaaee002006-06-26 02:48:02 +02001597 p = pendconn_add(s);
1598 if (p)
1599 return SRV_STATUS_QUEUED;
1600 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001601 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001602 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001603
1604 /* OK, we can use this server. Let's reserve our place */
1605 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001606 return SRV_STATUS_OK;
1607
1608 case SRV_STATUS_FULL:
1609 /* queue this session into the proxy's queue */
1610 p = pendconn_add(s);
1611 if (p)
1612 return SRV_STATUS_QUEUED;
1613 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001614 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001615
1616 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001617 return err;
1618
Willy Tarreaubaaee002006-06-26 02:48:02 +02001619 case SRV_STATUS_INTERNAL:
1620 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001621
Willy Tarreaubaaee002006-06-26 02:48:02 +02001622 default:
1623 return SRV_STATUS_INTERNAL;
1624 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001625}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001626
1627/*
1628 * This function initiates a connection to the server assigned to this session
1629 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1630 * It can return one of :
1631 * - SN_ERR_NONE if everything's OK
1632 * - SN_ERR_SRVTO if there are no more servers
1633 * - SN_ERR_SRVCL if the connection was refused by the server
1634 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1635 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1636 * - SN_ERR_INTERNAL for any other purely internal errors
1637 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1638 */
1639int connect_server(struct session *s)
1640{
1641 int fd, err;
1642
1643 if (!(s->flags & SN_ADDR_SET)) {
1644 err = assign_server_address(s);
1645 if (err != SRV_STATUS_OK)
1646 return SN_ERR_INTERNAL;
1647 }
1648
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001649 if ((fd = s->req->cons->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001650 qfprintf(stderr, "Cannot get a server socket.\n");
1651
1652 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001653 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001654 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001655 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001656 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001657 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001658 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001659 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001660 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001661 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001662 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001663 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001664 /* this is a resource error */
1665 return SN_ERR_RESOURCE;
1666 }
1667
1668 if (fd >= global.maxsock) {
1669 /* do not log anything there, it's a normal condition when this option
1670 * is used to serialize connections to a server !
1671 */
1672 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1673 close(fd);
1674 return SN_ERR_PRXCOND; /* it is a configuration limit */
1675 }
1676
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001677#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001678 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001679 /* TCP splicing supported by both FE and BE */
1680 tcp_splice_initfd(s->cli_fd, fd);
1681 }
1682#endif
1683
Willy Tarreaubaaee002006-06-26 02:48:02 +02001684 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1685 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1686 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1687 close(fd);
1688 return SN_ERR_INTERNAL;
1689 }
1690
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001691 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001692 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1693
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001694 if (s->be->options & PR_O_TCP_NOLING)
1695 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1696
Willy Tarreaubaaee002006-06-26 02:48:02 +02001697 /* allow specific binding :
1698 * - server-specific at first
1699 * - proxy-specific next
1700 */
1701 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001702 struct sockaddr_in *remote = NULL;
1703 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001704
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001705#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001706 switch (s->srv->state & SRV_TPROXY_MASK) {
1707 case SRV_TPROXY_ADDR:
1708 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1709 flags = 3;
1710 break;
1711 case SRV_TPROXY_CLI:
1712 flags |= 2;
1713 /* fall through */
1714 case SRV_TPROXY_CIP:
1715 /* FIXME: what can we do if the client connects in IPv6 ? */
1716 flags |= 1;
1717 remote = (struct sockaddr_in *)&s->cli_addr;
1718 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001719 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001720#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001721 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001722 if (ret) {
1723 close(fd);
1724 if (ret == 1) {
1725 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1726 s->be->id, s->srv->id);
1727 send_log(s->be, LOG_EMERG,
1728 "Cannot bind to source address before connect() for server %s/%s.\n",
1729 s->be->id, s->srv->id);
1730 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001731 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001732 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001733 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001734 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001735 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001736 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001737 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001738 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001739 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001740 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001741 struct sockaddr_in *remote = NULL;
1742 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001743
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001744#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001745 switch (s->be->options & PR_O_TPXY_MASK) {
1746 case PR_O_TPXY_ADDR:
1747 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1748 flags = 3;
1749 break;
1750 case PR_O_TPXY_CLI:
1751 flags |= 2;
1752 /* fall through */
1753 case PR_O_TPXY_CIP:
1754 /* FIXME: what can we do if the client connects in IPv6 ? */
1755 flags |= 1;
1756 remote = (struct sockaddr_in *)&s->cli_addr;
1757 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001758 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001759#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001760 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001761 if (ret) {
1762 close(fd);
1763 if (ret == 1) {
1764 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1765 s->be->id);
1766 send_log(s->be, LOG_EMERG,
1767 "Cannot bind to source address before connect() for proxy %s.\n",
1768 s->be->id);
1769 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001770 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001771 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001772 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001773 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1774 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001775 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001776 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001777 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001778 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001779
Willy Tarreaubaaee002006-06-26 02:48:02 +02001780 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1781 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1782
1783 if (errno == EAGAIN || errno == EADDRINUSE) {
1784 char *msg;
1785 if (errno == EAGAIN) /* no free ports left, try again later */
1786 msg = "no free ports";
1787 else
1788 msg = "local address already in use";
1789
1790 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1791 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001792 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001793 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001794 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001795 return SN_ERR_RESOURCE;
1796 } else if (errno == ETIMEDOUT) {
1797 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1798 close(fd);
1799 return SN_ERR_SRVTO;
1800 } else {
1801 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1802 //qfprintf(stderr,"Connect(): %d", errno);
1803 close(fd);
1804 return SN_ERR_SRVCL;
1805 }
1806 }
1807
1808 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001809 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001810 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001811 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001812 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001813 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001814
1815 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1816 fdtab[fd].peerlen = sizeof(s->srv_addr);
1817
Willy Tarreaubaaee002006-06-26 02:48:02 +02001818 fd_insert(fd);
Willy Tarreau788e2842008-08-26 13:25:39 +02001819 EV_FD_SET(fd, DIR_WR); /* for connect status */
1820
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001821 s->req->cons->state = SI_ST_CON;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001822 if (s->srv) {
1823 s->srv->cur_sess++;
1824 if (s->srv->cur_sess > s->srv->cur_sess_max)
1825 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001826 if (s->be->lbprm.server_take_conn)
1827 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001828 }
1829
Willy Tarreau26ed74d2008-08-17 12:11:14 +02001830 s->req->wex = tick_add_ifset(now_ms, s->be->timeout.connect);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001831 return SN_ERR_NONE; /* connection is OK */
1832}
1833
1834
1835/*
1836 * This function checks the retry count during the connect() job.
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001837 * It updates the session's retries, so that the caller knows what it
1838 * has to do. It uses the last connection error to set the log when
Willy Tarreaubaaee002006-06-26 02:48:02 +02001839 * it expires. It returns 1 when it has expired, and 0 otherwise.
1840 */
1841int srv_count_retry_down(struct session *t, int conn_err)
1842{
1843 /* we are in front of a retryable error */
1844 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001845
Willy Tarreaubaaee002006-06-26 02:48:02 +02001846 if (t->conn_retries < 0) {
1847 /* if not retryable anymore, let's abort */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001848 //t->req->wex = TICK_ETERNITY;
1849 //srv_close_with_err(t, conn_err, SN_FINST_C,
1850 // 503, error_message(t, HTTP_ERR_503));
1851
1852 if (!t->req->cons->err_type) {
1853 t->req->cons->err_type = SI_ET_CONN_ERR;
1854 t->req->cons->err_loc = t->srv;
1855 }
1856
Willy Tarreaubaaee002006-06-26 02:48:02 +02001857 if (t->srv)
1858 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001859 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001860
1861 /* We used to have a free connection slot. Since we'll never use it,
1862 * we have to inform the server that it may be used by another session.
1863 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001864 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001865 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001866 return 1;
1867 }
1868 return 0;
1869}
1870
1871
1872/*
1873 * This function performs the retryable part of the connect() job.
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001874 * It updates the session's and retries, so that the caller knows
1875 * what it has to do. It returns 1 when it breaks out of the loop,
1876 * or 0 if it needs to redispatch.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001877 */
1878int srv_retryable_connect(struct session *t)
1879{
1880 int conn_err;
1881
1882 /* This loop ensures that we stop before the last retry in case of a
1883 * redispatchable server.
1884 */
1885 do {
1886 /* initiate a connection to the server */
1887 conn_err = connect_server(t);
1888 switch (conn_err) {
1889
1890 case SN_ERR_NONE:
1891 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
Willy Tarreau98937b82007-12-10 15:05:42 +01001892 if (t->srv)
1893 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001894 return 1;
1895
1896 case SN_ERR_INTERNAL:
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001897 if (!t->req->cons->err_type) {
1898 t->req->cons->err_type = SI_ET_CONN_OTHER;
1899 t->req->cons->err_loc = t->srv;
1900 }
1901
1902 //t->req->wex = TICK_ETERNITY;
1903 //srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
1904 // 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001905 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001906 t->srv->cum_sess++;
1907 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001908 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001909 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001910 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001911 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001912 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001913 return 1;
1914 }
1915 /* ensure that we have enough retries left */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001916 if (srv_count_retry_down(t, conn_err))
Willy Tarreaubaaee002006-06-26 02:48:02 +02001917 return 1;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001918 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001919
1920 /* We're on our last chance, and the REDISP option was specified.
1921 * We will ignore cookie and force to balance or use the dispatcher.
1922 */
1923 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001924 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001925 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001926
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001927 if (t->srv)
1928 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001929
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001930 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001931 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001932 t->prev_srv = t->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001933 return 0;
1934}
1935
1936
1937/* This function performs the "redispatch" part of a connection attempt. It
1938 * will assign a server if required, queue the connection if required, and
1939 * handle errors that might arise at this level. It can change the server
1940 * state. It will return 1 if it encounters an error, switches the server
1941 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1942 * that the connection is ready to use.
1943 */
1944
1945int srv_redispatch_connect(struct session *t)
1946{
1947 int conn_err;
1948
1949 /* We know that we don't have any connection pending, so we will
1950 * try to get a new one, and wait in this state if it's queued
1951 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001952 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001953 conn_err = assign_server_and_queue(t);
1954 switch (conn_err) {
1955 case SRV_STATUS_OK:
1956 break;
1957
Willy Tarreau7c669d72008-06-20 15:04:11 +02001958 case SRV_STATUS_FULL:
1959 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
1960 * and we can redispatch to another server, or it is not and we return
1961 * 503. This only makes sense in DIRECT mode however, because normal LB
1962 * algorithms would never select such a server, and hash algorithms
1963 * would bring us on the same server again. Note that t->srv is set in
1964 * this case.
1965 */
1966 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
1967 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
1968 t->prev_srv = t->srv;
1969 goto redispatch;
1970 }
1971
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001972 //t->req->wex = TICK_ETERNITY;
1973 //srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q,
1974 // 503, error_message(t, HTTP_ERR_503));
1975
1976 if (!t->req->cons->err_type) {
1977 t->req->cons->err_type = SI_ET_QUEUE_ERR;
1978 t->req->cons->err_loc = t->srv;
1979 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001980
1981 t->srv->failed_conns++;
1982 t->be->failed_conns++;
1983 return 1;
1984
Willy Tarreaubaaee002006-06-26 02:48:02 +02001985 case SRV_STATUS_NOSRV:
1986 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001987 //t->req->wex = TICK_ETERNITY;
1988 //srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
1989 // 503, error_message(t, HTTP_ERR_503));
1990
1991 if (!t->req->cons->err_type) {
1992 t->req->cons->err_type = SI_ET_CONN_ERR;
1993 t->req->cons->err_loc = NULL;
1994 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001995
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001996 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001997 return 1;
1998
1999 case SRV_STATUS_QUEUED:
Willy Tarreau26ed74d2008-08-17 12:11:14 +02002000 t->req->wex = tick_add_ifset(now_ms, t->be->timeout.queue);
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002001 t->req->cons->state = SI_ST_QUE;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002002 /* do nothing else and do not wake any other session up */
2003 return 1;
2004
Willy Tarreaubaaee002006-06-26 02:48:02 +02002005 case SRV_STATUS_INTERNAL:
2006 default:
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002007 //t->req->wex = TICK_ETERNITY;
2008 //srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
2009 // 500, error_message(t, HTTP_ERR_500));
2010
2011 if (!t->req->cons->err_type) {
2012 t->req->cons->err_type = SI_ET_CONN_OTHER;
2013 t->req->cons->err_loc = t->srv;
2014 }
2015
Willy Tarreaubaaee002006-06-26 02:48:02 +02002016 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01002017 t->srv->cum_sess++;
2018 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002019 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002020 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002021
2022 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002023 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002024 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002025 return 1;
2026 }
2027 /* if we get here, it's because we got SRV_STATUS_OK, which also
2028 * means that the connection has not been queued.
2029 */
2030 return 0;
2031}
2032
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002033int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002034 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002035 return px->down_time;
2036
2037 return now.tv_sec - px->last_change + px->down_time;
2038}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002039
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002040/* This function parses a "balance" statement in a backend section describing
2041 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2042 * returns -1, it may write an error message into ther <err> buffer, for at
2043 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2044 * written. The function must be called with <args> pointing to the first word
2045 * after "balance".
2046 */
2047int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2048{
2049 if (!*(args[0])) {
2050 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002051 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2052 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002053 return 0;
2054 }
2055
2056 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002057 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2058 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002059 }
Willy Tarreau51406232008-03-10 22:04:20 +01002060 else if (!strcmp(args[0], "leastconn")) {
2061 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2062 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2063 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002064 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002065 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2066 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002067 }
2068 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002069 int arg = 1;
2070
Willy Tarreau31682232007-11-29 15:38:04 +01002071 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2072 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002073
2074 while (*args[arg]) {
2075 if (!strcmp(args[arg], "len")) {
2076 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2077 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2078 return -1;
2079 }
2080 curproxy->uri_len_limit = atoi(args[arg+1]);
2081 arg += 2;
2082 }
2083 else if (!strcmp(args[arg], "depth")) {
2084 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2085 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2086 return -1;
2087 }
2088 /* hint: we store the position of the ending '/' (depth+1) so
2089 * that we avoid a comparison while computing the hash.
2090 */
2091 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2092 arg += 2;
2093 }
2094 else {
2095 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2096 return -1;
2097 }
2098 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002099 }
Willy Tarreau01732802007-11-01 22:48:15 +01002100 else if (!strcmp(args[0], "url_param")) {
2101 if (!*args[1]) {
2102 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2103 return -1;
2104 }
Willy Tarreau31682232007-11-29 15:38:04 +01002105 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2106 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreaua534fea2008-08-03 12:19:50 +02002107
2108 free(curproxy->url_param_name);
Willy Tarreau01732802007-11-01 22:48:15 +01002109 curproxy->url_param_name = strdup(args[1]);
Willy Tarreaua534fea2008-08-03 12:19:50 +02002110 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002111 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002112 if (strcmp(args[2], "check_post")) {
2113 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2114 return -1;
2115 }
2116 if (*args[3]) {
2117 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2118 curproxy->url_param_post_limit = str2ui(args[3]);
2119 }
2120 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2121 if (!curproxy->url_param_post_limit)
2122 curproxy->url_param_post_limit = 48;
2123 else if ( curproxy->url_param_post_limit < 3 )
2124 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2125 }
Willy Tarreau01732802007-11-01 22:48:15 +01002126 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002127 else {
Willy Tarreau51406232008-03-10 22:04:20 +01002128 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002129 return -1;
2130 }
2131 return 0;
2132}
2133
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002134
2135/************************************************************************/
2136/* All supported keywords must be declared here. */
2137/************************************************************************/
2138
2139/* set test->i to the number of enabled servers on the proxy */
2140static int
2141acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2142 struct acl_expr *expr, struct acl_test *test)
2143{
2144 test->flags = ACL_TEST_F_VOL_TEST;
2145 if (expr->arg_len) {
2146 /* another proxy was designated, we must look for it */
2147 for (px = proxy; px; px = px->next)
2148 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2149 break;
2150 }
2151 if (!px)
2152 return 0;
2153
2154 if (px->srv_act)
2155 test->i = px->srv_act;
2156 else if (px->lbprm.fbck)
2157 test->i = 1;
2158 else
2159 test->i = px->srv_bck;
2160
2161 return 1;
2162}
2163
2164
2165/* Note: must not be declared <const> as its list will be overwritten */
2166static struct acl_kw_list acl_kws = {{ },{
Willy Tarreau0ceba5a2008-07-25 19:31:03 +02002167 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int, ACL_USE_NOTHING },
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002168 { NULL, NULL, NULL, NULL },
2169}};
2170
2171
2172__attribute__((constructor))
2173static void __backend_init(void)
2174{
2175 acl_register_keywords(&acl_kws);
2176}
2177
2178
Willy Tarreaubaaee002006-06-26 02:48:02 +02002179/*
2180 * Local variables:
2181 * c-indent-level: 8
2182 * c-basic-offset: 8
2183 * End:
2184 */