blob: 7116965f8f3a7b039bdd90fc453eb3cbe476def1 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020022#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020023#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010024#include <common/eb32tree.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020025#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020026#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020027
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010028#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029#include <types/buffers.h>
30#include <types/global.h>
31#include <types/polling.h>
32#include <types/proxy.h>
33#include <types/server.h>
34#include <types/session.h>
35
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010036#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020038#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020039#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010040#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/log.h>
42#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010043#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044#include <proto/queue.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020045#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020046#include <proto/stream_sock.h>
47#include <proto/task.h>
48
Willy Tarreau6d1a9882007-01-07 02:03:04 +010049#ifdef CONFIG_HAP_TCPSPLICE
50#include <libtcpsplice.h>
51#endif
52
Willy Tarreaub625a082007-11-26 01:15:43 +010053static inline void fwrr_remove_from_tree(struct server *s);
54static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
55static inline void fwrr_dequeue_srv(struct server *s);
56static void fwrr_get_srv(struct server *s);
57static void fwrr_queue_srv(struct server *s);
58
59/* This function returns non-zero if a server with the given weight and state
60 * is usable for LB, otherwise zero.
61 */
62static inline int srv_is_usable(int state, int weight)
63{
64 if (!weight)
65 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010066 if (state & SRV_GOINGDOWN)
67 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010068 if (!(state & SRV_RUNNING))
69 return 0;
70 return 1;
71}
72
Willy Tarreaubaaee002006-06-26 02:48:02 +020073/*
74 * This function recounts the number of usable active and backup servers for
75 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010076 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010077 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010078 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020079 */
Willy Tarreaub625a082007-11-26 01:15:43 +010080static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020081{
82 struct server *srv;
83
Willy Tarreau20697042007-11-15 23:26:18 +010084 px->srv_act = px->srv_bck = 0;
85 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010086 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020087 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010088 if (!srv_is_usable(srv->state, srv->eweight))
89 continue;
90
91 if (srv->state & SRV_BACKUP) {
92 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010093 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010094 px->lbprm.fbck = srv;
95 px->srv_bck++;
96 px->lbprm.tot_wbck += srv->eweight;
97 } else {
98 px->srv_act++;
99 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200100 }
101 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100102}
Willy Tarreau20697042007-11-15 23:26:18 +0100103
Willy Tarreaub625a082007-11-26 01:15:43 +0100104/* This function simply updates the backend's tot_weight and tot_used values
105 * after servers weights have been updated. It is designed to be used after
106 * recount_servers() or equivalent.
107 */
108static void update_backend_weight(struct proxy *px)
109{
Willy Tarreau20697042007-11-15 23:26:18 +0100110 if (px->srv_act) {
111 px->lbprm.tot_weight = px->lbprm.tot_wact;
112 px->lbprm.tot_used = px->srv_act;
113 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100114 else if (px->lbprm.fbck) {
115 /* use only the first backup server */
116 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
117 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100118 }
119 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100120 px->lbprm.tot_weight = px->lbprm.tot_wbck;
121 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100122 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100123}
124
125/* this function updates the map according to server <srv>'s new state */
126static void map_set_server_status_down(struct server *srv)
127{
128 struct proxy *p = srv->proxy;
129
130 if (srv->state == srv->prev_state &&
131 srv->eweight == srv->prev_eweight)
132 return;
133
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100134 if (srv_is_usable(srv->state, srv->eweight))
135 goto out_update_state;
136
Willy Tarreaub625a082007-11-26 01:15:43 +0100137 /* FIXME: could be optimized since we know what changed */
138 recount_servers(p);
139 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100140 p->lbprm.map.state |= PR_MAP_RECALC;
141 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100142 srv->prev_state = srv->state;
143 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200144}
145
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100146/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100147static void map_set_server_status_up(struct server *srv)
148{
149 struct proxy *p = srv->proxy;
150
151 if (srv->state == srv->prev_state &&
152 srv->eweight == srv->prev_eweight)
153 return;
154
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100155 if (!srv_is_usable(srv->state, srv->eweight))
156 goto out_update_state;
157
Willy Tarreaub625a082007-11-26 01:15:43 +0100158 /* FIXME: could be optimized since we know what changed */
159 recount_servers(p);
160 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100161 p->lbprm.map.state |= PR_MAP_RECALC;
162 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100163 srv->prev_state = srv->state;
164 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100165}
166
Willy Tarreau20697042007-11-15 23:26:18 +0100167/* This function recomputes the server map for proxy px. It relies on
168 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
169 * called after recount_servers(). It also expects px->lbprm.map.srv
170 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200171 */
172void recalc_server_map(struct proxy *px)
173{
174 int o, tot, flag;
175 struct server *cur, *best;
176
Willy Tarreau20697042007-11-15 23:26:18 +0100177 switch (px->lbprm.tot_used) {
178 case 0: /* no server */
179 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200180 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100181 case 1: /* only one server, just fill first entry */
182 tot = 1;
183 break;
184 default:
185 tot = px->lbprm.tot_weight;
186 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200187 }
188
Willy Tarreau20697042007-11-15 23:26:18 +0100189 /* here we *know* that we have some servers */
190 if (px->srv_act)
191 flag = SRV_RUNNING;
192 else
193 flag = SRV_RUNNING | SRV_BACKUP;
194
Willy Tarreaubaaee002006-06-26 02:48:02 +0200195 /* this algorithm gives priority to the first server, which means that
196 * it will respect the declaration order for equivalent weights, and
197 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100198 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200199 * case, where we want the first server only.
200 */
201 for (cur = px->srv; cur; cur = cur->next)
202 cur->wscore = 0;
203
204 for (o = 0; o < tot; o++) {
205 int max = 0;
206 best = NULL;
207 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100208 if (flag == (cur->state &
209 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200210 int v;
211
212 /* If we are forced to return only one server, we don't want to
213 * go further, because we would return the wrong one due to
214 * divide overflow.
215 */
216 if (tot == 1) {
217 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100218 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200219 break;
220 }
221
Willy Tarreau417fae02007-03-25 21:16:40 +0200222 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200223 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
224 if (best == NULL || v > max) {
225 max = v;
226 best = cur;
227 }
228 }
229 }
Willy Tarreau20697042007-11-15 23:26:18 +0100230 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200231 best->wscore -= tot;
232 }
Willy Tarreau20697042007-11-15 23:26:18 +0100233 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200234}
235
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100236/* This function is responsible of building the server MAP for map-based LB
237 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
238 * weights if applicable. It should be called only once per proxy, at config
239 * time.
240 */
241void init_server_map(struct proxy *p)
242{
243 struct server *srv;
244 int pgcd;
245 int act, bck;
246
Willy Tarreaub625a082007-11-26 01:15:43 +0100247 p->lbprm.set_server_status_up = map_set_server_status_up;
248 p->lbprm.set_server_status_down = map_set_server_status_down;
249 p->lbprm.update_server_eweight = NULL;
250
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100251 if (!p->srv)
252 return;
253
254 /* We will factor the weights to reduce the table,
255 * using Euclide's largest common divisor algorithm
256 */
257 pgcd = p->srv->uweight;
258 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
259 int w = srv->uweight;
260 while (w) {
261 int t = pgcd % w;
262 pgcd = w;
263 w = t;
264 }
265 }
266
267 /* It is sometimes useful to know what factor to apply
268 * to the backend's effective weight to know its real
269 * weight.
270 */
271 p->lbprm.wmult = pgcd;
272
273 act = bck = 0;
274 for (srv = p->srv; srv; srv = srv->next) {
275 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100276 srv->prev_eweight = srv->eweight;
277 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100278 if (srv->state & SRV_BACKUP)
279 bck += srv->eweight;
280 else
281 act += srv->eweight;
282 }
283
284 /* this is the largest map we will ever need for this servers list */
285 if (act < bck)
286 act = bck;
287
288 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
289 /* recounts servers and their weights */
290 p->lbprm.map.state = PR_MAP_RECALC;
291 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100292 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100293 recalc_server_map(p);
294}
295
Willy Tarreaub625a082007-11-26 01:15:43 +0100296/* This function updates the server trees according to server <srv>'s new
297 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100298 * It is not important whether the server was already down or not. It is not
299 * important either that the new state is completely down (the caller may not
300 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100301 */
302static void fwrr_set_server_status_down(struct server *srv)
303{
304 struct proxy *p = srv->proxy;
305 struct fwrr_group *grp;
306
307 if (srv->state == srv->prev_state &&
308 srv->eweight == srv->prev_eweight)
309 return;
310
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100311 if (srv_is_usable(srv->state, srv->eweight))
312 goto out_update_state;
313
Willy Tarreaub625a082007-11-26 01:15:43 +0100314 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
315 /* server was already down */
316 goto out_update_backend;
317
318 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
319 grp->next_weight -= srv->prev_eweight;
320
321 if (srv->state & SRV_BACKUP) {
322 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
323 p->srv_bck--;
324
325 if (srv == p->lbprm.fbck) {
326 /* we lost the first backup server in a single-backup
327 * configuration, we must search another one.
328 */
329 struct server *srv2 = p->lbprm.fbck;
330 do {
331 srv2 = srv2->next;
332 } while (srv2 &&
333 !((srv2->state & SRV_BACKUP) &&
334 srv_is_usable(srv2->state, srv2->eweight)));
335 p->lbprm.fbck = srv2;
336 }
337 } else {
338 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
339 p->srv_act--;
340 }
341
342 fwrr_dequeue_srv(srv);
343 fwrr_remove_from_tree(srv);
344
345out_update_backend:
346 /* check/update tot_used, tot_weight */
347 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100348 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100349 srv->prev_state = srv->state;
350 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100351}
352
353/* This function updates the server trees according to server <srv>'s new
354 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100355 * It is not important whether the server was already down or not. It is not
356 * important either that the new state is completely UP (the caller may not
357 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100358 * the weight of a server which was already up.
359 */
360static void fwrr_set_server_status_up(struct server *srv)
361{
362 struct proxy *p = srv->proxy;
363 struct fwrr_group *grp;
364
365 if (srv->state == srv->prev_state &&
366 srv->eweight == srv->prev_eweight)
367 return;
368
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100369 if (!srv_is_usable(srv->state, srv->eweight))
370 goto out_update_state;
371
Willy Tarreaub625a082007-11-26 01:15:43 +0100372 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
373 /* server was already up */
374 goto out_update_backend;
375
376 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
377 grp->next_weight += srv->eweight;
378
379 if (srv->state & SRV_BACKUP) {
380 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
381 p->srv_bck++;
382
Willy Tarreauf4cca452008-03-08 21:42:54 +0100383 if (!(p->options & PR_O_USE_ALL_BK)) {
384 if (!p->lbprm.fbck) {
385 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100386 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100387 } else {
388 /* we may have restored a backup server prior to fbck,
389 * in which case it should replace it.
390 */
391 struct server *srv2 = srv;
392 do {
393 srv2 = srv2->next;
394 } while (srv2 && (srv2 != p->lbprm.fbck));
395 if (srv2)
396 p->lbprm.fbck = srv;
397 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100398 }
399 } else {
400 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
401 p->srv_act++;
402 }
403
404 /* note that eweight cannot be 0 here */
405 fwrr_get_srv(srv);
406 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
407 fwrr_queue_srv(srv);
408
409out_update_backend:
410 /* check/update tot_used, tot_weight */
411 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100412 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100413 srv->prev_state = srv->state;
414 srv->prev_eweight = srv->eweight;
415}
416
417/* This function must be called after an update to server <srv>'s effective
418 * weight. It may be called after a state change too.
419 */
420static void fwrr_update_server_weight(struct server *srv)
421{
422 int old_state, new_state;
423 struct proxy *p = srv->proxy;
424 struct fwrr_group *grp;
425
426 if (srv->state == srv->prev_state &&
427 srv->eweight == srv->prev_eweight)
428 return;
429
430 /* If changing the server's weight changes its state, we simply apply
431 * the procedures we already have for status change. If the state
432 * remains down, the server is not in any tree, so it's as easy as
433 * updating its values. If the state remains up with different weights,
434 * there are some computations to perform to find a new place and
435 * possibly a new tree for this server.
436 */
437
438 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
439 new_state = srv_is_usable(srv->state, srv->eweight);
440
441 if (!old_state && !new_state) {
442 srv->prev_state = srv->state;
443 srv->prev_eweight = srv->eweight;
444 return;
445 }
446 else if (!old_state && new_state) {
447 fwrr_set_server_status_up(srv);
448 return;
449 }
450 else if (old_state && !new_state) {
451 fwrr_set_server_status_down(srv);
452 return;
453 }
454
455 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
456 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
457
458 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
459 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
460
461 if (srv->lb_tree == grp->init) {
462 fwrr_dequeue_srv(srv);
463 fwrr_queue_by_weight(grp->init, srv);
464 }
465 else if (!srv->lb_tree) {
466 /* FIXME: server was down. This is not possible right now but
467 * may be needed soon for slowstart or graceful shutdown.
468 */
469 fwrr_dequeue_srv(srv);
470 fwrr_get_srv(srv);
471 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
472 fwrr_queue_srv(srv);
473 } else {
474 /* The server is either active or in the next queue. If it's
475 * still in the active queue and it has not consumed all of its
476 * places, let's adjust its next position.
477 */
478 fwrr_get_srv(srv);
479
480 if (srv->eweight > 0) {
481 int prev_next = srv->npos;
482 int step = grp->next_weight / srv->eweight;
483
484 srv->npos = srv->lpos + step;
485 srv->rweight = 0;
486
487 if (srv->npos > prev_next)
488 srv->npos = prev_next;
489 if (srv->npos < grp->curr_pos + 2)
490 srv->npos = grp->curr_pos + step;
491 } else {
492 /* push it into the next tree */
493 srv->npos = grp->curr_pos + grp->curr_weight;
494 }
495
496 fwrr_dequeue_srv(srv);
497 fwrr_queue_srv(srv);
498 }
499
500 update_backend_weight(p);
501 srv->prev_state = srv->state;
502 srv->prev_eweight = srv->eweight;
503}
504
505/* Remove a server from a tree. It must have previously been dequeued. This
506 * function is meant to be called when a server is going down or has its
507 * weight disabled.
508 */
509static inline void fwrr_remove_from_tree(struct server *s)
510{
511 s->lb_tree = NULL;
512}
513
514/* Queue a server in the weight tree <root>, assuming the weight is >0.
515 * We want to sort them by inverted weights, because we need to place
516 * heavy servers first in order to get a smooth distribution.
517 */
518static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
519{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100520 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100521 eb32_insert(root, &s->lb_node);
522 s->lb_tree = root;
523}
524
525/* This function is responsible for building the weight trees in case of fast
526 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
527 * ratio. Both active and backup groups are initialized.
528 */
529void fwrr_init_server_groups(struct proxy *p)
530{
531 struct server *srv;
532 struct eb_root init_head = EB_ROOT;
533
534 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
535 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
536 p->lbprm.update_server_eweight = fwrr_update_server_weight;
537
538 p->lbprm.wdiv = BE_WEIGHT_SCALE;
539 for (srv = p->srv; srv; srv = srv->next) {
540 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
541 srv->prev_state = srv->state;
542 }
543
544 recount_servers(p);
545 update_backend_weight(p);
546
547 /* prepare the active servers group */
548 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
549 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
550 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
551 p->lbprm.fwrr.act.t1 = init_head;
552 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
553 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
554
555 /* prepare the backup servers group */
556 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
557 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
558 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
559 p->lbprm.fwrr.bck.t1 = init_head;
560 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
561 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
562
563 /* queue active and backup servers in two distinct groups */
564 for (srv = p->srv; srv; srv = srv->next) {
565 if (!srv_is_usable(srv->state, srv->eweight))
566 continue;
567 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
568 p->lbprm.fwrr.bck.init :
569 p->lbprm.fwrr.act.init,
570 srv);
571 }
572}
573
574/* simply removes a server from a weight tree */
575static inline void fwrr_dequeue_srv(struct server *s)
576{
577 eb32_delete(&s->lb_node);
578}
579
580/* queues a server into the appropriate group and tree depending on its
581 * backup status, and ->npos. If the server is disabled, simply assign
582 * it to the NULL tree.
583 */
584static void fwrr_queue_srv(struct server *s)
585{
586 struct proxy *p = s->proxy;
587 struct fwrr_group *grp;
588
589 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
590
591 /* Delay everything which does not fit into the window and everything
592 * which does not fit into the theorical new window.
593 */
594 if (!srv_is_usable(s->state, s->eweight)) {
595 fwrr_remove_from_tree(s);
596 }
597 else if (s->eweight <= 0 ||
598 s->npos >= 2 * grp->curr_weight ||
599 s->npos >= grp->curr_weight + grp->next_weight) {
600 /* put into next tree, and readjust npos in case we could
601 * finally take this back to current. */
602 s->npos -= grp->curr_weight;
603 fwrr_queue_by_weight(grp->next, s);
604 }
605 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100606 /* The sorting key is stored in units of s->npos * user_weight
607 * in order to avoid overflows. As stated in backend.h, the
608 * lower the scale, the rougher the weights modulation, and the
609 * higher the scale, the lower the number of servers without
610 * overflow. With this formula, the result is always positive,
611 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100612 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100613 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
614 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
615
616 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100617 s->lb_tree = &grp->curr;
618 }
619}
620
621/* prepares a server when extracting it from the "init" tree */
622static inline void fwrr_get_srv_init(struct server *s)
623{
624 s->npos = s->rweight = 0;
625}
626
627/* prepares a server when extracting it from the "next" tree */
628static inline void fwrr_get_srv_next(struct server *s)
629{
630 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
631 &s->proxy->lbprm.fwrr.bck :
632 &s->proxy->lbprm.fwrr.act;
633
634 s->npos += grp->curr_weight;
635}
636
637/* prepares a server when it was marked down */
638static inline void fwrr_get_srv_down(struct server *s)
639{
640 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
641 &s->proxy->lbprm.fwrr.bck :
642 &s->proxy->lbprm.fwrr.act;
643
644 s->npos = grp->curr_pos;
645}
646
647/* prepares a server when extracting it from its tree */
648static void fwrr_get_srv(struct server *s)
649{
650 struct proxy *p = s->proxy;
651 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
652 &p->lbprm.fwrr.bck :
653 &p->lbprm.fwrr.act;
654
655 if (s->lb_tree == grp->init) {
656 fwrr_get_srv_init(s);
657 }
658 else if (s->lb_tree == grp->next) {
659 fwrr_get_srv_next(s);
660 }
661 else if (s->lb_tree == NULL) {
662 fwrr_get_srv_down(s);
663 }
664}
665
666/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
667 * when this happens, and "next" filled with servers sorted by weights.
668 */
669static inline void fwrr_switch_trees(struct fwrr_group *grp)
670{
671 struct eb_root *swap;
672 swap = grp->init;
673 grp->init = grp->next;
674 grp->next = swap;
675 grp->curr_weight = grp->next_weight;
676 grp->curr_pos = grp->curr_weight;
677}
678
679/* return next server from the current tree in FWRR group <grp>, or a server
680 * from the "init" tree if appropriate. If both trees are empty, return NULL.
681 */
682static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
683{
684 struct eb32_node *node;
685 struct server *s;
686
687 node = eb32_first(&grp->curr);
688 s = eb32_entry(node, struct server, lb_node);
689
690 if (!node || s->npos > grp->curr_pos) {
691 /* either we have no server left, or we have a hole */
692 struct eb32_node *node2;
693 node2 = eb32_first(grp->init);
694 if (node2) {
695 node = node2;
696 s = eb32_entry(node, struct server, lb_node);
697 fwrr_get_srv_init(s);
698 if (s->eweight == 0) /* FIXME: is it possible at all ? */
699 node = NULL;
700 }
701 }
702 if (node)
703 return s;
704 else
705 return NULL;
706}
707
708/* Computes next position of server <s> in the group. It is mandatory for <s>
709 * to have a non-zero, positive eweight.
710*/
711static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
712{
713 if (!s->npos) {
714 /* first time ever for this server */
715 s->lpos = grp->curr_pos;
716 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
717 s->rweight += grp->next_weight % s->eweight;
718
719 if (s->rweight >= s->eweight) {
720 s->rweight -= s->eweight;
721 s->npos++;
722 }
723 } else {
724 s->lpos = s->npos;
725 s->npos += grp->next_weight / s->eweight;
726 s->rweight += grp->next_weight % s->eweight;
727
728 if (s->rweight >= s->eweight) {
729 s->rweight -= s->eweight;
730 s->npos++;
731 }
732 }
733}
734
735/* Return next server from the current tree in backend <p>, or a server from
736 * the init tree if appropriate. If both trees are empty, return NULL.
737 * Saturated servers are skipped and requeued.
738 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100739static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100740{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100741 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100742 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100743 int switched;
744
745 if (p->srv_act)
746 grp = &p->lbprm.fwrr.act;
747 else if (p->lbprm.fbck)
748 return p->lbprm.fbck;
749 else if (p->srv_bck)
750 grp = &p->lbprm.fwrr.bck;
751 else
752 return NULL;
753
754 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100755 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100756 full = NULL; /* NULL-terminated list of saturated servers */
757 while (1) {
758 /* if we see an empty group, let's first try to collect weights
759 * which might have recently changed.
760 */
761 if (!grp->curr_weight)
762 grp->curr_pos = grp->curr_weight = grp->next_weight;
763
764 /* get first server from the "current" tree. When the end of
765 * the tree is reached, we may have to switch, but only once.
766 */
767 while (1) {
768 srv = fwrr_get_server_from_group(grp);
769 if (srv)
770 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100771 if (switched) {
772 if (avoided) {
773 srv = avoided;
774 break;
775 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100776 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100777 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100778 switched = 1;
779 fwrr_switch_trees(grp);
780
781 }
782
783 /* OK, we have a server. However, it may be saturated, in which
784 * case we don't want to reconsider it for now. We'll update
785 * its position and dequeue it anyway, so that we can move it
786 * to a better place afterwards.
787 */
788 fwrr_update_position(grp, srv);
789 fwrr_dequeue_srv(srv);
790 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200791 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100792 /* make sure it is not the server we are trying to exclude... */
793 if (srv != srvtoavoid || avoided)
794 break;
795
796 avoided = srv; /* ...but remember that is was selected yet avoided */
797 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100798
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100799 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100800 srv->next_full = full;
801 full = srv;
802 }
803
804 /* OK, we got the best server, let's update it */
805 fwrr_queue_srv(srv);
806
807 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100808 /* Requeue all extracted servers. If full==srv then it was
809 * avoided (unsucessfully) and chained, omit it now.
810 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100811 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100812 if (switched) {
813 /* the tree has switched, requeue all extracted servers
814 * into "init", because their place was lost, and only
815 * their weight matters.
816 */
817 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100818 if (likely(full != srv))
819 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100820 full = full->next_full;
821 } while (full);
822 } else {
823 /* requeue all extracted servers just as if they were consumed
824 * so that they regain their expected place.
825 */
826 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100827 if (likely(full != srv))
828 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100829 full = full->next_full;
830 } while (full);
831 }
832 }
833 return srv;
834}
835
Willy Tarreau51406232008-03-10 22:04:20 +0100836/* Remove a server from a tree. It must have previously been dequeued. This
837 * function is meant to be called when a server is going down or has its
838 * weight disabled.
839 */
840static inline void fwlc_remove_from_tree(struct server *s)
841{
842 s->lb_tree = NULL;
843}
844
845/* simply removes a server from a tree */
846static inline void fwlc_dequeue_srv(struct server *s)
847{
848 eb32_delete(&s->lb_node);
849}
850
851/* Queue a server in its associated tree, assuming the weight is >0.
852 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
853 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
854 */
855static inline void fwlc_queue_srv(struct server *s)
856{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200857 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100858 eb32_insert(s->lb_tree, &s->lb_node);
859}
860
861/* Re-position the server in the FWLC tree after it has been assigned one
862 * connection or after it has released one. Note that it is possible that
863 * the server has been moved out of the tree due to failed health-checks.
864 */
865static void fwlc_srv_reposition(struct server *s)
866{
867 if (!s->lb_tree)
868 return;
869 fwlc_dequeue_srv(s);
870 fwlc_queue_srv(s);
871}
872
873/* This function updates the server trees according to server <srv>'s new
874 * state. It should be called when server <srv>'s status changes to down.
875 * It is not important whether the server was already down or not. It is not
876 * important either that the new state is completely down (the caller may not
877 * know all the variables of a server's state).
878 */
879static void fwlc_set_server_status_down(struct server *srv)
880{
881 struct proxy *p = srv->proxy;
882
883 if (srv->state == srv->prev_state &&
884 srv->eweight == srv->prev_eweight)
885 return;
886
887 if (srv_is_usable(srv->state, srv->eweight))
888 goto out_update_state;
889
890 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
891 /* server was already down */
892 goto out_update_backend;
893
894 if (srv->state & SRV_BACKUP) {
895 p->lbprm.tot_wbck -= srv->prev_eweight;
896 p->srv_bck--;
897
898 if (srv == p->lbprm.fbck) {
899 /* we lost the first backup server in a single-backup
900 * configuration, we must search another one.
901 */
902 struct server *srv2 = p->lbprm.fbck;
903 do {
904 srv2 = srv2->next;
905 } while (srv2 &&
906 !((srv2->state & SRV_BACKUP) &&
907 srv_is_usable(srv2->state, srv2->eweight)));
908 p->lbprm.fbck = srv2;
909 }
910 } else {
911 p->lbprm.tot_wact -= srv->prev_eweight;
912 p->srv_act--;
913 }
914
915 fwlc_dequeue_srv(srv);
916 fwlc_remove_from_tree(srv);
917
918out_update_backend:
919 /* check/update tot_used, tot_weight */
920 update_backend_weight(p);
921 out_update_state:
922 srv->prev_state = srv->state;
923 srv->prev_eweight = srv->eweight;
924}
925
926/* This function updates the server trees according to server <srv>'s new
927 * state. It should be called when server <srv>'s status changes to up.
928 * It is not important whether the server was already down or not. It is not
929 * important either that the new state is completely UP (the caller may not
930 * know all the variables of a server's state). This function will not change
931 * the weight of a server which was already up.
932 */
933static void fwlc_set_server_status_up(struct server *srv)
934{
935 struct proxy *p = srv->proxy;
936
937 if (srv->state == srv->prev_state &&
938 srv->eweight == srv->prev_eweight)
939 return;
940
941 if (!srv_is_usable(srv->state, srv->eweight))
942 goto out_update_state;
943
944 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
945 /* server was already up */
946 goto out_update_backend;
947
948 if (srv->state & SRV_BACKUP) {
949 srv->lb_tree = &p->lbprm.fwlc.bck;
950 p->lbprm.tot_wbck += srv->eweight;
951 p->srv_bck++;
952
953 if (!(p->options & PR_O_USE_ALL_BK)) {
954 if (!p->lbprm.fbck) {
955 /* there was no backup server anymore */
956 p->lbprm.fbck = srv;
957 } else {
958 /* we may have restored a backup server prior to fbck,
959 * in which case it should replace it.
960 */
961 struct server *srv2 = srv;
962 do {
963 srv2 = srv2->next;
964 } while (srv2 && (srv2 != p->lbprm.fbck));
965 if (srv2)
966 p->lbprm.fbck = srv;
967 }
968 }
969 } else {
970 srv->lb_tree = &p->lbprm.fwlc.act;
971 p->lbprm.tot_wact += srv->eweight;
972 p->srv_act++;
973 }
974
975 /* note that eweight cannot be 0 here */
976 fwlc_queue_srv(srv);
977
978 out_update_backend:
979 /* check/update tot_used, tot_weight */
980 update_backend_weight(p);
981 out_update_state:
982 srv->prev_state = srv->state;
983 srv->prev_eweight = srv->eweight;
984}
985
986/* This function must be called after an update to server <srv>'s effective
987 * weight. It may be called after a state change too.
988 */
989static void fwlc_update_server_weight(struct server *srv)
990{
991 int old_state, new_state;
992 struct proxy *p = srv->proxy;
993
994 if (srv->state == srv->prev_state &&
995 srv->eweight == srv->prev_eweight)
996 return;
997
998 /* If changing the server's weight changes its state, we simply apply
999 * the procedures we already have for status change. If the state
1000 * remains down, the server is not in any tree, so it's as easy as
1001 * updating its values. If the state remains up with different weights,
1002 * there are some computations to perform to find a new place and
1003 * possibly a new tree for this server.
1004 */
1005
1006 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1007 new_state = srv_is_usable(srv->state, srv->eweight);
1008
1009 if (!old_state && !new_state) {
1010 srv->prev_state = srv->state;
1011 srv->prev_eweight = srv->eweight;
1012 return;
1013 }
1014 else if (!old_state && new_state) {
1015 fwlc_set_server_status_up(srv);
1016 return;
1017 }
1018 else if (old_state && !new_state) {
1019 fwlc_set_server_status_down(srv);
1020 return;
1021 }
1022
1023 if (srv->lb_tree)
1024 fwlc_dequeue_srv(srv);
1025
1026 if (srv->state & SRV_BACKUP) {
1027 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1028 srv->lb_tree = &p->lbprm.fwlc.bck;
1029 } else {
1030 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1031 srv->lb_tree = &p->lbprm.fwlc.act;
1032 }
1033
1034 fwlc_queue_srv(srv);
1035
1036 update_backend_weight(p);
1037 srv->prev_state = srv->state;
1038 srv->prev_eweight = srv->eweight;
1039}
1040
1041/* This function is responsible for building the trees in case of fast
1042 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1043 * uweight ratio. Both active and backup groups are initialized.
1044 */
1045void fwlc_init_server_tree(struct proxy *p)
1046{
1047 struct server *srv;
1048 struct eb_root init_head = EB_ROOT;
1049
1050 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1051 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1052 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1053 p->lbprm.server_take_conn = fwlc_srv_reposition;
1054 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1055
1056 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1057 for (srv = p->srv; srv; srv = srv->next) {
1058 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1059 srv->prev_state = srv->state;
1060 }
1061
1062 recount_servers(p);
1063 update_backend_weight(p);
1064
1065 p->lbprm.fwlc.act = init_head;
1066 p->lbprm.fwlc.bck = init_head;
1067
1068 /* queue active and backup servers in two distinct groups */
1069 for (srv = p->srv; srv; srv = srv->next) {
1070 if (!srv_is_usable(srv->state, srv->eweight))
1071 continue;
1072 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1073 fwlc_queue_srv(srv);
1074 }
1075}
1076
1077/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1078 * return NULL. Saturated servers are skipped.
1079 */
1080static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1081{
1082 struct server *srv, *avoided;
1083 struct eb32_node *node;
1084
1085 srv = avoided = NULL;
1086
1087 if (p->srv_act)
1088 node = eb32_first(&p->lbprm.fwlc.act);
1089 else if (p->lbprm.fbck)
1090 return p->lbprm.fbck;
1091 else if (p->srv_bck)
1092 node = eb32_first(&p->lbprm.fwlc.bck);
1093 else
1094 return NULL;
1095
1096 while (node) {
1097 /* OK, we have a server. However, it may be saturated, in which
1098 * case we don't want to reconsider it for now, so we'll simply
1099 * skip it. Same if it's the server we try to avoid, in which
1100 * case we simply remember it for later use if needed.
1101 */
1102 struct server *s;
1103
1104 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001105 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001106 if (s != srvtoavoid) {
1107 srv = s;
1108 break;
1109 }
1110 avoided = s;
1111 }
1112 node = eb32_next(node);
1113 }
1114
1115 if (!srv)
1116 srv = avoided;
1117
1118 return srv;
1119}
1120
Willy Tarreau01732802007-11-01 22:48:15 +01001121/*
1122 * This function tries to find a running server for the proxy <px> following
1123 * the URL parameter hash method. It looks for a specific parameter in the
1124 * URL and hashes it to compute the server ID. This is useful to optimize
1125 * performance by avoiding bounces between servers in contexts where sessions
1126 * are shared but cookies are not usable. If the parameter is not found, NULL
1127 * is returned. If any server is found, it will be returned. If no valid server
1128 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001129 */
1130struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1131{
1132 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001133 const char *p;
1134 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001135 int plen;
1136
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001137 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001138 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001139 return NULL;
1140
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001141 if ((p = memchr(uri, '?', uri_len)) == NULL)
1142 return NULL;
1143
Willy Tarreau20697042007-11-15 23:26:18 +01001144 if (px->lbprm.map.state & PR_MAP_RECALC)
1145 recalc_server_map(px);
1146
Willy Tarreau01732802007-11-01 22:48:15 +01001147 p++;
1148
1149 uri_len -= (p - uri);
1150 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001151 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001152
1153 while (uri_len > plen) {
1154 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001155 if (params[plen] == '=') {
1156 if (memcmp(params, px->url_param_name, plen) == 0) {
1157 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001158 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001159 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001160 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001161 p += plen + 1;
1162 uri_len -= plen + 1;
1163
Willy Tarreau01732802007-11-01 22:48:15 +01001164 while (uri_len && *p != '&') {
1165 hash = *p + (hash << 6) + (hash << 16) - hash;
1166 uri_len--;
1167 p++;
1168 }
Willy Tarreau20697042007-11-15 23:26:18 +01001169 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001170 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001171 }
1172 /* skip to next parameter */
1173 p = memchr(params, '&', uri_len);
1174 if (!p)
1175 return NULL;
1176 p++;
1177 uri_len -= (p - params);
1178 params = p;
1179 }
1180 return NULL;
1181}
1182
1183/*
1184 * this does the same as the previous server_ph, but check the body contents
1185 */
1186struct server *get_server_ph_post(struct session *s)
1187{
1188 unsigned long hash = 0;
1189 struct http_txn *txn = &s->txn;
1190 struct buffer *req = s->req;
1191 struct http_msg *msg = &txn->req;
1192 struct proxy *px = s->be;
1193 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001194 unsigned long body;
1195 unsigned long len;
1196 const char *params;
1197 struct hdr_ctx ctx;
1198 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001199
1200 /* tot_weight appears to mean srv_count */
1201 if (px->lbprm.tot_weight == 0)
1202 return NULL;
1203
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001204 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
1205 len = req->total - body;
1206 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001207
1208 if ( len == 0 )
1209 return NULL;
1210
1211 if (px->lbprm.map.state & PR_MAP_RECALC)
1212 recalc_server_map(px);
1213
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001214 ctx.idx = 0;
1215
1216 /* if the message is chunked, we skip the chunk size, but use the value as len */
1217 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
1218 if ( ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",ctx.vlen)==0) {
1219 unsigned int chunk = 0;
1220 while ( params < req->rlim && !HTTP_IS_CRLF(*params)) {
1221 char c = *params;
1222 if (ishex(c)) {
1223 unsigned int hex = toupper(c) - '0';
1224 if ( hex > 9 )
1225 hex -= 'A' - '9' - 1;
1226 chunk = (chunk << 4) | hex;
1227 }
1228 else
1229 return NULL;
1230 params++;
1231 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001232 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001233 /* spec says we get CRLF */
1234 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1235 params += 2;
1236 else
1237 return NULL;
1238 /* ok we have some encoded length, just inspect the first chunk */
1239 len = chunk;
1240 }
Willy Tarreau01732802007-11-01 22:48:15 +01001241
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001242 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001243
1244 while (len > plen) {
1245 /* Look for the parameter name followed by an equal symbol */
1246 if (params[plen] == '=') {
1247 if (memcmp(params, px->url_param_name, plen) == 0) {
1248 /* OK, we have the parameter here at <params>, and
1249 * the value after the equal sign, at <p>
1250 * skip the equal symbol
1251 */
1252 p += plen + 1;
1253 len -= plen + 1;
1254
1255 while (len && *p != '&') {
1256 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1257 /* if in a POST, body must be URI encoded or its not a URI.
1258 * Do not interprete any possible binary data as a parameter.
1259 */
1260 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1261 break;
1262 return NULL; /* oh, no; this is not uri-encoded.
1263 * This body does not contain parameters.
1264 */
1265 }
1266 hash = *p + (hash << 6) + (hash << 16) - hash;
1267 len--;
1268 p++;
1269 /* should we break if vlen exceeds limit? */
1270 }
1271 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1272 }
1273 }
Willy Tarreau01732802007-11-01 22:48:15 +01001274 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001275 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001276 if (!p)
1277 return NULL;
1278 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001279 len -= (p - params);
1280 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001281 }
1282 return NULL;
1283}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001284
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001285
Willy Tarreaubaaee002006-06-26 02:48:02 +02001286/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001287 * This function applies the load-balancing algorithm to the session, as
1288 * defined by the backend it is assigned to. The session is then marked as
1289 * 'assigned'.
1290 *
1291 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1292 * had a server previously assigned, it is rebalanced, trying to avoid the same
1293 * server.
1294 * The function tries to keep the original connection slot if it reconnects to
1295 * the same server, otherwise it releases it and tries to offer it.
1296 *
1297 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001298 *
1299 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001300 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1301 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1302 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001303 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1304 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001305 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1306 * it does not need to be called anymore. This means that s->srv can be trusted
1307 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001308 *
1309 */
1310
1311int assign_server(struct session *s)
1312{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001313
Willy Tarreau7c669d72008-06-20 15:04:11 +02001314 struct server *conn_slot;
1315 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001316
Willy Tarreaubaaee002006-06-26 02:48:02 +02001317#ifdef DEBUG_FULL
1318 fprintf(stderr,"assign_server : s=%p\n",s);
1319#endif
1320
Willy Tarreau7c669d72008-06-20 15:04:11 +02001321 err = SRV_STATUS_INTERNAL;
1322 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1323 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001324
Willy Tarreau7c669d72008-06-20 15:04:11 +02001325 s->prev_srv = s->prev_srv;
1326 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001327
Willy Tarreau7c669d72008-06-20 15:04:11 +02001328 /* We have to release any connection slot before applying any LB algo,
1329 * otherwise we may erroneously end up with no available slot.
1330 */
1331 if (conn_slot)
1332 sess_change_server(s, NULL);
1333
1334 /* We will now try to find the good server and store it into <s->srv>.
1335 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1336 * as well as if no server is available (check error code).
1337 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001338
Willy Tarreau7c669d72008-06-20 15:04:11 +02001339 s->srv = NULL;
1340 if (s->be->lbprm.algo & BE_LB_ALGO) {
1341 int len;
1342 /* we must check if we have at least one server available */
1343 if (!s->be->lbprm.tot_weight) {
1344 err = SRV_STATUS_NOSRV;
1345 goto out;
1346 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001347
Willy Tarreau7c669d72008-06-20 15:04:11 +02001348 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1349 case BE_LB_ALGO_RR:
1350 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1351 if (!s->srv) {
1352 err = SRV_STATUS_FULL;
1353 goto out;
1354 }
1355 break;
1356 case BE_LB_ALGO_LC:
1357 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1358 if (!s->srv) {
1359 err = SRV_STATUS_FULL;
1360 goto out;
1361 }
1362 break;
1363 case BE_LB_ALGO_SH:
1364 if (s->cli_addr.ss_family == AF_INET)
1365 len = 4;
1366 else if (s->cli_addr.ss_family == AF_INET6)
1367 len = 16;
1368 else {
1369 /* unknown IP family */
1370 err = SRV_STATUS_INTERNAL;
1371 goto out;
1372 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001373
Willy Tarreau7c669d72008-06-20 15:04:11 +02001374 s->srv = get_server_sh(s->be,
1375 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1376 len);
1377 break;
1378 case BE_LB_ALGO_UH:
1379 /* URI hashing */
1380 s->srv = get_server_uh(s->be,
1381 s->txn.req.sol + s->txn.req.sl.rq.u,
1382 s->txn.req.sl.rq.u_l);
1383 break;
1384 case BE_LB_ALGO_PH:
1385 /* URL Parameter hashing */
1386 if (s->txn.meth == HTTP_METH_POST &&
1387 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1388 s->txn.req.sl.rq.u_l ) == NULL)
1389 s->srv = get_server_ph_post(s);
1390 else
1391 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001392 s->txn.req.sol + s->txn.req.sl.rq.u,
1393 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001394
Willy Tarreau7c669d72008-06-20 15:04:11 +02001395 if (!s->srv) {
1396 /* parameter not found, fall back to round robin on the map */
1397 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001398 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001399 err = SRV_STATUS_FULL;
1400 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001401 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001402 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001403 break;
1404 default:
1405 /* unknown balancing algorithm */
1406 err = SRV_STATUS_INTERNAL;
1407 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001408 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001409 if (s->srv != s->prev_srv) {
1410 s->be->cum_lbconn++;
1411 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001412 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001413 }
1414 else if (s->be->options & PR_O_HTTP_PROXY) {
1415 if (!s->srv_addr.sin_addr.s_addr) {
1416 err = SRV_STATUS_NOSRV;
1417 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001418 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001419 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001420 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
1421 !(s->fe->options & PR_O_TRANSP)) {
1422 err = SRV_STATUS_NOSRV;
1423 goto out;
1424 }
1425
1426 s->flags |= SN_ASSIGNED;
1427 err = SRV_STATUS_OK;
1428 out:
1429
1430 /* Either we take back our connection slot, or we offer it to someone
1431 * else if we don't need it anymore.
1432 */
1433 if (conn_slot) {
1434 if (conn_slot == s->srv) {
1435 sess_change_server(s, s->srv);
1436 } else {
1437 if (may_dequeue_tasks(conn_slot, s->be))
1438 process_srv_queue(conn_slot);
1439 }
1440 }
1441
1442 out_err:
1443 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001444}
1445
1446
1447/*
1448 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1449 * The address is taken from the currently assigned server, or from the
1450 * dispatch or transparent address.
1451 *
1452 * It may return :
1453 * SRV_STATUS_OK if everything is OK.
1454 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1455 *
1456 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1457 * not cleared, so it's to the caller to clear it if required.
1458 *
1459 */
1460int assign_server_address(struct session *s)
1461{
1462#ifdef DEBUG_FULL
1463 fprintf(stderr,"assign_server_address : s=%p\n",s);
1464#endif
1465
Willy Tarreau31682232007-11-29 15:38:04 +01001466 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001467 /* A server is necessarily known for this session */
1468 if (!(s->flags & SN_ASSIGNED))
1469 return SRV_STATUS_INTERNAL;
1470
1471 s->srv_addr = s->srv->addr;
1472
1473 /* if this server remaps proxied ports, we'll use
1474 * the port the client connected to with an offset. */
1475 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001476 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1477 get_frt_addr(s);
1478 if (s->frt_addr.ss_family == AF_INET) {
1479 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1480 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1481 } else {
1482 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1483 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1484 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001485 }
1486 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001487 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001488 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001489 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001490 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001491 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001492 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001493 if (!(s->flags & SN_FRT_ADDR_SET))
1494 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001495
Willy Tarreaubd414282008-01-19 13:46:35 +01001496 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1497 /* when we support IPv6 on the backend, we may add other tests */
1498 //qfprintf(stderr, "Cannot get original server address.\n");
1499 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001500 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001501 else if (s->be->options & PR_O_HTTP_PROXY) {
1502 /* If HTTP PROXY option is set, then server is already assigned
1503 * during incoming client request parsing. */
1504 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001505 else {
1506 /* no server and no LB algorithm ! */
1507 return SRV_STATUS_INTERNAL;
1508 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001509
1510 s->flags |= SN_ADDR_SET;
1511 return SRV_STATUS_OK;
1512}
1513
1514
1515/* This function assigns a server to session <s> if required, and can add the
1516 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001517 * If ->srv_conn is set, the session is first released from the server.
1518 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1519 * be called before any connection and after any retry or redispatch occurs.
1520 *
1521 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001522 *
1523 * Returns :
1524 *
1525 * SRV_STATUS_OK if everything is OK.
1526 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1527 * SRV_STATUS_QUEUED if the connection has been queued.
1528 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001529 * connection could not be queued in s->srv,
1530 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001531 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1532 *
1533 */
1534int assign_server_and_queue(struct session *s)
1535{
1536 struct pendconn *p;
1537 int err;
1538
1539 if (s->pend_pos)
1540 return SRV_STATUS_INTERNAL;
1541
Willy Tarreau7c669d72008-06-20 15:04:11 +02001542 err = SRV_STATUS_OK;
1543 if (!(s->flags & SN_ASSIGNED)) {
1544 err = assign_server(s);
1545 if (s->prev_srv) {
1546 /* This session was previously assigned to a server. We have to
1547 * update the session's and the server's stats :
1548 * - if the server changed :
1549 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1550 * - set SN_REDISP if it was successfully redispatched
1551 * - increment srv->redispatches and be->redispatches
1552 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001553 */
1554
Willy Tarreau7c669d72008-06-20 15:04:11 +02001555 if (s->prev_srv != s->srv) {
1556 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1557 s->txn.flags &= ~TX_CK_MASK;
1558 s->txn.flags |= TX_CK_DOWN;
1559 }
1560 s->flags |= SN_REDISP;
1561 s->prev_srv->redispatches++;
1562 s->be->redispatches++;
1563 } else {
1564 s->prev_srv->retries++;
1565 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001566 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001567 }
1568 }
1569
Willy Tarreaubaaee002006-06-26 02:48:02 +02001570 switch (err) {
1571 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001572 /* we have SN_ASSIGNED set */
1573 if (!s->srv)
1574 return SRV_STATUS_OK; /* dispatch or proxy mode */
1575
1576 /* If we already have a connection slot, no need to check any queue */
1577 if (s->srv_conn == s->srv)
1578 return SRV_STATUS_OK;
1579
1580 /* OK, this session already has an assigned server, but no
1581 * connection slot yet. Either it is a redispatch, or it was
1582 * assigned from persistence information (direct mode).
1583 */
1584 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1585 /* server scheduled for redirection, and already assigned. We
1586 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001587 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001588 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001589 return SRV_STATUS_OK;
1590 }
1591
Willy Tarreau7c669d72008-06-20 15:04:11 +02001592 /* We might have to queue this session if the assigned server is full.
1593 * We know we have to queue it into the server's queue, so if a maxqueue
1594 * is set on the server, we must also check that the server's queue is
1595 * not full, in which case we have to return FULL.
1596 */
1597 if (s->srv->maxconn &&
1598 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1599
1600 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1601 return SRV_STATUS_FULL;
1602
Willy Tarreaubaaee002006-06-26 02:48:02 +02001603 p = pendconn_add(s);
1604 if (p)
1605 return SRV_STATUS_QUEUED;
1606 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001607 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001608 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001609
1610 /* OK, we can use this server. Let's reserve our place */
1611 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001612 return SRV_STATUS_OK;
1613
1614 case SRV_STATUS_FULL:
1615 /* queue this session into the proxy's queue */
1616 p = pendconn_add(s);
1617 if (p)
1618 return SRV_STATUS_QUEUED;
1619 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001620 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001621
1622 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001623 return err;
1624
Willy Tarreaubaaee002006-06-26 02:48:02 +02001625 case SRV_STATUS_INTERNAL:
1626 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001627
Willy Tarreaubaaee002006-06-26 02:48:02 +02001628 default:
1629 return SRV_STATUS_INTERNAL;
1630 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001631}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001632
1633/*
1634 * This function initiates a connection to the server assigned to this session
1635 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1636 * It can return one of :
1637 * - SN_ERR_NONE if everything's OK
1638 * - SN_ERR_SRVTO if there are no more servers
1639 * - SN_ERR_SRVCL if the connection was refused by the server
1640 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1641 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1642 * - SN_ERR_INTERNAL for any other purely internal errors
1643 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1644 */
1645int connect_server(struct session *s)
1646{
1647 int fd, err;
1648
1649 if (!(s->flags & SN_ADDR_SET)) {
1650 err = assign_server_address(s);
1651 if (err != SRV_STATUS_OK)
1652 return SN_ERR_INTERNAL;
1653 }
1654
1655 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1656 qfprintf(stderr, "Cannot get a server socket.\n");
1657
1658 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001659 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001660 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001661 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001662 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001663 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001664 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001665 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001666 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001667 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001668 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001669 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001670 /* this is a resource error */
1671 return SN_ERR_RESOURCE;
1672 }
1673
1674 if (fd >= global.maxsock) {
1675 /* do not log anything there, it's a normal condition when this option
1676 * is used to serialize connections to a server !
1677 */
1678 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1679 close(fd);
1680 return SN_ERR_PRXCOND; /* it is a configuration limit */
1681 }
1682
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001683#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001684 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001685 /* TCP splicing supported by both FE and BE */
1686 tcp_splice_initfd(s->cli_fd, fd);
1687 }
1688#endif
1689
Willy Tarreaubaaee002006-06-26 02:48:02 +02001690 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1691 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1692 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1693 close(fd);
1694 return SN_ERR_INTERNAL;
1695 }
1696
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001697 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001698 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1699
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001700 if (s->be->options & PR_O_TCP_NOLING)
1701 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1702
Willy Tarreaubaaee002006-06-26 02:48:02 +02001703 /* allow specific binding :
1704 * - server-specific at first
1705 * - proxy-specific next
1706 */
1707 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001708 struct sockaddr_in *remote = NULL;
1709 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001710
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001711#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001712 switch (s->srv->state & SRV_TPROXY_MASK) {
1713 case SRV_TPROXY_ADDR:
1714 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1715 flags = 3;
1716 break;
1717 case SRV_TPROXY_CLI:
1718 flags |= 2;
1719 /* fall through */
1720 case SRV_TPROXY_CIP:
1721 /* FIXME: what can we do if the client connects in IPv6 ? */
1722 flags |= 1;
1723 remote = (struct sockaddr_in *)&s->cli_addr;
1724 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001725 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001726#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001727 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001728 if (ret) {
1729 close(fd);
1730 if (ret == 1) {
1731 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1732 s->be->id, s->srv->id);
1733 send_log(s->be, LOG_EMERG,
1734 "Cannot bind to source address before connect() for server %s/%s.\n",
1735 s->be->id, s->srv->id);
1736 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001737 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001738 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001739 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001740 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001741 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001742 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001743 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001744 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001745 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001746 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001747 struct sockaddr_in *remote = NULL;
1748 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001749
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001750#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001751 switch (s->be->options & PR_O_TPXY_MASK) {
1752 case PR_O_TPXY_ADDR:
1753 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1754 flags = 3;
1755 break;
1756 case PR_O_TPXY_CLI:
1757 flags |= 2;
1758 /* fall through */
1759 case PR_O_TPXY_CIP:
1760 /* FIXME: what can we do if the client connects in IPv6 ? */
1761 flags |= 1;
1762 remote = (struct sockaddr_in *)&s->cli_addr;
1763 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001764 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001765#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001766 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001767 if (ret) {
1768 close(fd);
1769 if (ret == 1) {
1770 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1771 s->be->id);
1772 send_log(s->be, LOG_EMERG,
1773 "Cannot bind to source address before connect() for proxy %s.\n",
1774 s->be->id);
1775 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001776 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001777 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001778 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001779 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1780 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001781 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001782 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001783 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001784 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001785
Willy Tarreaubaaee002006-06-26 02:48:02 +02001786 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1787 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1788
1789 if (errno == EAGAIN || errno == EADDRINUSE) {
1790 char *msg;
1791 if (errno == EAGAIN) /* no free ports left, try again later */
1792 msg = "no free ports";
1793 else
1794 msg = "local address already in use";
1795
1796 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1797 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001798 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001799 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001800 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001801 return SN_ERR_RESOURCE;
1802 } else if (errno == ETIMEDOUT) {
1803 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1804 close(fd);
1805 return SN_ERR_SRVTO;
1806 } else {
1807 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1808 //qfprintf(stderr,"Connect(): %d", errno);
1809 close(fd);
1810 return SN_ERR_SRVCL;
1811 }
1812 }
1813
1814 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001815 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001816 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001817 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001818 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001819 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001820
1821 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1822 fdtab[fd].peerlen = sizeof(s->srv_addr);
1823
Willy Tarreauf161a342007-04-08 16:59:42 +02001824 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001825
1826 fd_insert(fd);
1827 if (s->srv) {
1828 s->srv->cur_sess++;
1829 if (s->srv->cur_sess > s->srv->cur_sess_max)
1830 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001831 if (s->be->lbprm.server_take_conn)
1832 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001833 }
1834
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001835 s->req->cex = tick_add_ifset(now_ms, s->be->timeout.connect);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001836 return SN_ERR_NONE; /* connection is OK */
1837}
1838
1839
1840/*
1841 * This function checks the retry count during the connect() job.
1842 * It updates the session's srv_state and retries, so that the caller knows
1843 * what it has to do. It uses the last connection error to set the log when
1844 * it expires. It returns 1 when it has expired, and 0 otherwise.
1845 */
1846int srv_count_retry_down(struct session *t, int conn_err)
1847{
1848 /* we are in front of a retryable error */
1849 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001850
Willy Tarreaubaaee002006-06-26 02:48:02 +02001851 if (t->conn_retries < 0) {
1852 /* if not retryable anymore, let's abort */
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001853 t->req->cex = TICK_ETERNITY;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001854 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001855 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001856 if (t->srv)
1857 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001858 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001859
1860 /* We used to have a free connection slot. Since we'll never use it,
1861 * we have to inform the server that it may be used by another session.
1862 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001863 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001864 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001865 return 1;
1866 }
1867 return 0;
1868}
1869
1870
1871/*
1872 * This function performs the retryable part of the connect() job.
1873 * It updates the session's srv_state and retries, so that the caller knows
1874 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1875 * it needs to redispatch.
1876 */
1877int srv_retryable_connect(struct session *t)
1878{
1879 int conn_err;
1880
1881 /* This loop ensures that we stop before the last retry in case of a
1882 * redispatchable server.
1883 */
1884 do {
1885 /* initiate a connection to the server */
1886 conn_err = connect_server(t);
1887 switch (conn_err) {
1888
1889 case SN_ERR_NONE:
1890 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1891 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001892 if (t->srv)
1893 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001894 return 1;
1895
1896 case SN_ERR_INTERNAL:
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001897 t->req->cex = TICK_ETERNITY;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001898 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001899 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001900 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001901 t->srv->cum_sess++;
1902 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001903 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001904 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001905 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001906 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001907 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001908 return 1;
1909 }
1910 /* ensure that we have enough retries left */
1911 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001912 return 1;
1913 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001914 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001915
1916 /* We're on our last chance, and the REDISP option was specified.
1917 * We will ignore cookie and force to balance or use the dispatcher.
1918 */
1919 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001920 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001921 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001922
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001923 if (t->srv)
1924 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001925
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001926 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001927 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001928 t->prev_srv = t->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001929 return 0;
1930}
1931
1932
1933/* This function performs the "redispatch" part of a connection attempt. It
1934 * will assign a server if required, queue the connection if required, and
1935 * handle errors that might arise at this level. It can change the server
1936 * state. It will return 1 if it encounters an error, switches the server
1937 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1938 * that the connection is ready to use.
1939 */
1940
1941int srv_redispatch_connect(struct session *t)
1942{
1943 int conn_err;
1944
1945 /* We know that we don't have any connection pending, so we will
1946 * try to get a new one, and wait in this state if it's queued
1947 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001948 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001949 conn_err = assign_server_and_queue(t);
1950 switch (conn_err) {
1951 case SRV_STATUS_OK:
1952 break;
1953
Willy Tarreau7c669d72008-06-20 15:04:11 +02001954 case SRV_STATUS_FULL:
1955 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
1956 * and we can redispatch to another server, or it is not and we return
1957 * 503. This only makes sense in DIRECT mode however, because normal LB
1958 * algorithms would never select such a server, and hash algorithms
1959 * would bring us on the same server again. Note that t->srv is set in
1960 * this case.
1961 */
1962 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
1963 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
1964 t->prev_srv = t->srv;
1965 goto redispatch;
1966 }
1967
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001968 t->req->cex = TICK_ETERNITY;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001969 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q,
1970 503, error_message(t, HTTP_ERR_503));
1971
1972 t->srv->failed_conns++;
1973 t->be->failed_conns++;
1974 return 1;
1975
Willy Tarreaubaaee002006-06-26 02:48:02 +02001976 case SRV_STATUS_NOSRV:
1977 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001978 t->req->cex = TICK_ETERNITY;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001979 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001980 503, error_message(t, HTTP_ERR_503));
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001981
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001982 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001983 return 1;
1984
1985 case SRV_STATUS_QUEUED:
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001986 t->req->cex = tick_add_ifset(now_ms, t->be->timeout.queue);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001987 t->srv_state = SV_STIDLE;
1988 /* do nothing else and do not wake any other session up */
1989 return 1;
1990
Willy Tarreaubaaee002006-06-26 02:48:02 +02001991 case SRV_STATUS_INTERNAL:
1992 default:
Willy Tarreau0c303ee2008-07-07 00:09:58 +02001993 t->req->cex = TICK_ETERNITY;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001994 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001995 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001996 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001997 t->srv->cum_sess++;
1998 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001999 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002000 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002001
2002 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002003 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002004 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002005 return 1;
2006 }
2007 /* if we get here, it's because we got SRV_STATUS_OK, which also
2008 * means that the connection has not been queued.
2009 */
2010 return 0;
2011}
2012
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002013int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002014 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002015 return px->down_time;
2016
2017 return now.tv_sec - px->last_change + px->down_time;
2018}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002019
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002020/* This function parses a "balance" statement in a backend section describing
2021 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2022 * returns -1, it may write an error message into ther <err> buffer, for at
2023 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2024 * written. The function must be called with <args> pointing to the first word
2025 * after "balance".
2026 */
2027int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2028{
2029 if (!*(args[0])) {
2030 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002031 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2032 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002033 return 0;
2034 }
2035
2036 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002037 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2038 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002039 }
Willy Tarreau51406232008-03-10 22:04:20 +01002040 else if (!strcmp(args[0], "leastconn")) {
2041 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2042 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2043 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002044 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002045 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2046 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002047 }
2048 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002049 int arg = 1;
2050
Willy Tarreau31682232007-11-29 15:38:04 +01002051 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2052 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002053
2054 while (*args[arg]) {
2055 if (!strcmp(args[arg], "len")) {
2056 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2057 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2058 return -1;
2059 }
2060 curproxy->uri_len_limit = atoi(args[arg+1]);
2061 arg += 2;
2062 }
2063 else if (!strcmp(args[arg], "depth")) {
2064 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2065 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2066 return -1;
2067 }
2068 /* hint: we store the position of the ending '/' (depth+1) so
2069 * that we avoid a comparison while computing the hash.
2070 */
2071 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2072 arg += 2;
2073 }
2074 else {
2075 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2076 return -1;
2077 }
2078 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002079 }
Willy Tarreau01732802007-11-01 22:48:15 +01002080 else if (!strcmp(args[0], "url_param")) {
2081 if (!*args[1]) {
2082 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2083 return -1;
2084 }
Willy Tarreau31682232007-11-29 15:38:04 +01002085 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2086 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01002087 if (curproxy->url_param_name)
2088 free(curproxy->url_param_name);
2089 curproxy->url_param_name = strdup(args[1]);
2090 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002091 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002092 if (strcmp(args[2], "check_post")) {
2093 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2094 return -1;
2095 }
2096 if (*args[3]) {
2097 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2098 curproxy->url_param_post_limit = str2ui(args[3]);
2099 }
2100 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2101 if (!curproxy->url_param_post_limit)
2102 curproxy->url_param_post_limit = 48;
2103 else if ( curproxy->url_param_post_limit < 3 )
2104 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2105 }
Willy Tarreau01732802007-11-01 22:48:15 +01002106 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002107 else {
Willy Tarreau51406232008-03-10 22:04:20 +01002108 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002109 return -1;
2110 }
2111 return 0;
2112}
2113
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002114
2115/************************************************************************/
2116/* All supported keywords must be declared here. */
2117/************************************************************************/
2118
2119/* set test->i to the number of enabled servers on the proxy */
2120static int
2121acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2122 struct acl_expr *expr, struct acl_test *test)
2123{
2124 test->flags = ACL_TEST_F_VOL_TEST;
2125 if (expr->arg_len) {
2126 /* another proxy was designated, we must look for it */
2127 for (px = proxy; px; px = px->next)
2128 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2129 break;
2130 }
2131 if (!px)
2132 return 0;
2133
2134 if (px->srv_act)
2135 test->i = px->srv_act;
2136 else if (px->lbprm.fbck)
2137 test->i = 1;
2138 else
2139 test->i = px->srv_bck;
2140
2141 return 1;
2142}
2143
2144
2145/* Note: must not be declared <const> as its list will be overwritten */
2146static struct acl_kw_list acl_kws = {{ },{
2147 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
2148 { NULL, NULL, NULL, NULL },
2149}};
2150
2151
2152__attribute__((constructor))
2153static void __backend_init(void)
2154{
2155 acl_register_keywords(&acl_kws);
2156}
2157
2158
Willy Tarreaubaaee002006-06-26 02:48:02 +02002159/*
2160 * Local variables:
2161 * c-indent-level: 8
2162 * c-basic-offset: 8
2163 * End:
2164 */