blob: 033b65088ba6979ee44fcba144eb5f36557e91d1 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020022#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020023#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010024#include <common/eb32tree.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020025#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020026
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010027#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <types/buffers.h>
29#include <types/global.h>
30#include <types/polling.h>
31#include <types/proxy.h>
32#include <types/server.h>
33#include <types/session.h>
34
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010035#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020036#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020037#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010039#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020040#include <proto/log.h>
41#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010042#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020043#include <proto/queue.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020044#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020045#include <proto/stream_sock.h>
46#include <proto/task.h>
47
Willy Tarreau6d1a9882007-01-07 02:03:04 +010048#ifdef CONFIG_HAP_TCPSPLICE
49#include <libtcpsplice.h>
50#endif
51
Willy Tarreaub625a082007-11-26 01:15:43 +010052static inline void fwrr_remove_from_tree(struct server *s);
53static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
54static inline void fwrr_dequeue_srv(struct server *s);
55static void fwrr_get_srv(struct server *s);
56static void fwrr_queue_srv(struct server *s);
57
58/* This function returns non-zero if a server with the given weight and state
59 * is usable for LB, otherwise zero.
60 */
61static inline int srv_is_usable(int state, int weight)
62{
63 if (!weight)
64 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010065 if (state & SRV_GOINGDOWN)
66 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010067 if (!(state & SRV_RUNNING))
68 return 0;
69 return 1;
70}
71
Willy Tarreaubaaee002006-06-26 02:48:02 +020072/*
73 * This function recounts the number of usable active and backup servers for
74 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010075 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010076 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010077 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020078 */
Willy Tarreaub625a082007-11-26 01:15:43 +010079static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020080{
81 struct server *srv;
82
Willy Tarreau20697042007-11-15 23:26:18 +010083 px->srv_act = px->srv_bck = 0;
84 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010085 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020086 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010087 if (!srv_is_usable(srv->state, srv->eweight))
88 continue;
89
90 if (srv->state & SRV_BACKUP) {
91 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010092 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010093 px->lbprm.fbck = srv;
94 px->srv_bck++;
95 px->lbprm.tot_wbck += srv->eweight;
96 } else {
97 px->srv_act++;
98 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020099 }
100 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100101}
Willy Tarreau20697042007-11-15 23:26:18 +0100102
Willy Tarreaub625a082007-11-26 01:15:43 +0100103/* This function simply updates the backend's tot_weight and tot_used values
104 * after servers weights have been updated. It is designed to be used after
105 * recount_servers() or equivalent.
106 */
107static void update_backend_weight(struct proxy *px)
108{
Willy Tarreau20697042007-11-15 23:26:18 +0100109 if (px->srv_act) {
110 px->lbprm.tot_weight = px->lbprm.tot_wact;
111 px->lbprm.tot_used = px->srv_act;
112 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100113 else if (px->lbprm.fbck) {
114 /* use only the first backup server */
115 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
116 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100117 }
118 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100119 px->lbprm.tot_weight = px->lbprm.tot_wbck;
120 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100121 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100122}
123
124/* this function updates the map according to server <srv>'s new state */
125static void map_set_server_status_down(struct server *srv)
126{
127 struct proxy *p = srv->proxy;
128
129 if (srv->state == srv->prev_state &&
130 srv->eweight == srv->prev_eweight)
131 return;
132
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100133 if (srv_is_usable(srv->state, srv->eweight))
134 goto out_update_state;
135
Willy Tarreaub625a082007-11-26 01:15:43 +0100136 /* FIXME: could be optimized since we know what changed */
137 recount_servers(p);
138 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100139 p->lbprm.map.state |= PR_MAP_RECALC;
140 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100141 srv->prev_state = srv->state;
142 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200143}
144
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100145/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100146static void map_set_server_status_up(struct server *srv)
147{
148 struct proxy *p = srv->proxy;
149
150 if (srv->state == srv->prev_state &&
151 srv->eweight == srv->prev_eweight)
152 return;
153
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100154 if (!srv_is_usable(srv->state, srv->eweight))
155 goto out_update_state;
156
Willy Tarreaub625a082007-11-26 01:15:43 +0100157 /* FIXME: could be optimized since we know what changed */
158 recount_servers(p);
159 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100160 p->lbprm.map.state |= PR_MAP_RECALC;
161 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100162 srv->prev_state = srv->state;
163 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100164}
165
Willy Tarreau20697042007-11-15 23:26:18 +0100166/* This function recomputes the server map for proxy px. It relies on
167 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
168 * called after recount_servers(). It also expects px->lbprm.map.srv
169 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200170 */
171void recalc_server_map(struct proxy *px)
172{
173 int o, tot, flag;
174 struct server *cur, *best;
175
Willy Tarreau20697042007-11-15 23:26:18 +0100176 switch (px->lbprm.tot_used) {
177 case 0: /* no server */
178 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200179 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100180 case 1: /* only one server, just fill first entry */
181 tot = 1;
182 break;
183 default:
184 tot = px->lbprm.tot_weight;
185 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200186 }
187
Willy Tarreau20697042007-11-15 23:26:18 +0100188 /* here we *know* that we have some servers */
189 if (px->srv_act)
190 flag = SRV_RUNNING;
191 else
192 flag = SRV_RUNNING | SRV_BACKUP;
193
Willy Tarreaubaaee002006-06-26 02:48:02 +0200194 /* this algorithm gives priority to the first server, which means that
195 * it will respect the declaration order for equivalent weights, and
196 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100197 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200198 * case, where we want the first server only.
199 */
200 for (cur = px->srv; cur; cur = cur->next)
201 cur->wscore = 0;
202
203 for (o = 0; o < tot; o++) {
204 int max = 0;
205 best = NULL;
206 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100207 if (flag == (cur->state &
208 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200209 int v;
210
211 /* If we are forced to return only one server, we don't want to
212 * go further, because we would return the wrong one due to
213 * divide overflow.
214 */
215 if (tot == 1) {
216 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100217 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200218 break;
219 }
220
Willy Tarreau417fae02007-03-25 21:16:40 +0200221 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200222 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
223 if (best == NULL || v > max) {
224 max = v;
225 best = cur;
226 }
227 }
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230 best->wscore -= tot;
231 }
Willy Tarreau20697042007-11-15 23:26:18 +0100232 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200233}
234
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100235/* This function is responsible of building the server MAP for map-based LB
236 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
237 * weights if applicable. It should be called only once per proxy, at config
238 * time.
239 */
240void init_server_map(struct proxy *p)
241{
242 struct server *srv;
243 int pgcd;
244 int act, bck;
245
Willy Tarreaub625a082007-11-26 01:15:43 +0100246 p->lbprm.set_server_status_up = map_set_server_status_up;
247 p->lbprm.set_server_status_down = map_set_server_status_down;
248 p->lbprm.update_server_eweight = NULL;
249
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100250 if (!p->srv)
251 return;
252
253 /* We will factor the weights to reduce the table,
254 * using Euclide's largest common divisor algorithm
255 */
256 pgcd = p->srv->uweight;
257 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
258 int w = srv->uweight;
259 while (w) {
260 int t = pgcd % w;
261 pgcd = w;
262 w = t;
263 }
264 }
265
266 /* It is sometimes useful to know what factor to apply
267 * to the backend's effective weight to know its real
268 * weight.
269 */
270 p->lbprm.wmult = pgcd;
271
272 act = bck = 0;
273 for (srv = p->srv; srv; srv = srv->next) {
274 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100275 srv->prev_eweight = srv->eweight;
276 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100277 if (srv->state & SRV_BACKUP)
278 bck += srv->eweight;
279 else
280 act += srv->eweight;
281 }
282
283 /* this is the largest map we will ever need for this servers list */
284 if (act < bck)
285 act = bck;
286
287 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
288 /* recounts servers and their weights */
289 p->lbprm.map.state = PR_MAP_RECALC;
290 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100291 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100292 recalc_server_map(p);
293}
294
Willy Tarreaub625a082007-11-26 01:15:43 +0100295/* This function updates the server trees according to server <srv>'s new
296 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100297 * It is not important whether the server was already down or not. It is not
298 * important either that the new state is completely down (the caller may not
299 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100300 */
301static void fwrr_set_server_status_down(struct server *srv)
302{
303 struct proxy *p = srv->proxy;
304 struct fwrr_group *grp;
305
306 if (srv->state == srv->prev_state &&
307 srv->eweight == srv->prev_eweight)
308 return;
309
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100310 if (srv_is_usable(srv->state, srv->eweight))
311 goto out_update_state;
312
Willy Tarreaub625a082007-11-26 01:15:43 +0100313 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
314 /* server was already down */
315 goto out_update_backend;
316
317 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
318 grp->next_weight -= srv->prev_eweight;
319
320 if (srv->state & SRV_BACKUP) {
321 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
322 p->srv_bck--;
323
324 if (srv == p->lbprm.fbck) {
325 /* we lost the first backup server in a single-backup
326 * configuration, we must search another one.
327 */
328 struct server *srv2 = p->lbprm.fbck;
329 do {
330 srv2 = srv2->next;
331 } while (srv2 &&
332 !((srv2->state & SRV_BACKUP) &&
333 srv_is_usable(srv2->state, srv2->eweight)));
334 p->lbprm.fbck = srv2;
335 }
336 } else {
337 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
338 p->srv_act--;
339 }
340
341 fwrr_dequeue_srv(srv);
342 fwrr_remove_from_tree(srv);
343
344out_update_backend:
345 /* check/update tot_used, tot_weight */
346 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100347 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100348 srv->prev_state = srv->state;
349 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100350}
351
352/* This function updates the server trees according to server <srv>'s new
353 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100354 * It is not important whether the server was already down or not. It is not
355 * important either that the new state is completely UP (the caller may not
356 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100357 * the weight of a server which was already up.
358 */
359static void fwrr_set_server_status_up(struct server *srv)
360{
361 struct proxy *p = srv->proxy;
362 struct fwrr_group *grp;
363
364 if (srv->state == srv->prev_state &&
365 srv->eweight == srv->prev_eweight)
366 return;
367
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100368 if (!srv_is_usable(srv->state, srv->eweight))
369 goto out_update_state;
370
Willy Tarreaub625a082007-11-26 01:15:43 +0100371 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
372 /* server was already up */
373 goto out_update_backend;
374
375 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
376 grp->next_weight += srv->eweight;
377
378 if (srv->state & SRV_BACKUP) {
379 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
380 p->srv_bck++;
381
Willy Tarreauf4cca452008-03-08 21:42:54 +0100382 if (!(p->options & PR_O_USE_ALL_BK)) {
383 if (!p->lbprm.fbck) {
384 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100385 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100386 } else {
387 /* we may have restored a backup server prior to fbck,
388 * in which case it should replace it.
389 */
390 struct server *srv2 = srv;
391 do {
392 srv2 = srv2->next;
393 } while (srv2 && (srv2 != p->lbprm.fbck));
394 if (srv2)
395 p->lbprm.fbck = srv;
396 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100397 }
398 } else {
399 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
400 p->srv_act++;
401 }
402
403 /* note that eweight cannot be 0 here */
404 fwrr_get_srv(srv);
405 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
406 fwrr_queue_srv(srv);
407
408out_update_backend:
409 /* check/update tot_used, tot_weight */
410 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100411 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100412 srv->prev_state = srv->state;
413 srv->prev_eweight = srv->eweight;
414}
415
416/* This function must be called after an update to server <srv>'s effective
417 * weight. It may be called after a state change too.
418 */
419static void fwrr_update_server_weight(struct server *srv)
420{
421 int old_state, new_state;
422 struct proxy *p = srv->proxy;
423 struct fwrr_group *grp;
424
425 if (srv->state == srv->prev_state &&
426 srv->eweight == srv->prev_eweight)
427 return;
428
429 /* If changing the server's weight changes its state, we simply apply
430 * the procedures we already have for status change. If the state
431 * remains down, the server is not in any tree, so it's as easy as
432 * updating its values. If the state remains up with different weights,
433 * there are some computations to perform to find a new place and
434 * possibly a new tree for this server.
435 */
436
437 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
438 new_state = srv_is_usable(srv->state, srv->eweight);
439
440 if (!old_state && !new_state) {
441 srv->prev_state = srv->state;
442 srv->prev_eweight = srv->eweight;
443 return;
444 }
445 else if (!old_state && new_state) {
446 fwrr_set_server_status_up(srv);
447 return;
448 }
449 else if (old_state && !new_state) {
450 fwrr_set_server_status_down(srv);
451 return;
452 }
453
454 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
455 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
456
457 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
458 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
459
460 if (srv->lb_tree == grp->init) {
461 fwrr_dequeue_srv(srv);
462 fwrr_queue_by_weight(grp->init, srv);
463 }
464 else if (!srv->lb_tree) {
465 /* FIXME: server was down. This is not possible right now but
466 * may be needed soon for slowstart or graceful shutdown.
467 */
468 fwrr_dequeue_srv(srv);
469 fwrr_get_srv(srv);
470 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
471 fwrr_queue_srv(srv);
472 } else {
473 /* The server is either active or in the next queue. If it's
474 * still in the active queue and it has not consumed all of its
475 * places, let's adjust its next position.
476 */
477 fwrr_get_srv(srv);
478
479 if (srv->eweight > 0) {
480 int prev_next = srv->npos;
481 int step = grp->next_weight / srv->eweight;
482
483 srv->npos = srv->lpos + step;
484 srv->rweight = 0;
485
486 if (srv->npos > prev_next)
487 srv->npos = prev_next;
488 if (srv->npos < grp->curr_pos + 2)
489 srv->npos = grp->curr_pos + step;
490 } else {
491 /* push it into the next tree */
492 srv->npos = grp->curr_pos + grp->curr_weight;
493 }
494
495 fwrr_dequeue_srv(srv);
496 fwrr_queue_srv(srv);
497 }
498
499 update_backend_weight(p);
500 srv->prev_state = srv->state;
501 srv->prev_eweight = srv->eweight;
502}
503
504/* Remove a server from a tree. It must have previously been dequeued. This
505 * function is meant to be called when a server is going down or has its
506 * weight disabled.
507 */
508static inline void fwrr_remove_from_tree(struct server *s)
509{
510 s->lb_tree = NULL;
511}
512
513/* Queue a server in the weight tree <root>, assuming the weight is >0.
514 * We want to sort them by inverted weights, because we need to place
515 * heavy servers first in order to get a smooth distribution.
516 */
517static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
518{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100519 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100520 eb32_insert(root, &s->lb_node);
521 s->lb_tree = root;
522}
523
524/* This function is responsible for building the weight trees in case of fast
525 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
526 * ratio. Both active and backup groups are initialized.
527 */
528void fwrr_init_server_groups(struct proxy *p)
529{
530 struct server *srv;
531 struct eb_root init_head = EB_ROOT;
532
533 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
534 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
535 p->lbprm.update_server_eweight = fwrr_update_server_weight;
536
537 p->lbprm.wdiv = BE_WEIGHT_SCALE;
538 for (srv = p->srv; srv; srv = srv->next) {
539 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
540 srv->prev_state = srv->state;
541 }
542
543 recount_servers(p);
544 update_backend_weight(p);
545
546 /* prepare the active servers group */
547 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
548 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
549 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
550 p->lbprm.fwrr.act.t1 = init_head;
551 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
552 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
553
554 /* prepare the backup servers group */
555 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
556 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
557 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
558 p->lbprm.fwrr.bck.t1 = init_head;
559 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
560 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
561
562 /* queue active and backup servers in two distinct groups */
563 for (srv = p->srv; srv; srv = srv->next) {
564 if (!srv_is_usable(srv->state, srv->eweight))
565 continue;
566 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
567 p->lbprm.fwrr.bck.init :
568 p->lbprm.fwrr.act.init,
569 srv);
570 }
571}
572
573/* simply removes a server from a weight tree */
574static inline void fwrr_dequeue_srv(struct server *s)
575{
576 eb32_delete(&s->lb_node);
577}
578
579/* queues a server into the appropriate group and tree depending on its
580 * backup status, and ->npos. If the server is disabled, simply assign
581 * it to the NULL tree.
582 */
583static void fwrr_queue_srv(struct server *s)
584{
585 struct proxy *p = s->proxy;
586 struct fwrr_group *grp;
587
588 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
589
590 /* Delay everything which does not fit into the window and everything
591 * which does not fit into the theorical new window.
592 */
593 if (!srv_is_usable(s->state, s->eweight)) {
594 fwrr_remove_from_tree(s);
595 }
596 else if (s->eweight <= 0 ||
597 s->npos >= 2 * grp->curr_weight ||
598 s->npos >= grp->curr_weight + grp->next_weight) {
599 /* put into next tree, and readjust npos in case we could
600 * finally take this back to current. */
601 s->npos -= grp->curr_weight;
602 fwrr_queue_by_weight(grp->next, s);
603 }
604 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100605 /* The sorting key is stored in units of s->npos * user_weight
606 * in order to avoid overflows. As stated in backend.h, the
607 * lower the scale, the rougher the weights modulation, and the
608 * higher the scale, the lower the number of servers without
609 * overflow. With this formula, the result is always positive,
610 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100611 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100612 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
613 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
614
615 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100616 s->lb_tree = &grp->curr;
617 }
618}
619
620/* prepares a server when extracting it from the "init" tree */
621static inline void fwrr_get_srv_init(struct server *s)
622{
623 s->npos = s->rweight = 0;
624}
625
626/* prepares a server when extracting it from the "next" tree */
627static inline void fwrr_get_srv_next(struct server *s)
628{
629 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
630 &s->proxy->lbprm.fwrr.bck :
631 &s->proxy->lbprm.fwrr.act;
632
633 s->npos += grp->curr_weight;
634}
635
636/* prepares a server when it was marked down */
637static inline void fwrr_get_srv_down(struct server *s)
638{
639 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
640 &s->proxy->lbprm.fwrr.bck :
641 &s->proxy->lbprm.fwrr.act;
642
643 s->npos = grp->curr_pos;
644}
645
646/* prepares a server when extracting it from its tree */
647static void fwrr_get_srv(struct server *s)
648{
649 struct proxy *p = s->proxy;
650 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
651 &p->lbprm.fwrr.bck :
652 &p->lbprm.fwrr.act;
653
654 if (s->lb_tree == grp->init) {
655 fwrr_get_srv_init(s);
656 }
657 else if (s->lb_tree == grp->next) {
658 fwrr_get_srv_next(s);
659 }
660 else if (s->lb_tree == NULL) {
661 fwrr_get_srv_down(s);
662 }
663}
664
665/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
666 * when this happens, and "next" filled with servers sorted by weights.
667 */
668static inline void fwrr_switch_trees(struct fwrr_group *grp)
669{
670 struct eb_root *swap;
671 swap = grp->init;
672 grp->init = grp->next;
673 grp->next = swap;
674 grp->curr_weight = grp->next_weight;
675 grp->curr_pos = grp->curr_weight;
676}
677
678/* return next server from the current tree in FWRR group <grp>, or a server
679 * from the "init" tree if appropriate. If both trees are empty, return NULL.
680 */
681static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
682{
683 struct eb32_node *node;
684 struct server *s;
685
686 node = eb32_first(&grp->curr);
687 s = eb32_entry(node, struct server, lb_node);
688
689 if (!node || s->npos > grp->curr_pos) {
690 /* either we have no server left, or we have a hole */
691 struct eb32_node *node2;
692 node2 = eb32_first(grp->init);
693 if (node2) {
694 node = node2;
695 s = eb32_entry(node, struct server, lb_node);
696 fwrr_get_srv_init(s);
697 if (s->eweight == 0) /* FIXME: is it possible at all ? */
698 node = NULL;
699 }
700 }
701 if (node)
702 return s;
703 else
704 return NULL;
705}
706
707/* Computes next position of server <s> in the group. It is mandatory for <s>
708 * to have a non-zero, positive eweight.
709*/
710static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
711{
712 if (!s->npos) {
713 /* first time ever for this server */
714 s->lpos = grp->curr_pos;
715 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
716 s->rweight += grp->next_weight % s->eweight;
717
718 if (s->rweight >= s->eweight) {
719 s->rweight -= s->eweight;
720 s->npos++;
721 }
722 } else {
723 s->lpos = s->npos;
724 s->npos += grp->next_weight / s->eweight;
725 s->rweight += grp->next_weight % s->eweight;
726
727 if (s->rweight >= s->eweight) {
728 s->rweight -= s->eweight;
729 s->npos++;
730 }
731 }
732}
733
734/* Return next server from the current tree in backend <p>, or a server from
735 * the init tree if appropriate. If both trees are empty, return NULL.
736 * Saturated servers are skipped and requeued.
737 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100738static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100739{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100740 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100741 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100742 int switched;
743
744 if (p->srv_act)
745 grp = &p->lbprm.fwrr.act;
746 else if (p->lbprm.fbck)
747 return p->lbprm.fbck;
748 else if (p->srv_bck)
749 grp = &p->lbprm.fwrr.bck;
750 else
751 return NULL;
752
753 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100754 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100755 full = NULL; /* NULL-terminated list of saturated servers */
756 while (1) {
757 /* if we see an empty group, let's first try to collect weights
758 * which might have recently changed.
759 */
760 if (!grp->curr_weight)
761 grp->curr_pos = grp->curr_weight = grp->next_weight;
762
763 /* get first server from the "current" tree. When the end of
764 * the tree is reached, we may have to switch, but only once.
765 */
766 while (1) {
767 srv = fwrr_get_server_from_group(grp);
768 if (srv)
769 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100770 if (switched) {
771 if (avoided) {
772 srv = avoided;
773 break;
774 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100775 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100776 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100777 switched = 1;
778 fwrr_switch_trees(grp);
779
780 }
781
782 /* OK, we have a server. However, it may be saturated, in which
783 * case we don't want to reconsider it for now. We'll update
784 * its position and dequeue it anyway, so that we can move it
785 * to a better place afterwards.
786 */
787 fwrr_update_position(grp, srv);
788 fwrr_dequeue_srv(srv);
789 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200790 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100791 /* make sure it is not the server we are trying to exclude... */
792 if (srv != srvtoavoid || avoided)
793 break;
794
795 avoided = srv; /* ...but remember that is was selected yet avoided */
796 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100797
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100798 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100799 srv->next_full = full;
800 full = srv;
801 }
802
803 /* OK, we got the best server, let's update it */
804 fwrr_queue_srv(srv);
805
806 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100807 /* Requeue all extracted servers. If full==srv then it was
808 * avoided (unsucessfully) and chained, omit it now.
809 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100810 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100811 if (switched) {
812 /* the tree has switched, requeue all extracted servers
813 * into "init", because their place was lost, and only
814 * their weight matters.
815 */
816 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100817 if (likely(full != srv))
818 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100819 full = full->next_full;
820 } while (full);
821 } else {
822 /* requeue all extracted servers just as if they were consumed
823 * so that they regain their expected place.
824 */
825 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100826 if (likely(full != srv))
827 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100828 full = full->next_full;
829 } while (full);
830 }
831 }
832 return srv;
833}
834
Willy Tarreau51406232008-03-10 22:04:20 +0100835/* Remove a server from a tree. It must have previously been dequeued. This
836 * function is meant to be called when a server is going down or has its
837 * weight disabled.
838 */
839static inline void fwlc_remove_from_tree(struct server *s)
840{
841 s->lb_tree = NULL;
842}
843
844/* simply removes a server from a tree */
845static inline void fwlc_dequeue_srv(struct server *s)
846{
847 eb32_delete(&s->lb_node);
848}
849
850/* Queue a server in its associated tree, assuming the weight is >0.
851 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
852 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
853 */
854static inline void fwlc_queue_srv(struct server *s)
855{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200856 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100857 eb32_insert(s->lb_tree, &s->lb_node);
858}
859
860/* Re-position the server in the FWLC tree after it has been assigned one
861 * connection or after it has released one. Note that it is possible that
862 * the server has been moved out of the tree due to failed health-checks.
863 */
864static void fwlc_srv_reposition(struct server *s)
865{
866 if (!s->lb_tree)
867 return;
868 fwlc_dequeue_srv(s);
869 fwlc_queue_srv(s);
870}
871
872/* This function updates the server trees according to server <srv>'s new
873 * state. It should be called when server <srv>'s status changes to down.
874 * It is not important whether the server was already down or not. It is not
875 * important either that the new state is completely down (the caller may not
876 * know all the variables of a server's state).
877 */
878static void fwlc_set_server_status_down(struct server *srv)
879{
880 struct proxy *p = srv->proxy;
881
882 if (srv->state == srv->prev_state &&
883 srv->eweight == srv->prev_eweight)
884 return;
885
886 if (srv_is_usable(srv->state, srv->eweight))
887 goto out_update_state;
888
889 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
890 /* server was already down */
891 goto out_update_backend;
892
893 if (srv->state & SRV_BACKUP) {
894 p->lbprm.tot_wbck -= srv->prev_eweight;
895 p->srv_bck--;
896
897 if (srv == p->lbprm.fbck) {
898 /* we lost the first backup server in a single-backup
899 * configuration, we must search another one.
900 */
901 struct server *srv2 = p->lbprm.fbck;
902 do {
903 srv2 = srv2->next;
904 } while (srv2 &&
905 !((srv2->state & SRV_BACKUP) &&
906 srv_is_usable(srv2->state, srv2->eweight)));
907 p->lbprm.fbck = srv2;
908 }
909 } else {
910 p->lbprm.tot_wact -= srv->prev_eweight;
911 p->srv_act--;
912 }
913
914 fwlc_dequeue_srv(srv);
915 fwlc_remove_from_tree(srv);
916
917out_update_backend:
918 /* check/update tot_used, tot_weight */
919 update_backend_weight(p);
920 out_update_state:
921 srv->prev_state = srv->state;
922 srv->prev_eweight = srv->eweight;
923}
924
925/* This function updates the server trees according to server <srv>'s new
926 * state. It should be called when server <srv>'s status changes to up.
927 * It is not important whether the server was already down or not. It is not
928 * important either that the new state is completely UP (the caller may not
929 * know all the variables of a server's state). This function will not change
930 * the weight of a server which was already up.
931 */
932static void fwlc_set_server_status_up(struct server *srv)
933{
934 struct proxy *p = srv->proxy;
935
936 if (srv->state == srv->prev_state &&
937 srv->eweight == srv->prev_eweight)
938 return;
939
940 if (!srv_is_usable(srv->state, srv->eweight))
941 goto out_update_state;
942
943 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
944 /* server was already up */
945 goto out_update_backend;
946
947 if (srv->state & SRV_BACKUP) {
948 srv->lb_tree = &p->lbprm.fwlc.bck;
949 p->lbprm.tot_wbck += srv->eweight;
950 p->srv_bck++;
951
952 if (!(p->options & PR_O_USE_ALL_BK)) {
953 if (!p->lbprm.fbck) {
954 /* there was no backup server anymore */
955 p->lbprm.fbck = srv;
956 } else {
957 /* we may have restored a backup server prior to fbck,
958 * in which case it should replace it.
959 */
960 struct server *srv2 = srv;
961 do {
962 srv2 = srv2->next;
963 } while (srv2 && (srv2 != p->lbprm.fbck));
964 if (srv2)
965 p->lbprm.fbck = srv;
966 }
967 }
968 } else {
969 srv->lb_tree = &p->lbprm.fwlc.act;
970 p->lbprm.tot_wact += srv->eweight;
971 p->srv_act++;
972 }
973
974 /* note that eweight cannot be 0 here */
975 fwlc_queue_srv(srv);
976
977 out_update_backend:
978 /* check/update tot_used, tot_weight */
979 update_backend_weight(p);
980 out_update_state:
981 srv->prev_state = srv->state;
982 srv->prev_eweight = srv->eweight;
983}
984
985/* This function must be called after an update to server <srv>'s effective
986 * weight. It may be called after a state change too.
987 */
988static void fwlc_update_server_weight(struct server *srv)
989{
990 int old_state, new_state;
991 struct proxy *p = srv->proxy;
992
993 if (srv->state == srv->prev_state &&
994 srv->eweight == srv->prev_eweight)
995 return;
996
997 /* If changing the server's weight changes its state, we simply apply
998 * the procedures we already have for status change. If the state
999 * remains down, the server is not in any tree, so it's as easy as
1000 * updating its values. If the state remains up with different weights,
1001 * there are some computations to perform to find a new place and
1002 * possibly a new tree for this server.
1003 */
1004
1005 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1006 new_state = srv_is_usable(srv->state, srv->eweight);
1007
1008 if (!old_state && !new_state) {
1009 srv->prev_state = srv->state;
1010 srv->prev_eweight = srv->eweight;
1011 return;
1012 }
1013 else if (!old_state && new_state) {
1014 fwlc_set_server_status_up(srv);
1015 return;
1016 }
1017 else if (old_state && !new_state) {
1018 fwlc_set_server_status_down(srv);
1019 return;
1020 }
1021
1022 if (srv->lb_tree)
1023 fwlc_dequeue_srv(srv);
1024
1025 if (srv->state & SRV_BACKUP) {
1026 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1027 srv->lb_tree = &p->lbprm.fwlc.bck;
1028 } else {
1029 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1030 srv->lb_tree = &p->lbprm.fwlc.act;
1031 }
1032
1033 fwlc_queue_srv(srv);
1034
1035 update_backend_weight(p);
1036 srv->prev_state = srv->state;
1037 srv->prev_eweight = srv->eweight;
1038}
1039
1040/* This function is responsible for building the trees in case of fast
1041 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1042 * uweight ratio. Both active and backup groups are initialized.
1043 */
1044void fwlc_init_server_tree(struct proxy *p)
1045{
1046 struct server *srv;
1047 struct eb_root init_head = EB_ROOT;
1048
1049 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1050 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1051 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1052 p->lbprm.server_take_conn = fwlc_srv_reposition;
1053 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1054
1055 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1056 for (srv = p->srv; srv; srv = srv->next) {
1057 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1058 srv->prev_state = srv->state;
1059 }
1060
1061 recount_servers(p);
1062 update_backend_weight(p);
1063
1064 p->lbprm.fwlc.act = init_head;
1065 p->lbprm.fwlc.bck = init_head;
1066
1067 /* queue active and backup servers in two distinct groups */
1068 for (srv = p->srv; srv; srv = srv->next) {
1069 if (!srv_is_usable(srv->state, srv->eweight))
1070 continue;
1071 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1072 fwlc_queue_srv(srv);
1073 }
1074}
1075
1076/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1077 * return NULL. Saturated servers are skipped.
1078 */
1079static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1080{
1081 struct server *srv, *avoided;
1082 struct eb32_node *node;
1083
1084 srv = avoided = NULL;
1085
1086 if (p->srv_act)
1087 node = eb32_first(&p->lbprm.fwlc.act);
1088 else if (p->lbprm.fbck)
1089 return p->lbprm.fbck;
1090 else if (p->srv_bck)
1091 node = eb32_first(&p->lbprm.fwlc.bck);
1092 else
1093 return NULL;
1094
1095 while (node) {
1096 /* OK, we have a server. However, it may be saturated, in which
1097 * case we don't want to reconsider it for now, so we'll simply
1098 * skip it. Same if it's the server we try to avoid, in which
1099 * case we simply remember it for later use if needed.
1100 */
1101 struct server *s;
1102
1103 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001104 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001105 if (s != srvtoavoid) {
1106 srv = s;
1107 break;
1108 }
1109 avoided = s;
1110 }
1111 node = eb32_next(node);
1112 }
1113
1114 if (!srv)
1115 srv = avoided;
1116
1117 return srv;
1118}
1119
Willy Tarreau01732802007-11-01 22:48:15 +01001120/*
1121 * This function tries to find a running server for the proxy <px> following
1122 * the URL parameter hash method. It looks for a specific parameter in the
1123 * URL and hashes it to compute the server ID. This is useful to optimize
1124 * performance by avoiding bounces between servers in contexts where sessions
1125 * are shared but cookies are not usable. If the parameter is not found, NULL
1126 * is returned. If any server is found, it will be returned. If no valid server
1127 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001128 */
1129struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1130{
1131 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001132 const char *p;
1133 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001134 int plen;
1135
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001136 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001137 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001138 return NULL;
1139
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001140 if ((p = memchr(uri, '?', uri_len)) == NULL)
1141 return NULL;
1142
Willy Tarreau20697042007-11-15 23:26:18 +01001143 if (px->lbprm.map.state & PR_MAP_RECALC)
1144 recalc_server_map(px);
1145
Willy Tarreau01732802007-11-01 22:48:15 +01001146 p++;
1147
1148 uri_len -= (p - uri);
1149 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001150 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001151
1152 while (uri_len > plen) {
1153 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001154 if (params[plen] == '=') {
1155 if (memcmp(params, px->url_param_name, plen) == 0) {
1156 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001157 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001158 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001159 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001160 p += plen + 1;
1161 uri_len -= plen + 1;
1162
Willy Tarreau01732802007-11-01 22:48:15 +01001163 while (uri_len && *p != '&') {
1164 hash = *p + (hash << 6) + (hash << 16) - hash;
1165 uri_len--;
1166 p++;
1167 }
Willy Tarreau20697042007-11-15 23:26:18 +01001168 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001169 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001170 }
1171 /* skip to next parameter */
1172 p = memchr(params, '&', uri_len);
1173 if (!p)
1174 return NULL;
1175 p++;
1176 uri_len -= (p - params);
1177 params = p;
1178 }
1179 return NULL;
1180}
1181
1182/*
1183 * this does the same as the previous server_ph, but check the body contents
1184 */
1185struct server *get_server_ph_post(struct session *s)
1186{
1187 unsigned long hash = 0;
1188 struct http_txn *txn = &s->txn;
1189 struct buffer *req = s->req;
1190 struct http_msg *msg = &txn->req;
1191 struct proxy *px = s->be;
1192 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001193 unsigned long body;
1194 unsigned long len;
1195 const char *params;
1196 struct hdr_ctx ctx;
1197 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001198
1199 /* tot_weight appears to mean srv_count */
1200 if (px->lbprm.tot_weight == 0)
1201 return NULL;
1202
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001203 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
1204 len = req->total - body;
1205 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001206
1207 if ( len == 0 )
1208 return NULL;
1209
1210 if (px->lbprm.map.state & PR_MAP_RECALC)
1211 recalc_server_map(px);
1212
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001213 ctx.idx = 0;
1214
1215 /* if the message is chunked, we skip the chunk size, but use the value as len */
1216 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
1217 if ( ctx.idx && strncasecmp(ctx.line+ctx.val,"chunked",ctx.vlen)==0) {
1218 unsigned int chunk = 0;
1219 while ( params < req->rlim && !HTTP_IS_CRLF(*params)) {
1220 char c = *params;
1221 if (ishex(c)) {
1222 unsigned int hex = toupper(c) - '0';
1223 if ( hex > 9 )
1224 hex -= 'A' - '9' - 1;
1225 chunk = (chunk << 4) | hex;
1226 }
1227 else
1228 return NULL;
1229 params++;
1230 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001231 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001232 /* spec says we get CRLF */
1233 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1234 params += 2;
1235 else
1236 return NULL;
1237 /* ok we have some encoded length, just inspect the first chunk */
1238 len = chunk;
1239 }
Willy Tarreau01732802007-11-01 22:48:15 +01001240
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001241 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001242
1243 while (len > plen) {
1244 /* Look for the parameter name followed by an equal symbol */
1245 if (params[plen] == '=') {
1246 if (memcmp(params, px->url_param_name, plen) == 0) {
1247 /* OK, we have the parameter here at <params>, and
1248 * the value after the equal sign, at <p>
1249 * skip the equal symbol
1250 */
1251 p += plen + 1;
1252 len -= plen + 1;
1253
1254 while (len && *p != '&') {
1255 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1256 /* if in a POST, body must be URI encoded or its not a URI.
1257 * Do not interprete any possible binary data as a parameter.
1258 */
1259 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1260 break;
1261 return NULL; /* oh, no; this is not uri-encoded.
1262 * This body does not contain parameters.
1263 */
1264 }
1265 hash = *p + (hash << 6) + (hash << 16) - hash;
1266 len--;
1267 p++;
1268 /* should we break if vlen exceeds limit? */
1269 }
1270 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1271 }
1272 }
Willy Tarreau01732802007-11-01 22:48:15 +01001273 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001274 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001275 if (!p)
1276 return NULL;
1277 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001278 len -= (p - params);
1279 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001280 }
1281 return NULL;
1282}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001283
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001284
Willy Tarreaubaaee002006-06-26 02:48:02 +02001285/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001286 * This function applies the load-balancing algorithm to the session, as
1287 * defined by the backend it is assigned to. The session is then marked as
1288 * 'assigned'.
1289 *
1290 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1291 * had a server previously assigned, it is rebalanced, trying to avoid the same
1292 * server.
1293 * The function tries to keep the original connection slot if it reconnects to
1294 * the same server, otherwise it releases it and tries to offer it.
1295 *
1296 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001297 *
1298 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001299 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1300 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1301 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001302 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1303 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001304 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1305 * it does not need to be called anymore. This means that s->srv can be trusted
1306 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001307 *
1308 */
1309
1310int assign_server(struct session *s)
1311{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001312
Willy Tarreau7c669d72008-06-20 15:04:11 +02001313 struct server *conn_slot;
1314 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001315
Willy Tarreaubaaee002006-06-26 02:48:02 +02001316#ifdef DEBUG_FULL
1317 fprintf(stderr,"assign_server : s=%p\n",s);
1318#endif
1319
Willy Tarreau7c669d72008-06-20 15:04:11 +02001320 err = SRV_STATUS_INTERNAL;
1321 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1322 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001323
Willy Tarreau7c669d72008-06-20 15:04:11 +02001324 s->prev_srv = s->prev_srv;
1325 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001326
Willy Tarreau7c669d72008-06-20 15:04:11 +02001327 /* We have to release any connection slot before applying any LB algo,
1328 * otherwise we may erroneously end up with no available slot.
1329 */
1330 if (conn_slot)
1331 sess_change_server(s, NULL);
1332
1333 /* We will now try to find the good server and store it into <s->srv>.
1334 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1335 * as well as if no server is available (check error code).
1336 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001337
Willy Tarreau7c669d72008-06-20 15:04:11 +02001338 s->srv = NULL;
1339 if (s->be->lbprm.algo & BE_LB_ALGO) {
1340 int len;
1341 /* we must check if we have at least one server available */
1342 if (!s->be->lbprm.tot_weight) {
1343 err = SRV_STATUS_NOSRV;
1344 goto out;
1345 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001346
Willy Tarreau7c669d72008-06-20 15:04:11 +02001347 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1348 case BE_LB_ALGO_RR:
1349 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1350 if (!s->srv) {
1351 err = SRV_STATUS_FULL;
1352 goto out;
1353 }
1354 break;
1355 case BE_LB_ALGO_LC:
1356 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1357 if (!s->srv) {
1358 err = SRV_STATUS_FULL;
1359 goto out;
1360 }
1361 break;
1362 case BE_LB_ALGO_SH:
1363 if (s->cli_addr.ss_family == AF_INET)
1364 len = 4;
1365 else if (s->cli_addr.ss_family == AF_INET6)
1366 len = 16;
1367 else {
1368 /* unknown IP family */
1369 err = SRV_STATUS_INTERNAL;
1370 goto out;
1371 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001372
Willy Tarreau7c669d72008-06-20 15:04:11 +02001373 s->srv = get_server_sh(s->be,
1374 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1375 len);
1376 break;
1377 case BE_LB_ALGO_UH:
1378 /* URI hashing */
1379 s->srv = get_server_uh(s->be,
1380 s->txn.req.sol + s->txn.req.sl.rq.u,
1381 s->txn.req.sl.rq.u_l);
1382 break;
1383 case BE_LB_ALGO_PH:
1384 /* URL Parameter hashing */
1385 if (s->txn.meth == HTTP_METH_POST &&
1386 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1387 s->txn.req.sl.rq.u_l ) == NULL)
1388 s->srv = get_server_ph_post(s);
1389 else
1390 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001391 s->txn.req.sol + s->txn.req.sl.rq.u,
1392 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001393
Willy Tarreau7c669d72008-06-20 15:04:11 +02001394 if (!s->srv) {
1395 /* parameter not found, fall back to round robin on the map */
1396 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001397 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001398 err = SRV_STATUS_FULL;
1399 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001400 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001401 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001402 break;
1403 default:
1404 /* unknown balancing algorithm */
1405 err = SRV_STATUS_INTERNAL;
1406 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001407 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001408 if (s->srv != s->prev_srv) {
1409 s->be->cum_lbconn++;
1410 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001411 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001412 }
1413 else if (s->be->options & PR_O_HTTP_PROXY) {
1414 if (!s->srv_addr.sin_addr.s_addr) {
1415 err = SRV_STATUS_NOSRV;
1416 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001417 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001418 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001419 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
1420 !(s->fe->options & PR_O_TRANSP)) {
1421 err = SRV_STATUS_NOSRV;
1422 goto out;
1423 }
1424
1425 s->flags |= SN_ASSIGNED;
1426 err = SRV_STATUS_OK;
1427 out:
1428
1429 /* Either we take back our connection slot, or we offer it to someone
1430 * else if we don't need it anymore.
1431 */
1432 if (conn_slot) {
1433 if (conn_slot == s->srv) {
1434 sess_change_server(s, s->srv);
1435 } else {
1436 if (may_dequeue_tasks(conn_slot, s->be))
1437 process_srv_queue(conn_slot);
1438 }
1439 }
1440
1441 out_err:
1442 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001443}
1444
1445
1446/*
1447 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1448 * The address is taken from the currently assigned server, or from the
1449 * dispatch or transparent address.
1450 *
1451 * It may return :
1452 * SRV_STATUS_OK if everything is OK.
1453 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1454 *
1455 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1456 * not cleared, so it's to the caller to clear it if required.
1457 *
1458 */
1459int assign_server_address(struct session *s)
1460{
1461#ifdef DEBUG_FULL
1462 fprintf(stderr,"assign_server_address : s=%p\n",s);
1463#endif
1464
Willy Tarreau31682232007-11-29 15:38:04 +01001465 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001466 /* A server is necessarily known for this session */
1467 if (!(s->flags & SN_ASSIGNED))
1468 return SRV_STATUS_INTERNAL;
1469
1470 s->srv_addr = s->srv->addr;
1471
1472 /* if this server remaps proxied ports, we'll use
1473 * the port the client connected to with an offset. */
1474 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001475 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1476 get_frt_addr(s);
1477 if (s->frt_addr.ss_family == AF_INET) {
1478 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1479 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1480 } else {
1481 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1482 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1483 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001484 }
1485 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001486 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001487 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001488 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001489 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001490 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001491 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001492 if (!(s->flags & SN_FRT_ADDR_SET))
1493 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001494
Willy Tarreaubd414282008-01-19 13:46:35 +01001495 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1496 /* when we support IPv6 on the backend, we may add other tests */
1497 //qfprintf(stderr, "Cannot get original server address.\n");
1498 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001499 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001500 else if (s->be->options & PR_O_HTTP_PROXY) {
1501 /* If HTTP PROXY option is set, then server is already assigned
1502 * during incoming client request parsing. */
1503 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001504 else {
1505 /* no server and no LB algorithm ! */
1506 return SRV_STATUS_INTERNAL;
1507 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001508
1509 s->flags |= SN_ADDR_SET;
1510 return SRV_STATUS_OK;
1511}
1512
1513
1514/* This function assigns a server to session <s> if required, and can add the
1515 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001516 * If ->srv_conn is set, the session is first released from the server.
1517 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1518 * be called before any connection and after any retry or redispatch occurs.
1519 *
1520 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001521 *
1522 * Returns :
1523 *
1524 * SRV_STATUS_OK if everything is OK.
1525 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1526 * SRV_STATUS_QUEUED if the connection has been queued.
1527 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001528 * connection could not be queued in s->srv,
1529 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001530 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1531 *
1532 */
1533int assign_server_and_queue(struct session *s)
1534{
1535 struct pendconn *p;
1536 int err;
1537
1538 if (s->pend_pos)
1539 return SRV_STATUS_INTERNAL;
1540
Willy Tarreau7c669d72008-06-20 15:04:11 +02001541 err = SRV_STATUS_OK;
1542 if (!(s->flags & SN_ASSIGNED)) {
1543 err = assign_server(s);
1544 if (s->prev_srv) {
1545 /* This session was previously assigned to a server. We have to
1546 * update the session's and the server's stats :
1547 * - if the server changed :
1548 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1549 * - set SN_REDISP if it was successfully redispatched
1550 * - increment srv->redispatches and be->redispatches
1551 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001552 */
1553
Willy Tarreau7c669d72008-06-20 15:04:11 +02001554 if (s->prev_srv != s->srv) {
1555 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1556 s->txn.flags &= ~TX_CK_MASK;
1557 s->txn.flags |= TX_CK_DOWN;
1558 }
1559 s->flags |= SN_REDISP;
1560 s->prev_srv->redispatches++;
1561 s->be->redispatches++;
1562 } else {
1563 s->prev_srv->retries++;
1564 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001565 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001566 }
1567 }
1568
Willy Tarreaubaaee002006-06-26 02:48:02 +02001569 switch (err) {
1570 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001571 /* we have SN_ASSIGNED set */
1572 if (!s->srv)
1573 return SRV_STATUS_OK; /* dispatch or proxy mode */
1574
1575 /* If we already have a connection slot, no need to check any queue */
1576 if (s->srv_conn == s->srv)
1577 return SRV_STATUS_OK;
1578
1579 /* OK, this session already has an assigned server, but no
1580 * connection slot yet. Either it is a redispatch, or it was
1581 * assigned from persistence information (direct mode).
1582 */
1583 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1584 /* server scheduled for redirection, and already assigned. We
1585 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001586 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001587 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001588 return SRV_STATUS_OK;
1589 }
1590
Willy Tarreau7c669d72008-06-20 15:04:11 +02001591 /* We might have to queue this session if the assigned server is full.
1592 * We know we have to queue it into the server's queue, so if a maxqueue
1593 * is set on the server, we must also check that the server's queue is
1594 * not full, in which case we have to return FULL.
1595 */
1596 if (s->srv->maxconn &&
1597 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1598
1599 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1600 return SRV_STATUS_FULL;
1601
Willy Tarreaubaaee002006-06-26 02:48:02 +02001602 p = pendconn_add(s);
1603 if (p)
1604 return SRV_STATUS_QUEUED;
1605 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001606 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001607 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001608
1609 /* OK, we can use this server. Let's reserve our place */
1610 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001611 return SRV_STATUS_OK;
1612
1613 case SRV_STATUS_FULL:
1614 /* queue this session into the proxy's queue */
1615 p = pendconn_add(s);
1616 if (p)
1617 return SRV_STATUS_QUEUED;
1618 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001619 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001620
1621 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001622 return err;
1623
Willy Tarreaubaaee002006-06-26 02:48:02 +02001624 case SRV_STATUS_INTERNAL:
1625 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001626
Willy Tarreaubaaee002006-06-26 02:48:02 +02001627 default:
1628 return SRV_STATUS_INTERNAL;
1629 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001630}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001631
1632/*
1633 * This function initiates a connection to the server assigned to this session
1634 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1635 * It can return one of :
1636 * - SN_ERR_NONE if everything's OK
1637 * - SN_ERR_SRVTO if there are no more servers
1638 * - SN_ERR_SRVCL if the connection was refused by the server
1639 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1640 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1641 * - SN_ERR_INTERNAL for any other purely internal errors
1642 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1643 */
1644int connect_server(struct session *s)
1645{
1646 int fd, err;
1647
1648 if (!(s->flags & SN_ADDR_SET)) {
1649 err = assign_server_address(s);
1650 if (err != SRV_STATUS_OK)
1651 return SN_ERR_INTERNAL;
1652 }
1653
1654 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1655 qfprintf(stderr, "Cannot get a server socket.\n");
1656
1657 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001658 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001659 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001660 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001661 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001662 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001663 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001664 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001665 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001666 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001667 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001668 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001669 /* this is a resource error */
1670 return SN_ERR_RESOURCE;
1671 }
1672
1673 if (fd >= global.maxsock) {
1674 /* do not log anything there, it's a normal condition when this option
1675 * is used to serialize connections to a server !
1676 */
1677 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1678 close(fd);
1679 return SN_ERR_PRXCOND; /* it is a configuration limit */
1680 }
1681
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001682#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001683 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001684 /* TCP splicing supported by both FE and BE */
1685 tcp_splice_initfd(s->cli_fd, fd);
1686 }
1687#endif
1688
Willy Tarreaubaaee002006-06-26 02:48:02 +02001689 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1690 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1691 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1692 close(fd);
1693 return SN_ERR_INTERNAL;
1694 }
1695
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001696 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001697 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1698
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001699 if (s->be->options & PR_O_TCP_NOLING)
1700 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1701
Willy Tarreaubaaee002006-06-26 02:48:02 +02001702 /* allow specific binding :
1703 * - server-specific at first
1704 * - proxy-specific next
1705 */
1706 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001707 struct sockaddr_in *remote = NULL;
1708 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001709
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001710#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001711 switch (s->srv->state & SRV_TPROXY_MASK) {
1712 case SRV_TPROXY_ADDR:
1713 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1714 flags = 3;
1715 break;
1716 case SRV_TPROXY_CLI:
1717 flags |= 2;
1718 /* fall through */
1719 case SRV_TPROXY_CIP:
1720 /* FIXME: what can we do if the client connects in IPv6 ? */
1721 flags |= 1;
1722 remote = (struct sockaddr_in *)&s->cli_addr;
1723 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001724 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001725#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001726 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001727 if (ret) {
1728 close(fd);
1729 if (ret == 1) {
1730 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1731 s->be->id, s->srv->id);
1732 send_log(s->be, LOG_EMERG,
1733 "Cannot bind to source address before connect() for server %s/%s.\n",
1734 s->be->id, s->srv->id);
1735 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001736 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001737 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001738 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001739 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001740 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001741 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001742 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001743 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001744 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001745 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001746 struct sockaddr_in *remote = NULL;
1747 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001748
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001749#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001750 switch (s->be->options & PR_O_TPXY_MASK) {
1751 case PR_O_TPXY_ADDR:
1752 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1753 flags = 3;
1754 break;
1755 case PR_O_TPXY_CLI:
1756 flags |= 2;
1757 /* fall through */
1758 case PR_O_TPXY_CIP:
1759 /* FIXME: what can we do if the client connects in IPv6 ? */
1760 flags |= 1;
1761 remote = (struct sockaddr_in *)&s->cli_addr;
1762 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001763 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001764#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001765 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001766 if (ret) {
1767 close(fd);
1768 if (ret == 1) {
1769 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1770 s->be->id);
1771 send_log(s->be, LOG_EMERG,
1772 "Cannot bind to source address before connect() for proxy %s.\n",
1773 s->be->id);
1774 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001775 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001776 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001777 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001778 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1779 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001780 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001781 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001782 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001783 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001784
Willy Tarreaubaaee002006-06-26 02:48:02 +02001785 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1786 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1787
1788 if (errno == EAGAIN || errno == EADDRINUSE) {
1789 char *msg;
1790 if (errno == EAGAIN) /* no free ports left, try again later */
1791 msg = "no free ports";
1792 else
1793 msg = "local address already in use";
1794
1795 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1796 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001797 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001798 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001799 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001800 return SN_ERR_RESOURCE;
1801 } else if (errno == ETIMEDOUT) {
1802 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1803 close(fd);
1804 return SN_ERR_SRVTO;
1805 } else {
1806 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1807 //qfprintf(stderr,"Connect(): %d", errno);
1808 close(fd);
1809 return SN_ERR_SRVCL;
1810 }
1811 }
1812
1813 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001814 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001815 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001816 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001817 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001818 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001819
1820 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1821 fdtab[fd].peerlen = sizeof(s->srv_addr);
1822
Willy Tarreauf161a342007-04-08 16:59:42 +02001823 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001824
1825 fd_insert(fd);
1826 if (s->srv) {
1827 s->srv->cur_sess++;
1828 if (s->srv->cur_sess > s->srv->cur_sess_max)
1829 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001830 if (s->be->lbprm.server_take_conn)
1831 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001832 }
1833
Willy Tarreaud7c30f92007-12-03 01:38:36 +01001834 if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect))
Willy Tarreaud7971282006-07-29 18:36:34 +02001835 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001836 return SN_ERR_NONE; /* connection is OK */
1837}
1838
1839
1840/*
1841 * This function checks the retry count during the connect() job.
1842 * It updates the session's srv_state and retries, so that the caller knows
1843 * what it has to do. It uses the last connection error to set the log when
1844 * it expires. It returns 1 when it has expired, and 0 otherwise.
1845 */
1846int srv_count_retry_down(struct session *t, int conn_err)
1847{
1848 /* we are in front of a retryable error */
1849 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001850
Willy Tarreaubaaee002006-06-26 02:48:02 +02001851 if (t->conn_retries < 0) {
1852 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +02001853 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001854 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001855 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001856 if (t->srv)
1857 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001858 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001859
1860 /* We used to have a free connection slot. Since we'll never use it,
1861 * we have to inform the server that it may be used by another session.
1862 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001863 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001864 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001865 return 1;
1866 }
1867 return 0;
1868}
1869
1870
1871/*
1872 * This function performs the retryable part of the connect() job.
1873 * It updates the session's srv_state and retries, so that the caller knows
1874 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1875 * it needs to redispatch.
1876 */
1877int srv_retryable_connect(struct session *t)
1878{
1879 int conn_err;
1880
1881 /* This loop ensures that we stop before the last retry in case of a
1882 * redispatchable server.
1883 */
1884 do {
1885 /* initiate a connection to the server */
1886 conn_err = connect_server(t);
1887 switch (conn_err) {
1888
1889 case SN_ERR_NONE:
1890 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1891 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001892 if (t->srv)
1893 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001894 return 1;
1895
1896 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +02001897 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001898 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001899 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001900 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001901 t->srv->cum_sess++;
1902 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001903 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001904 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001905 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001906 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001907 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001908 return 1;
1909 }
1910 /* ensure that we have enough retries left */
1911 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001912 return 1;
1913 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001914 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001915
1916 /* We're on our last chance, and the REDISP option was specified.
1917 * We will ignore cookie and force to balance or use the dispatcher.
1918 */
1919 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001920 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02001921 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001922
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001923 if (t->srv)
1924 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001925
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001926 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001927 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001928 t->prev_srv = t->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001929 return 0;
1930}
1931
1932
1933/* This function performs the "redispatch" part of a connection attempt. It
1934 * will assign a server if required, queue the connection if required, and
1935 * handle errors that might arise at this level. It can change the server
1936 * state. It will return 1 if it encounters an error, switches the server
1937 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1938 * that the connection is ready to use.
1939 */
1940
1941int srv_redispatch_connect(struct session *t)
1942{
1943 int conn_err;
1944
1945 /* We know that we don't have any connection pending, so we will
1946 * try to get a new one, and wait in this state if it's queued
1947 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001948 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001949 conn_err = assign_server_and_queue(t);
1950 switch (conn_err) {
1951 case SRV_STATUS_OK:
1952 break;
1953
Willy Tarreau7c669d72008-06-20 15:04:11 +02001954 case SRV_STATUS_FULL:
1955 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
1956 * and we can redispatch to another server, or it is not and we return
1957 * 503. This only makes sense in DIRECT mode however, because normal LB
1958 * algorithms would never select such a server, and hash algorithms
1959 * would bring us on the same server again. Note that t->srv is set in
1960 * this case.
1961 */
1962 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
1963 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
1964 t->prev_srv = t->srv;
1965 goto redispatch;
1966 }
1967
1968 tv_eternity(&t->req->cex);
1969 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q,
1970 503, error_message(t, HTTP_ERR_503));
1971
1972 t->srv->failed_conns++;
1973 t->be->failed_conns++;
1974 return 1;
1975
Willy Tarreaubaaee002006-06-26 02:48:02 +02001976 case SRV_STATUS_NOSRV:
1977 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +02001978 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001979 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001980 503, error_message(t, HTTP_ERR_503));
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001981
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001982 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001983 return 1;
1984
1985 case SRV_STATUS_QUEUED:
Willy Tarreau1fa31262007-12-03 00:36:16 +01001986 if (!tv_add_ifset(&t->req->cex, &now, &t->be->timeout.queue))
Willy Tarreaud7971282006-07-29 18:36:34 +02001987 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001988 t->srv_state = SV_STIDLE;
1989 /* do nothing else and do not wake any other session up */
1990 return 1;
1991
Willy Tarreaubaaee002006-06-26 02:48:02 +02001992 case SRV_STATUS_INTERNAL:
1993 default:
Willy Tarreaud7971282006-07-29 18:36:34 +02001994 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001995 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001996 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001997 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001998 t->srv->cum_sess++;
1999 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002000 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002001 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002002
2003 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002004 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002005 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002006 return 1;
2007 }
2008 /* if we get here, it's because we got SRV_STATUS_OK, which also
2009 * means that the connection has not been queued.
2010 */
2011 return 0;
2012}
2013
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002014int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002015 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002016 return px->down_time;
2017
2018 return now.tv_sec - px->last_change + px->down_time;
2019}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002020
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002021/* This function parses a "balance" statement in a backend section describing
2022 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2023 * returns -1, it may write an error message into ther <err> buffer, for at
2024 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2025 * written. The function must be called with <args> pointing to the first word
2026 * after "balance".
2027 */
2028int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2029{
2030 if (!*(args[0])) {
2031 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002032 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2033 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002034 return 0;
2035 }
2036
2037 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002038 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2039 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002040 }
Willy Tarreau51406232008-03-10 22:04:20 +01002041 else if (!strcmp(args[0], "leastconn")) {
2042 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2043 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2044 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002045 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002046 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2047 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002048 }
2049 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002050 int arg = 1;
2051
Willy Tarreau31682232007-11-29 15:38:04 +01002052 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2053 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002054
2055 while (*args[arg]) {
2056 if (!strcmp(args[arg], "len")) {
2057 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2058 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2059 return -1;
2060 }
2061 curproxy->uri_len_limit = atoi(args[arg+1]);
2062 arg += 2;
2063 }
2064 else if (!strcmp(args[arg], "depth")) {
2065 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2066 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2067 return -1;
2068 }
2069 /* hint: we store the position of the ending '/' (depth+1) so
2070 * that we avoid a comparison while computing the hash.
2071 */
2072 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2073 arg += 2;
2074 }
2075 else {
2076 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2077 return -1;
2078 }
2079 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002080 }
Willy Tarreau01732802007-11-01 22:48:15 +01002081 else if (!strcmp(args[0], "url_param")) {
2082 if (!*args[1]) {
2083 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2084 return -1;
2085 }
Willy Tarreau31682232007-11-29 15:38:04 +01002086 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2087 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01002088 if (curproxy->url_param_name)
2089 free(curproxy->url_param_name);
2090 curproxy->url_param_name = strdup(args[1]);
2091 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002092 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002093 if (strcmp(args[2], "check_post")) {
2094 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2095 return -1;
2096 }
2097 if (*args[3]) {
2098 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2099 curproxy->url_param_post_limit = str2ui(args[3]);
2100 }
2101 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2102 if (!curproxy->url_param_post_limit)
2103 curproxy->url_param_post_limit = 48;
2104 else if ( curproxy->url_param_post_limit < 3 )
2105 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2106 }
Willy Tarreau01732802007-11-01 22:48:15 +01002107 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002108 else {
Willy Tarreau51406232008-03-10 22:04:20 +01002109 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002110 return -1;
2111 }
2112 return 0;
2113}
2114
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002115
2116/************************************************************************/
2117/* All supported keywords must be declared here. */
2118/************************************************************************/
2119
2120/* set test->i to the number of enabled servers on the proxy */
2121static int
2122acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2123 struct acl_expr *expr, struct acl_test *test)
2124{
2125 test->flags = ACL_TEST_F_VOL_TEST;
2126 if (expr->arg_len) {
2127 /* another proxy was designated, we must look for it */
2128 for (px = proxy; px; px = px->next)
2129 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2130 break;
2131 }
2132 if (!px)
2133 return 0;
2134
2135 if (px->srv_act)
2136 test->i = px->srv_act;
2137 else if (px->lbprm.fbck)
2138 test->i = 1;
2139 else
2140 test->i = px->srv_bck;
2141
2142 return 1;
2143}
2144
2145
2146/* Note: must not be declared <const> as its list will be overwritten */
2147static struct acl_kw_list acl_kws = {{ },{
2148 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
2149 { NULL, NULL, NULL, NULL },
2150}};
2151
2152
2153__attribute__((constructor))
2154static void __backend_init(void)
2155{
2156 acl_register_keywords(&acl_kws);
2157}
2158
2159
Willy Tarreaubaaee002006-06-26 02:48:02 +02002160/*
2161 * Local variables:
2162 * c-indent-level: 8
2163 * c-basic-offset: 8
2164 * End:
2165 */