blob: 321c8a85d3223fc5a968427dd9327072577c7957 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020022#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020023#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010024#include <common/eb32tree.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020025#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020026#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020027
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010030#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020031#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020032#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010034#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/log.h>
36#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010037#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/queue.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010039#include <proto/server.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020040#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/stream_sock.h>
42#include <proto/task.h>
43
Willy Tarreau6d1a9882007-01-07 02:03:04 +010044#ifdef CONFIG_HAP_TCPSPLICE
45#include <libtcpsplice.h>
46#endif
47
Willy Tarreaub625a082007-11-26 01:15:43 +010048static inline void fwrr_remove_from_tree(struct server *s);
49static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
50static inline void fwrr_dequeue_srv(struct server *s);
51static void fwrr_get_srv(struct server *s);
52static void fwrr_queue_srv(struct server *s);
53
54/* This function returns non-zero if a server with the given weight and state
55 * is usable for LB, otherwise zero.
56 */
57static inline int srv_is_usable(int state, int weight)
58{
59 if (!weight)
60 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010061 if (state & SRV_GOINGDOWN)
62 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010063 if (!(state & SRV_RUNNING))
64 return 0;
65 return 1;
66}
67
Willy Tarreaubaaee002006-06-26 02:48:02 +020068/*
69 * This function recounts the number of usable active and backup servers for
70 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010071 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010072 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010073 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020074 */
Willy Tarreaub625a082007-11-26 01:15:43 +010075static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020076{
77 struct server *srv;
78
Willy Tarreau20697042007-11-15 23:26:18 +010079 px->srv_act = px->srv_bck = 0;
80 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010081 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020082 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010083 if (!srv_is_usable(srv->state, srv->eweight))
84 continue;
85
86 if (srv->state & SRV_BACKUP) {
87 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010088 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010089 px->lbprm.fbck = srv;
90 px->srv_bck++;
91 px->lbprm.tot_wbck += srv->eweight;
92 } else {
93 px->srv_act++;
94 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020095 }
96 }
Willy Tarreaub625a082007-11-26 01:15:43 +010097}
Willy Tarreau20697042007-11-15 23:26:18 +010098
Willy Tarreaub625a082007-11-26 01:15:43 +010099/* This function simply updates the backend's tot_weight and tot_used values
100 * after servers weights have been updated. It is designed to be used after
101 * recount_servers() or equivalent.
102 */
103static void update_backend_weight(struct proxy *px)
104{
Willy Tarreau20697042007-11-15 23:26:18 +0100105 if (px->srv_act) {
106 px->lbprm.tot_weight = px->lbprm.tot_wact;
107 px->lbprm.tot_used = px->srv_act;
108 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100109 else if (px->lbprm.fbck) {
110 /* use only the first backup server */
111 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
112 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100113 }
114 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100115 px->lbprm.tot_weight = px->lbprm.tot_wbck;
116 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100117 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100118}
119
120/* this function updates the map according to server <srv>'s new state */
121static void map_set_server_status_down(struct server *srv)
122{
123 struct proxy *p = srv->proxy;
124
125 if (srv->state == srv->prev_state &&
126 srv->eweight == srv->prev_eweight)
127 return;
128
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100129 if (srv_is_usable(srv->state, srv->eweight))
130 goto out_update_state;
131
Willy Tarreaub625a082007-11-26 01:15:43 +0100132 /* FIXME: could be optimized since we know what changed */
133 recount_servers(p);
134 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100135 p->lbprm.map.state |= PR_MAP_RECALC;
136 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100137 srv->prev_state = srv->state;
138 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200139}
140
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100141/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100142static void map_set_server_status_up(struct server *srv)
143{
144 struct proxy *p = srv->proxy;
145
146 if (srv->state == srv->prev_state &&
147 srv->eweight == srv->prev_eweight)
148 return;
149
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100150 if (!srv_is_usable(srv->state, srv->eweight))
151 goto out_update_state;
152
Willy Tarreaub625a082007-11-26 01:15:43 +0100153 /* FIXME: could be optimized since we know what changed */
154 recount_servers(p);
155 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100156 p->lbprm.map.state |= PR_MAP_RECALC;
157 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100158 srv->prev_state = srv->state;
159 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100160}
161
Willy Tarreau20697042007-11-15 23:26:18 +0100162/* This function recomputes the server map for proxy px. It relies on
163 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
164 * called after recount_servers(). It also expects px->lbprm.map.srv
165 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200166 */
167void recalc_server_map(struct proxy *px)
168{
169 int o, tot, flag;
170 struct server *cur, *best;
171
Willy Tarreau20697042007-11-15 23:26:18 +0100172 switch (px->lbprm.tot_used) {
173 case 0: /* no server */
174 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200175 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100176 case 1: /* only one server, just fill first entry */
177 tot = 1;
178 break;
179 default:
180 tot = px->lbprm.tot_weight;
181 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200182 }
183
Willy Tarreau20697042007-11-15 23:26:18 +0100184 /* here we *know* that we have some servers */
185 if (px->srv_act)
186 flag = SRV_RUNNING;
187 else
188 flag = SRV_RUNNING | SRV_BACKUP;
189
Willy Tarreaubaaee002006-06-26 02:48:02 +0200190 /* this algorithm gives priority to the first server, which means that
191 * it will respect the declaration order for equivalent weights, and
192 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100193 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200194 * case, where we want the first server only.
195 */
196 for (cur = px->srv; cur; cur = cur->next)
197 cur->wscore = 0;
198
199 for (o = 0; o < tot; o++) {
200 int max = 0;
201 best = NULL;
202 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100203 if (flag == (cur->state &
204 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200205 int v;
206
207 /* If we are forced to return only one server, we don't want to
208 * go further, because we would return the wrong one due to
209 * divide overflow.
210 */
211 if (tot == 1) {
212 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100213 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200214 break;
215 }
216
Willy Tarreau417fae02007-03-25 21:16:40 +0200217 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200218 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
219 if (best == NULL || v > max) {
220 max = v;
221 best = cur;
222 }
223 }
224 }
Willy Tarreau20697042007-11-15 23:26:18 +0100225 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200226 best->wscore -= tot;
227 }
Willy Tarreau20697042007-11-15 23:26:18 +0100228 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200229}
230
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100231/* This function is responsible of building the server MAP for map-based LB
232 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
233 * weights if applicable. It should be called only once per proxy, at config
234 * time.
235 */
236void init_server_map(struct proxy *p)
237{
238 struct server *srv;
239 int pgcd;
240 int act, bck;
241
Willy Tarreaub625a082007-11-26 01:15:43 +0100242 p->lbprm.set_server_status_up = map_set_server_status_up;
243 p->lbprm.set_server_status_down = map_set_server_status_down;
244 p->lbprm.update_server_eweight = NULL;
245
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100246 if (!p->srv)
247 return;
248
249 /* We will factor the weights to reduce the table,
250 * using Euclide's largest common divisor algorithm
251 */
252 pgcd = p->srv->uweight;
253 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
254 int w = srv->uweight;
255 while (w) {
256 int t = pgcd % w;
257 pgcd = w;
258 w = t;
259 }
260 }
261
262 /* It is sometimes useful to know what factor to apply
263 * to the backend's effective weight to know its real
264 * weight.
265 */
266 p->lbprm.wmult = pgcd;
267
268 act = bck = 0;
269 for (srv = p->srv; srv; srv = srv->next) {
270 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100271 srv->prev_eweight = srv->eweight;
272 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100273 if (srv->state & SRV_BACKUP)
274 bck += srv->eweight;
275 else
276 act += srv->eweight;
277 }
278
279 /* this is the largest map we will ever need for this servers list */
280 if (act < bck)
281 act = bck;
282
283 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
284 /* recounts servers and their weights */
285 p->lbprm.map.state = PR_MAP_RECALC;
286 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100287 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100288 recalc_server_map(p);
289}
290
Willy Tarreaub625a082007-11-26 01:15:43 +0100291/* This function updates the server trees according to server <srv>'s new
292 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100293 * It is not important whether the server was already down or not. It is not
294 * important either that the new state is completely down (the caller may not
295 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100296 */
297static void fwrr_set_server_status_down(struct server *srv)
298{
299 struct proxy *p = srv->proxy;
300 struct fwrr_group *grp;
301
302 if (srv->state == srv->prev_state &&
303 srv->eweight == srv->prev_eweight)
304 return;
305
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100306 if (srv_is_usable(srv->state, srv->eweight))
307 goto out_update_state;
308
Willy Tarreaub625a082007-11-26 01:15:43 +0100309 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
310 /* server was already down */
311 goto out_update_backend;
312
313 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
314 grp->next_weight -= srv->prev_eweight;
315
316 if (srv->state & SRV_BACKUP) {
317 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
318 p->srv_bck--;
319
320 if (srv == p->lbprm.fbck) {
321 /* we lost the first backup server in a single-backup
322 * configuration, we must search another one.
323 */
324 struct server *srv2 = p->lbprm.fbck;
325 do {
326 srv2 = srv2->next;
327 } while (srv2 &&
328 !((srv2->state & SRV_BACKUP) &&
329 srv_is_usable(srv2->state, srv2->eweight)));
330 p->lbprm.fbck = srv2;
331 }
332 } else {
333 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
334 p->srv_act--;
335 }
336
337 fwrr_dequeue_srv(srv);
338 fwrr_remove_from_tree(srv);
339
340out_update_backend:
341 /* check/update tot_used, tot_weight */
342 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100343 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100344 srv->prev_state = srv->state;
345 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100346}
347
348/* This function updates the server trees according to server <srv>'s new
349 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100350 * It is not important whether the server was already down or not. It is not
351 * important either that the new state is completely UP (the caller may not
352 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100353 * the weight of a server which was already up.
354 */
355static void fwrr_set_server_status_up(struct server *srv)
356{
357 struct proxy *p = srv->proxy;
358 struct fwrr_group *grp;
359
360 if (srv->state == srv->prev_state &&
361 srv->eweight == srv->prev_eweight)
362 return;
363
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100364 if (!srv_is_usable(srv->state, srv->eweight))
365 goto out_update_state;
366
Willy Tarreaub625a082007-11-26 01:15:43 +0100367 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
368 /* server was already up */
369 goto out_update_backend;
370
371 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
372 grp->next_weight += srv->eweight;
373
374 if (srv->state & SRV_BACKUP) {
375 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
376 p->srv_bck++;
377
Willy Tarreauf4cca452008-03-08 21:42:54 +0100378 if (!(p->options & PR_O_USE_ALL_BK)) {
379 if (!p->lbprm.fbck) {
380 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100381 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100382 } else {
383 /* we may have restored a backup server prior to fbck,
384 * in which case it should replace it.
385 */
386 struct server *srv2 = srv;
387 do {
388 srv2 = srv2->next;
389 } while (srv2 && (srv2 != p->lbprm.fbck));
390 if (srv2)
391 p->lbprm.fbck = srv;
392 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100393 }
394 } else {
395 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
396 p->srv_act++;
397 }
398
399 /* note that eweight cannot be 0 here */
400 fwrr_get_srv(srv);
401 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
402 fwrr_queue_srv(srv);
403
404out_update_backend:
405 /* check/update tot_used, tot_weight */
406 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100407 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100408 srv->prev_state = srv->state;
409 srv->prev_eweight = srv->eweight;
410}
411
412/* This function must be called after an update to server <srv>'s effective
413 * weight. It may be called after a state change too.
414 */
415static void fwrr_update_server_weight(struct server *srv)
416{
417 int old_state, new_state;
418 struct proxy *p = srv->proxy;
419 struct fwrr_group *grp;
420
421 if (srv->state == srv->prev_state &&
422 srv->eweight == srv->prev_eweight)
423 return;
424
425 /* If changing the server's weight changes its state, we simply apply
426 * the procedures we already have for status change. If the state
427 * remains down, the server is not in any tree, so it's as easy as
428 * updating its values. If the state remains up with different weights,
429 * there are some computations to perform to find a new place and
430 * possibly a new tree for this server.
431 */
432
433 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
434 new_state = srv_is_usable(srv->state, srv->eweight);
435
436 if (!old_state && !new_state) {
437 srv->prev_state = srv->state;
438 srv->prev_eweight = srv->eweight;
439 return;
440 }
441 else if (!old_state && new_state) {
442 fwrr_set_server_status_up(srv);
443 return;
444 }
445 else if (old_state && !new_state) {
446 fwrr_set_server_status_down(srv);
447 return;
448 }
449
450 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
451 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
452
453 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
454 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
455
456 if (srv->lb_tree == grp->init) {
457 fwrr_dequeue_srv(srv);
458 fwrr_queue_by_weight(grp->init, srv);
459 }
460 else if (!srv->lb_tree) {
461 /* FIXME: server was down. This is not possible right now but
462 * may be needed soon for slowstart or graceful shutdown.
463 */
464 fwrr_dequeue_srv(srv);
465 fwrr_get_srv(srv);
466 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
467 fwrr_queue_srv(srv);
468 } else {
469 /* The server is either active or in the next queue. If it's
470 * still in the active queue and it has not consumed all of its
471 * places, let's adjust its next position.
472 */
473 fwrr_get_srv(srv);
474
475 if (srv->eweight > 0) {
476 int prev_next = srv->npos;
477 int step = grp->next_weight / srv->eweight;
478
479 srv->npos = srv->lpos + step;
480 srv->rweight = 0;
481
482 if (srv->npos > prev_next)
483 srv->npos = prev_next;
484 if (srv->npos < grp->curr_pos + 2)
485 srv->npos = grp->curr_pos + step;
486 } else {
487 /* push it into the next tree */
488 srv->npos = grp->curr_pos + grp->curr_weight;
489 }
490
491 fwrr_dequeue_srv(srv);
492 fwrr_queue_srv(srv);
493 }
494
495 update_backend_weight(p);
496 srv->prev_state = srv->state;
497 srv->prev_eweight = srv->eweight;
498}
499
500/* Remove a server from a tree. It must have previously been dequeued. This
501 * function is meant to be called when a server is going down or has its
502 * weight disabled.
503 */
504static inline void fwrr_remove_from_tree(struct server *s)
505{
506 s->lb_tree = NULL;
507}
508
509/* Queue a server in the weight tree <root>, assuming the weight is >0.
510 * We want to sort them by inverted weights, because we need to place
511 * heavy servers first in order to get a smooth distribution.
512 */
513static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
514{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100515 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100516 eb32_insert(root, &s->lb_node);
517 s->lb_tree = root;
518}
519
520/* This function is responsible for building the weight trees in case of fast
521 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
522 * ratio. Both active and backup groups are initialized.
523 */
524void fwrr_init_server_groups(struct proxy *p)
525{
526 struct server *srv;
527 struct eb_root init_head = EB_ROOT;
528
529 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
530 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
531 p->lbprm.update_server_eweight = fwrr_update_server_weight;
532
533 p->lbprm.wdiv = BE_WEIGHT_SCALE;
534 for (srv = p->srv; srv; srv = srv->next) {
535 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
536 srv->prev_state = srv->state;
537 }
538
539 recount_servers(p);
540 update_backend_weight(p);
541
542 /* prepare the active servers group */
543 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
544 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
545 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
546 p->lbprm.fwrr.act.t1 = init_head;
547 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
548 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
549
550 /* prepare the backup servers group */
551 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
552 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
553 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
554 p->lbprm.fwrr.bck.t1 = init_head;
555 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
556 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
557
558 /* queue active and backup servers in two distinct groups */
559 for (srv = p->srv; srv; srv = srv->next) {
560 if (!srv_is_usable(srv->state, srv->eweight))
561 continue;
562 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
563 p->lbprm.fwrr.bck.init :
564 p->lbprm.fwrr.act.init,
565 srv);
566 }
567}
568
569/* simply removes a server from a weight tree */
570static inline void fwrr_dequeue_srv(struct server *s)
571{
572 eb32_delete(&s->lb_node);
573}
574
575/* queues a server into the appropriate group and tree depending on its
576 * backup status, and ->npos. If the server is disabled, simply assign
577 * it to the NULL tree.
578 */
579static void fwrr_queue_srv(struct server *s)
580{
581 struct proxy *p = s->proxy;
582 struct fwrr_group *grp;
583
584 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
585
586 /* Delay everything which does not fit into the window and everything
587 * which does not fit into the theorical new window.
588 */
589 if (!srv_is_usable(s->state, s->eweight)) {
590 fwrr_remove_from_tree(s);
591 }
592 else if (s->eweight <= 0 ||
593 s->npos >= 2 * grp->curr_weight ||
594 s->npos >= grp->curr_weight + grp->next_weight) {
595 /* put into next tree, and readjust npos in case we could
596 * finally take this back to current. */
597 s->npos -= grp->curr_weight;
598 fwrr_queue_by_weight(grp->next, s);
599 }
600 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100601 /* The sorting key is stored in units of s->npos * user_weight
602 * in order to avoid overflows. As stated in backend.h, the
603 * lower the scale, the rougher the weights modulation, and the
604 * higher the scale, the lower the number of servers without
605 * overflow. With this formula, the result is always positive,
606 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100607 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100608 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
609 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
610
611 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100612 s->lb_tree = &grp->curr;
613 }
614}
615
616/* prepares a server when extracting it from the "init" tree */
617static inline void fwrr_get_srv_init(struct server *s)
618{
619 s->npos = s->rweight = 0;
620}
621
622/* prepares a server when extracting it from the "next" tree */
623static inline void fwrr_get_srv_next(struct server *s)
624{
625 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
626 &s->proxy->lbprm.fwrr.bck :
627 &s->proxy->lbprm.fwrr.act;
628
629 s->npos += grp->curr_weight;
630}
631
632/* prepares a server when it was marked down */
633static inline void fwrr_get_srv_down(struct server *s)
634{
635 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
636 &s->proxy->lbprm.fwrr.bck :
637 &s->proxy->lbprm.fwrr.act;
638
639 s->npos = grp->curr_pos;
640}
641
642/* prepares a server when extracting it from its tree */
643static void fwrr_get_srv(struct server *s)
644{
645 struct proxy *p = s->proxy;
646 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
647 &p->lbprm.fwrr.bck :
648 &p->lbprm.fwrr.act;
649
650 if (s->lb_tree == grp->init) {
651 fwrr_get_srv_init(s);
652 }
653 else if (s->lb_tree == grp->next) {
654 fwrr_get_srv_next(s);
655 }
656 else if (s->lb_tree == NULL) {
657 fwrr_get_srv_down(s);
658 }
659}
660
661/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
662 * when this happens, and "next" filled with servers sorted by weights.
663 */
664static inline void fwrr_switch_trees(struct fwrr_group *grp)
665{
666 struct eb_root *swap;
667 swap = grp->init;
668 grp->init = grp->next;
669 grp->next = swap;
670 grp->curr_weight = grp->next_weight;
671 grp->curr_pos = grp->curr_weight;
672}
673
674/* return next server from the current tree in FWRR group <grp>, or a server
675 * from the "init" tree if appropriate. If both trees are empty, return NULL.
676 */
677static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
678{
679 struct eb32_node *node;
680 struct server *s;
681
682 node = eb32_first(&grp->curr);
683 s = eb32_entry(node, struct server, lb_node);
684
685 if (!node || s->npos > grp->curr_pos) {
686 /* either we have no server left, or we have a hole */
687 struct eb32_node *node2;
688 node2 = eb32_first(grp->init);
689 if (node2) {
690 node = node2;
691 s = eb32_entry(node, struct server, lb_node);
692 fwrr_get_srv_init(s);
693 if (s->eweight == 0) /* FIXME: is it possible at all ? */
694 node = NULL;
695 }
696 }
697 if (node)
698 return s;
699 else
700 return NULL;
701}
702
703/* Computes next position of server <s> in the group. It is mandatory for <s>
704 * to have a non-zero, positive eweight.
705*/
706static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
707{
708 if (!s->npos) {
709 /* first time ever for this server */
710 s->lpos = grp->curr_pos;
711 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
712 s->rweight += grp->next_weight % s->eweight;
713
714 if (s->rweight >= s->eweight) {
715 s->rweight -= s->eweight;
716 s->npos++;
717 }
718 } else {
719 s->lpos = s->npos;
720 s->npos += grp->next_weight / s->eweight;
721 s->rweight += grp->next_weight % s->eweight;
722
723 if (s->rweight >= s->eweight) {
724 s->rweight -= s->eweight;
725 s->npos++;
726 }
727 }
728}
729
730/* Return next server from the current tree in backend <p>, or a server from
731 * the init tree if appropriate. If both trees are empty, return NULL.
732 * Saturated servers are skipped and requeued.
733 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100734static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100735{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100736 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100737 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100738 int switched;
739
740 if (p->srv_act)
741 grp = &p->lbprm.fwrr.act;
742 else if (p->lbprm.fbck)
743 return p->lbprm.fbck;
744 else if (p->srv_bck)
745 grp = &p->lbprm.fwrr.bck;
746 else
747 return NULL;
748
749 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100750 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100751 full = NULL; /* NULL-terminated list of saturated servers */
752 while (1) {
753 /* if we see an empty group, let's first try to collect weights
754 * which might have recently changed.
755 */
756 if (!grp->curr_weight)
757 grp->curr_pos = grp->curr_weight = grp->next_weight;
758
759 /* get first server from the "current" tree. When the end of
760 * the tree is reached, we may have to switch, but only once.
761 */
762 while (1) {
763 srv = fwrr_get_server_from_group(grp);
764 if (srv)
765 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100766 if (switched) {
767 if (avoided) {
768 srv = avoided;
769 break;
770 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100771 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100772 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100773 switched = 1;
774 fwrr_switch_trees(grp);
775
776 }
777
778 /* OK, we have a server. However, it may be saturated, in which
779 * case we don't want to reconsider it for now. We'll update
780 * its position and dequeue it anyway, so that we can move it
781 * to a better place afterwards.
782 */
783 fwrr_update_position(grp, srv);
784 fwrr_dequeue_srv(srv);
785 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200786 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100787 /* make sure it is not the server we are trying to exclude... */
788 if (srv != srvtoavoid || avoided)
789 break;
790
791 avoided = srv; /* ...but remember that is was selected yet avoided */
792 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100793
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100794 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100795 srv->next_full = full;
796 full = srv;
797 }
798
799 /* OK, we got the best server, let's update it */
800 fwrr_queue_srv(srv);
801
802 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100803 /* Requeue all extracted servers. If full==srv then it was
804 * avoided (unsucessfully) and chained, omit it now.
805 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100806 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100807 if (switched) {
808 /* the tree has switched, requeue all extracted servers
809 * into "init", because their place was lost, and only
810 * their weight matters.
811 */
812 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100813 if (likely(full != srv))
814 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100815 full = full->next_full;
816 } while (full);
817 } else {
818 /* requeue all extracted servers just as if they were consumed
819 * so that they regain their expected place.
820 */
821 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100822 if (likely(full != srv))
823 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100824 full = full->next_full;
825 } while (full);
826 }
827 }
828 return srv;
829}
830
Willy Tarreau51406232008-03-10 22:04:20 +0100831/* Remove a server from a tree. It must have previously been dequeued. This
832 * function is meant to be called when a server is going down or has its
833 * weight disabled.
834 */
835static inline void fwlc_remove_from_tree(struct server *s)
836{
837 s->lb_tree = NULL;
838}
839
840/* simply removes a server from a tree */
841static inline void fwlc_dequeue_srv(struct server *s)
842{
843 eb32_delete(&s->lb_node);
844}
845
846/* Queue a server in its associated tree, assuming the weight is >0.
847 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
848 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
849 */
850static inline void fwlc_queue_srv(struct server *s)
851{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200852 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100853 eb32_insert(s->lb_tree, &s->lb_node);
854}
855
856/* Re-position the server in the FWLC tree after it has been assigned one
857 * connection or after it has released one. Note that it is possible that
858 * the server has been moved out of the tree due to failed health-checks.
859 */
860static void fwlc_srv_reposition(struct server *s)
861{
862 if (!s->lb_tree)
863 return;
864 fwlc_dequeue_srv(s);
865 fwlc_queue_srv(s);
866}
867
868/* This function updates the server trees according to server <srv>'s new
869 * state. It should be called when server <srv>'s status changes to down.
870 * It is not important whether the server was already down or not. It is not
871 * important either that the new state is completely down (the caller may not
872 * know all the variables of a server's state).
873 */
874static void fwlc_set_server_status_down(struct server *srv)
875{
876 struct proxy *p = srv->proxy;
877
878 if (srv->state == srv->prev_state &&
879 srv->eweight == srv->prev_eweight)
880 return;
881
882 if (srv_is_usable(srv->state, srv->eweight))
883 goto out_update_state;
884
885 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
886 /* server was already down */
887 goto out_update_backend;
888
889 if (srv->state & SRV_BACKUP) {
890 p->lbprm.tot_wbck -= srv->prev_eweight;
891 p->srv_bck--;
892
893 if (srv == p->lbprm.fbck) {
894 /* we lost the first backup server in a single-backup
895 * configuration, we must search another one.
896 */
897 struct server *srv2 = p->lbprm.fbck;
898 do {
899 srv2 = srv2->next;
900 } while (srv2 &&
901 !((srv2->state & SRV_BACKUP) &&
902 srv_is_usable(srv2->state, srv2->eweight)));
903 p->lbprm.fbck = srv2;
904 }
905 } else {
906 p->lbprm.tot_wact -= srv->prev_eweight;
907 p->srv_act--;
908 }
909
910 fwlc_dequeue_srv(srv);
911 fwlc_remove_from_tree(srv);
912
913out_update_backend:
914 /* check/update tot_used, tot_weight */
915 update_backend_weight(p);
916 out_update_state:
917 srv->prev_state = srv->state;
918 srv->prev_eweight = srv->eweight;
919}
920
921/* This function updates the server trees according to server <srv>'s new
922 * state. It should be called when server <srv>'s status changes to up.
923 * It is not important whether the server was already down or not. It is not
924 * important either that the new state is completely UP (the caller may not
925 * know all the variables of a server's state). This function will not change
926 * the weight of a server which was already up.
927 */
928static void fwlc_set_server_status_up(struct server *srv)
929{
930 struct proxy *p = srv->proxy;
931
932 if (srv->state == srv->prev_state &&
933 srv->eweight == srv->prev_eweight)
934 return;
935
936 if (!srv_is_usable(srv->state, srv->eweight))
937 goto out_update_state;
938
939 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
940 /* server was already up */
941 goto out_update_backend;
942
943 if (srv->state & SRV_BACKUP) {
944 srv->lb_tree = &p->lbprm.fwlc.bck;
945 p->lbprm.tot_wbck += srv->eweight;
946 p->srv_bck++;
947
948 if (!(p->options & PR_O_USE_ALL_BK)) {
949 if (!p->lbprm.fbck) {
950 /* there was no backup server anymore */
951 p->lbprm.fbck = srv;
952 } else {
953 /* we may have restored a backup server prior to fbck,
954 * in which case it should replace it.
955 */
956 struct server *srv2 = srv;
957 do {
958 srv2 = srv2->next;
959 } while (srv2 && (srv2 != p->lbprm.fbck));
960 if (srv2)
961 p->lbprm.fbck = srv;
962 }
963 }
964 } else {
965 srv->lb_tree = &p->lbprm.fwlc.act;
966 p->lbprm.tot_wact += srv->eweight;
967 p->srv_act++;
968 }
969
970 /* note that eweight cannot be 0 here */
971 fwlc_queue_srv(srv);
972
973 out_update_backend:
974 /* check/update tot_used, tot_weight */
975 update_backend_weight(p);
976 out_update_state:
977 srv->prev_state = srv->state;
978 srv->prev_eweight = srv->eweight;
979}
980
981/* This function must be called after an update to server <srv>'s effective
982 * weight. It may be called after a state change too.
983 */
984static void fwlc_update_server_weight(struct server *srv)
985{
986 int old_state, new_state;
987 struct proxy *p = srv->proxy;
988
989 if (srv->state == srv->prev_state &&
990 srv->eweight == srv->prev_eweight)
991 return;
992
993 /* If changing the server's weight changes its state, we simply apply
994 * the procedures we already have for status change. If the state
995 * remains down, the server is not in any tree, so it's as easy as
996 * updating its values. If the state remains up with different weights,
997 * there are some computations to perform to find a new place and
998 * possibly a new tree for this server.
999 */
1000
1001 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1002 new_state = srv_is_usable(srv->state, srv->eweight);
1003
1004 if (!old_state && !new_state) {
1005 srv->prev_state = srv->state;
1006 srv->prev_eweight = srv->eweight;
1007 return;
1008 }
1009 else if (!old_state && new_state) {
1010 fwlc_set_server_status_up(srv);
1011 return;
1012 }
1013 else if (old_state && !new_state) {
1014 fwlc_set_server_status_down(srv);
1015 return;
1016 }
1017
1018 if (srv->lb_tree)
1019 fwlc_dequeue_srv(srv);
1020
1021 if (srv->state & SRV_BACKUP) {
1022 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1023 srv->lb_tree = &p->lbprm.fwlc.bck;
1024 } else {
1025 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1026 srv->lb_tree = &p->lbprm.fwlc.act;
1027 }
1028
1029 fwlc_queue_srv(srv);
1030
1031 update_backend_weight(p);
1032 srv->prev_state = srv->state;
1033 srv->prev_eweight = srv->eweight;
1034}
1035
1036/* This function is responsible for building the trees in case of fast
1037 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1038 * uweight ratio. Both active and backup groups are initialized.
1039 */
1040void fwlc_init_server_tree(struct proxy *p)
1041{
1042 struct server *srv;
1043 struct eb_root init_head = EB_ROOT;
1044
1045 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1046 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1047 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1048 p->lbprm.server_take_conn = fwlc_srv_reposition;
1049 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1050
1051 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1052 for (srv = p->srv; srv; srv = srv->next) {
1053 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1054 srv->prev_state = srv->state;
1055 }
1056
1057 recount_servers(p);
1058 update_backend_weight(p);
1059
1060 p->lbprm.fwlc.act = init_head;
1061 p->lbprm.fwlc.bck = init_head;
1062
1063 /* queue active and backup servers in two distinct groups */
1064 for (srv = p->srv; srv; srv = srv->next) {
1065 if (!srv_is_usable(srv->state, srv->eweight))
1066 continue;
1067 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1068 fwlc_queue_srv(srv);
1069 }
1070}
1071
1072/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1073 * return NULL. Saturated servers are skipped.
1074 */
1075static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1076{
1077 struct server *srv, *avoided;
1078 struct eb32_node *node;
1079
1080 srv = avoided = NULL;
1081
1082 if (p->srv_act)
1083 node = eb32_first(&p->lbprm.fwlc.act);
1084 else if (p->lbprm.fbck)
1085 return p->lbprm.fbck;
1086 else if (p->srv_bck)
1087 node = eb32_first(&p->lbprm.fwlc.bck);
1088 else
1089 return NULL;
1090
1091 while (node) {
1092 /* OK, we have a server. However, it may be saturated, in which
1093 * case we don't want to reconsider it for now, so we'll simply
1094 * skip it. Same if it's the server we try to avoid, in which
1095 * case we simply remember it for later use if needed.
1096 */
1097 struct server *s;
1098
1099 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001100 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001101 if (s != srvtoavoid) {
1102 srv = s;
1103 break;
1104 }
1105 avoided = s;
1106 }
1107 node = eb32_next(node);
1108 }
1109
1110 if (!srv)
1111 srv = avoided;
1112
1113 return srv;
1114}
1115
Willy Tarreau01732802007-11-01 22:48:15 +01001116/*
1117 * This function tries to find a running server for the proxy <px> following
1118 * the URL parameter hash method. It looks for a specific parameter in the
1119 * URL and hashes it to compute the server ID. This is useful to optimize
1120 * performance by avoiding bounces between servers in contexts where sessions
1121 * are shared but cookies are not usable. If the parameter is not found, NULL
1122 * is returned. If any server is found, it will be returned. If no valid server
1123 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001124 */
1125struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1126{
1127 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001128 const char *p;
1129 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001130 int plen;
1131
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001132 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001133 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001134 return NULL;
1135
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001136 if ((p = memchr(uri, '?', uri_len)) == NULL)
1137 return NULL;
1138
Willy Tarreau20697042007-11-15 23:26:18 +01001139 if (px->lbprm.map.state & PR_MAP_RECALC)
1140 recalc_server_map(px);
1141
Willy Tarreau01732802007-11-01 22:48:15 +01001142 p++;
1143
1144 uri_len -= (p - uri);
1145 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001146 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001147
1148 while (uri_len > plen) {
1149 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001150 if (params[plen] == '=') {
1151 if (memcmp(params, px->url_param_name, plen) == 0) {
1152 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001153 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001154 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001155 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001156 p += plen + 1;
1157 uri_len -= plen + 1;
1158
Willy Tarreau01732802007-11-01 22:48:15 +01001159 while (uri_len && *p != '&') {
1160 hash = *p + (hash << 6) + (hash << 16) - hash;
1161 uri_len--;
1162 p++;
1163 }
Willy Tarreau20697042007-11-15 23:26:18 +01001164 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001165 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001166 }
1167 /* skip to next parameter */
1168 p = memchr(params, '&', uri_len);
1169 if (!p)
1170 return NULL;
1171 p++;
1172 uri_len -= (p - params);
1173 params = p;
1174 }
1175 return NULL;
1176}
1177
1178/*
1179 * this does the same as the previous server_ph, but check the body contents
1180 */
1181struct server *get_server_ph_post(struct session *s)
1182{
1183 unsigned long hash = 0;
1184 struct http_txn *txn = &s->txn;
1185 struct buffer *req = s->req;
1186 struct http_msg *msg = &txn->req;
1187 struct proxy *px = s->be;
1188 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001189 unsigned long body;
1190 unsigned long len;
1191 const char *params;
1192 struct hdr_ctx ctx;
1193 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001194
1195 /* tot_weight appears to mean srv_count */
1196 if (px->lbprm.tot_weight == 0)
1197 return NULL;
1198
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001199 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
Willy Tarreaufb0528b2008-08-11 00:21:56 +02001200 len = req->l - body;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001201 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001202
1203 if ( len == 0 )
1204 return NULL;
1205
1206 if (px->lbprm.map.state & PR_MAP_RECALC)
1207 recalc_server_map(px);
1208
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001209 ctx.idx = 0;
1210
1211 /* if the message is chunked, we skip the chunk size, but use the value as len */
1212 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
Willy Tarreauadfb8562008-08-11 15:24:42 +02001213 if (ctx.idx && ctx.vlen >= 7 && strncasecmp(ctx.line+ctx.val, "chunked", 7) == 0) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001214 unsigned int chunk = 0;
Willy Tarreau03d60bb2009-01-09 11:13:00 +01001215 while ( params < (req->data+req->max_len) && !HTTP_IS_CRLF(*params)) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001216 char c = *params;
1217 if (ishex(c)) {
1218 unsigned int hex = toupper(c) - '0';
1219 if ( hex > 9 )
1220 hex -= 'A' - '9' - 1;
1221 chunk = (chunk << 4) | hex;
1222 }
1223 else
1224 return NULL;
1225 params++;
1226 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001227 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001228 /* spec says we get CRLF */
1229 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1230 params += 2;
1231 else
1232 return NULL;
1233 /* ok we have some encoded length, just inspect the first chunk */
1234 len = chunk;
1235 }
Willy Tarreau01732802007-11-01 22:48:15 +01001236
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001237 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001238
1239 while (len > plen) {
1240 /* Look for the parameter name followed by an equal symbol */
1241 if (params[plen] == '=') {
1242 if (memcmp(params, px->url_param_name, plen) == 0) {
1243 /* OK, we have the parameter here at <params>, and
1244 * the value after the equal sign, at <p>
1245 * skip the equal symbol
1246 */
1247 p += plen + 1;
1248 len -= plen + 1;
1249
1250 while (len && *p != '&') {
1251 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1252 /* if in a POST, body must be URI encoded or its not a URI.
1253 * Do not interprete any possible binary data as a parameter.
1254 */
1255 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1256 break;
1257 return NULL; /* oh, no; this is not uri-encoded.
1258 * This body does not contain parameters.
1259 */
1260 }
1261 hash = *p + (hash << 6) + (hash << 16) - hash;
1262 len--;
1263 p++;
1264 /* should we break if vlen exceeds limit? */
1265 }
1266 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1267 }
1268 }
Willy Tarreau01732802007-11-01 22:48:15 +01001269 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001270 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001271 if (!p)
1272 return NULL;
1273 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001274 len -= (p - params);
1275 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001276 }
1277 return NULL;
1278}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001279
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001280
Willy Tarreaubaaee002006-06-26 02:48:02 +02001281/*
Benoitaffb4812009-03-25 13:02:10 +01001282 * This function tries to find a running server for the proxy <px> following
1283 * the Header parameter hash method. It looks for a specific parameter in the
1284 * URL and hashes it to compute the server ID. This is useful to optimize
1285 * performance by avoiding bounces between servers in contexts where sessions
1286 * are shared but cookies are not usable. If the parameter is not found, NULL
1287 * is returned. If any server is found, it will be returned. If no valid server
1288 * is found, NULL is returned.
1289 */
1290struct server *get_server_hh(struct session *s)
1291{
1292 unsigned long hash = 0;
1293 struct http_txn *txn = &s->txn;
1294 struct http_msg *msg = &txn->req;
1295 struct proxy *px = s->be;
1296 unsigned int plen = px->hh_len;
1297 unsigned long len;
1298 struct hdr_ctx ctx;
1299 const char *p;
1300
1301 /* tot_weight appears to mean srv_count */
1302 if (px->lbprm.tot_weight == 0)
1303 return NULL;
1304
1305 if (px->lbprm.map.state & PR_MAP_RECALC)
1306 recalc_server_map(px);
1307
1308 ctx.idx = 0;
1309
1310 /* if the message is chunked, we skip the chunk size, but use the value as len */
1311 http_find_header2(px->hh_name, plen, msg->sol, &txn->hdr_idx, &ctx);
1312
1313 /* if the header is not found or empty, let's fallback to round robin */
1314 if (!ctx.idx || !ctx.vlen)
1315 return NULL;
1316
1317 /* Found a the hh_name in the headers.
1318 * we will compute the hash based on this value ctx.val.
1319 */
1320 len = ctx.vlen;
1321 p = (char *)ctx.line + ctx.val;
1322 if (!px->hh_match_domain) {
1323 while (len) {
1324 hash = *p + (hash << 6) + (hash << 16) - hash;
1325 len--;
1326 p++;
1327 }
1328 } else {
1329 int dohash = 0;
1330 p += len - 1;
1331 /* special computation, use only main domain name, not tld/host
1332 * going back from the end of string, start hashing at first
1333 * dot stop at next.
1334 * This is designed to work with the 'Host' header, and requires
1335 * a special option to activate this.
1336 */
1337 while (len) {
1338 if (*p == '.') {
1339 if (!dohash)
1340 dohash = 1;
1341 else
1342 break;
1343 } else {
1344 if (dohash)
1345 hash = *p + (hash << 6) + (hash << 16) - hash;
1346 }
1347 len--;
1348 p--;
1349 }
1350 }
1351 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1352}
1353
1354
1355/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001356 * This function applies the load-balancing algorithm to the session, as
1357 * defined by the backend it is assigned to. The session is then marked as
1358 * 'assigned'.
1359 *
1360 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1361 * had a server previously assigned, it is rebalanced, trying to avoid the same
1362 * server.
1363 * The function tries to keep the original connection slot if it reconnects to
1364 * the same server, otherwise it releases it and tries to offer it.
1365 *
1366 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001367 *
1368 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001369 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1370 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1371 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001372 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1373 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001374 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1375 * it does not need to be called anymore. This means that s->srv can be trusted
1376 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001377 *
1378 */
1379
1380int assign_server(struct session *s)
1381{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001382
Willy Tarreau7c669d72008-06-20 15:04:11 +02001383 struct server *conn_slot;
1384 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001385
Willy Tarreaubaaee002006-06-26 02:48:02 +02001386#ifdef DEBUG_FULL
1387 fprintf(stderr,"assign_server : s=%p\n",s);
1388#endif
1389
Willy Tarreau7c669d72008-06-20 15:04:11 +02001390 err = SRV_STATUS_INTERNAL;
1391 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1392 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001393
Willy Tarreau7c669d72008-06-20 15:04:11 +02001394 s->prev_srv = s->prev_srv;
1395 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001396
Willy Tarreau7c669d72008-06-20 15:04:11 +02001397 /* We have to release any connection slot before applying any LB algo,
1398 * otherwise we may erroneously end up with no available slot.
1399 */
1400 if (conn_slot)
1401 sess_change_server(s, NULL);
1402
1403 /* We will now try to find the good server and store it into <s->srv>.
1404 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1405 * as well as if no server is available (check error code).
1406 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001407
Willy Tarreau7c669d72008-06-20 15:04:11 +02001408 s->srv = NULL;
1409 if (s->be->lbprm.algo & BE_LB_ALGO) {
1410 int len;
1411 /* we must check if we have at least one server available */
1412 if (!s->be->lbprm.tot_weight) {
1413 err = SRV_STATUS_NOSRV;
1414 goto out;
1415 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001416
Willy Tarreau7c669d72008-06-20 15:04:11 +02001417 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1418 case BE_LB_ALGO_RR:
1419 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1420 if (!s->srv) {
1421 err = SRV_STATUS_FULL;
1422 goto out;
1423 }
1424 break;
1425 case BE_LB_ALGO_LC:
1426 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1427 if (!s->srv) {
1428 err = SRV_STATUS_FULL;
1429 goto out;
1430 }
1431 break;
1432 case BE_LB_ALGO_SH:
1433 if (s->cli_addr.ss_family == AF_INET)
1434 len = 4;
1435 else if (s->cli_addr.ss_family == AF_INET6)
1436 len = 16;
1437 else {
1438 /* unknown IP family */
1439 err = SRV_STATUS_INTERNAL;
1440 goto out;
1441 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001442
Willy Tarreau7c669d72008-06-20 15:04:11 +02001443 s->srv = get_server_sh(s->be,
1444 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1445 len);
1446 break;
1447 case BE_LB_ALGO_UH:
1448 /* URI hashing */
1449 s->srv = get_server_uh(s->be,
1450 s->txn.req.sol + s->txn.req.sl.rq.u,
1451 s->txn.req.sl.rq.u_l);
1452 break;
1453 case BE_LB_ALGO_PH:
1454 /* URL Parameter hashing */
1455 if (s->txn.meth == HTTP_METH_POST &&
1456 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1457 s->txn.req.sl.rq.u_l ) == NULL)
1458 s->srv = get_server_ph_post(s);
1459 else
1460 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001461 s->txn.req.sol + s->txn.req.sl.rq.u,
1462 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001463
Willy Tarreau7c669d72008-06-20 15:04:11 +02001464 if (!s->srv) {
1465 /* parameter not found, fall back to round robin on the map */
1466 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001467 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001468 err = SRV_STATUS_FULL;
1469 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001470 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001471 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001472 break;
Benoitaffb4812009-03-25 13:02:10 +01001473 case BE_LB_ALGO_HH:
1474 /* Header Parameter hashing */
1475 s->srv = get_server_hh(s);
1476
1477 if (!s->srv) {
1478 /* parameter not found, fall back to round robin on the map */
1479 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
1480 if (!s->srv) {
1481 err = SRV_STATUS_FULL;
1482 goto out;
1483 }
1484 }
1485 break;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001486 default:
1487 /* unknown balancing algorithm */
1488 err = SRV_STATUS_INTERNAL;
1489 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001490 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001491 if (s->srv != s->prev_srv) {
1492 s->be->cum_lbconn++;
1493 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001494 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001495 }
1496 else if (s->be->options & PR_O_HTTP_PROXY) {
1497 if (!s->srv_addr.sin_addr.s_addr) {
1498 err = SRV_STATUS_NOSRV;
1499 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001500 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001501 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001502 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001503 !(s->be->options & PR_O_TRANSP)) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001504 err = SRV_STATUS_NOSRV;
1505 goto out;
1506 }
1507
1508 s->flags |= SN_ASSIGNED;
1509 err = SRV_STATUS_OK;
1510 out:
1511
1512 /* Either we take back our connection slot, or we offer it to someone
1513 * else if we don't need it anymore.
1514 */
1515 if (conn_slot) {
1516 if (conn_slot == s->srv) {
1517 sess_change_server(s, s->srv);
1518 } else {
1519 if (may_dequeue_tasks(conn_slot, s->be))
1520 process_srv_queue(conn_slot);
1521 }
1522 }
1523
1524 out_err:
1525 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001526}
1527
1528
1529/*
1530 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1531 * The address is taken from the currently assigned server, or from the
1532 * dispatch or transparent address.
1533 *
1534 * It may return :
1535 * SRV_STATUS_OK if everything is OK.
1536 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1537 *
1538 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1539 * not cleared, so it's to the caller to clear it if required.
1540 *
1541 */
1542int assign_server_address(struct session *s)
1543{
1544#ifdef DEBUG_FULL
1545 fprintf(stderr,"assign_server_address : s=%p\n",s);
1546#endif
1547
Willy Tarreau31682232007-11-29 15:38:04 +01001548 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001549 /* A server is necessarily known for this session */
1550 if (!(s->flags & SN_ASSIGNED))
1551 return SRV_STATUS_INTERNAL;
1552
1553 s->srv_addr = s->srv->addr;
1554
1555 /* if this server remaps proxied ports, we'll use
1556 * the port the client connected to with an offset. */
1557 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001558 if (!(s->be->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001559 get_frt_addr(s);
1560 if (s->frt_addr.ss_family == AF_INET) {
1561 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1562 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1563 } else {
1564 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1565 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1566 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001567 }
1568 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001569 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001570 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001571 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001572 }
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001573 else if (s->be->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001574 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001575 if (!(s->flags & SN_FRT_ADDR_SET))
1576 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001577
Willy Tarreaubd414282008-01-19 13:46:35 +01001578 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1579 /* when we support IPv6 on the backend, we may add other tests */
1580 //qfprintf(stderr, "Cannot get original server address.\n");
1581 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001582 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001583 else if (s->be->options & PR_O_HTTP_PROXY) {
1584 /* If HTTP PROXY option is set, then server is already assigned
1585 * during incoming client request parsing. */
1586 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001587 else {
1588 /* no server and no LB algorithm ! */
1589 return SRV_STATUS_INTERNAL;
1590 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001591
1592 s->flags |= SN_ADDR_SET;
1593 return SRV_STATUS_OK;
1594}
1595
1596
1597/* This function assigns a server to session <s> if required, and can add the
1598 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001599 * If ->srv_conn is set, the session is first released from the server.
1600 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1601 * be called before any connection and after any retry or redispatch occurs.
1602 *
1603 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001604 *
1605 * Returns :
1606 *
1607 * SRV_STATUS_OK if everything is OK.
1608 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1609 * SRV_STATUS_QUEUED if the connection has been queued.
1610 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001611 * connection could not be queued in s->srv,
1612 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001613 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1614 *
1615 */
1616int assign_server_and_queue(struct session *s)
1617{
1618 struct pendconn *p;
1619 int err;
1620
1621 if (s->pend_pos)
1622 return SRV_STATUS_INTERNAL;
1623
Willy Tarreau7c669d72008-06-20 15:04:11 +02001624 err = SRV_STATUS_OK;
1625 if (!(s->flags & SN_ASSIGNED)) {
1626 err = assign_server(s);
1627 if (s->prev_srv) {
1628 /* This session was previously assigned to a server. We have to
1629 * update the session's and the server's stats :
1630 * - if the server changed :
1631 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1632 * - set SN_REDISP if it was successfully redispatched
1633 * - increment srv->redispatches and be->redispatches
1634 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001635 */
1636
Willy Tarreau7c669d72008-06-20 15:04:11 +02001637 if (s->prev_srv != s->srv) {
1638 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1639 s->txn.flags &= ~TX_CK_MASK;
1640 s->txn.flags |= TX_CK_DOWN;
1641 }
1642 s->flags |= SN_REDISP;
1643 s->prev_srv->redispatches++;
1644 s->be->redispatches++;
1645 } else {
1646 s->prev_srv->retries++;
1647 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001648 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001649 }
1650 }
1651
Willy Tarreaubaaee002006-06-26 02:48:02 +02001652 switch (err) {
1653 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001654 /* we have SN_ASSIGNED set */
1655 if (!s->srv)
1656 return SRV_STATUS_OK; /* dispatch or proxy mode */
1657
1658 /* If we already have a connection slot, no need to check any queue */
1659 if (s->srv_conn == s->srv)
1660 return SRV_STATUS_OK;
1661
1662 /* OK, this session already has an assigned server, but no
1663 * connection slot yet. Either it is a redispatch, or it was
1664 * assigned from persistence information (direct mode).
1665 */
1666 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1667 /* server scheduled for redirection, and already assigned. We
1668 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001669 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001670 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001671 return SRV_STATUS_OK;
1672 }
1673
Willy Tarreau7c669d72008-06-20 15:04:11 +02001674 /* We might have to queue this session if the assigned server is full.
1675 * We know we have to queue it into the server's queue, so if a maxqueue
1676 * is set on the server, we must also check that the server's queue is
1677 * not full, in which case we have to return FULL.
1678 */
1679 if (s->srv->maxconn &&
1680 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1681
1682 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1683 return SRV_STATUS_FULL;
1684
Willy Tarreaubaaee002006-06-26 02:48:02 +02001685 p = pendconn_add(s);
1686 if (p)
1687 return SRV_STATUS_QUEUED;
1688 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001689 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001690 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001691
1692 /* OK, we can use this server. Let's reserve our place */
1693 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001694 return SRV_STATUS_OK;
1695
1696 case SRV_STATUS_FULL:
1697 /* queue this session into the proxy's queue */
1698 p = pendconn_add(s);
1699 if (p)
1700 return SRV_STATUS_QUEUED;
1701 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001702 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001703
1704 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001705 return err;
1706
Willy Tarreaubaaee002006-06-26 02:48:02 +02001707 case SRV_STATUS_INTERNAL:
1708 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001709
Willy Tarreaubaaee002006-06-26 02:48:02 +02001710 default:
1711 return SRV_STATUS_INTERNAL;
1712 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001713}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001714
1715/*
1716 * This function initiates a connection to the server assigned to this session
1717 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1718 * It can return one of :
1719 * - SN_ERR_NONE if everything's OK
1720 * - SN_ERR_SRVTO if there are no more servers
1721 * - SN_ERR_SRVCL if the connection was refused by the server
1722 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1723 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1724 * - SN_ERR_INTERNAL for any other purely internal errors
1725 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1726 */
1727int connect_server(struct session *s)
1728{
1729 int fd, err;
1730
1731 if (!(s->flags & SN_ADDR_SET)) {
1732 err = assign_server_address(s);
1733 if (err != SRV_STATUS_OK)
1734 return SN_ERR_INTERNAL;
1735 }
1736
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001737 if ((fd = s->req->cons->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001738 qfprintf(stderr, "Cannot get a server socket.\n");
1739
1740 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001741 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001742 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001743 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001744 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001745 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001746 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001747 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001748 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001749 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001750 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001751 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001752 /* this is a resource error */
1753 return SN_ERR_RESOURCE;
1754 }
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001755
Willy Tarreaubaaee002006-06-26 02:48:02 +02001756 if (fd >= global.maxsock) {
1757 /* do not log anything there, it's a normal condition when this option
1758 * is used to serialize connections to a server !
1759 */
1760 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1761 close(fd);
1762 return SN_ERR_PRXCOND; /* it is a configuration limit */
1763 }
1764
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001765#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreau3ab68cf2009-01-25 16:03:28 +01001766 if ((global.tune.options & GTUNE_USE_SPLICE) &&
1767 (s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001768 /* TCP splicing supported by both FE and BE */
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001769 tcp_splice_initfd(s->req->prod->fd, fd);
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001770 }
1771#endif
1772
Willy Tarreaubaaee002006-06-26 02:48:02 +02001773 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1774 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1775 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1776 close(fd);
1777 return SN_ERR_INTERNAL;
1778 }
1779
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001780 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001781 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1782
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001783 if (s->be->options & PR_O_TCP_NOLING)
1784 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1785
Willy Tarreaubaaee002006-06-26 02:48:02 +02001786 /* allow specific binding :
1787 * - server-specific at first
1788 * - proxy-specific next
1789 */
1790 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001791 struct sockaddr_in *remote = NULL;
1792 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001793
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001794#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001795 switch (s->srv->state & SRV_TPROXY_MASK) {
1796 case SRV_TPROXY_ADDR:
1797 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1798 flags = 3;
1799 break;
1800 case SRV_TPROXY_CLI:
1801 flags |= 2;
1802 /* fall through */
1803 case SRV_TPROXY_CIP:
1804 /* FIXME: what can we do if the client connects in IPv6 ? */
1805 flags |= 1;
1806 remote = (struct sockaddr_in *)&s->cli_addr;
1807 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001808 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001809#endif
Willy Tarreauc76721d2009-02-04 20:20:58 +01001810#ifdef SO_BINDTODEVICE
1811 /* Note: this might fail if not CAP_NET_RAW */
1812 if (s->srv->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001813 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->srv->iface_name, s->srv->iface_len + 1);
Willy Tarreauc76721d2009-02-04 20:20:58 +01001814#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001815 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001816 if (ret) {
1817 close(fd);
1818 if (ret == 1) {
1819 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1820 s->be->id, s->srv->id);
1821 send_log(s->be, LOG_EMERG,
1822 "Cannot bind to source address before connect() for server %s/%s.\n",
1823 s->be->id, s->srv->id);
1824 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001825 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001826 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001827 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001828 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001829 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001830 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001831 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001832 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001833 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001834 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001835 struct sockaddr_in *remote = NULL;
1836 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001837
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001838#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001839 switch (s->be->options & PR_O_TPXY_MASK) {
1840 case PR_O_TPXY_ADDR:
1841 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1842 flags = 3;
1843 break;
1844 case PR_O_TPXY_CLI:
1845 flags |= 2;
1846 /* fall through */
1847 case PR_O_TPXY_CIP:
1848 /* FIXME: what can we do if the client connects in IPv6 ? */
1849 flags |= 1;
1850 remote = (struct sockaddr_in *)&s->cli_addr;
1851 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001852 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001853#endif
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001854#ifdef SO_BINDTODEVICE
1855 /* Note: this might fail if not CAP_NET_RAW */
1856 if (s->be->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001857 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->be->iface_name, s->be->iface_len + 1);
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001858#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001859 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001860 if (ret) {
1861 close(fd);
1862 if (ret == 1) {
1863 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1864 s->be->id);
1865 send_log(s->be, LOG_EMERG,
1866 "Cannot bind to source address before connect() for proxy %s.\n",
1867 s->be->id);
1868 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001869 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001870 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001871 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001872 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1873 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001874 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001875 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001876 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001877 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001878
Willy Tarreaubaaee002006-06-26 02:48:02 +02001879 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1880 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1881
1882 if (errno == EAGAIN || errno == EADDRINUSE) {
1883 char *msg;
1884 if (errno == EAGAIN) /* no free ports left, try again later */
1885 msg = "no free ports";
1886 else
1887 msg = "local address already in use";
1888
1889 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1890 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001891 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001892 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001893 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001894 return SN_ERR_RESOURCE;
1895 } else if (errno == ETIMEDOUT) {
1896 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1897 close(fd);
1898 return SN_ERR_SRVTO;
1899 } else {
1900 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1901 //qfprintf(stderr,"Connect(): %d", errno);
1902 close(fd);
1903 return SN_ERR_SRVCL;
1904 }
1905 }
1906
Willy Tarreaue5ed4062008-08-30 03:17:31 +02001907 fdtab[fd].owner = s->req->cons;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001908 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001909 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001910 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001911 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001912 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001913
1914 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1915 fdtab[fd].peerlen = sizeof(s->srv_addr);
1916
Willy Tarreaubaaee002006-06-26 02:48:02 +02001917 fd_insert(fd);
Willy Tarreau788e2842008-08-26 13:25:39 +02001918 EV_FD_SET(fd, DIR_WR); /* for connect status */
1919
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001920 s->req->cons->state = SI_ST_CON;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001921 if (s->srv) {
Willy Tarreau1e62de62008-11-11 20:20:02 +01001922 s->flags |= SN_CURR_SESS;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001923 s->srv->cur_sess++;
1924 if (s->srv->cur_sess > s->srv->cur_sess_max)
1925 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001926 if (s->be->lbprm.server_take_conn)
1927 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001928 }
1929
Willy Tarreaua3780f22009-03-15 21:49:00 +01001930 s->req->cons->exp = tick_add_ifset(now_ms, s->be->timeout.connect);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001931 return SN_ERR_NONE; /* connection is OK */
1932}
1933
1934
Willy Tarreaubaaee002006-06-26 02:48:02 +02001935/* This function performs the "redispatch" part of a connection attempt. It
1936 * will assign a server if required, queue the connection if required, and
1937 * handle errors that might arise at this level. It can change the server
1938 * state. It will return 1 if it encounters an error, switches the server
1939 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1940 * that the connection is ready to use.
1941 */
1942
1943int srv_redispatch_connect(struct session *t)
1944{
1945 int conn_err;
1946
1947 /* We know that we don't have any connection pending, so we will
1948 * try to get a new one, and wait in this state if it's queued
1949 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001950 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001951 conn_err = assign_server_and_queue(t);
1952 switch (conn_err) {
1953 case SRV_STATUS_OK:
1954 break;
1955
Willy Tarreau7c669d72008-06-20 15:04:11 +02001956 case SRV_STATUS_FULL:
1957 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
1958 * and we can redispatch to another server, or it is not and we return
1959 * 503. This only makes sense in DIRECT mode however, because normal LB
1960 * algorithms would never select such a server, and hash algorithms
1961 * would bring us on the same server again. Note that t->srv is set in
1962 * this case.
1963 */
1964 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
1965 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
1966 t->prev_srv = t->srv;
1967 goto redispatch;
1968 }
1969
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001970 if (!t->req->cons->err_type) {
1971 t->req->cons->err_type = SI_ET_QUEUE_ERR;
1972 t->req->cons->err_loc = t->srv;
1973 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001974
1975 t->srv->failed_conns++;
1976 t->be->failed_conns++;
1977 return 1;
1978
Willy Tarreaubaaee002006-06-26 02:48:02 +02001979 case SRV_STATUS_NOSRV:
1980 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001981 if (!t->req->cons->err_type) {
1982 t->req->cons->err_type = SI_ET_CONN_ERR;
1983 t->req->cons->err_loc = NULL;
1984 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001985
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001986 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001987 return 1;
1988
1989 case SRV_STATUS_QUEUED:
Willy Tarreau35374672008-09-03 18:11:02 +02001990 t->req->cons->exp = tick_add_ifset(now_ms, t->be->timeout.queue);
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001991 t->req->cons->state = SI_ST_QUE;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001992 /* do nothing else and do not wake any other session up */
1993 return 1;
1994
Willy Tarreaubaaee002006-06-26 02:48:02 +02001995 case SRV_STATUS_INTERNAL:
1996 default:
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001997 if (!t->req->cons->err_type) {
1998 t->req->cons->err_type = SI_ET_CONN_OTHER;
1999 t->req->cons->err_loc = t->srv;
2000 }
2001
Willy Tarreaubaaee002006-06-26 02:48:02 +02002002 if (t->srv)
Willy Tarreau7f062c42009-03-05 18:43:00 +01002003 srv_inc_sess_ctr(t->srv);
Willy Tarreau98937b82007-12-10 15:05:42 +01002004 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002005 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002006 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002007
2008 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002009 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002010 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002011 return 1;
2012 }
2013 /* if we get here, it's because we got SRV_STATUS_OK, which also
2014 * means that the connection has not been queued.
2015 */
2016 return 0;
2017}
2018
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002019int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002020 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002021 return px->down_time;
2022
2023 return now.tv_sec - px->last_change + px->down_time;
2024}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002025
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002026/* This function parses a "balance" statement in a backend section describing
2027 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2028 * returns -1, it may write an error message into ther <err> buffer, for at
2029 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2030 * written. The function must be called with <args> pointing to the first word
2031 * after "balance".
2032 */
2033int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2034{
2035 if (!*(args[0])) {
2036 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002037 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2038 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002039 return 0;
2040 }
2041
2042 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002043 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2044 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002045 }
Willy Tarreau51406232008-03-10 22:04:20 +01002046 else if (!strcmp(args[0], "leastconn")) {
2047 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2048 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2049 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002050 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002051 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2052 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002053 }
2054 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002055 int arg = 1;
2056
Willy Tarreau31682232007-11-29 15:38:04 +01002057 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2058 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002059
2060 while (*args[arg]) {
2061 if (!strcmp(args[arg], "len")) {
2062 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2063 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2064 return -1;
2065 }
2066 curproxy->uri_len_limit = atoi(args[arg+1]);
2067 arg += 2;
2068 }
2069 else if (!strcmp(args[arg], "depth")) {
2070 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2071 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2072 return -1;
2073 }
2074 /* hint: we store the position of the ending '/' (depth+1) so
2075 * that we avoid a comparison while computing the hash.
2076 */
2077 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2078 arg += 2;
2079 }
2080 else {
2081 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2082 return -1;
2083 }
2084 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002085 }
Willy Tarreau01732802007-11-01 22:48:15 +01002086 else if (!strcmp(args[0], "url_param")) {
2087 if (!*args[1]) {
2088 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2089 return -1;
2090 }
Willy Tarreau31682232007-11-29 15:38:04 +01002091 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2092 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreaua534fea2008-08-03 12:19:50 +02002093
2094 free(curproxy->url_param_name);
Willy Tarreau01732802007-11-01 22:48:15 +01002095 curproxy->url_param_name = strdup(args[1]);
Willy Tarreaua534fea2008-08-03 12:19:50 +02002096 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002097 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002098 if (strcmp(args[2], "check_post")) {
2099 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2100 return -1;
2101 }
2102 if (*args[3]) {
2103 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2104 curproxy->url_param_post_limit = str2ui(args[3]);
2105 }
2106 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2107 if (!curproxy->url_param_post_limit)
2108 curproxy->url_param_post_limit = 48;
2109 else if ( curproxy->url_param_post_limit < 3 )
2110 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2111 }
Benoitaffb4812009-03-25 13:02:10 +01002112 }
2113 else if (!strncmp(args[0], "hdr(", 4)) {
2114 const char *beg, *end;
2115
2116 beg = args[0] + 4;
2117 end = strchr(beg, ')');
2118
2119 if (!end || end == beg) {
2120 snprintf(err, errlen, "'balance hdr(name)' requires an http header field name.");
2121 return -1;
2122 }
2123
2124 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2125 curproxy->lbprm.algo |= BE_LB_ALGO_HH;
2126
2127 free(curproxy->hh_name);
2128 curproxy->hh_len = end - beg;
2129 curproxy->hh_name = my_strndup(beg, end - beg);
2130 curproxy->hh_match_domain = 0;
2131
2132 if (*args[1]) {
2133 if (strcmp(args[1], "use_domain_only")) {
2134 snprintf(err, errlen, "'balance hdr(name)' only accepts 'use_domain_only' modifier.");
2135 return -1;
2136 }
2137 curproxy->hh_match_domain = 1;
2138 }
2139
Willy Tarreau01732802007-11-01 22:48:15 +01002140 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002141 else {
Benoitaffb4812009-03-25 13:02:10 +01002142 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri', 'url_param' and 'hdr(name)' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002143 return -1;
2144 }
2145 return 0;
2146}
2147
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002148
2149/************************************************************************/
2150/* All supported keywords must be declared here. */
2151/************************************************************************/
2152
2153/* set test->i to the number of enabled servers on the proxy */
2154static int
2155acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2156 struct acl_expr *expr, struct acl_test *test)
2157{
2158 test->flags = ACL_TEST_F_VOL_TEST;
2159 if (expr->arg_len) {
2160 /* another proxy was designated, we must look for it */
2161 for (px = proxy; px; px = px->next)
2162 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2163 break;
2164 }
2165 if (!px)
2166 return 0;
2167
2168 if (px->srv_act)
2169 test->i = px->srv_act;
2170 else if (px->lbprm.fbck)
2171 test->i = 1;
2172 else
2173 test->i = px->srv_bck;
2174
2175 return 1;
2176}
2177
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002178/* set test->i to the number of enabled servers on the proxy */
2179static int
2180acl_fetch_connslots(struct proxy *px, struct session *l4, void *l7, int dir,
2181 struct acl_expr *expr, struct acl_test *test)
2182{
2183 struct server *iterator;
2184 test->flags = ACL_TEST_F_VOL_TEST;
2185 if (expr->arg_len) {
2186 /* another proxy was designated, we must look for it */
2187 for (px = proxy; px; px = px->next)
2188 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2189 break;
2190 }
2191 if (!px)
2192 return 0;
2193
2194 test->i = 0;
2195 iterator = px->srv;
2196 while (iterator) {
2197 if ((iterator->state & 1) == 0) {
2198 iterator = iterator->next;
2199 continue;
2200 }
2201 if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
2202 test->i = -1;
2203 return 1;
2204 }
2205
2206 test->i += (iterator->maxconn - iterator->cur_sess)
2207 + (iterator->maxqueue - iterator->nbpend);
2208 iterator = iterator->next;
2209 }
2210
2211 return 1;
2212}
2213
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002214/* set test->i to the number of connections per second reaching the frontend */
2215static int
2216acl_fetch_fe_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2217 struct acl_expr *expr, struct acl_test *test)
2218{
2219 test->flags = ACL_TEST_F_VOL_TEST;
2220 if (expr->arg_len) {
2221 /* another proxy was designated, we must look for it */
2222 for (px = proxy; px; px = px->next)
2223 if ((px->cap & PR_CAP_FE) && !strcmp(px->id, expr->arg.str))
2224 break;
2225 }
2226 if (!px)
2227 return 0;
2228
2229 test->i = read_freq_ctr(&px->fe_sess_per_sec);
2230 return 1;
2231}
2232
2233/* set test->i to the number of connections per second reaching the backend */
2234static int
2235acl_fetch_be_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2236 struct acl_expr *expr, struct acl_test *test)
2237{
2238 test->flags = ACL_TEST_F_VOL_TEST;
2239 if (expr->arg_len) {
2240 /* another proxy was designated, we must look for it */
2241 for (px = proxy; px; px = px->next)
2242 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2243 break;
2244 }
2245 if (!px)
2246 return 0;
2247
2248 test->i = read_freq_ctr(&px->be_sess_per_sec);
2249 return 1;
2250}
2251
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002252
2253/* Note: must not be declared <const> as its list will be overwritten */
2254static struct acl_kw_list acl_kws = {{ },{
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002255 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau3a8efeb2009-03-05 19:15:37 +01002256 { "connslots", acl_parse_int, acl_fetch_connslots, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002257 { "fe_sess_rate", acl_parse_int, acl_fetch_fe_sess_rate, acl_match_int, ACL_USE_NOTHING },
2258 { "be_sess_rate", acl_parse_int, acl_fetch_be_sess_rate, acl_match_int, ACL_USE_NOTHING },
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002259 { NULL, NULL, NULL, NULL },
2260}};
2261
2262
2263__attribute__((constructor))
2264static void __backend_init(void)
2265{
2266 acl_register_keywords(&acl_kws);
2267}
2268
2269
Willy Tarreaubaaee002006-06-26 02:48:02 +02002270/*
2271 * Local variables:
2272 * c-indent-level: 8
2273 * c-basic-offset: 8
2274 * End:
2275 */