blob: 5e78fd8df5a522ffb6e7965b9653b16cf190b630 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreau2dd0d472006-06-29 17:53:05 +020021#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020022#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020023#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010024#include <common/eb32tree.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020025#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020026#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020027
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010030#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020031#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020032#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010034#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/log.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020036#include <proto/port_range.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010038#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020039#include <proto/queue.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010040#include <proto/server.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020041#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020042#include <proto/stream_sock.h>
43#include <proto/task.h>
44
Willy Tarreau6d1a9882007-01-07 02:03:04 +010045#ifdef CONFIG_HAP_TCPSPLICE
46#include <libtcpsplice.h>
47#endif
48
Willy Tarreaub625a082007-11-26 01:15:43 +010049static inline void fwrr_remove_from_tree(struct server *s);
50static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
51static inline void fwrr_dequeue_srv(struct server *s);
52static void fwrr_get_srv(struct server *s);
53static void fwrr_queue_srv(struct server *s);
54
55/* This function returns non-zero if a server with the given weight and state
56 * is usable for LB, otherwise zero.
57 */
58static inline int srv_is_usable(int state, int weight)
59{
60 if (!weight)
61 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010062 if (state & SRV_GOINGDOWN)
63 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010064 if (!(state & SRV_RUNNING))
65 return 0;
66 return 1;
67}
68
Willy Tarreaubaaee002006-06-26 02:48:02 +020069/*
70 * This function recounts the number of usable active and backup servers for
71 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010072 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010073 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010074 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020075 */
Willy Tarreaub625a082007-11-26 01:15:43 +010076static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020077{
78 struct server *srv;
79
Willy Tarreau20697042007-11-15 23:26:18 +010080 px->srv_act = px->srv_bck = 0;
81 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010082 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020083 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010084 if (!srv_is_usable(srv->state, srv->eweight))
85 continue;
86
87 if (srv->state & SRV_BACKUP) {
88 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010089 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010090 px->lbprm.fbck = srv;
91 px->srv_bck++;
92 px->lbprm.tot_wbck += srv->eweight;
93 } else {
94 px->srv_act++;
95 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020096 }
97 }
Willy Tarreaub625a082007-11-26 01:15:43 +010098}
Willy Tarreau20697042007-11-15 23:26:18 +010099
Willy Tarreaub625a082007-11-26 01:15:43 +0100100/* This function simply updates the backend's tot_weight and tot_used values
101 * after servers weights have been updated. It is designed to be used after
102 * recount_servers() or equivalent.
103 */
104static void update_backend_weight(struct proxy *px)
105{
Willy Tarreau20697042007-11-15 23:26:18 +0100106 if (px->srv_act) {
107 px->lbprm.tot_weight = px->lbprm.tot_wact;
108 px->lbprm.tot_used = px->srv_act;
109 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100110 else if (px->lbprm.fbck) {
111 /* use only the first backup server */
112 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
113 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100114 }
115 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100116 px->lbprm.tot_weight = px->lbprm.tot_wbck;
117 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100118 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100119}
120
121/* this function updates the map according to server <srv>'s new state */
122static void map_set_server_status_down(struct server *srv)
123{
124 struct proxy *p = srv->proxy;
125
126 if (srv->state == srv->prev_state &&
127 srv->eweight == srv->prev_eweight)
128 return;
129
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100130 if (srv_is_usable(srv->state, srv->eweight))
131 goto out_update_state;
132
Willy Tarreaub625a082007-11-26 01:15:43 +0100133 /* FIXME: could be optimized since we know what changed */
134 recount_servers(p);
135 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100136 p->lbprm.map.state |= PR_MAP_RECALC;
137 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100138 srv->prev_state = srv->state;
139 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200140}
141
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100142/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100143static void map_set_server_status_up(struct server *srv)
144{
145 struct proxy *p = srv->proxy;
146
147 if (srv->state == srv->prev_state &&
148 srv->eweight == srv->prev_eweight)
149 return;
150
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100151 if (!srv_is_usable(srv->state, srv->eweight))
152 goto out_update_state;
153
Willy Tarreaub625a082007-11-26 01:15:43 +0100154 /* FIXME: could be optimized since we know what changed */
155 recount_servers(p);
156 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100157 p->lbprm.map.state |= PR_MAP_RECALC;
158 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100159 srv->prev_state = srv->state;
160 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100161}
162
Willy Tarreau20697042007-11-15 23:26:18 +0100163/* This function recomputes the server map for proxy px. It relies on
164 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
165 * called after recount_servers(). It also expects px->lbprm.map.srv
166 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200167 */
168void recalc_server_map(struct proxy *px)
169{
170 int o, tot, flag;
171 struct server *cur, *best;
172
Willy Tarreau20697042007-11-15 23:26:18 +0100173 switch (px->lbprm.tot_used) {
174 case 0: /* no server */
175 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200176 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100177 case 1: /* only one server, just fill first entry */
178 tot = 1;
179 break;
180 default:
181 tot = px->lbprm.tot_weight;
182 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200183 }
184
Willy Tarreau20697042007-11-15 23:26:18 +0100185 /* here we *know* that we have some servers */
186 if (px->srv_act)
187 flag = SRV_RUNNING;
188 else
189 flag = SRV_RUNNING | SRV_BACKUP;
190
Willy Tarreaubaaee002006-06-26 02:48:02 +0200191 /* this algorithm gives priority to the first server, which means that
192 * it will respect the declaration order for equivalent weights, and
193 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100194 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200195 * case, where we want the first server only.
196 */
197 for (cur = px->srv; cur; cur = cur->next)
198 cur->wscore = 0;
199
200 for (o = 0; o < tot; o++) {
201 int max = 0;
202 best = NULL;
203 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100204 if (flag == (cur->state &
205 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200206 int v;
207
208 /* If we are forced to return only one server, we don't want to
209 * go further, because we would return the wrong one due to
210 * divide overflow.
211 */
212 if (tot == 1) {
213 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100214 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200215 break;
216 }
217
Willy Tarreau417fae02007-03-25 21:16:40 +0200218 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200219 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
220 if (best == NULL || v > max) {
221 max = v;
222 best = cur;
223 }
224 }
225 }
Willy Tarreau20697042007-11-15 23:26:18 +0100226 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200227 best->wscore -= tot;
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230}
231
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100232/* This function is responsible of building the server MAP for map-based LB
233 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
234 * weights if applicable. It should be called only once per proxy, at config
235 * time.
236 */
237void init_server_map(struct proxy *p)
238{
239 struct server *srv;
240 int pgcd;
241 int act, bck;
242
Willy Tarreaub625a082007-11-26 01:15:43 +0100243 p->lbprm.set_server_status_up = map_set_server_status_up;
244 p->lbprm.set_server_status_down = map_set_server_status_down;
245 p->lbprm.update_server_eweight = NULL;
246
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100247 if (!p->srv)
248 return;
249
250 /* We will factor the weights to reduce the table,
251 * using Euclide's largest common divisor algorithm
252 */
253 pgcd = p->srv->uweight;
254 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
255 int w = srv->uweight;
256 while (w) {
257 int t = pgcd % w;
258 pgcd = w;
259 w = t;
260 }
261 }
262
263 /* It is sometimes useful to know what factor to apply
264 * to the backend's effective weight to know its real
265 * weight.
266 */
267 p->lbprm.wmult = pgcd;
268
269 act = bck = 0;
270 for (srv = p->srv; srv; srv = srv->next) {
271 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100272 srv->prev_eweight = srv->eweight;
273 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100274 if (srv->state & SRV_BACKUP)
275 bck += srv->eweight;
276 else
277 act += srv->eweight;
278 }
279
280 /* this is the largest map we will ever need for this servers list */
281 if (act < bck)
282 act = bck;
283
284 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
285 /* recounts servers and their weights */
286 p->lbprm.map.state = PR_MAP_RECALC;
287 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100288 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100289 recalc_server_map(p);
290}
291
Willy Tarreaub625a082007-11-26 01:15:43 +0100292/* This function updates the server trees according to server <srv>'s new
293 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100294 * It is not important whether the server was already down or not. It is not
295 * important either that the new state is completely down (the caller may not
296 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100297 */
298static void fwrr_set_server_status_down(struct server *srv)
299{
300 struct proxy *p = srv->proxy;
301 struct fwrr_group *grp;
302
303 if (srv->state == srv->prev_state &&
304 srv->eweight == srv->prev_eweight)
305 return;
306
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100307 if (srv_is_usable(srv->state, srv->eweight))
308 goto out_update_state;
309
Willy Tarreaub625a082007-11-26 01:15:43 +0100310 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
311 /* server was already down */
312 goto out_update_backend;
313
314 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
315 grp->next_weight -= srv->prev_eweight;
316
317 if (srv->state & SRV_BACKUP) {
318 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
319 p->srv_bck--;
320
321 if (srv == p->lbprm.fbck) {
322 /* we lost the first backup server in a single-backup
323 * configuration, we must search another one.
324 */
325 struct server *srv2 = p->lbprm.fbck;
326 do {
327 srv2 = srv2->next;
328 } while (srv2 &&
329 !((srv2->state & SRV_BACKUP) &&
330 srv_is_usable(srv2->state, srv2->eweight)));
331 p->lbprm.fbck = srv2;
332 }
333 } else {
334 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
335 p->srv_act--;
336 }
337
338 fwrr_dequeue_srv(srv);
339 fwrr_remove_from_tree(srv);
340
341out_update_backend:
342 /* check/update tot_used, tot_weight */
343 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100344 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100345 srv->prev_state = srv->state;
346 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100347}
348
349/* This function updates the server trees according to server <srv>'s new
350 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100351 * It is not important whether the server was already down or not. It is not
352 * important either that the new state is completely UP (the caller may not
353 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100354 * the weight of a server which was already up.
355 */
356static void fwrr_set_server_status_up(struct server *srv)
357{
358 struct proxy *p = srv->proxy;
359 struct fwrr_group *grp;
360
361 if (srv->state == srv->prev_state &&
362 srv->eweight == srv->prev_eweight)
363 return;
364
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100365 if (!srv_is_usable(srv->state, srv->eweight))
366 goto out_update_state;
367
Willy Tarreaub625a082007-11-26 01:15:43 +0100368 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
369 /* server was already up */
370 goto out_update_backend;
371
372 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
373 grp->next_weight += srv->eweight;
374
375 if (srv->state & SRV_BACKUP) {
376 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
377 p->srv_bck++;
378
Willy Tarreauf4cca452008-03-08 21:42:54 +0100379 if (!(p->options & PR_O_USE_ALL_BK)) {
380 if (!p->lbprm.fbck) {
381 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100382 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100383 } else {
384 /* we may have restored a backup server prior to fbck,
385 * in which case it should replace it.
386 */
387 struct server *srv2 = srv;
388 do {
389 srv2 = srv2->next;
390 } while (srv2 && (srv2 != p->lbprm.fbck));
391 if (srv2)
392 p->lbprm.fbck = srv;
393 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100394 }
395 } else {
396 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
397 p->srv_act++;
398 }
399
400 /* note that eweight cannot be 0 here */
401 fwrr_get_srv(srv);
402 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
403 fwrr_queue_srv(srv);
404
405out_update_backend:
406 /* check/update tot_used, tot_weight */
407 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100408 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100409 srv->prev_state = srv->state;
410 srv->prev_eweight = srv->eweight;
411}
412
413/* This function must be called after an update to server <srv>'s effective
414 * weight. It may be called after a state change too.
415 */
416static void fwrr_update_server_weight(struct server *srv)
417{
418 int old_state, new_state;
419 struct proxy *p = srv->proxy;
420 struct fwrr_group *grp;
421
422 if (srv->state == srv->prev_state &&
423 srv->eweight == srv->prev_eweight)
424 return;
425
426 /* If changing the server's weight changes its state, we simply apply
427 * the procedures we already have for status change. If the state
428 * remains down, the server is not in any tree, so it's as easy as
429 * updating its values. If the state remains up with different weights,
430 * there are some computations to perform to find a new place and
431 * possibly a new tree for this server.
432 */
433
434 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
435 new_state = srv_is_usable(srv->state, srv->eweight);
436
437 if (!old_state && !new_state) {
438 srv->prev_state = srv->state;
439 srv->prev_eweight = srv->eweight;
440 return;
441 }
442 else if (!old_state && new_state) {
443 fwrr_set_server_status_up(srv);
444 return;
445 }
446 else if (old_state && !new_state) {
447 fwrr_set_server_status_down(srv);
448 return;
449 }
450
451 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
452 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
453
454 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
455 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
456
457 if (srv->lb_tree == grp->init) {
458 fwrr_dequeue_srv(srv);
459 fwrr_queue_by_weight(grp->init, srv);
460 }
461 else if (!srv->lb_tree) {
462 /* FIXME: server was down. This is not possible right now but
463 * may be needed soon for slowstart or graceful shutdown.
464 */
465 fwrr_dequeue_srv(srv);
466 fwrr_get_srv(srv);
467 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
468 fwrr_queue_srv(srv);
469 } else {
470 /* The server is either active or in the next queue. If it's
471 * still in the active queue and it has not consumed all of its
472 * places, let's adjust its next position.
473 */
474 fwrr_get_srv(srv);
475
476 if (srv->eweight > 0) {
477 int prev_next = srv->npos;
478 int step = grp->next_weight / srv->eweight;
479
480 srv->npos = srv->lpos + step;
481 srv->rweight = 0;
482
483 if (srv->npos > prev_next)
484 srv->npos = prev_next;
485 if (srv->npos < grp->curr_pos + 2)
486 srv->npos = grp->curr_pos + step;
487 } else {
488 /* push it into the next tree */
489 srv->npos = grp->curr_pos + grp->curr_weight;
490 }
491
492 fwrr_dequeue_srv(srv);
493 fwrr_queue_srv(srv);
494 }
495
496 update_backend_weight(p);
497 srv->prev_state = srv->state;
498 srv->prev_eweight = srv->eweight;
499}
500
501/* Remove a server from a tree. It must have previously been dequeued. This
502 * function is meant to be called when a server is going down or has its
503 * weight disabled.
504 */
505static inline void fwrr_remove_from_tree(struct server *s)
506{
507 s->lb_tree = NULL;
508}
509
510/* Queue a server in the weight tree <root>, assuming the weight is >0.
511 * We want to sort them by inverted weights, because we need to place
512 * heavy servers first in order to get a smooth distribution.
513 */
514static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
515{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100516 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100517 eb32_insert(root, &s->lb_node);
518 s->lb_tree = root;
519}
520
521/* This function is responsible for building the weight trees in case of fast
522 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
523 * ratio. Both active and backup groups are initialized.
524 */
525void fwrr_init_server_groups(struct proxy *p)
526{
527 struct server *srv;
528 struct eb_root init_head = EB_ROOT;
529
530 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
531 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
532 p->lbprm.update_server_eweight = fwrr_update_server_weight;
533
534 p->lbprm.wdiv = BE_WEIGHT_SCALE;
535 for (srv = p->srv; srv; srv = srv->next) {
536 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
537 srv->prev_state = srv->state;
538 }
539
540 recount_servers(p);
541 update_backend_weight(p);
542
543 /* prepare the active servers group */
544 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
545 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
546 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
547 p->lbprm.fwrr.act.t1 = init_head;
548 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
549 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
550
551 /* prepare the backup servers group */
552 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
553 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
554 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
555 p->lbprm.fwrr.bck.t1 = init_head;
556 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
557 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
558
559 /* queue active and backup servers in two distinct groups */
560 for (srv = p->srv; srv; srv = srv->next) {
561 if (!srv_is_usable(srv->state, srv->eweight))
562 continue;
563 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
564 p->lbprm.fwrr.bck.init :
565 p->lbprm.fwrr.act.init,
566 srv);
567 }
568}
569
570/* simply removes a server from a weight tree */
571static inline void fwrr_dequeue_srv(struct server *s)
572{
573 eb32_delete(&s->lb_node);
574}
575
576/* queues a server into the appropriate group and tree depending on its
577 * backup status, and ->npos. If the server is disabled, simply assign
578 * it to the NULL tree.
579 */
580static void fwrr_queue_srv(struct server *s)
581{
582 struct proxy *p = s->proxy;
583 struct fwrr_group *grp;
584
585 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
586
587 /* Delay everything which does not fit into the window and everything
588 * which does not fit into the theorical new window.
589 */
590 if (!srv_is_usable(s->state, s->eweight)) {
591 fwrr_remove_from_tree(s);
592 }
593 else if (s->eweight <= 0 ||
594 s->npos >= 2 * grp->curr_weight ||
595 s->npos >= grp->curr_weight + grp->next_weight) {
596 /* put into next tree, and readjust npos in case we could
597 * finally take this back to current. */
598 s->npos -= grp->curr_weight;
599 fwrr_queue_by_weight(grp->next, s);
600 }
601 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100602 /* The sorting key is stored in units of s->npos * user_weight
603 * in order to avoid overflows. As stated in backend.h, the
604 * lower the scale, the rougher the weights modulation, and the
605 * higher the scale, the lower the number of servers without
606 * overflow. With this formula, the result is always positive,
607 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100608 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100609 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
610 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
611
612 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100613 s->lb_tree = &grp->curr;
614 }
615}
616
617/* prepares a server when extracting it from the "init" tree */
618static inline void fwrr_get_srv_init(struct server *s)
619{
620 s->npos = s->rweight = 0;
621}
622
623/* prepares a server when extracting it from the "next" tree */
624static inline void fwrr_get_srv_next(struct server *s)
625{
626 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
627 &s->proxy->lbprm.fwrr.bck :
628 &s->proxy->lbprm.fwrr.act;
629
630 s->npos += grp->curr_weight;
631}
632
633/* prepares a server when it was marked down */
634static inline void fwrr_get_srv_down(struct server *s)
635{
636 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
637 &s->proxy->lbprm.fwrr.bck :
638 &s->proxy->lbprm.fwrr.act;
639
640 s->npos = grp->curr_pos;
641}
642
643/* prepares a server when extracting it from its tree */
644static void fwrr_get_srv(struct server *s)
645{
646 struct proxy *p = s->proxy;
647 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
648 &p->lbprm.fwrr.bck :
649 &p->lbprm.fwrr.act;
650
651 if (s->lb_tree == grp->init) {
652 fwrr_get_srv_init(s);
653 }
654 else if (s->lb_tree == grp->next) {
655 fwrr_get_srv_next(s);
656 }
657 else if (s->lb_tree == NULL) {
658 fwrr_get_srv_down(s);
659 }
660}
661
662/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
663 * when this happens, and "next" filled with servers sorted by weights.
664 */
665static inline void fwrr_switch_trees(struct fwrr_group *grp)
666{
667 struct eb_root *swap;
668 swap = grp->init;
669 grp->init = grp->next;
670 grp->next = swap;
671 grp->curr_weight = grp->next_weight;
672 grp->curr_pos = grp->curr_weight;
673}
674
675/* return next server from the current tree in FWRR group <grp>, or a server
676 * from the "init" tree if appropriate. If both trees are empty, return NULL.
677 */
678static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
679{
680 struct eb32_node *node;
681 struct server *s;
682
683 node = eb32_first(&grp->curr);
684 s = eb32_entry(node, struct server, lb_node);
685
686 if (!node || s->npos > grp->curr_pos) {
687 /* either we have no server left, or we have a hole */
688 struct eb32_node *node2;
689 node2 = eb32_first(grp->init);
690 if (node2) {
691 node = node2;
692 s = eb32_entry(node, struct server, lb_node);
693 fwrr_get_srv_init(s);
694 if (s->eweight == 0) /* FIXME: is it possible at all ? */
695 node = NULL;
696 }
697 }
698 if (node)
699 return s;
700 else
701 return NULL;
702}
703
704/* Computes next position of server <s> in the group. It is mandatory for <s>
705 * to have a non-zero, positive eweight.
706*/
707static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
708{
709 if (!s->npos) {
710 /* first time ever for this server */
711 s->lpos = grp->curr_pos;
712 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
713 s->rweight += grp->next_weight % s->eweight;
714
715 if (s->rweight >= s->eweight) {
716 s->rweight -= s->eweight;
717 s->npos++;
718 }
719 } else {
720 s->lpos = s->npos;
721 s->npos += grp->next_weight / s->eweight;
722 s->rweight += grp->next_weight % s->eweight;
723
724 if (s->rweight >= s->eweight) {
725 s->rweight -= s->eweight;
726 s->npos++;
727 }
728 }
729}
730
731/* Return next server from the current tree in backend <p>, or a server from
732 * the init tree if appropriate. If both trees are empty, return NULL.
733 * Saturated servers are skipped and requeued.
734 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100735static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100736{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100737 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100738 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100739 int switched;
740
741 if (p->srv_act)
742 grp = &p->lbprm.fwrr.act;
743 else if (p->lbprm.fbck)
744 return p->lbprm.fbck;
745 else if (p->srv_bck)
746 grp = &p->lbprm.fwrr.bck;
747 else
748 return NULL;
749
750 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100751 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100752 full = NULL; /* NULL-terminated list of saturated servers */
753 while (1) {
754 /* if we see an empty group, let's first try to collect weights
755 * which might have recently changed.
756 */
757 if (!grp->curr_weight)
758 grp->curr_pos = grp->curr_weight = grp->next_weight;
759
760 /* get first server from the "current" tree. When the end of
761 * the tree is reached, we may have to switch, but only once.
762 */
763 while (1) {
764 srv = fwrr_get_server_from_group(grp);
765 if (srv)
766 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100767 if (switched) {
768 if (avoided) {
769 srv = avoided;
770 break;
771 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100772 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100773 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100774 switched = 1;
775 fwrr_switch_trees(grp);
776
777 }
778
779 /* OK, we have a server. However, it may be saturated, in which
780 * case we don't want to reconsider it for now. We'll update
781 * its position and dequeue it anyway, so that we can move it
782 * to a better place afterwards.
783 */
784 fwrr_update_position(grp, srv);
785 fwrr_dequeue_srv(srv);
786 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200787 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100788 /* make sure it is not the server we are trying to exclude... */
789 if (srv != srvtoavoid || avoided)
790 break;
791
792 avoided = srv; /* ...but remember that is was selected yet avoided */
793 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100794
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100795 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100796 srv->next_full = full;
797 full = srv;
798 }
799
800 /* OK, we got the best server, let's update it */
801 fwrr_queue_srv(srv);
802
803 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100804 /* Requeue all extracted servers. If full==srv then it was
805 * avoided (unsucessfully) and chained, omit it now.
806 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100807 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100808 if (switched) {
809 /* the tree has switched, requeue all extracted servers
810 * into "init", because their place was lost, and only
811 * their weight matters.
812 */
813 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100814 if (likely(full != srv))
815 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100816 full = full->next_full;
817 } while (full);
818 } else {
819 /* requeue all extracted servers just as if they were consumed
820 * so that they regain their expected place.
821 */
822 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100823 if (likely(full != srv))
824 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100825 full = full->next_full;
826 } while (full);
827 }
828 }
829 return srv;
830}
831
Willy Tarreau51406232008-03-10 22:04:20 +0100832/* Remove a server from a tree. It must have previously been dequeued. This
833 * function is meant to be called when a server is going down or has its
834 * weight disabled.
835 */
836static inline void fwlc_remove_from_tree(struct server *s)
837{
838 s->lb_tree = NULL;
839}
840
841/* simply removes a server from a tree */
842static inline void fwlc_dequeue_srv(struct server *s)
843{
844 eb32_delete(&s->lb_node);
845}
846
847/* Queue a server in its associated tree, assuming the weight is >0.
848 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
849 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
850 */
851static inline void fwlc_queue_srv(struct server *s)
852{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200853 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100854 eb32_insert(s->lb_tree, &s->lb_node);
855}
856
857/* Re-position the server in the FWLC tree after it has been assigned one
858 * connection or after it has released one. Note that it is possible that
859 * the server has been moved out of the tree due to failed health-checks.
860 */
861static void fwlc_srv_reposition(struct server *s)
862{
863 if (!s->lb_tree)
864 return;
865 fwlc_dequeue_srv(s);
866 fwlc_queue_srv(s);
867}
868
869/* This function updates the server trees according to server <srv>'s new
870 * state. It should be called when server <srv>'s status changes to down.
871 * It is not important whether the server was already down or not. It is not
872 * important either that the new state is completely down (the caller may not
873 * know all the variables of a server's state).
874 */
875static void fwlc_set_server_status_down(struct server *srv)
876{
877 struct proxy *p = srv->proxy;
878
879 if (srv->state == srv->prev_state &&
880 srv->eweight == srv->prev_eweight)
881 return;
882
883 if (srv_is_usable(srv->state, srv->eweight))
884 goto out_update_state;
885
886 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
887 /* server was already down */
888 goto out_update_backend;
889
890 if (srv->state & SRV_BACKUP) {
891 p->lbprm.tot_wbck -= srv->prev_eweight;
892 p->srv_bck--;
893
894 if (srv == p->lbprm.fbck) {
895 /* we lost the first backup server in a single-backup
896 * configuration, we must search another one.
897 */
898 struct server *srv2 = p->lbprm.fbck;
899 do {
900 srv2 = srv2->next;
901 } while (srv2 &&
902 !((srv2->state & SRV_BACKUP) &&
903 srv_is_usable(srv2->state, srv2->eweight)));
904 p->lbprm.fbck = srv2;
905 }
906 } else {
907 p->lbprm.tot_wact -= srv->prev_eweight;
908 p->srv_act--;
909 }
910
911 fwlc_dequeue_srv(srv);
912 fwlc_remove_from_tree(srv);
913
914out_update_backend:
915 /* check/update tot_used, tot_weight */
916 update_backend_weight(p);
917 out_update_state:
918 srv->prev_state = srv->state;
919 srv->prev_eweight = srv->eweight;
920}
921
922/* This function updates the server trees according to server <srv>'s new
923 * state. It should be called when server <srv>'s status changes to up.
924 * It is not important whether the server was already down or not. It is not
925 * important either that the new state is completely UP (the caller may not
926 * know all the variables of a server's state). This function will not change
927 * the weight of a server which was already up.
928 */
929static void fwlc_set_server_status_up(struct server *srv)
930{
931 struct proxy *p = srv->proxy;
932
933 if (srv->state == srv->prev_state &&
934 srv->eweight == srv->prev_eweight)
935 return;
936
937 if (!srv_is_usable(srv->state, srv->eweight))
938 goto out_update_state;
939
940 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
941 /* server was already up */
942 goto out_update_backend;
943
944 if (srv->state & SRV_BACKUP) {
945 srv->lb_tree = &p->lbprm.fwlc.bck;
946 p->lbprm.tot_wbck += srv->eweight;
947 p->srv_bck++;
948
949 if (!(p->options & PR_O_USE_ALL_BK)) {
950 if (!p->lbprm.fbck) {
951 /* there was no backup server anymore */
952 p->lbprm.fbck = srv;
953 } else {
954 /* we may have restored a backup server prior to fbck,
955 * in which case it should replace it.
956 */
957 struct server *srv2 = srv;
958 do {
959 srv2 = srv2->next;
960 } while (srv2 && (srv2 != p->lbprm.fbck));
961 if (srv2)
962 p->lbprm.fbck = srv;
963 }
964 }
965 } else {
966 srv->lb_tree = &p->lbprm.fwlc.act;
967 p->lbprm.tot_wact += srv->eweight;
968 p->srv_act++;
969 }
970
971 /* note that eweight cannot be 0 here */
972 fwlc_queue_srv(srv);
973
974 out_update_backend:
975 /* check/update tot_used, tot_weight */
976 update_backend_weight(p);
977 out_update_state:
978 srv->prev_state = srv->state;
979 srv->prev_eweight = srv->eweight;
980}
981
982/* This function must be called after an update to server <srv>'s effective
983 * weight. It may be called after a state change too.
984 */
985static void fwlc_update_server_weight(struct server *srv)
986{
987 int old_state, new_state;
988 struct proxy *p = srv->proxy;
989
990 if (srv->state == srv->prev_state &&
991 srv->eweight == srv->prev_eweight)
992 return;
993
994 /* If changing the server's weight changes its state, we simply apply
995 * the procedures we already have for status change. If the state
996 * remains down, the server is not in any tree, so it's as easy as
997 * updating its values. If the state remains up with different weights,
998 * there are some computations to perform to find a new place and
999 * possibly a new tree for this server.
1000 */
1001
1002 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1003 new_state = srv_is_usable(srv->state, srv->eweight);
1004
1005 if (!old_state && !new_state) {
1006 srv->prev_state = srv->state;
1007 srv->prev_eweight = srv->eweight;
1008 return;
1009 }
1010 else if (!old_state && new_state) {
1011 fwlc_set_server_status_up(srv);
1012 return;
1013 }
1014 else if (old_state && !new_state) {
1015 fwlc_set_server_status_down(srv);
1016 return;
1017 }
1018
1019 if (srv->lb_tree)
1020 fwlc_dequeue_srv(srv);
1021
1022 if (srv->state & SRV_BACKUP) {
1023 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1024 srv->lb_tree = &p->lbprm.fwlc.bck;
1025 } else {
1026 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1027 srv->lb_tree = &p->lbprm.fwlc.act;
1028 }
1029
1030 fwlc_queue_srv(srv);
1031
1032 update_backend_weight(p);
1033 srv->prev_state = srv->state;
1034 srv->prev_eweight = srv->eweight;
1035}
1036
1037/* This function is responsible for building the trees in case of fast
1038 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1039 * uweight ratio. Both active and backup groups are initialized.
1040 */
1041void fwlc_init_server_tree(struct proxy *p)
1042{
1043 struct server *srv;
1044 struct eb_root init_head = EB_ROOT;
1045
1046 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1047 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1048 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1049 p->lbprm.server_take_conn = fwlc_srv_reposition;
1050 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1051
1052 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1053 for (srv = p->srv; srv; srv = srv->next) {
1054 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1055 srv->prev_state = srv->state;
1056 }
1057
1058 recount_servers(p);
1059 update_backend_weight(p);
1060
1061 p->lbprm.fwlc.act = init_head;
1062 p->lbprm.fwlc.bck = init_head;
1063
1064 /* queue active and backup servers in two distinct groups */
1065 for (srv = p->srv; srv; srv = srv->next) {
1066 if (!srv_is_usable(srv->state, srv->eweight))
1067 continue;
1068 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1069 fwlc_queue_srv(srv);
1070 }
1071}
1072
1073/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1074 * return NULL. Saturated servers are skipped.
1075 */
1076static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1077{
1078 struct server *srv, *avoided;
1079 struct eb32_node *node;
1080
1081 srv = avoided = NULL;
1082
1083 if (p->srv_act)
1084 node = eb32_first(&p->lbprm.fwlc.act);
1085 else if (p->lbprm.fbck)
1086 return p->lbprm.fbck;
1087 else if (p->srv_bck)
1088 node = eb32_first(&p->lbprm.fwlc.bck);
1089 else
1090 return NULL;
1091
1092 while (node) {
1093 /* OK, we have a server. However, it may be saturated, in which
1094 * case we don't want to reconsider it for now, so we'll simply
1095 * skip it. Same if it's the server we try to avoid, in which
1096 * case we simply remember it for later use if needed.
1097 */
1098 struct server *s;
1099
1100 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001101 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001102 if (s != srvtoavoid) {
1103 srv = s;
1104 break;
1105 }
1106 avoided = s;
1107 }
1108 node = eb32_next(node);
1109 }
1110
1111 if (!srv)
1112 srv = avoided;
1113
1114 return srv;
1115}
1116
Willy Tarreau01732802007-11-01 22:48:15 +01001117/*
1118 * This function tries to find a running server for the proxy <px> following
1119 * the URL parameter hash method. It looks for a specific parameter in the
1120 * URL and hashes it to compute the server ID. This is useful to optimize
1121 * performance by avoiding bounces between servers in contexts where sessions
1122 * are shared but cookies are not usable. If the parameter is not found, NULL
1123 * is returned. If any server is found, it will be returned. If no valid server
1124 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001125 */
1126struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1127{
1128 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001129 const char *p;
1130 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001131 int plen;
1132
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001133 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001134 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001135 return NULL;
1136
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001137 if ((p = memchr(uri, '?', uri_len)) == NULL)
1138 return NULL;
1139
Willy Tarreau20697042007-11-15 23:26:18 +01001140 if (px->lbprm.map.state & PR_MAP_RECALC)
1141 recalc_server_map(px);
1142
Willy Tarreau01732802007-11-01 22:48:15 +01001143 p++;
1144
1145 uri_len -= (p - uri);
1146 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001147 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001148
1149 while (uri_len > plen) {
1150 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001151 if (params[plen] == '=') {
1152 if (memcmp(params, px->url_param_name, plen) == 0) {
1153 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001154 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001155 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001156 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001157 p += plen + 1;
1158 uri_len -= plen + 1;
1159
Willy Tarreau01732802007-11-01 22:48:15 +01001160 while (uri_len && *p != '&') {
1161 hash = *p + (hash << 6) + (hash << 16) - hash;
1162 uri_len--;
1163 p++;
1164 }
Willy Tarreau20697042007-11-15 23:26:18 +01001165 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001166 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001167 }
1168 /* skip to next parameter */
1169 p = memchr(params, '&', uri_len);
1170 if (!p)
1171 return NULL;
1172 p++;
1173 uri_len -= (p - params);
1174 params = p;
1175 }
1176 return NULL;
1177}
1178
1179/*
1180 * this does the same as the previous server_ph, but check the body contents
1181 */
1182struct server *get_server_ph_post(struct session *s)
1183{
1184 unsigned long hash = 0;
1185 struct http_txn *txn = &s->txn;
1186 struct buffer *req = s->req;
1187 struct http_msg *msg = &txn->req;
1188 struct proxy *px = s->be;
1189 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001190 unsigned long body;
1191 unsigned long len;
1192 const char *params;
1193 struct hdr_ctx ctx;
1194 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001195
1196 /* tot_weight appears to mean srv_count */
1197 if (px->lbprm.tot_weight == 0)
1198 return NULL;
1199
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001200 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
Willy Tarreaufb0528b2008-08-11 00:21:56 +02001201 len = req->l - body;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001202 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001203
1204 if ( len == 0 )
1205 return NULL;
1206
1207 if (px->lbprm.map.state & PR_MAP_RECALC)
1208 recalc_server_map(px);
1209
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001210 ctx.idx = 0;
1211
1212 /* if the message is chunked, we skip the chunk size, but use the value as len */
1213 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
Willy Tarreauadfb8562008-08-11 15:24:42 +02001214 if (ctx.idx && ctx.vlen >= 7 && strncasecmp(ctx.line+ctx.val, "chunked", 7) == 0) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001215 unsigned int chunk = 0;
Willy Tarreau03d60bb2009-01-09 11:13:00 +01001216 while ( params < (req->data+req->max_len) && !HTTP_IS_CRLF(*params)) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001217 char c = *params;
1218 if (ishex(c)) {
1219 unsigned int hex = toupper(c) - '0';
1220 if ( hex > 9 )
1221 hex -= 'A' - '9' - 1;
1222 chunk = (chunk << 4) | hex;
1223 }
1224 else
1225 return NULL;
1226 params++;
1227 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001228 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001229 /* spec says we get CRLF */
1230 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1231 params += 2;
1232 else
1233 return NULL;
1234 /* ok we have some encoded length, just inspect the first chunk */
1235 len = chunk;
1236 }
Willy Tarreau01732802007-11-01 22:48:15 +01001237
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001238 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001239
1240 while (len > plen) {
1241 /* Look for the parameter name followed by an equal symbol */
1242 if (params[plen] == '=') {
1243 if (memcmp(params, px->url_param_name, plen) == 0) {
1244 /* OK, we have the parameter here at <params>, and
1245 * the value after the equal sign, at <p>
1246 * skip the equal symbol
1247 */
1248 p += plen + 1;
1249 len -= plen + 1;
1250
1251 while (len && *p != '&') {
1252 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1253 /* if in a POST, body must be URI encoded or its not a URI.
1254 * Do not interprete any possible binary data as a parameter.
1255 */
1256 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1257 break;
1258 return NULL; /* oh, no; this is not uri-encoded.
1259 * This body does not contain parameters.
1260 */
1261 }
1262 hash = *p + (hash << 6) + (hash << 16) - hash;
1263 len--;
1264 p++;
1265 /* should we break if vlen exceeds limit? */
1266 }
1267 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1268 }
1269 }
Willy Tarreau01732802007-11-01 22:48:15 +01001270 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001271 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001272 if (!p)
1273 return NULL;
1274 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001275 len -= (p - params);
1276 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001277 }
1278 return NULL;
1279}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001280
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001281
Willy Tarreaubaaee002006-06-26 02:48:02 +02001282/*
Benoitaffb4812009-03-25 13:02:10 +01001283 * This function tries to find a running server for the proxy <px> following
1284 * the Header parameter hash method. It looks for a specific parameter in the
1285 * URL and hashes it to compute the server ID. This is useful to optimize
1286 * performance by avoiding bounces between servers in contexts where sessions
1287 * are shared but cookies are not usable. If the parameter is not found, NULL
1288 * is returned. If any server is found, it will be returned. If no valid server
1289 * is found, NULL is returned.
1290 */
1291struct server *get_server_hh(struct session *s)
1292{
1293 unsigned long hash = 0;
1294 struct http_txn *txn = &s->txn;
1295 struct http_msg *msg = &txn->req;
1296 struct proxy *px = s->be;
1297 unsigned int plen = px->hh_len;
1298 unsigned long len;
1299 struct hdr_ctx ctx;
1300 const char *p;
1301
1302 /* tot_weight appears to mean srv_count */
1303 if (px->lbprm.tot_weight == 0)
1304 return NULL;
1305
1306 if (px->lbprm.map.state & PR_MAP_RECALC)
1307 recalc_server_map(px);
1308
1309 ctx.idx = 0;
1310
1311 /* if the message is chunked, we skip the chunk size, but use the value as len */
1312 http_find_header2(px->hh_name, plen, msg->sol, &txn->hdr_idx, &ctx);
1313
1314 /* if the header is not found or empty, let's fallback to round robin */
1315 if (!ctx.idx || !ctx.vlen)
1316 return NULL;
1317
1318 /* Found a the hh_name in the headers.
1319 * we will compute the hash based on this value ctx.val.
1320 */
1321 len = ctx.vlen;
1322 p = (char *)ctx.line + ctx.val;
1323 if (!px->hh_match_domain) {
1324 while (len) {
1325 hash = *p + (hash << 6) + (hash << 16) - hash;
1326 len--;
1327 p++;
1328 }
1329 } else {
1330 int dohash = 0;
1331 p += len - 1;
1332 /* special computation, use only main domain name, not tld/host
1333 * going back from the end of string, start hashing at first
1334 * dot stop at next.
1335 * This is designed to work with the 'Host' header, and requires
1336 * a special option to activate this.
1337 */
1338 while (len) {
1339 if (*p == '.') {
1340 if (!dohash)
1341 dohash = 1;
1342 else
1343 break;
1344 } else {
1345 if (dohash)
1346 hash = *p + (hash << 6) + (hash << 16) - hash;
1347 }
1348 len--;
1349 p--;
1350 }
1351 }
1352 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1353}
1354
1355
1356/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001357 * This function applies the load-balancing algorithm to the session, as
1358 * defined by the backend it is assigned to. The session is then marked as
1359 * 'assigned'.
1360 *
1361 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1362 * had a server previously assigned, it is rebalanced, trying to avoid the same
1363 * server.
1364 * The function tries to keep the original connection slot if it reconnects to
1365 * the same server, otherwise it releases it and tries to offer it.
1366 *
1367 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001368 *
1369 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001370 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1371 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1372 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001373 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1374 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001375 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1376 * it does not need to be called anymore. This means that s->srv can be trusted
1377 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001378 *
1379 */
1380
1381int assign_server(struct session *s)
1382{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001383
Willy Tarreau7c669d72008-06-20 15:04:11 +02001384 struct server *conn_slot;
1385 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001386
Willy Tarreaubaaee002006-06-26 02:48:02 +02001387#ifdef DEBUG_FULL
1388 fprintf(stderr,"assign_server : s=%p\n",s);
1389#endif
1390
Willy Tarreau7c669d72008-06-20 15:04:11 +02001391 err = SRV_STATUS_INTERNAL;
1392 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1393 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001394
Willy Tarreau7c669d72008-06-20 15:04:11 +02001395 s->prev_srv = s->prev_srv;
1396 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001397
Willy Tarreau7c669d72008-06-20 15:04:11 +02001398 /* We have to release any connection slot before applying any LB algo,
1399 * otherwise we may erroneously end up with no available slot.
1400 */
1401 if (conn_slot)
1402 sess_change_server(s, NULL);
1403
1404 /* We will now try to find the good server and store it into <s->srv>.
1405 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1406 * as well as if no server is available (check error code).
1407 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001408
Willy Tarreau7c669d72008-06-20 15:04:11 +02001409 s->srv = NULL;
1410 if (s->be->lbprm.algo & BE_LB_ALGO) {
1411 int len;
1412 /* we must check if we have at least one server available */
1413 if (!s->be->lbprm.tot_weight) {
1414 err = SRV_STATUS_NOSRV;
1415 goto out;
1416 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001417
Willy Tarreau7c669d72008-06-20 15:04:11 +02001418 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1419 case BE_LB_ALGO_RR:
1420 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1421 if (!s->srv) {
1422 err = SRV_STATUS_FULL;
1423 goto out;
1424 }
1425 break;
1426 case BE_LB_ALGO_LC:
1427 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1428 if (!s->srv) {
1429 err = SRV_STATUS_FULL;
1430 goto out;
1431 }
1432 break;
1433 case BE_LB_ALGO_SH:
1434 if (s->cli_addr.ss_family == AF_INET)
1435 len = 4;
1436 else if (s->cli_addr.ss_family == AF_INET6)
1437 len = 16;
1438 else {
1439 /* unknown IP family */
1440 err = SRV_STATUS_INTERNAL;
1441 goto out;
1442 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001443
Willy Tarreau7c669d72008-06-20 15:04:11 +02001444 s->srv = get_server_sh(s->be,
1445 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1446 len);
1447 break;
1448 case BE_LB_ALGO_UH:
1449 /* URI hashing */
1450 s->srv = get_server_uh(s->be,
1451 s->txn.req.sol + s->txn.req.sl.rq.u,
1452 s->txn.req.sl.rq.u_l);
1453 break;
1454 case BE_LB_ALGO_PH:
1455 /* URL Parameter hashing */
1456 if (s->txn.meth == HTTP_METH_POST &&
1457 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1458 s->txn.req.sl.rq.u_l ) == NULL)
1459 s->srv = get_server_ph_post(s);
1460 else
1461 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001462 s->txn.req.sol + s->txn.req.sl.rq.u,
1463 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001464
Willy Tarreau7c669d72008-06-20 15:04:11 +02001465 if (!s->srv) {
1466 /* parameter not found, fall back to round robin on the map */
1467 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001468 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001469 err = SRV_STATUS_FULL;
1470 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001471 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001472 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001473 break;
Benoitaffb4812009-03-25 13:02:10 +01001474 case BE_LB_ALGO_HH:
1475 /* Header Parameter hashing */
1476 s->srv = get_server_hh(s);
1477
1478 if (!s->srv) {
1479 /* parameter not found, fall back to round robin on the map */
1480 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
1481 if (!s->srv) {
1482 err = SRV_STATUS_FULL;
1483 goto out;
1484 }
1485 }
1486 break;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001487 default:
1488 /* unknown balancing algorithm */
1489 err = SRV_STATUS_INTERNAL;
1490 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001491 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001492 if (s->srv != s->prev_srv) {
1493 s->be->cum_lbconn++;
1494 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001495 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001496 }
1497 else if (s->be->options & PR_O_HTTP_PROXY) {
1498 if (!s->srv_addr.sin_addr.s_addr) {
1499 err = SRV_STATUS_NOSRV;
1500 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001501 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001502 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001503 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001504 !(s->be->options & PR_O_TRANSP)) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001505 err = SRV_STATUS_NOSRV;
1506 goto out;
1507 }
1508
1509 s->flags |= SN_ASSIGNED;
1510 err = SRV_STATUS_OK;
1511 out:
1512
1513 /* Either we take back our connection slot, or we offer it to someone
1514 * else if we don't need it anymore.
1515 */
1516 if (conn_slot) {
1517 if (conn_slot == s->srv) {
1518 sess_change_server(s, s->srv);
1519 } else {
1520 if (may_dequeue_tasks(conn_slot, s->be))
1521 process_srv_queue(conn_slot);
1522 }
1523 }
1524
1525 out_err:
1526 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001527}
1528
1529
1530/*
1531 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1532 * The address is taken from the currently assigned server, or from the
1533 * dispatch or transparent address.
1534 *
1535 * It may return :
1536 * SRV_STATUS_OK if everything is OK.
1537 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1538 *
1539 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1540 * not cleared, so it's to the caller to clear it if required.
1541 *
1542 */
1543int assign_server_address(struct session *s)
1544{
1545#ifdef DEBUG_FULL
1546 fprintf(stderr,"assign_server_address : s=%p\n",s);
1547#endif
1548
Willy Tarreau31682232007-11-29 15:38:04 +01001549 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001550 /* A server is necessarily known for this session */
1551 if (!(s->flags & SN_ASSIGNED))
1552 return SRV_STATUS_INTERNAL;
1553
1554 s->srv_addr = s->srv->addr;
1555
1556 /* if this server remaps proxied ports, we'll use
1557 * the port the client connected to with an offset. */
1558 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001559 if (!(s->be->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001560 get_frt_addr(s);
1561 if (s->frt_addr.ss_family == AF_INET) {
1562 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1563 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1564 } else {
1565 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1566 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1567 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001568 }
1569 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001570 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001571 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001572 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001573 }
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001574 else if (s->be->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001575 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001576 if (!(s->flags & SN_FRT_ADDR_SET))
1577 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001578
Willy Tarreaubd414282008-01-19 13:46:35 +01001579 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1580 /* when we support IPv6 on the backend, we may add other tests */
1581 //qfprintf(stderr, "Cannot get original server address.\n");
1582 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001583 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001584 else if (s->be->options & PR_O_HTTP_PROXY) {
1585 /* If HTTP PROXY option is set, then server is already assigned
1586 * during incoming client request parsing. */
1587 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001588 else {
1589 /* no server and no LB algorithm ! */
1590 return SRV_STATUS_INTERNAL;
1591 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001592
1593 s->flags |= SN_ADDR_SET;
1594 return SRV_STATUS_OK;
1595}
1596
1597
1598/* This function assigns a server to session <s> if required, and can add the
1599 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001600 * If ->srv_conn is set, the session is first released from the server.
1601 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1602 * be called before any connection and after any retry or redispatch occurs.
1603 *
1604 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001605 *
1606 * Returns :
1607 *
1608 * SRV_STATUS_OK if everything is OK.
1609 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1610 * SRV_STATUS_QUEUED if the connection has been queued.
1611 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001612 * connection could not be queued in s->srv,
1613 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001614 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1615 *
1616 */
1617int assign_server_and_queue(struct session *s)
1618{
1619 struct pendconn *p;
1620 int err;
1621
1622 if (s->pend_pos)
1623 return SRV_STATUS_INTERNAL;
1624
Willy Tarreau7c669d72008-06-20 15:04:11 +02001625 err = SRV_STATUS_OK;
1626 if (!(s->flags & SN_ASSIGNED)) {
1627 err = assign_server(s);
1628 if (s->prev_srv) {
1629 /* This session was previously assigned to a server. We have to
1630 * update the session's and the server's stats :
1631 * - if the server changed :
1632 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1633 * - set SN_REDISP if it was successfully redispatched
1634 * - increment srv->redispatches and be->redispatches
1635 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001636 */
1637
Willy Tarreau7c669d72008-06-20 15:04:11 +02001638 if (s->prev_srv != s->srv) {
1639 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1640 s->txn.flags &= ~TX_CK_MASK;
1641 s->txn.flags |= TX_CK_DOWN;
1642 }
1643 s->flags |= SN_REDISP;
1644 s->prev_srv->redispatches++;
1645 s->be->redispatches++;
1646 } else {
1647 s->prev_srv->retries++;
1648 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001649 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001650 }
1651 }
1652
Willy Tarreaubaaee002006-06-26 02:48:02 +02001653 switch (err) {
1654 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001655 /* we have SN_ASSIGNED set */
1656 if (!s->srv)
1657 return SRV_STATUS_OK; /* dispatch or proxy mode */
1658
1659 /* If we already have a connection slot, no need to check any queue */
1660 if (s->srv_conn == s->srv)
1661 return SRV_STATUS_OK;
1662
1663 /* OK, this session already has an assigned server, but no
1664 * connection slot yet. Either it is a redispatch, or it was
1665 * assigned from persistence information (direct mode).
1666 */
1667 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1668 /* server scheduled for redirection, and already assigned. We
1669 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001670 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001671 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001672 return SRV_STATUS_OK;
1673 }
1674
Willy Tarreau7c669d72008-06-20 15:04:11 +02001675 /* We might have to queue this session if the assigned server is full.
1676 * We know we have to queue it into the server's queue, so if a maxqueue
1677 * is set on the server, we must also check that the server's queue is
1678 * not full, in which case we have to return FULL.
1679 */
1680 if (s->srv->maxconn &&
1681 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1682
1683 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1684 return SRV_STATUS_FULL;
1685
Willy Tarreaubaaee002006-06-26 02:48:02 +02001686 p = pendconn_add(s);
1687 if (p)
1688 return SRV_STATUS_QUEUED;
1689 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001690 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001691 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001692
1693 /* OK, we can use this server. Let's reserve our place */
1694 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001695 return SRV_STATUS_OK;
1696
1697 case SRV_STATUS_FULL:
1698 /* queue this session into the proxy's queue */
1699 p = pendconn_add(s);
1700 if (p)
1701 return SRV_STATUS_QUEUED;
1702 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001703 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001704
1705 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001706 return err;
1707
Willy Tarreaubaaee002006-06-26 02:48:02 +02001708 case SRV_STATUS_INTERNAL:
1709 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001710
Willy Tarreaubaaee002006-06-26 02:48:02 +02001711 default:
1712 return SRV_STATUS_INTERNAL;
1713 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001714}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001715
1716/*
1717 * This function initiates a connection to the server assigned to this session
1718 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1719 * It can return one of :
1720 * - SN_ERR_NONE if everything's OK
1721 * - SN_ERR_SRVTO if there are no more servers
1722 * - SN_ERR_SRVCL if the connection was refused by the server
1723 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1724 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1725 * - SN_ERR_INTERNAL for any other purely internal errors
1726 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1727 */
1728int connect_server(struct session *s)
1729{
1730 int fd, err;
1731
1732 if (!(s->flags & SN_ADDR_SET)) {
1733 err = assign_server_address(s);
1734 if (err != SRV_STATUS_OK)
1735 return SN_ERR_INTERNAL;
1736 }
1737
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001738 if ((fd = s->req->cons->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001739 qfprintf(stderr, "Cannot get a server socket.\n");
1740
1741 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001742 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001743 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001744 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001745 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001746 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001747 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001748 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001749 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001750 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001751 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001752 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001753 /* this is a resource error */
1754 return SN_ERR_RESOURCE;
1755 }
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001756
Willy Tarreaubaaee002006-06-26 02:48:02 +02001757 if (fd >= global.maxsock) {
1758 /* do not log anything there, it's a normal condition when this option
1759 * is used to serialize connections to a server !
1760 */
1761 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1762 close(fd);
1763 return SN_ERR_PRXCOND; /* it is a configuration limit */
1764 }
1765
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001766#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreau3ab68cf2009-01-25 16:03:28 +01001767 if ((global.tune.options & GTUNE_USE_SPLICE) &&
1768 (s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001769 /* TCP splicing supported by both FE and BE */
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001770 tcp_splice_initfd(s->req->prod->fd, fd);
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001771 }
1772#endif
1773
Willy Tarreaubaaee002006-06-26 02:48:02 +02001774 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1775 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1776 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1777 close(fd);
1778 return SN_ERR_INTERNAL;
1779 }
1780
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001781 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001782 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1783
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001784 if (s->be->options & PR_O_TCP_NOLING)
1785 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1786
Willy Tarreaubaaee002006-06-26 02:48:02 +02001787 /* allow specific binding :
1788 * - server-specific at first
1789 * - proxy-specific next
1790 */
1791 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001792 struct sockaddr_in *remote = NULL;
1793 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001794
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001795#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001796 switch (s->srv->state & SRV_TPROXY_MASK) {
1797 case SRV_TPROXY_ADDR:
1798 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1799 flags = 3;
1800 break;
1801 case SRV_TPROXY_CLI:
1802 flags |= 2;
1803 /* fall through */
1804 case SRV_TPROXY_CIP:
1805 /* FIXME: what can we do if the client connects in IPv6 ? */
1806 flags |= 1;
1807 remote = (struct sockaddr_in *)&s->cli_addr;
1808 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001809 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001810#endif
Willy Tarreauc76721d2009-02-04 20:20:58 +01001811#ifdef SO_BINDTODEVICE
1812 /* Note: this might fail if not CAP_NET_RAW */
1813 if (s->srv->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001814 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->srv->iface_name, s->srv->iface_len + 1);
Willy Tarreauc76721d2009-02-04 20:20:58 +01001815#endif
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001816
1817 if (s->srv->sport_range) {
1818 int attempts = 10; /* should be more than enough to find a spare port */
1819 struct sockaddr_in src;
1820
1821 ret = 1;
1822 src = s->srv->source_addr;
1823
1824 do {
1825 /* note: in case of retry, we may have to release a previously
1826 * allocated port, hence this loop's construct.
1827 */
1828 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1829 fdtab[fd].port_range = NULL;
1830
1831 if (!attempts)
1832 break;
1833 attempts--;
1834
1835 fdtab[fd].local_port = port_range_alloc_port(s->srv->sport_range);
1836 if (!fdtab[fd].local_port)
1837 break;
1838
1839 fdtab[fd].port_range = s->srv->sport_range;
1840 src.sin_port = htons(fdtab[fd].local_port);
1841
1842 ret = tcpv4_bind_socket(fd, flags, &src, remote);
1843 } while (ret != 0); /* binding NOK */
1844 }
1845 else {
1846 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
1847 }
1848
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001849 if (ret) {
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001850 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1851 fdtab[fd].port_range = NULL;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001852 close(fd);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001853
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001854 if (ret == 1) {
1855 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1856 s->be->id, s->srv->id);
1857 send_log(s->be, LOG_EMERG,
1858 "Cannot bind to source address before connect() for server %s/%s.\n",
1859 s->be->id, s->srv->id);
1860 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001861 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001862 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001863 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001864 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001865 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001866 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001867 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001868 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001869 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001870 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001871 struct sockaddr_in *remote = NULL;
1872 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001873
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001874#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001875 switch (s->be->options & PR_O_TPXY_MASK) {
1876 case PR_O_TPXY_ADDR:
1877 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1878 flags = 3;
1879 break;
1880 case PR_O_TPXY_CLI:
1881 flags |= 2;
1882 /* fall through */
1883 case PR_O_TPXY_CIP:
1884 /* FIXME: what can we do if the client connects in IPv6 ? */
1885 flags |= 1;
1886 remote = (struct sockaddr_in *)&s->cli_addr;
1887 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001888 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001889#endif
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001890#ifdef SO_BINDTODEVICE
1891 /* Note: this might fail if not CAP_NET_RAW */
1892 if (s->be->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001893 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->be->iface_name, s->be->iface_len + 1);
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001894#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001895 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001896 if (ret) {
1897 close(fd);
1898 if (ret == 1) {
1899 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1900 s->be->id);
1901 send_log(s->be, LOG_EMERG,
1902 "Cannot bind to source address before connect() for proxy %s.\n",
1903 s->be->id);
1904 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001905 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001906 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001907 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001908 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1909 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001910 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001911 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001912 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001913 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001914
Willy Tarreaubaaee002006-06-26 02:48:02 +02001915 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1916 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1917
1918 if (errno == EAGAIN || errno == EADDRINUSE) {
1919 char *msg;
1920 if (errno == EAGAIN) /* no free ports left, try again later */
1921 msg = "no free ports";
1922 else
1923 msg = "local address already in use";
1924
1925 qfprintf(stderr,"Cannot connect: %s.\n",msg);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001926 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1927 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001928 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001929 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001930 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001931 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001932 return SN_ERR_RESOURCE;
1933 } else if (errno == ETIMEDOUT) {
1934 //qfprintf(stderr,"Connect(): ETIMEDOUT");
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001935 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1936 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001937 close(fd);
1938 return SN_ERR_SRVTO;
1939 } else {
1940 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1941 //qfprintf(stderr,"Connect(): %d", errno);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001942 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1943 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001944 close(fd);
1945 return SN_ERR_SRVCL;
1946 }
1947 }
1948
Willy Tarreaue5ed4062008-08-30 03:17:31 +02001949 fdtab[fd].owner = s->req->cons;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001950 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaufb14edc2009-06-14 15:24:37 +02001951 fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
Willy Tarreaud7971282006-07-29 18:36:34 +02001952 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001953 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001954 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001955 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001956
1957 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1958 fdtab[fd].peerlen = sizeof(s->srv_addr);
1959
Willy Tarreaubaaee002006-06-26 02:48:02 +02001960 fd_insert(fd);
Willy Tarreau788e2842008-08-26 13:25:39 +02001961 EV_FD_SET(fd, DIR_WR); /* for connect status */
1962
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001963 s->req->cons->state = SI_ST_CON;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001964 if (s->srv) {
Willy Tarreau1e62de62008-11-11 20:20:02 +01001965 s->flags |= SN_CURR_SESS;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001966 s->srv->cur_sess++;
1967 if (s->srv->cur_sess > s->srv->cur_sess_max)
1968 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001969 if (s->be->lbprm.server_take_conn)
1970 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001971 }
1972
Willy Tarreaua3780f22009-03-15 21:49:00 +01001973 s->req->cons->exp = tick_add_ifset(now_ms, s->be->timeout.connect);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001974 return SN_ERR_NONE; /* connection is OK */
1975}
1976
1977
Willy Tarreaubaaee002006-06-26 02:48:02 +02001978/* This function performs the "redispatch" part of a connection attempt. It
1979 * will assign a server if required, queue the connection if required, and
1980 * handle errors that might arise at this level. It can change the server
1981 * state. It will return 1 if it encounters an error, switches the server
1982 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1983 * that the connection is ready to use.
1984 */
1985
1986int srv_redispatch_connect(struct session *t)
1987{
1988 int conn_err;
1989
1990 /* We know that we don't have any connection pending, so we will
1991 * try to get a new one, and wait in this state if it's queued
1992 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001993 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001994 conn_err = assign_server_and_queue(t);
1995 switch (conn_err) {
1996 case SRV_STATUS_OK:
1997 break;
1998
Willy Tarreau7c669d72008-06-20 15:04:11 +02001999 case SRV_STATUS_FULL:
2000 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
2001 * and we can redispatch to another server, or it is not and we return
2002 * 503. This only makes sense in DIRECT mode however, because normal LB
2003 * algorithms would never select such a server, and hash algorithms
2004 * would bring us on the same server again. Note that t->srv is set in
2005 * this case.
2006 */
2007 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
2008 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
2009 t->prev_srv = t->srv;
2010 goto redispatch;
2011 }
2012
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002013 if (!t->req->cons->err_type) {
2014 t->req->cons->err_type = SI_ET_QUEUE_ERR;
2015 t->req->cons->err_loc = t->srv;
2016 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02002017
2018 t->srv->failed_conns++;
2019 t->be->failed_conns++;
2020 return 1;
2021
Willy Tarreaubaaee002006-06-26 02:48:02 +02002022 case SRV_STATUS_NOSRV:
2023 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002024 if (!t->req->cons->err_type) {
2025 t->req->cons->err_type = SI_ET_CONN_ERR;
2026 t->req->cons->err_loc = NULL;
2027 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01002028
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002029 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002030 return 1;
2031
2032 case SRV_STATUS_QUEUED:
Willy Tarreau35374672008-09-03 18:11:02 +02002033 t->req->cons->exp = tick_add_ifset(now_ms, t->be->timeout.queue);
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002034 t->req->cons->state = SI_ST_QUE;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002035 /* do nothing else and do not wake any other session up */
2036 return 1;
2037
Willy Tarreaubaaee002006-06-26 02:48:02 +02002038 case SRV_STATUS_INTERNAL:
2039 default:
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002040 if (!t->req->cons->err_type) {
2041 t->req->cons->err_type = SI_ET_CONN_OTHER;
2042 t->req->cons->err_loc = t->srv;
2043 }
2044
Willy Tarreaubaaee002006-06-26 02:48:02 +02002045 if (t->srv)
Willy Tarreau7f062c42009-03-05 18:43:00 +01002046 srv_inc_sess_ctr(t->srv);
Willy Tarreau98937b82007-12-10 15:05:42 +01002047 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002048 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002049 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002050
2051 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002052 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002053 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002054 return 1;
2055 }
2056 /* if we get here, it's because we got SRV_STATUS_OK, which also
2057 * means that the connection has not been queued.
2058 */
2059 return 0;
2060}
2061
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002062int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002063 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002064 return px->down_time;
2065
2066 return now.tv_sec - px->last_change + px->down_time;
2067}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002068
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002069/* This function parses a "balance" statement in a backend section describing
2070 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2071 * returns -1, it may write an error message into ther <err> buffer, for at
2072 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2073 * written. The function must be called with <args> pointing to the first word
2074 * after "balance".
2075 */
2076int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2077{
2078 if (!*(args[0])) {
2079 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002080 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2081 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002082 return 0;
2083 }
2084
2085 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002086 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2087 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002088 }
Willy Tarreau51406232008-03-10 22:04:20 +01002089 else if (!strcmp(args[0], "leastconn")) {
2090 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2091 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2092 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002093 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002094 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2095 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002096 }
2097 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002098 int arg = 1;
2099
Willy Tarreau31682232007-11-29 15:38:04 +01002100 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2101 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002102
2103 while (*args[arg]) {
2104 if (!strcmp(args[arg], "len")) {
2105 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2106 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2107 return -1;
2108 }
2109 curproxy->uri_len_limit = atoi(args[arg+1]);
2110 arg += 2;
2111 }
2112 else if (!strcmp(args[arg], "depth")) {
2113 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2114 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2115 return -1;
2116 }
2117 /* hint: we store the position of the ending '/' (depth+1) so
2118 * that we avoid a comparison while computing the hash.
2119 */
2120 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2121 arg += 2;
2122 }
2123 else {
2124 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2125 return -1;
2126 }
2127 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002128 }
Willy Tarreau01732802007-11-01 22:48:15 +01002129 else if (!strcmp(args[0], "url_param")) {
2130 if (!*args[1]) {
2131 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2132 return -1;
2133 }
Willy Tarreau31682232007-11-29 15:38:04 +01002134 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2135 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreaua534fea2008-08-03 12:19:50 +02002136
2137 free(curproxy->url_param_name);
Willy Tarreau01732802007-11-01 22:48:15 +01002138 curproxy->url_param_name = strdup(args[1]);
Willy Tarreaua534fea2008-08-03 12:19:50 +02002139 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002140 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002141 if (strcmp(args[2], "check_post")) {
2142 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2143 return -1;
2144 }
2145 if (*args[3]) {
2146 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2147 curproxy->url_param_post_limit = str2ui(args[3]);
2148 }
2149 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2150 if (!curproxy->url_param_post_limit)
2151 curproxy->url_param_post_limit = 48;
2152 else if ( curproxy->url_param_post_limit < 3 )
2153 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2154 }
Benoitaffb4812009-03-25 13:02:10 +01002155 }
2156 else if (!strncmp(args[0], "hdr(", 4)) {
2157 const char *beg, *end;
2158
2159 beg = args[0] + 4;
2160 end = strchr(beg, ')');
2161
2162 if (!end || end == beg) {
2163 snprintf(err, errlen, "'balance hdr(name)' requires an http header field name.");
2164 return -1;
2165 }
2166
2167 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2168 curproxy->lbprm.algo |= BE_LB_ALGO_HH;
2169
2170 free(curproxy->hh_name);
2171 curproxy->hh_len = end - beg;
2172 curproxy->hh_name = my_strndup(beg, end - beg);
2173 curproxy->hh_match_domain = 0;
2174
2175 if (*args[1]) {
2176 if (strcmp(args[1], "use_domain_only")) {
2177 snprintf(err, errlen, "'balance hdr(name)' only accepts 'use_domain_only' modifier.");
2178 return -1;
2179 }
2180 curproxy->hh_match_domain = 1;
2181 }
2182
Willy Tarreau01732802007-11-01 22:48:15 +01002183 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002184 else {
Benoitaffb4812009-03-25 13:02:10 +01002185 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri', 'url_param' and 'hdr(name)' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002186 return -1;
2187 }
2188 return 0;
2189}
2190
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002191
2192/************************************************************************/
2193/* All supported keywords must be declared here. */
2194/************************************************************************/
2195
2196/* set test->i to the number of enabled servers on the proxy */
2197static int
2198acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2199 struct acl_expr *expr, struct acl_test *test)
2200{
2201 test->flags = ACL_TEST_F_VOL_TEST;
2202 if (expr->arg_len) {
2203 /* another proxy was designated, we must look for it */
2204 for (px = proxy; px; px = px->next)
2205 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2206 break;
2207 }
2208 if (!px)
2209 return 0;
2210
2211 if (px->srv_act)
2212 test->i = px->srv_act;
2213 else if (px->lbprm.fbck)
2214 test->i = 1;
2215 else
2216 test->i = px->srv_bck;
2217
2218 return 1;
2219}
2220
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002221/* set test->i to the number of enabled servers on the proxy */
2222static int
2223acl_fetch_connslots(struct proxy *px, struct session *l4, void *l7, int dir,
2224 struct acl_expr *expr, struct acl_test *test)
2225{
2226 struct server *iterator;
2227 test->flags = ACL_TEST_F_VOL_TEST;
2228 if (expr->arg_len) {
2229 /* another proxy was designated, we must look for it */
2230 for (px = proxy; px; px = px->next)
2231 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2232 break;
2233 }
2234 if (!px)
2235 return 0;
2236
2237 test->i = 0;
2238 iterator = px->srv;
2239 while (iterator) {
2240 if ((iterator->state & 1) == 0) {
2241 iterator = iterator->next;
2242 continue;
2243 }
2244 if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
2245 test->i = -1;
2246 return 1;
2247 }
2248
2249 test->i += (iterator->maxconn - iterator->cur_sess)
2250 + (iterator->maxqueue - iterator->nbpend);
2251 iterator = iterator->next;
2252 }
2253
2254 return 1;
2255}
2256
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002257/* set test->i to the number of connections per second reaching the frontend */
2258static int
2259acl_fetch_fe_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2260 struct acl_expr *expr, struct acl_test *test)
2261{
2262 test->flags = ACL_TEST_F_VOL_TEST;
2263 if (expr->arg_len) {
2264 /* another proxy was designated, we must look for it */
2265 for (px = proxy; px; px = px->next)
2266 if ((px->cap & PR_CAP_FE) && !strcmp(px->id, expr->arg.str))
2267 break;
2268 }
2269 if (!px)
2270 return 0;
2271
2272 test->i = read_freq_ctr(&px->fe_sess_per_sec);
2273 return 1;
2274}
2275
2276/* set test->i to the number of connections per second reaching the backend */
2277static int
2278acl_fetch_be_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2279 struct acl_expr *expr, struct acl_test *test)
2280{
2281 test->flags = ACL_TEST_F_VOL_TEST;
2282 if (expr->arg_len) {
2283 /* another proxy was designated, we must look for it */
2284 for (px = proxy; px; px = px->next)
2285 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2286 break;
2287 }
2288 if (!px)
2289 return 0;
2290
2291 test->i = read_freq_ctr(&px->be_sess_per_sec);
2292 return 1;
2293}
2294
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002295
2296/* Note: must not be declared <const> as its list will be overwritten */
2297static struct acl_kw_list acl_kws = {{ },{
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002298 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau3a8efeb2009-03-05 19:15:37 +01002299 { "connslots", acl_parse_int, acl_fetch_connslots, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002300 { "fe_sess_rate", acl_parse_int, acl_fetch_fe_sess_rate, acl_match_int, ACL_USE_NOTHING },
2301 { "be_sess_rate", acl_parse_int, acl_fetch_be_sess_rate, acl_match_int, ACL_USE_NOTHING },
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002302 { NULL, NULL, NULL, NULL },
2303}};
2304
2305
2306__attribute__((constructor))
2307static void __backend_init(void)
2308{
2309 acl_register_keywords(&acl_kws);
2310}
2311
2312
Willy Tarreaubaaee002006-06-26 02:48:02 +02002313/*
2314 * Local variables:
2315 * c-indent-level: 8
2316 * c-basic-offset: 8
2317 * End:
2318 */