blob: 436a4d9b328761ed15a7d943d51e91f4d7ee730d [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020019
Willy Tarreau2dd0d472006-06-29 17:53:05 +020020#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020021#include <common/config.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010022#include <common/eb32tree.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010025#include <types/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <types/buffers.h>
27#include <types/global.h>
28#include <types/polling.h>
29#include <types/proxy.h>
30#include <types/server.h>
31#include <types/session.h>
32
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010033#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020035#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020036#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010037#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020038#include <proto/log.h>
39#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010040#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/queue.h>
42#include <proto/stream_sock.h>
43#include <proto/task.h>
44
Willy Tarreau6d1a9882007-01-07 02:03:04 +010045#ifdef CONFIG_HAP_TCPSPLICE
46#include <libtcpsplice.h>
47#endif
48
Willy Tarreaub625a082007-11-26 01:15:43 +010049static inline void fwrr_remove_from_tree(struct server *s);
50static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
51static inline void fwrr_dequeue_srv(struct server *s);
52static void fwrr_get_srv(struct server *s);
53static void fwrr_queue_srv(struct server *s);
54
55/* This function returns non-zero if a server with the given weight and state
56 * is usable for LB, otherwise zero.
57 */
58static inline int srv_is_usable(int state, int weight)
59{
60 if (!weight)
61 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010062 if (state & SRV_GOINGDOWN)
63 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010064 if (!(state & SRV_RUNNING))
65 return 0;
66 return 1;
67}
68
Willy Tarreaubaaee002006-06-26 02:48:02 +020069/*
70 * This function recounts the number of usable active and backup servers for
71 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010072 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010073 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010074 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020075 */
Willy Tarreaub625a082007-11-26 01:15:43 +010076static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020077{
78 struct server *srv;
79
Willy Tarreau20697042007-11-15 23:26:18 +010080 px->srv_act = px->srv_bck = 0;
81 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010082 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020083 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010084 if (!srv_is_usable(srv->state, srv->eweight))
85 continue;
86
87 if (srv->state & SRV_BACKUP) {
88 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010089 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010090 px->lbprm.fbck = srv;
91 px->srv_bck++;
92 px->lbprm.tot_wbck += srv->eweight;
93 } else {
94 px->srv_act++;
95 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020096 }
97 }
Willy Tarreaub625a082007-11-26 01:15:43 +010098}
Willy Tarreau20697042007-11-15 23:26:18 +010099
Willy Tarreaub625a082007-11-26 01:15:43 +0100100/* This function simply updates the backend's tot_weight and tot_used values
101 * after servers weights have been updated. It is designed to be used after
102 * recount_servers() or equivalent.
103 */
104static void update_backend_weight(struct proxy *px)
105{
Willy Tarreau20697042007-11-15 23:26:18 +0100106 if (px->srv_act) {
107 px->lbprm.tot_weight = px->lbprm.tot_wact;
108 px->lbprm.tot_used = px->srv_act;
109 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100110 else if (px->lbprm.fbck) {
111 /* use only the first backup server */
112 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
113 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100114 }
115 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100116 px->lbprm.tot_weight = px->lbprm.tot_wbck;
117 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100118 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100119}
120
121/* this function updates the map according to server <srv>'s new state */
122static void map_set_server_status_down(struct server *srv)
123{
124 struct proxy *p = srv->proxy;
125
126 if (srv->state == srv->prev_state &&
127 srv->eweight == srv->prev_eweight)
128 return;
129
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100130 if (srv_is_usable(srv->state, srv->eweight))
131 goto out_update_state;
132
Willy Tarreaub625a082007-11-26 01:15:43 +0100133 /* FIXME: could be optimized since we know what changed */
134 recount_servers(p);
135 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100136 p->lbprm.map.state |= PR_MAP_RECALC;
137 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100138 srv->prev_state = srv->state;
139 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200140}
141
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100142/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100143static void map_set_server_status_up(struct server *srv)
144{
145 struct proxy *p = srv->proxy;
146
147 if (srv->state == srv->prev_state &&
148 srv->eweight == srv->prev_eweight)
149 return;
150
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100151 if (!srv_is_usable(srv->state, srv->eweight))
152 goto out_update_state;
153
Willy Tarreaub625a082007-11-26 01:15:43 +0100154 /* FIXME: could be optimized since we know what changed */
155 recount_servers(p);
156 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100157 p->lbprm.map.state |= PR_MAP_RECALC;
158 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100159 srv->prev_state = srv->state;
160 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100161}
162
Willy Tarreau20697042007-11-15 23:26:18 +0100163/* This function recomputes the server map for proxy px. It relies on
164 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
165 * called after recount_servers(). It also expects px->lbprm.map.srv
166 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200167 */
168void recalc_server_map(struct proxy *px)
169{
170 int o, tot, flag;
171 struct server *cur, *best;
172
Willy Tarreau20697042007-11-15 23:26:18 +0100173 switch (px->lbprm.tot_used) {
174 case 0: /* no server */
175 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200176 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100177 case 1: /* only one server, just fill first entry */
178 tot = 1;
179 break;
180 default:
181 tot = px->lbprm.tot_weight;
182 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200183 }
184
Willy Tarreau20697042007-11-15 23:26:18 +0100185 /* here we *know* that we have some servers */
186 if (px->srv_act)
187 flag = SRV_RUNNING;
188 else
189 flag = SRV_RUNNING | SRV_BACKUP;
190
Willy Tarreaubaaee002006-06-26 02:48:02 +0200191 /* this algorithm gives priority to the first server, which means that
192 * it will respect the declaration order for equivalent weights, and
193 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100194 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200195 * case, where we want the first server only.
196 */
197 for (cur = px->srv; cur; cur = cur->next)
198 cur->wscore = 0;
199
200 for (o = 0; o < tot; o++) {
201 int max = 0;
202 best = NULL;
203 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100204 if (flag == (cur->state &
205 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200206 int v;
207
208 /* If we are forced to return only one server, we don't want to
209 * go further, because we would return the wrong one due to
210 * divide overflow.
211 */
212 if (tot == 1) {
213 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100214 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200215 break;
216 }
217
Willy Tarreau417fae02007-03-25 21:16:40 +0200218 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200219 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
220 if (best == NULL || v > max) {
221 max = v;
222 best = cur;
223 }
224 }
225 }
Willy Tarreau20697042007-11-15 23:26:18 +0100226 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200227 best->wscore -= tot;
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230}
231
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100232/* This function is responsible of building the server MAP for map-based LB
233 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
234 * weights if applicable. It should be called only once per proxy, at config
235 * time.
236 */
237void init_server_map(struct proxy *p)
238{
239 struct server *srv;
240 int pgcd;
241 int act, bck;
242
Willy Tarreaub625a082007-11-26 01:15:43 +0100243 p->lbprm.set_server_status_up = map_set_server_status_up;
244 p->lbprm.set_server_status_down = map_set_server_status_down;
245 p->lbprm.update_server_eweight = NULL;
246
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100247 if (!p->srv)
248 return;
249
250 /* We will factor the weights to reduce the table,
251 * using Euclide's largest common divisor algorithm
252 */
253 pgcd = p->srv->uweight;
254 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
255 int w = srv->uweight;
256 while (w) {
257 int t = pgcd % w;
258 pgcd = w;
259 w = t;
260 }
261 }
262
263 /* It is sometimes useful to know what factor to apply
264 * to the backend's effective weight to know its real
265 * weight.
266 */
267 p->lbprm.wmult = pgcd;
268
269 act = bck = 0;
270 for (srv = p->srv; srv; srv = srv->next) {
271 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100272 srv->prev_eweight = srv->eweight;
273 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100274 if (srv->state & SRV_BACKUP)
275 bck += srv->eweight;
276 else
277 act += srv->eweight;
278 }
279
280 /* this is the largest map we will ever need for this servers list */
281 if (act < bck)
282 act = bck;
283
284 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
285 /* recounts servers and their weights */
286 p->lbprm.map.state = PR_MAP_RECALC;
287 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100288 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100289 recalc_server_map(p);
290}
291
Willy Tarreaub625a082007-11-26 01:15:43 +0100292/* This function updates the server trees according to server <srv>'s new
293 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100294 * It is not important whether the server was already down or not. It is not
295 * important either that the new state is completely down (the caller may not
296 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100297 */
298static void fwrr_set_server_status_down(struct server *srv)
299{
300 struct proxy *p = srv->proxy;
301 struct fwrr_group *grp;
302
303 if (srv->state == srv->prev_state &&
304 srv->eweight == srv->prev_eweight)
305 return;
306
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100307 if (srv_is_usable(srv->state, srv->eweight))
308 goto out_update_state;
309
Willy Tarreaub625a082007-11-26 01:15:43 +0100310 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
311 /* server was already down */
312 goto out_update_backend;
313
314 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
315 grp->next_weight -= srv->prev_eweight;
316
317 if (srv->state & SRV_BACKUP) {
318 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
319 p->srv_bck--;
320
321 if (srv == p->lbprm.fbck) {
322 /* we lost the first backup server in a single-backup
323 * configuration, we must search another one.
324 */
325 struct server *srv2 = p->lbprm.fbck;
326 do {
327 srv2 = srv2->next;
328 } while (srv2 &&
329 !((srv2->state & SRV_BACKUP) &&
330 srv_is_usable(srv2->state, srv2->eweight)));
331 p->lbprm.fbck = srv2;
332 }
333 } else {
334 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
335 p->srv_act--;
336 }
337
338 fwrr_dequeue_srv(srv);
339 fwrr_remove_from_tree(srv);
340
341out_update_backend:
342 /* check/update tot_used, tot_weight */
343 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100344 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100345 srv->prev_state = srv->state;
346 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100347}
348
349/* This function updates the server trees according to server <srv>'s new
350 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100351 * It is not important whether the server was already down or not. It is not
352 * important either that the new state is completely UP (the caller may not
353 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100354 * the weight of a server which was already up.
355 */
356static void fwrr_set_server_status_up(struct server *srv)
357{
358 struct proxy *p = srv->proxy;
359 struct fwrr_group *grp;
360
361 if (srv->state == srv->prev_state &&
362 srv->eweight == srv->prev_eweight)
363 return;
364
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100365 if (!srv_is_usable(srv->state, srv->eweight))
366 goto out_update_state;
367
Willy Tarreaub625a082007-11-26 01:15:43 +0100368 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
369 /* server was already up */
370 goto out_update_backend;
371
372 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
373 grp->next_weight += srv->eweight;
374
375 if (srv->state & SRV_BACKUP) {
376 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
377 p->srv_bck++;
378
Willy Tarreauf4cca452008-03-08 21:42:54 +0100379 if (!(p->options & PR_O_USE_ALL_BK)) {
380 if (!p->lbprm.fbck) {
381 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100382 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100383 } else {
384 /* we may have restored a backup server prior to fbck,
385 * in which case it should replace it.
386 */
387 struct server *srv2 = srv;
388 do {
389 srv2 = srv2->next;
390 } while (srv2 && (srv2 != p->lbprm.fbck));
391 if (srv2)
392 p->lbprm.fbck = srv;
393 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100394 }
395 } else {
396 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
397 p->srv_act++;
398 }
399
400 /* note that eweight cannot be 0 here */
401 fwrr_get_srv(srv);
402 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
403 fwrr_queue_srv(srv);
404
405out_update_backend:
406 /* check/update tot_used, tot_weight */
407 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100408 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100409 srv->prev_state = srv->state;
410 srv->prev_eweight = srv->eweight;
411}
412
413/* This function must be called after an update to server <srv>'s effective
414 * weight. It may be called after a state change too.
415 */
416static void fwrr_update_server_weight(struct server *srv)
417{
418 int old_state, new_state;
419 struct proxy *p = srv->proxy;
420 struct fwrr_group *grp;
421
422 if (srv->state == srv->prev_state &&
423 srv->eweight == srv->prev_eweight)
424 return;
425
426 /* If changing the server's weight changes its state, we simply apply
427 * the procedures we already have for status change. If the state
428 * remains down, the server is not in any tree, so it's as easy as
429 * updating its values. If the state remains up with different weights,
430 * there are some computations to perform to find a new place and
431 * possibly a new tree for this server.
432 */
433
434 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
435 new_state = srv_is_usable(srv->state, srv->eweight);
436
437 if (!old_state && !new_state) {
438 srv->prev_state = srv->state;
439 srv->prev_eweight = srv->eweight;
440 return;
441 }
442 else if (!old_state && new_state) {
443 fwrr_set_server_status_up(srv);
444 return;
445 }
446 else if (old_state && !new_state) {
447 fwrr_set_server_status_down(srv);
448 return;
449 }
450
451 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
452 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
453
454 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
455 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
456
457 if (srv->lb_tree == grp->init) {
458 fwrr_dequeue_srv(srv);
459 fwrr_queue_by_weight(grp->init, srv);
460 }
461 else if (!srv->lb_tree) {
462 /* FIXME: server was down. This is not possible right now but
463 * may be needed soon for slowstart or graceful shutdown.
464 */
465 fwrr_dequeue_srv(srv);
466 fwrr_get_srv(srv);
467 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
468 fwrr_queue_srv(srv);
469 } else {
470 /* The server is either active or in the next queue. If it's
471 * still in the active queue and it has not consumed all of its
472 * places, let's adjust its next position.
473 */
474 fwrr_get_srv(srv);
475
476 if (srv->eweight > 0) {
477 int prev_next = srv->npos;
478 int step = grp->next_weight / srv->eweight;
479
480 srv->npos = srv->lpos + step;
481 srv->rweight = 0;
482
483 if (srv->npos > prev_next)
484 srv->npos = prev_next;
485 if (srv->npos < grp->curr_pos + 2)
486 srv->npos = grp->curr_pos + step;
487 } else {
488 /* push it into the next tree */
489 srv->npos = grp->curr_pos + grp->curr_weight;
490 }
491
492 fwrr_dequeue_srv(srv);
493 fwrr_queue_srv(srv);
494 }
495
496 update_backend_weight(p);
497 srv->prev_state = srv->state;
498 srv->prev_eweight = srv->eweight;
499}
500
501/* Remove a server from a tree. It must have previously been dequeued. This
502 * function is meant to be called when a server is going down or has its
503 * weight disabled.
504 */
505static inline void fwrr_remove_from_tree(struct server *s)
506{
507 s->lb_tree = NULL;
508}
509
510/* Queue a server in the weight tree <root>, assuming the weight is >0.
511 * We want to sort them by inverted weights, because we need to place
512 * heavy servers first in order to get a smooth distribution.
513 */
514static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
515{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100516 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100517 eb32_insert(root, &s->lb_node);
518 s->lb_tree = root;
519}
520
521/* This function is responsible for building the weight trees in case of fast
522 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
523 * ratio. Both active and backup groups are initialized.
524 */
525void fwrr_init_server_groups(struct proxy *p)
526{
527 struct server *srv;
528 struct eb_root init_head = EB_ROOT;
529
530 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
531 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
532 p->lbprm.update_server_eweight = fwrr_update_server_weight;
533
534 p->lbprm.wdiv = BE_WEIGHT_SCALE;
535 for (srv = p->srv; srv; srv = srv->next) {
536 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
537 srv->prev_state = srv->state;
538 }
539
540 recount_servers(p);
541 update_backend_weight(p);
542
543 /* prepare the active servers group */
544 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
545 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
546 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
547 p->lbprm.fwrr.act.t1 = init_head;
548 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
549 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
550
551 /* prepare the backup servers group */
552 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
553 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
554 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
555 p->lbprm.fwrr.bck.t1 = init_head;
556 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
557 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
558
559 /* queue active and backup servers in two distinct groups */
560 for (srv = p->srv; srv; srv = srv->next) {
561 if (!srv_is_usable(srv->state, srv->eweight))
562 continue;
563 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
564 p->lbprm.fwrr.bck.init :
565 p->lbprm.fwrr.act.init,
566 srv);
567 }
568}
569
570/* simply removes a server from a weight tree */
571static inline void fwrr_dequeue_srv(struct server *s)
572{
573 eb32_delete(&s->lb_node);
574}
575
576/* queues a server into the appropriate group and tree depending on its
577 * backup status, and ->npos. If the server is disabled, simply assign
578 * it to the NULL tree.
579 */
580static void fwrr_queue_srv(struct server *s)
581{
582 struct proxy *p = s->proxy;
583 struct fwrr_group *grp;
584
585 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
586
587 /* Delay everything which does not fit into the window and everything
588 * which does not fit into the theorical new window.
589 */
590 if (!srv_is_usable(s->state, s->eweight)) {
591 fwrr_remove_from_tree(s);
592 }
593 else if (s->eweight <= 0 ||
594 s->npos >= 2 * grp->curr_weight ||
595 s->npos >= grp->curr_weight + grp->next_weight) {
596 /* put into next tree, and readjust npos in case we could
597 * finally take this back to current. */
598 s->npos -= grp->curr_weight;
599 fwrr_queue_by_weight(grp->next, s);
600 }
601 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100602 /* The sorting key is stored in units of s->npos * user_weight
603 * in order to avoid overflows. As stated in backend.h, the
604 * lower the scale, the rougher the weights modulation, and the
605 * higher the scale, the lower the number of servers without
606 * overflow. With this formula, the result is always positive,
607 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100608 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100609 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
610 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
611
612 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100613 s->lb_tree = &grp->curr;
614 }
615}
616
617/* prepares a server when extracting it from the "init" tree */
618static inline void fwrr_get_srv_init(struct server *s)
619{
620 s->npos = s->rweight = 0;
621}
622
623/* prepares a server when extracting it from the "next" tree */
624static inline void fwrr_get_srv_next(struct server *s)
625{
626 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
627 &s->proxy->lbprm.fwrr.bck :
628 &s->proxy->lbprm.fwrr.act;
629
630 s->npos += grp->curr_weight;
631}
632
633/* prepares a server when it was marked down */
634static inline void fwrr_get_srv_down(struct server *s)
635{
636 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
637 &s->proxy->lbprm.fwrr.bck :
638 &s->proxy->lbprm.fwrr.act;
639
640 s->npos = grp->curr_pos;
641}
642
643/* prepares a server when extracting it from its tree */
644static void fwrr_get_srv(struct server *s)
645{
646 struct proxy *p = s->proxy;
647 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
648 &p->lbprm.fwrr.bck :
649 &p->lbprm.fwrr.act;
650
651 if (s->lb_tree == grp->init) {
652 fwrr_get_srv_init(s);
653 }
654 else if (s->lb_tree == grp->next) {
655 fwrr_get_srv_next(s);
656 }
657 else if (s->lb_tree == NULL) {
658 fwrr_get_srv_down(s);
659 }
660}
661
662/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
663 * when this happens, and "next" filled with servers sorted by weights.
664 */
665static inline void fwrr_switch_trees(struct fwrr_group *grp)
666{
667 struct eb_root *swap;
668 swap = grp->init;
669 grp->init = grp->next;
670 grp->next = swap;
671 grp->curr_weight = grp->next_weight;
672 grp->curr_pos = grp->curr_weight;
673}
674
675/* return next server from the current tree in FWRR group <grp>, or a server
676 * from the "init" tree if appropriate. If both trees are empty, return NULL.
677 */
678static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
679{
680 struct eb32_node *node;
681 struct server *s;
682
683 node = eb32_first(&grp->curr);
684 s = eb32_entry(node, struct server, lb_node);
685
686 if (!node || s->npos > grp->curr_pos) {
687 /* either we have no server left, or we have a hole */
688 struct eb32_node *node2;
689 node2 = eb32_first(grp->init);
690 if (node2) {
691 node = node2;
692 s = eb32_entry(node, struct server, lb_node);
693 fwrr_get_srv_init(s);
694 if (s->eweight == 0) /* FIXME: is it possible at all ? */
695 node = NULL;
696 }
697 }
698 if (node)
699 return s;
700 else
701 return NULL;
702}
703
704/* Computes next position of server <s> in the group. It is mandatory for <s>
705 * to have a non-zero, positive eweight.
706*/
707static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
708{
709 if (!s->npos) {
710 /* first time ever for this server */
711 s->lpos = grp->curr_pos;
712 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
713 s->rweight += grp->next_weight % s->eweight;
714
715 if (s->rweight >= s->eweight) {
716 s->rweight -= s->eweight;
717 s->npos++;
718 }
719 } else {
720 s->lpos = s->npos;
721 s->npos += grp->next_weight / s->eweight;
722 s->rweight += grp->next_weight % s->eweight;
723
724 if (s->rweight >= s->eweight) {
725 s->rweight -= s->eweight;
726 s->npos++;
727 }
728 }
729}
730
731/* Return next server from the current tree in backend <p>, or a server from
732 * the init tree if appropriate. If both trees are empty, return NULL.
733 * Saturated servers are skipped and requeued.
734 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100735static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100736{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100737 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100738 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100739 int switched;
740
741 if (p->srv_act)
742 grp = &p->lbprm.fwrr.act;
743 else if (p->lbprm.fbck)
744 return p->lbprm.fbck;
745 else if (p->srv_bck)
746 grp = &p->lbprm.fwrr.bck;
747 else
748 return NULL;
749
750 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100751 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100752 full = NULL; /* NULL-terminated list of saturated servers */
753 while (1) {
754 /* if we see an empty group, let's first try to collect weights
755 * which might have recently changed.
756 */
757 if (!grp->curr_weight)
758 grp->curr_pos = grp->curr_weight = grp->next_weight;
759
760 /* get first server from the "current" tree. When the end of
761 * the tree is reached, we may have to switch, but only once.
762 */
763 while (1) {
764 srv = fwrr_get_server_from_group(grp);
765 if (srv)
766 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100767 if (switched) {
768 if (avoided) {
769 srv = avoided;
770 break;
771 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100772 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100773 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100774 switched = 1;
775 fwrr_switch_trees(grp);
776
777 }
778
779 /* OK, we have a server. However, it may be saturated, in which
780 * case we don't want to reconsider it for now. We'll update
781 * its position and dequeue it anyway, so that we can move it
782 * to a better place afterwards.
783 */
784 fwrr_update_position(grp, srv);
785 fwrr_dequeue_srv(srv);
786 grp->curr_pos++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100787 if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) {
788 /* make sure it is not the server we are trying to exclude... */
789 if (srv != srvtoavoid || avoided)
790 break;
791
792 avoided = srv; /* ...but remember that is was selected yet avoided */
793 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100794
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100795 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100796 srv->next_full = full;
797 full = srv;
798 }
799
800 /* OK, we got the best server, let's update it */
801 fwrr_queue_srv(srv);
802
803 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100804 /* Requeue all extracted servers. If full==srv then it was
805 * avoided (unsucessfully) and chained, omit it now.
806 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100807 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100808 if (switched) {
809 /* the tree has switched, requeue all extracted servers
810 * into "init", because their place was lost, and only
811 * their weight matters.
812 */
813 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100814 if (likely(full != srv))
815 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100816 full = full->next_full;
817 } while (full);
818 } else {
819 /* requeue all extracted servers just as if they were consumed
820 * so that they regain their expected place.
821 */
822 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100823 if (likely(full != srv))
824 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100825 full = full->next_full;
826 } while (full);
827 }
828 }
829 return srv;
830}
831
Willy Tarreau51406232008-03-10 22:04:20 +0100832/* Remove a server from a tree. It must have previously been dequeued. This
833 * function is meant to be called when a server is going down or has its
834 * weight disabled.
835 */
836static inline void fwlc_remove_from_tree(struct server *s)
837{
838 s->lb_tree = NULL;
839}
840
841/* simply removes a server from a tree */
842static inline void fwlc_dequeue_srv(struct server *s)
843{
844 eb32_delete(&s->lb_node);
845}
846
847/* Queue a server in its associated tree, assuming the weight is >0.
848 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
849 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
850 */
851static inline void fwlc_queue_srv(struct server *s)
852{
853 s->lb_node.key = s->cur_sess * SRV_EWGHT_MAX / s->eweight;
854 eb32_insert(s->lb_tree, &s->lb_node);
855}
856
857/* Re-position the server in the FWLC tree after it has been assigned one
858 * connection or after it has released one. Note that it is possible that
859 * the server has been moved out of the tree due to failed health-checks.
860 */
861static void fwlc_srv_reposition(struct server *s)
862{
863 if (!s->lb_tree)
864 return;
865 fwlc_dequeue_srv(s);
866 fwlc_queue_srv(s);
867}
868
869/* This function updates the server trees according to server <srv>'s new
870 * state. It should be called when server <srv>'s status changes to down.
871 * It is not important whether the server was already down or not. It is not
872 * important either that the new state is completely down (the caller may not
873 * know all the variables of a server's state).
874 */
875static void fwlc_set_server_status_down(struct server *srv)
876{
877 struct proxy *p = srv->proxy;
878
879 if (srv->state == srv->prev_state &&
880 srv->eweight == srv->prev_eweight)
881 return;
882
883 if (srv_is_usable(srv->state, srv->eweight))
884 goto out_update_state;
885
886 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
887 /* server was already down */
888 goto out_update_backend;
889
890 if (srv->state & SRV_BACKUP) {
891 p->lbprm.tot_wbck -= srv->prev_eweight;
892 p->srv_bck--;
893
894 if (srv == p->lbprm.fbck) {
895 /* we lost the first backup server in a single-backup
896 * configuration, we must search another one.
897 */
898 struct server *srv2 = p->lbprm.fbck;
899 do {
900 srv2 = srv2->next;
901 } while (srv2 &&
902 !((srv2->state & SRV_BACKUP) &&
903 srv_is_usable(srv2->state, srv2->eweight)));
904 p->lbprm.fbck = srv2;
905 }
906 } else {
907 p->lbprm.tot_wact -= srv->prev_eweight;
908 p->srv_act--;
909 }
910
911 fwlc_dequeue_srv(srv);
912 fwlc_remove_from_tree(srv);
913
914out_update_backend:
915 /* check/update tot_used, tot_weight */
916 update_backend_weight(p);
917 out_update_state:
918 srv->prev_state = srv->state;
919 srv->prev_eweight = srv->eweight;
920}
921
922/* This function updates the server trees according to server <srv>'s new
923 * state. It should be called when server <srv>'s status changes to up.
924 * It is not important whether the server was already down or not. It is not
925 * important either that the new state is completely UP (the caller may not
926 * know all the variables of a server's state). This function will not change
927 * the weight of a server which was already up.
928 */
929static void fwlc_set_server_status_up(struct server *srv)
930{
931 struct proxy *p = srv->proxy;
932
933 if (srv->state == srv->prev_state &&
934 srv->eweight == srv->prev_eweight)
935 return;
936
937 if (!srv_is_usable(srv->state, srv->eweight))
938 goto out_update_state;
939
940 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
941 /* server was already up */
942 goto out_update_backend;
943
944 if (srv->state & SRV_BACKUP) {
945 srv->lb_tree = &p->lbprm.fwlc.bck;
946 p->lbprm.tot_wbck += srv->eweight;
947 p->srv_bck++;
948
949 if (!(p->options & PR_O_USE_ALL_BK)) {
950 if (!p->lbprm.fbck) {
951 /* there was no backup server anymore */
952 p->lbprm.fbck = srv;
953 } else {
954 /* we may have restored a backup server prior to fbck,
955 * in which case it should replace it.
956 */
957 struct server *srv2 = srv;
958 do {
959 srv2 = srv2->next;
960 } while (srv2 && (srv2 != p->lbprm.fbck));
961 if (srv2)
962 p->lbprm.fbck = srv;
963 }
964 }
965 } else {
966 srv->lb_tree = &p->lbprm.fwlc.act;
967 p->lbprm.tot_wact += srv->eweight;
968 p->srv_act++;
969 }
970
971 /* note that eweight cannot be 0 here */
972 fwlc_queue_srv(srv);
973
974 out_update_backend:
975 /* check/update tot_used, tot_weight */
976 update_backend_weight(p);
977 out_update_state:
978 srv->prev_state = srv->state;
979 srv->prev_eweight = srv->eweight;
980}
981
982/* This function must be called after an update to server <srv>'s effective
983 * weight. It may be called after a state change too.
984 */
985static void fwlc_update_server_weight(struct server *srv)
986{
987 int old_state, new_state;
988 struct proxy *p = srv->proxy;
989
990 if (srv->state == srv->prev_state &&
991 srv->eweight == srv->prev_eweight)
992 return;
993
994 /* If changing the server's weight changes its state, we simply apply
995 * the procedures we already have for status change. If the state
996 * remains down, the server is not in any tree, so it's as easy as
997 * updating its values. If the state remains up with different weights,
998 * there are some computations to perform to find a new place and
999 * possibly a new tree for this server.
1000 */
1001
1002 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1003 new_state = srv_is_usable(srv->state, srv->eweight);
1004
1005 if (!old_state && !new_state) {
1006 srv->prev_state = srv->state;
1007 srv->prev_eweight = srv->eweight;
1008 return;
1009 }
1010 else if (!old_state && new_state) {
1011 fwlc_set_server_status_up(srv);
1012 return;
1013 }
1014 else if (old_state && !new_state) {
1015 fwlc_set_server_status_down(srv);
1016 return;
1017 }
1018
1019 if (srv->lb_tree)
1020 fwlc_dequeue_srv(srv);
1021
1022 if (srv->state & SRV_BACKUP) {
1023 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1024 srv->lb_tree = &p->lbprm.fwlc.bck;
1025 } else {
1026 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1027 srv->lb_tree = &p->lbprm.fwlc.act;
1028 }
1029
1030 fwlc_queue_srv(srv);
1031
1032 update_backend_weight(p);
1033 srv->prev_state = srv->state;
1034 srv->prev_eweight = srv->eweight;
1035}
1036
1037/* This function is responsible for building the trees in case of fast
1038 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1039 * uweight ratio. Both active and backup groups are initialized.
1040 */
1041void fwlc_init_server_tree(struct proxy *p)
1042{
1043 struct server *srv;
1044 struct eb_root init_head = EB_ROOT;
1045
1046 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1047 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1048 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1049 p->lbprm.server_take_conn = fwlc_srv_reposition;
1050 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1051
1052 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1053 for (srv = p->srv; srv; srv = srv->next) {
1054 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1055 srv->prev_state = srv->state;
1056 }
1057
1058 recount_servers(p);
1059 update_backend_weight(p);
1060
1061 p->lbprm.fwlc.act = init_head;
1062 p->lbprm.fwlc.bck = init_head;
1063
1064 /* queue active and backup servers in two distinct groups */
1065 for (srv = p->srv; srv; srv = srv->next) {
1066 if (!srv_is_usable(srv->state, srv->eweight))
1067 continue;
1068 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1069 fwlc_queue_srv(srv);
1070 }
1071}
1072
1073/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1074 * return NULL. Saturated servers are skipped.
1075 */
1076static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1077{
1078 struct server *srv, *avoided;
1079 struct eb32_node *node;
1080
1081 srv = avoided = NULL;
1082
1083 if (p->srv_act)
1084 node = eb32_first(&p->lbprm.fwlc.act);
1085 else if (p->lbprm.fbck)
1086 return p->lbprm.fbck;
1087 else if (p->srv_bck)
1088 node = eb32_first(&p->lbprm.fwlc.bck);
1089 else
1090 return NULL;
1091
1092 while (node) {
1093 /* OK, we have a server. However, it may be saturated, in which
1094 * case we don't want to reconsider it for now, so we'll simply
1095 * skip it. Same if it's the server we try to avoid, in which
1096 * case we simply remember it for later use if needed.
1097 */
1098 struct server *s;
1099
1100 s = eb32_entry(node, struct server, lb_node);
1101 if (!s->maxconn || s->cur_sess < srv_dynamic_maxconn(s)) {
1102 if (s != srvtoavoid) {
1103 srv = s;
1104 break;
1105 }
1106 avoided = s;
1107 }
1108 node = eb32_next(node);
1109 }
1110
1111 if (!srv)
1112 srv = avoided;
1113
1114 return srv;
1115}
1116
Willy Tarreau01732802007-11-01 22:48:15 +01001117/*
1118 * This function tries to find a running server for the proxy <px> following
1119 * the URL parameter hash method. It looks for a specific parameter in the
1120 * URL and hashes it to compute the server ID. This is useful to optimize
1121 * performance by avoiding bounces between servers in contexts where sessions
1122 * are shared but cookies are not usable. If the parameter is not found, NULL
1123 * is returned. If any server is found, it will be returned. If no valid server
1124 * is found, NULL is returned.
1125 *
1126 */
1127struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1128{
1129 unsigned long hash = 0;
1130 char *p;
1131 int plen;
1132
Willy Tarreau20697042007-11-15 23:26:18 +01001133 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001134 return NULL;
1135
Willy Tarreau20697042007-11-15 23:26:18 +01001136 if (px->lbprm.map.state & PR_MAP_RECALC)
1137 recalc_server_map(px);
1138
Willy Tarreau01732802007-11-01 22:48:15 +01001139 p = memchr(uri, '?', uri_len);
1140 if (!p)
1141 return NULL;
1142 p++;
1143
1144 uri_len -= (p - uri);
1145 plen = px->url_param_len;
1146
1147 if (uri_len <= plen)
1148 return NULL;
1149
1150 while (uri_len > plen) {
1151 /* Look for the parameter name followed by an equal symbol */
1152 if (p[plen] == '=') {
1153 /* skip the equal symbol */
1154 uri = p;
1155 p += plen + 1;
1156 uri_len -= plen + 1;
1157 if (memcmp(uri, px->url_param_name, plen) == 0) {
1158 /* OK, we have the parameter here at <uri>, and
1159 * the value after the equal sign, at <p>
1160 */
1161 while (uri_len && *p != '&') {
1162 hash = *p + (hash << 6) + (hash << 16) - hash;
1163 uri_len--;
1164 p++;
1165 }
Willy Tarreau20697042007-11-15 23:26:18 +01001166 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001167 }
1168 }
1169
1170 /* skip to next parameter */
1171 uri = p;
1172 p = memchr(uri, '&', uri_len);
1173 if (!p)
1174 return NULL;
1175 p++;
1176 uri_len -= (p - uri);
1177 }
1178 return NULL;
1179}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001180
1181/*
1182 * This function marks the session as 'assigned' in direct or dispatch modes,
1183 * or tries to assign one in balance mode, according to the algorithm. It does
1184 * nothing if the session had already been assigned a server.
1185 *
1186 * It may return :
1187 * SRV_STATUS_OK if everything is OK. s->srv will be valid.
1188 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1189 * SRV_STATUS_FULL if all servers are saturated. s->srv = NULL.
1190 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1191 *
1192 * Upon successful return, the session flag SN_ASSIGNED to indicate that it does
1193 * not need to be called anymore. This usually means that s->srv can be trusted
1194 * in balance and direct modes. This flag is not cleared, so it's to the caller
1195 * to clear it if required (eg: redispatch).
1196 *
1197 */
1198
1199int assign_server(struct session *s)
1200{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001201
1202 struct server *srvtoavoid;
1203
Willy Tarreaubaaee002006-06-26 02:48:02 +02001204#ifdef DEBUG_FULL
1205 fprintf(stderr,"assign_server : s=%p\n",s);
1206#endif
1207
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001208 srvtoavoid = s->srv;
1209 s->srv = NULL;
1210
Willy Tarreaubaaee002006-06-26 02:48:02 +02001211 if (s->pend_pos)
1212 return SRV_STATUS_INTERNAL;
1213
1214 if (!(s->flags & SN_ASSIGNED)) {
Willy Tarreau31682232007-11-29 15:38:04 +01001215 if (s->be->lbprm.algo & BE_LB_ALGO) {
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001216 int len;
1217
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001218 if (s->flags & SN_DIRECT) {
1219 s->flags |= SN_ASSIGNED;
1220 return SRV_STATUS_OK;
1221 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001222
Willy Tarreaub625a082007-11-26 01:15:43 +01001223 if (!s->be->lbprm.tot_weight)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001224 return SRV_STATUS_NOSRV;
1225
Willy Tarreau31682232007-11-29 15:38:04 +01001226 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1227 case BE_LB_ALGO_RR:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001228 s->srv = fwrr_get_next_server(s->be, srvtoavoid);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001229 if (!s->srv)
1230 return SRV_STATUS_FULL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001231 break;
Willy Tarreau51406232008-03-10 22:04:20 +01001232 case BE_LB_ALGO_LC:
1233 s->srv = fwlc_get_next_server(s->be, srvtoavoid);
1234 if (!s->srv)
1235 return SRV_STATUS_FULL;
1236 break;
Willy Tarreau31682232007-11-29 15:38:04 +01001237 case BE_LB_ALGO_SH:
Willy Tarreaubaaee002006-06-26 02:48:02 +02001238 if (s->cli_addr.ss_family == AF_INET)
1239 len = 4;
1240 else if (s->cli_addr.ss_family == AF_INET6)
1241 len = 16;
1242 else /* unknown IP family */
1243 return SRV_STATUS_INTERNAL;
1244
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001245 s->srv = get_server_sh(s->be,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001246 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1247 len);
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001248 break;
Willy Tarreau31682232007-11-29 15:38:04 +01001249 case BE_LB_ALGO_UH:
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001250 /* URI hashing */
1251 s->srv = get_server_uh(s->be,
1252 s->txn.req.sol + s->txn.req.sl.rq.u,
1253 s->txn.req.sl.rq.u_l);
Willy Tarreau01732802007-11-01 22:48:15 +01001254 break;
Willy Tarreau31682232007-11-29 15:38:04 +01001255 case BE_LB_ALGO_PH:
Willy Tarreau01732802007-11-01 22:48:15 +01001256 /* URL Parameter hashing */
1257 s->srv = get_server_ph(s->be,
1258 s->txn.req.sol + s->txn.req.sl.rq.u,
1259 s->txn.req.sl.rq.u_l);
1260 if (!s->srv) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001261 /* parameter not found, fall back to round robin on the map */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001262 s->srv = get_server_rr_with_conns(s->be, srvtoavoid);
Willy Tarreau01732802007-11-01 22:48:15 +01001263 if (!s->srv)
1264 return SRV_STATUS_FULL;
1265 }
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001266 break;
1267 default:
1268 /* unknown balancing algorithm */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001269 return SRV_STATUS_INTERNAL;
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001270 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001271 if (s->srv != srvtoavoid) {
1272 s->be->cum_lbconn++;
1273 s->srv->cum_lbconn++;
1274 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001275 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001276 else if (s->be->options & PR_O_HTTP_PROXY) {
1277 if (!s->srv_addr.sin_addr.s_addr)
1278 return SRV_STATUS_NOSRV;
1279 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001280 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001281 !(s->fe->options & PR_O_TRANSP)) {
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001282 return SRV_STATUS_NOSRV;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001283 }
1284 s->flags |= SN_ASSIGNED;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001285 }
1286 return SRV_STATUS_OK;
1287}
1288
1289
1290/*
1291 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1292 * The address is taken from the currently assigned server, or from the
1293 * dispatch or transparent address.
1294 *
1295 * It may return :
1296 * SRV_STATUS_OK if everything is OK.
1297 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1298 *
1299 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1300 * not cleared, so it's to the caller to clear it if required.
1301 *
1302 */
1303int assign_server_address(struct session *s)
1304{
1305#ifdef DEBUG_FULL
1306 fprintf(stderr,"assign_server_address : s=%p\n",s);
1307#endif
1308
Willy Tarreau31682232007-11-29 15:38:04 +01001309 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001310 /* A server is necessarily known for this session */
1311 if (!(s->flags & SN_ASSIGNED))
1312 return SRV_STATUS_INTERNAL;
1313
1314 s->srv_addr = s->srv->addr;
1315
1316 /* if this server remaps proxied ports, we'll use
1317 * the port the client connected to with an offset. */
1318 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001319 if (!(s->fe->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
1320 get_frt_addr(s);
1321 if (s->frt_addr.ss_family == AF_INET) {
1322 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1323 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1324 } else {
1325 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1326 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1327 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001328 }
1329 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001330 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001331 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001332 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001333 }
Willy Tarreau73de9892006-11-30 11:40:23 +01001334 else if (s->fe->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001335 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001336 if (!(s->flags & SN_FRT_ADDR_SET))
1337 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001338
Willy Tarreaubd414282008-01-19 13:46:35 +01001339 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1340 /* when we support IPv6 on the backend, we may add other tests */
1341 //qfprintf(stderr, "Cannot get original server address.\n");
1342 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001343 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001344 else if (s->be->options & PR_O_HTTP_PROXY) {
1345 /* If HTTP PROXY option is set, then server is already assigned
1346 * during incoming client request parsing. */
1347 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001348 else {
1349 /* no server and no LB algorithm ! */
1350 return SRV_STATUS_INTERNAL;
1351 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001352
1353 s->flags |= SN_ADDR_SET;
1354 return SRV_STATUS_OK;
1355}
1356
1357
1358/* This function assigns a server to session <s> if required, and can add the
1359 * connection to either the assigned server's queue or to the proxy's queue.
1360 *
1361 * Returns :
1362 *
1363 * SRV_STATUS_OK if everything is OK.
1364 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1365 * SRV_STATUS_QUEUED if the connection has been queued.
1366 * SRV_STATUS_FULL if the server(s) is/are saturated and the
1367 * connection could not be queued.
1368 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1369 *
1370 */
1371int assign_server_and_queue(struct session *s)
1372{
1373 struct pendconn *p;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001374 struct server *srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001375 int err;
1376
1377 if (s->pend_pos)
1378 return SRV_STATUS_INTERNAL;
1379
1380 if (s->flags & SN_ASSIGNED) {
Willy Tarreau21d2af32008-02-14 20:25:24 +01001381 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1382 /* server scheduled for redirection, and already assigned. We
1383 * don't want to go further nor check the queue.
1384 */
1385 return SRV_STATUS_OK;
1386 }
1387
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001388 if (s->srv && s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001389 /* it's left to the dispatcher to choose a server */
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001390 s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Elijah Epifanovacafc5f2007-10-25 20:15:38 +02001391 } else {
1392 /* a server does not need to be assigned, perhaps because we're in
1393 * direct mode, or in dispatch or transparent modes where the server
1394 * is not needed.
1395 */
1396 if (s->srv &&
1397 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1398 p = pendconn_add(s);
1399 if (p)
1400 return SRV_STATUS_QUEUED;
1401 else
1402 return SRV_STATUS_FULL;
1403 }
1404 return SRV_STATUS_OK;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001405 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001406 }
1407
1408 /* a server needs to be assigned */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001409 srv = s->srv;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001410 err = assign_server(s);
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001411
1412 if (srv) {
1413 if (srv != s->srv) {
1414 /* This session was previously dispatched to another server:
1415 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1416 * - set SN_REDISP if it was successfully redispatched
1417 * - increment srv->redispatches and be->redispatches
1418 */
1419
1420 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1421 s->txn.flags &= ~TX_CK_MASK;
1422 s->txn.flags |= TX_CK_DOWN;
1423 }
1424
1425 s->flags |= SN_REDISP;
1426
1427 srv->redispatches++;
1428 s->be->redispatches++;
1429 } else {
1430 srv->retries++;
1431 s->be->retries++;
1432 }
1433 }
1434
Willy Tarreaubaaee002006-06-26 02:48:02 +02001435 switch (err) {
1436 case SRV_STATUS_OK:
Willy Tarreau21d2af32008-02-14 20:25:24 +01001437 if ((s->flags & SN_REDIRECTABLE) && s->srv && s->srv->rdr_len) {
1438 /* server supporting redirection and it is possible.
1439 * Let's report that and ignore maxconn !
1440 */
1441 return SRV_STATUS_OK;
1442 }
1443
Willy Tarreaubaaee002006-06-26 02:48:02 +02001444 /* in balance mode, we might have servers with connection limits */
1445 if (s->srv &&
1446 s->srv->maxconn && s->srv->cur_sess >= srv_dynamic_maxconn(s->srv)) {
1447 p = pendconn_add(s);
1448 if (p)
1449 return SRV_STATUS_QUEUED;
1450 else
1451 return SRV_STATUS_FULL;
1452 }
1453 return SRV_STATUS_OK;
1454
1455 case SRV_STATUS_FULL:
1456 /* queue this session into the proxy's queue */
1457 p = pendconn_add(s);
1458 if (p)
1459 return SRV_STATUS_QUEUED;
1460 else
1461 return SRV_STATUS_FULL;
1462
1463 case SRV_STATUS_NOSRV:
1464 case SRV_STATUS_INTERNAL:
1465 return err;
1466 default:
1467 return SRV_STATUS_INTERNAL;
1468 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001469}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001470
1471/*
1472 * This function initiates a connection to the server assigned to this session
1473 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1474 * It can return one of :
1475 * - SN_ERR_NONE if everything's OK
1476 * - SN_ERR_SRVTO if there are no more servers
1477 * - SN_ERR_SRVCL if the connection was refused by the server
1478 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1479 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1480 * - SN_ERR_INTERNAL for any other purely internal errors
1481 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1482 */
1483int connect_server(struct session *s)
1484{
1485 int fd, err;
1486
1487 if (!(s->flags & SN_ADDR_SET)) {
1488 err = assign_server_address(s);
1489 if (err != SRV_STATUS_OK)
1490 return SN_ERR_INTERNAL;
1491 }
1492
1493 if ((fd = s->srv_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
1494 qfprintf(stderr, "Cannot get a server socket.\n");
1495
1496 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001497 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001498 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001499 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001500 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001501 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001502 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001503 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001504 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001505 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001506 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001507 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001508 /* this is a resource error */
1509 return SN_ERR_RESOURCE;
1510 }
1511
1512 if (fd >= global.maxsock) {
1513 /* do not log anything there, it's a normal condition when this option
1514 * is used to serialize connections to a server !
1515 */
1516 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1517 close(fd);
1518 return SN_ERR_PRXCOND; /* it is a configuration limit */
1519 }
1520
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001521#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001522 if ((s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001523 /* TCP splicing supported by both FE and BE */
1524 tcp_splice_initfd(s->cli_fd, fd);
1525 }
1526#endif
1527
Willy Tarreaubaaee002006-06-26 02:48:02 +02001528 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1529 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1530 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1531 close(fd);
1532 return SN_ERR_INTERNAL;
1533 }
1534
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001535 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001536 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1537
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001538 if (s->be->options & PR_O_TCP_NOLING)
1539 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1540
Willy Tarreaubaaee002006-06-26 02:48:02 +02001541 /* allow specific binding :
1542 * - server-specific at first
1543 * - proxy-specific next
1544 */
1545 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001546 struct sockaddr_in *remote = NULL;
1547 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001548
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001549#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001550 switch (s->srv->state & SRV_TPROXY_MASK) {
1551 case SRV_TPROXY_ADDR:
1552 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1553 flags = 3;
1554 break;
1555 case SRV_TPROXY_CLI:
1556 flags |= 2;
1557 /* fall through */
1558 case SRV_TPROXY_CIP:
1559 /* FIXME: what can we do if the client connects in IPv6 ? */
1560 flags |= 1;
1561 remote = (struct sockaddr_in *)&s->cli_addr;
1562 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001563 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001564#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001565 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001566 if (ret) {
1567 close(fd);
1568 if (ret == 1) {
1569 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1570 s->be->id, s->srv->id);
1571 send_log(s->be, LOG_EMERG,
1572 "Cannot bind to source address before connect() for server %s/%s.\n",
1573 s->be->id, s->srv->id);
1574 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001575 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001576 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001577 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001578 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001579 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001580 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001581 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001582 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001583 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001584 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001585 struct sockaddr_in *remote = NULL;
1586 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001587
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001588#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001589 switch (s->be->options & PR_O_TPXY_MASK) {
1590 case PR_O_TPXY_ADDR:
1591 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1592 flags = 3;
1593 break;
1594 case PR_O_TPXY_CLI:
1595 flags |= 2;
1596 /* fall through */
1597 case PR_O_TPXY_CIP:
1598 /* FIXME: what can we do if the client connects in IPv6 ? */
1599 flags |= 1;
1600 remote = (struct sockaddr_in *)&s->cli_addr;
1601 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001602 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001603#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001604 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001605 if (ret) {
1606 close(fd);
1607 if (ret == 1) {
1608 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1609 s->be->id);
1610 send_log(s->be, LOG_EMERG,
1611 "Cannot bind to source address before connect() for proxy %s.\n",
1612 s->be->id);
1613 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001614 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001615 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001616 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001617 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1618 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001619 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001620 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001621 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001622 }
1623
1624 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1625 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1626
1627 if (errno == EAGAIN || errno == EADDRINUSE) {
1628 char *msg;
1629 if (errno == EAGAIN) /* no free ports left, try again later */
1630 msg = "no free ports";
1631 else
1632 msg = "local address already in use";
1633
1634 qfprintf(stderr,"Cannot connect: %s.\n",msg);
1635 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001636 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001637 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001638 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001639 return SN_ERR_RESOURCE;
1640 } else if (errno == ETIMEDOUT) {
1641 //qfprintf(stderr,"Connect(): ETIMEDOUT");
1642 close(fd);
1643 return SN_ERR_SRVTO;
1644 } else {
1645 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1646 //qfprintf(stderr,"Connect(): %d", errno);
1647 close(fd);
1648 return SN_ERR_SRVCL;
1649 }
1650 }
1651
1652 fdtab[fd].owner = s->task;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001653 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaud7971282006-07-29 18:36:34 +02001654 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001655 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001656 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001657 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001658
1659 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1660 fdtab[fd].peerlen = sizeof(s->srv_addr);
1661
Willy Tarreauf161a342007-04-08 16:59:42 +02001662 EV_FD_SET(fd, DIR_WR); /* for connect status */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001663
1664 fd_insert(fd);
1665 if (s->srv) {
1666 s->srv->cur_sess++;
1667 if (s->srv->cur_sess > s->srv->cur_sess_max)
1668 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001669 if (s->be->lbprm.server_take_conn)
1670 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001671 }
1672
Willy Tarreaud7c30f92007-12-03 01:38:36 +01001673 if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect))
Willy Tarreaud7971282006-07-29 18:36:34 +02001674 tv_eternity(&s->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001675 return SN_ERR_NONE; /* connection is OK */
1676}
1677
1678
1679/*
1680 * This function checks the retry count during the connect() job.
1681 * It updates the session's srv_state and retries, so that the caller knows
1682 * what it has to do. It uses the last connection error to set the log when
1683 * it expires. It returns 1 when it has expired, and 0 otherwise.
1684 */
1685int srv_count_retry_down(struct session *t, int conn_err)
1686{
1687 /* we are in front of a retryable error */
1688 t->conn_retries--;
Krzysztof Oledzki1cf36ba2007-10-18 19:12:30 +02001689
Willy Tarreaubaaee002006-06-26 02:48:02 +02001690 if (t->conn_retries < 0) {
1691 /* if not retryable anymore, let's abort */
Willy Tarreaud7971282006-07-29 18:36:34 +02001692 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001693 srv_close_with_err(t, conn_err, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001694 503, error_message(t, HTTP_ERR_503));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001695 if (t->srv)
1696 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001697 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001698
1699 /* We used to have a free connection slot. Since we'll never use it,
1700 * we have to inform the server that it may be used by another session.
1701 */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001702 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001703 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001704 return 1;
1705 }
1706 return 0;
1707}
1708
1709
1710/*
1711 * This function performs the retryable part of the connect() job.
1712 * It updates the session's srv_state and retries, so that the caller knows
1713 * what it has to do. It returns 1 when it breaks out of the loop, or 0 if
1714 * it needs to redispatch.
1715 */
1716int srv_retryable_connect(struct session *t)
1717{
1718 int conn_err;
1719
1720 /* This loop ensures that we stop before the last retry in case of a
1721 * redispatchable server.
1722 */
1723 do {
1724 /* initiate a connection to the server */
1725 conn_err = connect_server(t);
1726 switch (conn_err) {
1727
1728 case SN_ERR_NONE:
1729 //fprintf(stderr,"0: c=%d, s=%d\n", c, s);
1730 t->srv_state = SV_STCONN;
Willy Tarreau98937b82007-12-10 15:05:42 +01001731 if (t->srv)
1732 t->srv->cum_sess++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001733 return 1;
1734
1735 case SN_ERR_INTERNAL:
Willy Tarreaud7971282006-07-29 18:36:34 +02001736 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001737 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001738 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001739 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001740 t->srv->cum_sess++;
1741 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001742 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001743 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001744 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001745 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001746 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001747 return 1;
1748 }
1749 /* ensure that we have enough retries left */
1750 if (srv_count_retry_down(t, conn_err)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001751 return 1;
1752 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001753 } while (t->srv == NULL || t->conn_retries > 0 || !(t->be->options & PR_O_REDISP));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001754
1755 /* We're on our last chance, and the REDISP option was specified.
1756 * We will ignore cookie and force to balance or use the dispatcher.
1757 */
1758 /* let's try to offer this slot to anybody */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001759 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001760 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001761
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001762 if (t->srv)
1763 t->srv->cum_sess++; //FIXME?
Willy Tarreaubaaee002006-06-26 02:48:02 +02001764
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001765 /* it's left to the dispatcher to choose a server */
Willy Tarreaubaaee002006-06-26 02:48:02 +02001766 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001767 return 0;
1768}
1769
1770
1771/* This function performs the "redispatch" part of a connection attempt. It
1772 * will assign a server if required, queue the connection if required, and
1773 * handle errors that might arise at this level. It can change the server
1774 * state. It will return 1 if it encounters an error, switches the server
1775 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1776 * that the connection is ready to use.
1777 */
1778
1779int srv_redispatch_connect(struct session *t)
1780{
1781 int conn_err;
1782
1783 /* We know that we don't have any connection pending, so we will
1784 * try to get a new one, and wait in this state if it's queued
1785 */
1786 conn_err = assign_server_and_queue(t);
1787 switch (conn_err) {
1788 case SRV_STATUS_OK:
1789 break;
1790
1791 case SRV_STATUS_NOSRV:
1792 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaud7971282006-07-29 18:36:34 +02001793 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001794 srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001795 503, error_message(t, HTTP_ERR_503));
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001796
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001797 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001798
1799 return 1;
1800
1801 case SRV_STATUS_QUEUED:
Willy Tarreau1fa31262007-12-03 00:36:16 +01001802 /* note: we use the connect expiration date for the queue. */
1803 if (!tv_add_ifset(&t->req->cex, &now, &t->be->timeout.queue))
Willy Tarreaud7971282006-07-29 18:36:34 +02001804 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001805 t->srv_state = SV_STIDLE;
1806 /* do nothing else and do not wake any other session up */
1807 return 1;
1808
1809 case SRV_STATUS_FULL:
1810 case SRV_STATUS_INTERNAL:
1811 default:
Willy Tarreaud7971282006-07-29 18:36:34 +02001812 tv_eternity(&t->req->cex);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001813 srv_close_with_err(t, SN_ERR_INTERNAL, SN_FINST_C,
Willy Tarreau80587432006-12-24 17:47:20 +01001814 500, error_message(t, HTTP_ERR_500));
Willy Tarreaubaaee002006-06-26 02:48:02 +02001815 if (t->srv)
Willy Tarreau98937b82007-12-10 15:05:42 +01001816 t->srv->cum_sess++;
1817 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001818 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001819 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001820
1821 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001822 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau96bcfd72007-04-29 10:41:56 +02001823 task_wakeup(t->srv->queue_mgt);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001824 return 1;
1825 }
1826 /* if we get here, it's because we got SRV_STATUS_OK, which also
1827 * means that the connection has not been queued.
1828 */
1829 return 0;
1830}
1831
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001832int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01001833 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02001834 return px->down_time;
1835
1836 return now.tv_sec - px->last_change + px->down_time;
1837}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001838
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001839/* This function parses a "balance" statement in a backend section describing
1840 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
1841 * returns -1, it may write an error message into ther <err> buffer, for at
1842 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
1843 * written. The function must be called with <args> pointing to the first word
1844 * after "balance".
1845 */
1846int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
1847{
1848 if (!*(args[0])) {
1849 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01001850 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1851 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001852 return 0;
1853 }
1854
1855 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001856 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1857 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001858 }
Willy Tarreau51406232008-03-10 22:04:20 +01001859 else if (!strcmp(args[0], "leastconn")) {
1860 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1861 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
1862 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001863 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001864 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1865 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001866 }
1867 else if (!strcmp(args[0], "uri")) {
Willy Tarreau31682232007-11-29 15:38:04 +01001868 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1869 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001870 }
Willy Tarreau01732802007-11-01 22:48:15 +01001871 else if (!strcmp(args[0], "url_param")) {
1872 if (!*args[1]) {
1873 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
1874 return -1;
1875 }
Willy Tarreau31682232007-11-29 15:38:04 +01001876 curproxy->lbprm.algo &= ~BE_LB_ALGO;
1877 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreau01732802007-11-01 22:48:15 +01001878 if (curproxy->url_param_name)
1879 free(curproxy->url_param_name);
1880 curproxy->url_param_name = strdup(args[1]);
1881 curproxy->url_param_len = strlen(args[1]);
1882 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001883 else {
Willy Tarreau51406232008-03-10 22:04:20 +01001884 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01001885 return -1;
1886 }
1887 return 0;
1888}
1889
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01001890
1891/************************************************************************/
1892/* All supported keywords must be declared here. */
1893/************************************************************************/
1894
1895/* set test->i to the number of enabled servers on the proxy */
1896static int
1897acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
1898 struct acl_expr *expr, struct acl_test *test)
1899{
1900 test->flags = ACL_TEST_F_VOL_TEST;
1901 if (expr->arg_len) {
1902 /* another proxy was designated, we must look for it */
1903 for (px = proxy; px; px = px->next)
1904 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
1905 break;
1906 }
1907 if (!px)
1908 return 0;
1909
1910 if (px->srv_act)
1911 test->i = px->srv_act;
1912 else if (px->lbprm.fbck)
1913 test->i = 1;
1914 else
1915 test->i = px->srv_bck;
1916
1917 return 1;
1918}
1919
1920
1921/* Note: must not be declared <const> as its list will be overwritten */
1922static struct acl_kw_list acl_kws = {{ },{
1923 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int },
1924 { NULL, NULL, NULL, NULL },
1925}};
1926
1927
1928__attribute__((constructor))
1929static void __backend_init(void)
1930{
1931 acl_register_keywords(&acl_kws);
1932}
1933
1934
Willy Tarreaubaaee002006-06-26 02:48:02 +02001935/*
1936 * Local variables:
1937 * c-indent-level: 8
1938 * c-basic-offset: 8
1939 * End:
1940 */