blob: 58de35e54cfc9feb18a4a419b870e89dae40575b [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreaud88edf22009-06-14 15:48:17 +020021#include <netinet/tcp.h>
22
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020024#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020025#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010026#include <common/eb32tree.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020027#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020028#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
Willy Tarreaubaaee002006-06-26 02:48:02 +020030#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020031
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010032#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020034#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010036#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/log.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020038#include <proto/port_range.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020039#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010040#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/queue.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010042#include <proto/server.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020043#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044#include <proto/stream_sock.h>
45#include <proto/task.h>
46
Willy Tarreau6d1a9882007-01-07 02:03:04 +010047#ifdef CONFIG_HAP_TCPSPLICE
48#include <libtcpsplice.h>
49#endif
50
Willy Tarreaub625a082007-11-26 01:15:43 +010051static inline void fwrr_remove_from_tree(struct server *s);
52static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
53static inline void fwrr_dequeue_srv(struct server *s);
54static void fwrr_get_srv(struct server *s);
55static void fwrr_queue_srv(struct server *s);
56
57/* This function returns non-zero if a server with the given weight and state
58 * is usable for LB, otherwise zero.
59 */
60static inline int srv_is_usable(int state, int weight)
61{
62 if (!weight)
63 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010064 if (state & SRV_GOINGDOWN)
65 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010066 if (!(state & SRV_RUNNING))
67 return 0;
68 return 1;
69}
70
Willy Tarreaubaaee002006-06-26 02:48:02 +020071/*
72 * This function recounts the number of usable active and backup servers for
73 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010074 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010075 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010076 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020077 */
Willy Tarreaub625a082007-11-26 01:15:43 +010078static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020079{
80 struct server *srv;
81
Willy Tarreau20697042007-11-15 23:26:18 +010082 px->srv_act = px->srv_bck = 0;
83 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010084 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020085 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010086 if (!srv_is_usable(srv->state, srv->eweight))
87 continue;
88
89 if (srv->state & SRV_BACKUP) {
90 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010091 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010092 px->lbprm.fbck = srv;
93 px->srv_bck++;
94 px->lbprm.tot_wbck += srv->eweight;
95 } else {
96 px->srv_act++;
97 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020098 }
99 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100100}
Willy Tarreau20697042007-11-15 23:26:18 +0100101
Willy Tarreaub625a082007-11-26 01:15:43 +0100102/* This function simply updates the backend's tot_weight and tot_used values
103 * after servers weights have been updated. It is designed to be used after
104 * recount_servers() or equivalent.
105 */
106static void update_backend_weight(struct proxy *px)
107{
Willy Tarreau20697042007-11-15 23:26:18 +0100108 if (px->srv_act) {
109 px->lbprm.tot_weight = px->lbprm.tot_wact;
110 px->lbprm.tot_used = px->srv_act;
111 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100112 else if (px->lbprm.fbck) {
113 /* use only the first backup server */
114 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
115 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100116 }
117 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100118 px->lbprm.tot_weight = px->lbprm.tot_wbck;
119 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100120 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100121}
122
123/* this function updates the map according to server <srv>'s new state */
124static void map_set_server_status_down(struct server *srv)
125{
126 struct proxy *p = srv->proxy;
127
128 if (srv->state == srv->prev_state &&
129 srv->eweight == srv->prev_eweight)
130 return;
131
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100132 if (srv_is_usable(srv->state, srv->eweight))
133 goto out_update_state;
134
Willy Tarreaub625a082007-11-26 01:15:43 +0100135 /* FIXME: could be optimized since we know what changed */
136 recount_servers(p);
137 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100138 p->lbprm.map.state |= PR_MAP_RECALC;
139 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100140 srv->prev_state = srv->state;
141 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200142}
143
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100144/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100145static void map_set_server_status_up(struct server *srv)
146{
147 struct proxy *p = srv->proxy;
148
149 if (srv->state == srv->prev_state &&
150 srv->eweight == srv->prev_eweight)
151 return;
152
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100153 if (!srv_is_usable(srv->state, srv->eweight))
154 goto out_update_state;
155
Willy Tarreaub625a082007-11-26 01:15:43 +0100156 /* FIXME: could be optimized since we know what changed */
157 recount_servers(p);
158 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100159 p->lbprm.map.state |= PR_MAP_RECALC;
160 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100161 srv->prev_state = srv->state;
162 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100163}
164
Willy Tarreau20697042007-11-15 23:26:18 +0100165/* This function recomputes the server map for proxy px. It relies on
166 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
167 * called after recount_servers(). It also expects px->lbprm.map.srv
168 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200169 */
170void recalc_server_map(struct proxy *px)
171{
172 int o, tot, flag;
173 struct server *cur, *best;
174
Willy Tarreau20697042007-11-15 23:26:18 +0100175 switch (px->lbprm.tot_used) {
176 case 0: /* no server */
177 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200178 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100179 case 1: /* only one server, just fill first entry */
180 tot = 1;
181 break;
182 default:
183 tot = px->lbprm.tot_weight;
184 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200185 }
186
Willy Tarreau20697042007-11-15 23:26:18 +0100187 /* here we *know* that we have some servers */
188 if (px->srv_act)
189 flag = SRV_RUNNING;
190 else
191 flag = SRV_RUNNING | SRV_BACKUP;
192
Willy Tarreaubaaee002006-06-26 02:48:02 +0200193 /* this algorithm gives priority to the first server, which means that
194 * it will respect the declaration order for equivalent weights, and
195 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100196 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200197 * case, where we want the first server only.
198 */
199 for (cur = px->srv; cur; cur = cur->next)
200 cur->wscore = 0;
201
202 for (o = 0; o < tot; o++) {
203 int max = 0;
204 best = NULL;
205 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau48494c02007-11-30 10:41:39 +0100206 if (flag == (cur->state &
207 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200208 int v;
209
210 /* If we are forced to return only one server, we don't want to
211 * go further, because we would return the wrong one due to
212 * divide overflow.
213 */
214 if (tot == 1) {
215 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100216 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200217 break;
218 }
219
Willy Tarreau417fae02007-03-25 21:16:40 +0200220 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200221 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
222 if (best == NULL || v > max) {
223 max = v;
224 best = cur;
225 }
226 }
227 }
Willy Tarreau20697042007-11-15 23:26:18 +0100228 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200229 best->wscore -= tot;
230 }
Willy Tarreau20697042007-11-15 23:26:18 +0100231 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200232}
233
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100234/* This function is responsible of building the server MAP for map-based LB
235 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
236 * weights if applicable. It should be called only once per proxy, at config
237 * time.
238 */
239void init_server_map(struct proxy *p)
240{
241 struct server *srv;
242 int pgcd;
243 int act, bck;
244
Willy Tarreaub625a082007-11-26 01:15:43 +0100245 p->lbprm.set_server_status_up = map_set_server_status_up;
246 p->lbprm.set_server_status_down = map_set_server_status_down;
247 p->lbprm.update_server_eweight = NULL;
248
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100249 if (!p->srv)
250 return;
251
252 /* We will factor the weights to reduce the table,
253 * using Euclide's largest common divisor algorithm
254 */
255 pgcd = p->srv->uweight;
256 for (srv = p->srv->next; srv && pgcd > 1; srv = srv->next) {
257 int w = srv->uweight;
258 while (w) {
259 int t = pgcd % w;
260 pgcd = w;
261 w = t;
262 }
263 }
264
265 /* It is sometimes useful to know what factor to apply
266 * to the backend's effective weight to know its real
267 * weight.
268 */
269 p->lbprm.wmult = pgcd;
270
271 act = bck = 0;
272 for (srv = p->srv; srv; srv = srv->next) {
273 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100274 srv->prev_eweight = srv->eweight;
275 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100276 if (srv->state & SRV_BACKUP)
277 bck += srv->eweight;
278 else
279 act += srv->eweight;
280 }
281
282 /* this is the largest map we will ever need for this servers list */
283 if (act < bck)
284 act = bck;
285
286 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
287 /* recounts servers and their weights */
288 p->lbprm.map.state = PR_MAP_RECALC;
289 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100290 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100291 recalc_server_map(p);
292}
293
Willy Tarreaub625a082007-11-26 01:15:43 +0100294/* This function updates the server trees according to server <srv>'s new
295 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100296 * It is not important whether the server was already down or not. It is not
297 * important either that the new state is completely down (the caller may not
298 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100299 */
300static void fwrr_set_server_status_down(struct server *srv)
301{
302 struct proxy *p = srv->proxy;
303 struct fwrr_group *grp;
304
305 if (srv->state == srv->prev_state &&
306 srv->eweight == srv->prev_eweight)
307 return;
308
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100309 if (srv_is_usable(srv->state, srv->eweight))
310 goto out_update_state;
311
Willy Tarreaub625a082007-11-26 01:15:43 +0100312 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
313 /* server was already down */
314 goto out_update_backend;
315
316 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
317 grp->next_weight -= srv->prev_eweight;
318
319 if (srv->state & SRV_BACKUP) {
320 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
321 p->srv_bck--;
322
323 if (srv == p->lbprm.fbck) {
324 /* we lost the first backup server in a single-backup
325 * configuration, we must search another one.
326 */
327 struct server *srv2 = p->lbprm.fbck;
328 do {
329 srv2 = srv2->next;
330 } while (srv2 &&
331 !((srv2->state & SRV_BACKUP) &&
332 srv_is_usable(srv2->state, srv2->eweight)));
333 p->lbprm.fbck = srv2;
334 }
335 } else {
336 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
337 p->srv_act--;
338 }
339
340 fwrr_dequeue_srv(srv);
341 fwrr_remove_from_tree(srv);
342
343out_update_backend:
344 /* check/update tot_used, tot_weight */
345 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100346 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100347 srv->prev_state = srv->state;
348 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100349}
350
351/* This function updates the server trees according to server <srv>'s new
352 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100353 * It is not important whether the server was already down or not. It is not
354 * important either that the new state is completely UP (the caller may not
355 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100356 * the weight of a server which was already up.
357 */
358static void fwrr_set_server_status_up(struct server *srv)
359{
360 struct proxy *p = srv->proxy;
361 struct fwrr_group *grp;
362
363 if (srv->state == srv->prev_state &&
364 srv->eweight == srv->prev_eweight)
365 return;
366
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100367 if (!srv_is_usable(srv->state, srv->eweight))
368 goto out_update_state;
369
Willy Tarreaub625a082007-11-26 01:15:43 +0100370 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
371 /* server was already up */
372 goto out_update_backend;
373
374 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
375 grp->next_weight += srv->eweight;
376
377 if (srv->state & SRV_BACKUP) {
378 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
379 p->srv_bck++;
380
Willy Tarreauf4cca452008-03-08 21:42:54 +0100381 if (!(p->options & PR_O_USE_ALL_BK)) {
382 if (!p->lbprm.fbck) {
383 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100384 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100385 } else {
386 /* we may have restored a backup server prior to fbck,
387 * in which case it should replace it.
388 */
389 struct server *srv2 = srv;
390 do {
391 srv2 = srv2->next;
392 } while (srv2 && (srv2 != p->lbprm.fbck));
393 if (srv2)
394 p->lbprm.fbck = srv;
395 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100396 }
397 } else {
398 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
399 p->srv_act++;
400 }
401
402 /* note that eweight cannot be 0 here */
403 fwrr_get_srv(srv);
404 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
405 fwrr_queue_srv(srv);
406
407out_update_backend:
408 /* check/update tot_used, tot_weight */
409 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100410 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100411 srv->prev_state = srv->state;
412 srv->prev_eweight = srv->eweight;
413}
414
415/* This function must be called after an update to server <srv>'s effective
416 * weight. It may be called after a state change too.
417 */
418static void fwrr_update_server_weight(struct server *srv)
419{
420 int old_state, new_state;
421 struct proxy *p = srv->proxy;
422 struct fwrr_group *grp;
423
424 if (srv->state == srv->prev_state &&
425 srv->eweight == srv->prev_eweight)
426 return;
427
428 /* If changing the server's weight changes its state, we simply apply
429 * the procedures we already have for status change. If the state
430 * remains down, the server is not in any tree, so it's as easy as
431 * updating its values. If the state remains up with different weights,
432 * there are some computations to perform to find a new place and
433 * possibly a new tree for this server.
434 */
435
436 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
437 new_state = srv_is_usable(srv->state, srv->eweight);
438
439 if (!old_state && !new_state) {
440 srv->prev_state = srv->state;
441 srv->prev_eweight = srv->eweight;
442 return;
443 }
444 else if (!old_state && new_state) {
445 fwrr_set_server_status_up(srv);
446 return;
447 }
448 else if (old_state && !new_state) {
449 fwrr_set_server_status_down(srv);
450 return;
451 }
452
453 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
454 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
455
456 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
457 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
458
459 if (srv->lb_tree == grp->init) {
460 fwrr_dequeue_srv(srv);
461 fwrr_queue_by_weight(grp->init, srv);
462 }
463 else if (!srv->lb_tree) {
464 /* FIXME: server was down. This is not possible right now but
465 * may be needed soon for slowstart or graceful shutdown.
466 */
467 fwrr_dequeue_srv(srv);
468 fwrr_get_srv(srv);
469 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
470 fwrr_queue_srv(srv);
471 } else {
472 /* The server is either active or in the next queue. If it's
473 * still in the active queue and it has not consumed all of its
474 * places, let's adjust its next position.
475 */
476 fwrr_get_srv(srv);
477
478 if (srv->eweight > 0) {
479 int prev_next = srv->npos;
480 int step = grp->next_weight / srv->eweight;
481
482 srv->npos = srv->lpos + step;
483 srv->rweight = 0;
484
485 if (srv->npos > prev_next)
486 srv->npos = prev_next;
487 if (srv->npos < grp->curr_pos + 2)
488 srv->npos = grp->curr_pos + step;
489 } else {
490 /* push it into the next tree */
491 srv->npos = grp->curr_pos + grp->curr_weight;
492 }
493
494 fwrr_dequeue_srv(srv);
495 fwrr_queue_srv(srv);
496 }
497
498 update_backend_weight(p);
499 srv->prev_state = srv->state;
500 srv->prev_eweight = srv->eweight;
501}
502
503/* Remove a server from a tree. It must have previously been dequeued. This
504 * function is meant to be called when a server is going down or has its
505 * weight disabled.
506 */
507static inline void fwrr_remove_from_tree(struct server *s)
508{
509 s->lb_tree = NULL;
510}
511
512/* Queue a server in the weight tree <root>, assuming the weight is >0.
513 * We want to sort them by inverted weights, because we need to place
514 * heavy servers first in order to get a smooth distribution.
515 */
516static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
517{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100518 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100519 eb32_insert(root, &s->lb_node);
520 s->lb_tree = root;
521}
522
523/* This function is responsible for building the weight trees in case of fast
524 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
525 * ratio. Both active and backup groups are initialized.
526 */
527void fwrr_init_server_groups(struct proxy *p)
528{
529 struct server *srv;
530 struct eb_root init_head = EB_ROOT;
531
532 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
533 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
534 p->lbprm.update_server_eweight = fwrr_update_server_weight;
535
536 p->lbprm.wdiv = BE_WEIGHT_SCALE;
537 for (srv = p->srv; srv; srv = srv->next) {
538 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
539 srv->prev_state = srv->state;
540 }
541
542 recount_servers(p);
543 update_backend_weight(p);
544
545 /* prepare the active servers group */
546 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
547 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
548 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
549 p->lbprm.fwrr.act.t1 = init_head;
550 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
551 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
552
553 /* prepare the backup servers group */
554 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
555 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
556 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
557 p->lbprm.fwrr.bck.t1 = init_head;
558 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
559 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
560
561 /* queue active and backup servers in two distinct groups */
562 for (srv = p->srv; srv; srv = srv->next) {
563 if (!srv_is_usable(srv->state, srv->eweight))
564 continue;
565 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
566 p->lbprm.fwrr.bck.init :
567 p->lbprm.fwrr.act.init,
568 srv);
569 }
570}
571
572/* simply removes a server from a weight tree */
573static inline void fwrr_dequeue_srv(struct server *s)
574{
575 eb32_delete(&s->lb_node);
576}
577
578/* queues a server into the appropriate group and tree depending on its
579 * backup status, and ->npos. If the server is disabled, simply assign
580 * it to the NULL tree.
581 */
582static void fwrr_queue_srv(struct server *s)
583{
584 struct proxy *p = s->proxy;
585 struct fwrr_group *grp;
586
587 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
588
589 /* Delay everything which does not fit into the window and everything
590 * which does not fit into the theorical new window.
591 */
592 if (!srv_is_usable(s->state, s->eweight)) {
593 fwrr_remove_from_tree(s);
594 }
595 else if (s->eweight <= 0 ||
596 s->npos >= 2 * grp->curr_weight ||
597 s->npos >= grp->curr_weight + grp->next_weight) {
598 /* put into next tree, and readjust npos in case we could
599 * finally take this back to current. */
600 s->npos -= grp->curr_weight;
601 fwrr_queue_by_weight(grp->next, s);
602 }
603 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100604 /* The sorting key is stored in units of s->npos * user_weight
605 * in order to avoid overflows. As stated in backend.h, the
606 * lower the scale, the rougher the weights modulation, and the
607 * higher the scale, the lower the number of servers without
608 * overflow. With this formula, the result is always positive,
609 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100610 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100611 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
612 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
613
614 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100615 s->lb_tree = &grp->curr;
616 }
617}
618
619/* prepares a server when extracting it from the "init" tree */
620static inline void fwrr_get_srv_init(struct server *s)
621{
622 s->npos = s->rweight = 0;
623}
624
625/* prepares a server when extracting it from the "next" tree */
626static inline void fwrr_get_srv_next(struct server *s)
627{
628 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
629 &s->proxy->lbprm.fwrr.bck :
630 &s->proxy->lbprm.fwrr.act;
631
632 s->npos += grp->curr_weight;
633}
634
635/* prepares a server when it was marked down */
636static inline void fwrr_get_srv_down(struct server *s)
637{
638 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
639 &s->proxy->lbprm.fwrr.bck :
640 &s->proxy->lbprm.fwrr.act;
641
642 s->npos = grp->curr_pos;
643}
644
645/* prepares a server when extracting it from its tree */
646static void fwrr_get_srv(struct server *s)
647{
648 struct proxy *p = s->proxy;
649 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
650 &p->lbprm.fwrr.bck :
651 &p->lbprm.fwrr.act;
652
653 if (s->lb_tree == grp->init) {
654 fwrr_get_srv_init(s);
655 }
656 else if (s->lb_tree == grp->next) {
657 fwrr_get_srv_next(s);
658 }
659 else if (s->lb_tree == NULL) {
660 fwrr_get_srv_down(s);
661 }
662}
663
664/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
665 * when this happens, and "next" filled with servers sorted by weights.
666 */
667static inline void fwrr_switch_trees(struct fwrr_group *grp)
668{
669 struct eb_root *swap;
670 swap = grp->init;
671 grp->init = grp->next;
672 grp->next = swap;
673 grp->curr_weight = grp->next_weight;
674 grp->curr_pos = grp->curr_weight;
675}
676
677/* return next server from the current tree in FWRR group <grp>, or a server
678 * from the "init" tree if appropriate. If both trees are empty, return NULL.
679 */
680static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
681{
682 struct eb32_node *node;
683 struct server *s;
684
685 node = eb32_first(&grp->curr);
686 s = eb32_entry(node, struct server, lb_node);
687
688 if (!node || s->npos > grp->curr_pos) {
689 /* either we have no server left, or we have a hole */
690 struct eb32_node *node2;
691 node2 = eb32_first(grp->init);
692 if (node2) {
693 node = node2;
694 s = eb32_entry(node, struct server, lb_node);
695 fwrr_get_srv_init(s);
696 if (s->eweight == 0) /* FIXME: is it possible at all ? */
697 node = NULL;
698 }
699 }
700 if (node)
701 return s;
702 else
703 return NULL;
704}
705
706/* Computes next position of server <s> in the group. It is mandatory for <s>
707 * to have a non-zero, positive eweight.
708*/
709static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
710{
711 if (!s->npos) {
712 /* first time ever for this server */
713 s->lpos = grp->curr_pos;
714 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
715 s->rweight += grp->next_weight % s->eweight;
716
717 if (s->rweight >= s->eweight) {
718 s->rweight -= s->eweight;
719 s->npos++;
720 }
721 } else {
722 s->lpos = s->npos;
723 s->npos += grp->next_weight / s->eweight;
724 s->rweight += grp->next_weight % s->eweight;
725
726 if (s->rweight >= s->eweight) {
727 s->rweight -= s->eweight;
728 s->npos++;
729 }
730 }
731}
732
733/* Return next server from the current tree in backend <p>, or a server from
734 * the init tree if appropriate. If both trees are empty, return NULL.
735 * Saturated servers are skipped and requeued.
736 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100737static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100738{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100739 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100740 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100741 int switched;
742
743 if (p->srv_act)
744 grp = &p->lbprm.fwrr.act;
745 else if (p->lbprm.fbck)
746 return p->lbprm.fbck;
747 else if (p->srv_bck)
748 grp = &p->lbprm.fwrr.bck;
749 else
750 return NULL;
751
752 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100753 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100754 full = NULL; /* NULL-terminated list of saturated servers */
755 while (1) {
756 /* if we see an empty group, let's first try to collect weights
757 * which might have recently changed.
758 */
759 if (!grp->curr_weight)
760 grp->curr_pos = grp->curr_weight = grp->next_weight;
761
762 /* get first server from the "current" tree. When the end of
763 * the tree is reached, we may have to switch, but only once.
764 */
765 while (1) {
766 srv = fwrr_get_server_from_group(grp);
767 if (srv)
768 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100769 if (switched) {
770 if (avoided) {
771 srv = avoided;
772 break;
773 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100774 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100775 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100776 switched = 1;
777 fwrr_switch_trees(grp);
778
779 }
780
781 /* OK, we have a server. However, it may be saturated, in which
782 * case we don't want to reconsider it for now. We'll update
783 * its position and dequeue it anyway, so that we can move it
784 * to a better place afterwards.
785 */
786 fwrr_update_position(grp, srv);
787 fwrr_dequeue_srv(srv);
788 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200789 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100790 /* make sure it is not the server we are trying to exclude... */
791 if (srv != srvtoavoid || avoided)
792 break;
793
794 avoided = srv; /* ...but remember that is was selected yet avoided */
795 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100796
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100797 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100798 srv->next_full = full;
799 full = srv;
800 }
801
802 /* OK, we got the best server, let's update it */
803 fwrr_queue_srv(srv);
804
805 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100806 /* Requeue all extracted servers. If full==srv then it was
807 * avoided (unsucessfully) and chained, omit it now.
808 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100809 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100810 if (switched) {
811 /* the tree has switched, requeue all extracted servers
812 * into "init", because their place was lost, and only
813 * their weight matters.
814 */
815 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100816 if (likely(full != srv))
817 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100818 full = full->next_full;
819 } while (full);
820 } else {
821 /* requeue all extracted servers just as if they were consumed
822 * so that they regain their expected place.
823 */
824 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100825 if (likely(full != srv))
826 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100827 full = full->next_full;
828 } while (full);
829 }
830 }
831 return srv;
832}
833
Willy Tarreau51406232008-03-10 22:04:20 +0100834/* Remove a server from a tree. It must have previously been dequeued. This
835 * function is meant to be called when a server is going down or has its
836 * weight disabled.
837 */
838static inline void fwlc_remove_from_tree(struct server *s)
839{
840 s->lb_tree = NULL;
841}
842
843/* simply removes a server from a tree */
844static inline void fwlc_dequeue_srv(struct server *s)
845{
846 eb32_delete(&s->lb_node);
847}
848
849/* Queue a server in its associated tree, assuming the weight is >0.
850 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
851 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
852 */
853static inline void fwlc_queue_srv(struct server *s)
854{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200855 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100856 eb32_insert(s->lb_tree, &s->lb_node);
857}
858
859/* Re-position the server in the FWLC tree after it has been assigned one
860 * connection or after it has released one. Note that it is possible that
861 * the server has been moved out of the tree due to failed health-checks.
862 */
863static void fwlc_srv_reposition(struct server *s)
864{
865 if (!s->lb_tree)
866 return;
867 fwlc_dequeue_srv(s);
868 fwlc_queue_srv(s);
869}
870
871/* This function updates the server trees according to server <srv>'s new
872 * state. It should be called when server <srv>'s status changes to down.
873 * It is not important whether the server was already down or not. It is not
874 * important either that the new state is completely down (the caller may not
875 * know all the variables of a server's state).
876 */
877static void fwlc_set_server_status_down(struct server *srv)
878{
879 struct proxy *p = srv->proxy;
880
881 if (srv->state == srv->prev_state &&
882 srv->eweight == srv->prev_eweight)
883 return;
884
885 if (srv_is_usable(srv->state, srv->eweight))
886 goto out_update_state;
887
888 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
889 /* server was already down */
890 goto out_update_backend;
891
892 if (srv->state & SRV_BACKUP) {
893 p->lbprm.tot_wbck -= srv->prev_eweight;
894 p->srv_bck--;
895
896 if (srv == p->lbprm.fbck) {
897 /* we lost the first backup server in a single-backup
898 * configuration, we must search another one.
899 */
900 struct server *srv2 = p->lbprm.fbck;
901 do {
902 srv2 = srv2->next;
903 } while (srv2 &&
904 !((srv2->state & SRV_BACKUP) &&
905 srv_is_usable(srv2->state, srv2->eweight)));
906 p->lbprm.fbck = srv2;
907 }
908 } else {
909 p->lbprm.tot_wact -= srv->prev_eweight;
910 p->srv_act--;
911 }
912
913 fwlc_dequeue_srv(srv);
914 fwlc_remove_from_tree(srv);
915
916out_update_backend:
917 /* check/update tot_used, tot_weight */
918 update_backend_weight(p);
919 out_update_state:
920 srv->prev_state = srv->state;
921 srv->prev_eweight = srv->eweight;
922}
923
924/* This function updates the server trees according to server <srv>'s new
925 * state. It should be called when server <srv>'s status changes to up.
926 * It is not important whether the server was already down or not. It is not
927 * important either that the new state is completely UP (the caller may not
928 * know all the variables of a server's state). This function will not change
929 * the weight of a server which was already up.
930 */
931static void fwlc_set_server_status_up(struct server *srv)
932{
933 struct proxy *p = srv->proxy;
934
935 if (srv->state == srv->prev_state &&
936 srv->eweight == srv->prev_eweight)
937 return;
938
939 if (!srv_is_usable(srv->state, srv->eweight))
940 goto out_update_state;
941
942 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
943 /* server was already up */
944 goto out_update_backend;
945
946 if (srv->state & SRV_BACKUP) {
947 srv->lb_tree = &p->lbprm.fwlc.bck;
948 p->lbprm.tot_wbck += srv->eweight;
949 p->srv_bck++;
950
951 if (!(p->options & PR_O_USE_ALL_BK)) {
952 if (!p->lbprm.fbck) {
953 /* there was no backup server anymore */
954 p->lbprm.fbck = srv;
955 } else {
956 /* we may have restored a backup server prior to fbck,
957 * in which case it should replace it.
958 */
959 struct server *srv2 = srv;
960 do {
961 srv2 = srv2->next;
962 } while (srv2 && (srv2 != p->lbprm.fbck));
963 if (srv2)
964 p->lbprm.fbck = srv;
965 }
966 }
967 } else {
968 srv->lb_tree = &p->lbprm.fwlc.act;
969 p->lbprm.tot_wact += srv->eweight;
970 p->srv_act++;
971 }
972
973 /* note that eweight cannot be 0 here */
974 fwlc_queue_srv(srv);
975
976 out_update_backend:
977 /* check/update tot_used, tot_weight */
978 update_backend_weight(p);
979 out_update_state:
980 srv->prev_state = srv->state;
981 srv->prev_eweight = srv->eweight;
982}
983
984/* This function must be called after an update to server <srv>'s effective
985 * weight. It may be called after a state change too.
986 */
987static void fwlc_update_server_weight(struct server *srv)
988{
989 int old_state, new_state;
990 struct proxy *p = srv->proxy;
991
992 if (srv->state == srv->prev_state &&
993 srv->eweight == srv->prev_eweight)
994 return;
995
996 /* If changing the server's weight changes its state, we simply apply
997 * the procedures we already have for status change. If the state
998 * remains down, the server is not in any tree, so it's as easy as
999 * updating its values. If the state remains up with different weights,
1000 * there are some computations to perform to find a new place and
1001 * possibly a new tree for this server.
1002 */
1003
1004 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1005 new_state = srv_is_usable(srv->state, srv->eweight);
1006
1007 if (!old_state && !new_state) {
1008 srv->prev_state = srv->state;
1009 srv->prev_eweight = srv->eweight;
1010 return;
1011 }
1012 else if (!old_state && new_state) {
1013 fwlc_set_server_status_up(srv);
1014 return;
1015 }
1016 else if (old_state && !new_state) {
1017 fwlc_set_server_status_down(srv);
1018 return;
1019 }
1020
1021 if (srv->lb_tree)
1022 fwlc_dequeue_srv(srv);
1023
1024 if (srv->state & SRV_BACKUP) {
1025 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1026 srv->lb_tree = &p->lbprm.fwlc.bck;
1027 } else {
1028 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1029 srv->lb_tree = &p->lbprm.fwlc.act;
1030 }
1031
1032 fwlc_queue_srv(srv);
1033
1034 update_backend_weight(p);
1035 srv->prev_state = srv->state;
1036 srv->prev_eweight = srv->eweight;
1037}
1038
1039/* This function is responsible for building the trees in case of fast
1040 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1041 * uweight ratio. Both active and backup groups are initialized.
1042 */
1043void fwlc_init_server_tree(struct proxy *p)
1044{
1045 struct server *srv;
1046 struct eb_root init_head = EB_ROOT;
1047
1048 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1049 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1050 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1051 p->lbprm.server_take_conn = fwlc_srv_reposition;
1052 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1053
1054 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1055 for (srv = p->srv; srv; srv = srv->next) {
1056 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1057 srv->prev_state = srv->state;
1058 }
1059
1060 recount_servers(p);
1061 update_backend_weight(p);
1062
1063 p->lbprm.fwlc.act = init_head;
1064 p->lbprm.fwlc.bck = init_head;
1065
1066 /* queue active and backup servers in two distinct groups */
1067 for (srv = p->srv; srv; srv = srv->next) {
1068 if (!srv_is_usable(srv->state, srv->eweight))
1069 continue;
1070 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1071 fwlc_queue_srv(srv);
1072 }
1073}
1074
1075/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1076 * return NULL. Saturated servers are skipped.
1077 */
1078static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1079{
1080 struct server *srv, *avoided;
1081 struct eb32_node *node;
1082
1083 srv = avoided = NULL;
1084
1085 if (p->srv_act)
1086 node = eb32_first(&p->lbprm.fwlc.act);
1087 else if (p->lbprm.fbck)
1088 return p->lbprm.fbck;
1089 else if (p->srv_bck)
1090 node = eb32_first(&p->lbprm.fwlc.bck);
1091 else
1092 return NULL;
1093
1094 while (node) {
1095 /* OK, we have a server. However, it may be saturated, in which
1096 * case we don't want to reconsider it for now, so we'll simply
1097 * skip it. Same if it's the server we try to avoid, in which
1098 * case we simply remember it for later use if needed.
1099 */
1100 struct server *s;
1101
1102 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001103 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001104 if (s != srvtoavoid) {
1105 srv = s;
1106 break;
1107 }
1108 avoided = s;
1109 }
1110 node = eb32_next(node);
1111 }
1112
1113 if (!srv)
1114 srv = avoided;
1115
1116 return srv;
1117}
1118
Willy Tarreau01732802007-11-01 22:48:15 +01001119/*
1120 * This function tries to find a running server for the proxy <px> following
1121 * the URL parameter hash method. It looks for a specific parameter in the
1122 * URL and hashes it to compute the server ID. This is useful to optimize
1123 * performance by avoiding bounces between servers in contexts where sessions
1124 * are shared but cookies are not usable. If the parameter is not found, NULL
1125 * is returned. If any server is found, it will be returned. If no valid server
1126 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001127 */
1128struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1129{
1130 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001131 const char *p;
1132 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001133 int plen;
1134
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001135 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001136 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001137 return NULL;
1138
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001139 if ((p = memchr(uri, '?', uri_len)) == NULL)
1140 return NULL;
1141
Willy Tarreau20697042007-11-15 23:26:18 +01001142 if (px->lbprm.map.state & PR_MAP_RECALC)
1143 recalc_server_map(px);
1144
Willy Tarreau01732802007-11-01 22:48:15 +01001145 p++;
1146
1147 uri_len -= (p - uri);
1148 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001149 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001150
1151 while (uri_len > plen) {
1152 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001153 if (params[plen] == '=') {
1154 if (memcmp(params, px->url_param_name, plen) == 0) {
1155 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001156 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001157 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001158 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001159 p += plen + 1;
1160 uri_len -= plen + 1;
1161
Willy Tarreau01732802007-11-01 22:48:15 +01001162 while (uri_len && *p != '&') {
1163 hash = *p + (hash << 6) + (hash << 16) - hash;
1164 uri_len--;
1165 p++;
1166 }
Willy Tarreau20697042007-11-15 23:26:18 +01001167 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001168 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001169 }
1170 /* skip to next parameter */
1171 p = memchr(params, '&', uri_len);
1172 if (!p)
1173 return NULL;
1174 p++;
1175 uri_len -= (p - params);
1176 params = p;
1177 }
1178 return NULL;
1179}
1180
1181/*
1182 * this does the same as the previous server_ph, but check the body contents
1183 */
1184struct server *get_server_ph_post(struct session *s)
1185{
1186 unsigned long hash = 0;
1187 struct http_txn *txn = &s->txn;
1188 struct buffer *req = s->req;
1189 struct http_msg *msg = &txn->req;
1190 struct proxy *px = s->be;
1191 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001192 unsigned long body;
1193 unsigned long len;
1194 const char *params;
1195 struct hdr_ctx ctx;
1196 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001197
1198 /* tot_weight appears to mean srv_count */
1199 if (px->lbprm.tot_weight == 0)
1200 return NULL;
1201
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001202 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
Willy Tarreaufb0528b2008-08-11 00:21:56 +02001203 len = req->l - body;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001204 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001205
1206 if ( len == 0 )
1207 return NULL;
1208
1209 if (px->lbprm.map.state & PR_MAP_RECALC)
1210 recalc_server_map(px);
1211
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001212 ctx.idx = 0;
1213
1214 /* if the message is chunked, we skip the chunk size, but use the value as len */
1215 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
Willy Tarreauadfb8562008-08-11 15:24:42 +02001216 if (ctx.idx && ctx.vlen >= 7 && strncasecmp(ctx.line+ctx.val, "chunked", 7) == 0) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001217 unsigned int chunk = 0;
Willy Tarreau03d60bb2009-01-09 11:13:00 +01001218 while ( params < (req->data+req->max_len) && !HTTP_IS_CRLF(*params)) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001219 char c = *params;
1220 if (ishex(c)) {
1221 unsigned int hex = toupper(c) - '0';
1222 if ( hex > 9 )
1223 hex -= 'A' - '9' - 1;
1224 chunk = (chunk << 4) | hex;
1225 }
1226 else
1227 return NULL;
1228 params++;
1229 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001230 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001231 /* spec says we get CRLF */
1232 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1233 params += 2;
1234 else
1235 return NULL;
1236 /* ok we have some encoded length, just inspect the first chunk */
1237 len = chunk;
1238 }
Willy Tarreau01732802007-11-01 22:48:15 +01001239
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001240 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001241
1242 while (len > plen) {
1243 /* Look for the parameter name followed by an equal symbol */
1244 if (params[plen] == '=') {
1245 if (memcmp(params, px->url_param_name, plen) == 0) {
1246 /* OK, we have the parameter here at <params>, and
1247 * the value after the equal sign, at <p>
1248 * skip the equal symbol
1249 */
1250 p += plen + 1;
1251 len -= plen + 1;
1252
1253 while (len && *p != '&') {
1254 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1255 /* if in a POST, body must be URI encoded or its not a URI.
1256 * Do not interprete any possible binary data as a parameter.
1257 */
1258 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1259 break;
1260 return NULL; /* oh, no; this is not uri-encoded.
1261 * This body does not contain parameters.
1262 */
1263 }
1264 hash = *p + (hash << 6) + (hash << 16) - hash;
1265 len--;
1266 p++;
1267 /* should we break if vlen exceeds limit? */
1268 }
1269 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1270 }
1271 }
Willy Tarreau01732802007-11-01 22:48:15 +01001272 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001273 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001274 if (!p)
1275 return NULL;
1276 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001277 len -= (p - params);
1278 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001279 }
1280 return NULL;
1281}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001282
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001283
Willy Tarreaubaaee002006-06-26 02:48:02 +02001284/*
Benoitaffb4812009-03-25 13:02:10 +01001285 * This function tries to find a running server for the proxy <px> following
1286 * the Header parameter hash method. It looks for a specific parameter in the
1287 * URL and hashes it to compute the server ID. This is useful to optimize
1288 * performance by avoiding bounces between servers in contexts where sessions
1289 * are shared but cookies are not usable. If the parameter is not found, NULL
1290 * is returned. If any server is found, it will be returned. If no valid server
1291 * is found, NULL is returned.
1292 */
1293struct server *get_server_hh(struct session *s)
1294{
1295 unsigned long hash = 0;
1296 struct http_txn *txn = &s->txn;
1297 struct http_msg *msg = &txn->req;
1298 struct proxy *px = s->be;
1299 unsigned int plen = px->hh_len;
1300 unsigned long len;
1301 struct hdr_ctx ctx;
1302 const char *p;
1303
1304 /* tot_weight appears to mean srv_count */
1305 if (px->lbprm.tot_weight == 0)
1306 return NULL;
1307
1308 if (px->lbprm.map.state & PR_MAP_RECALC)
1309 recalc_server_map(px);
1310
1311 ctx.idx = 0;
1312
1313 /* if the message is chunked, we skip the chunk size, but use the value as len */
1314 http_find_header2(px->hh_name, plen, msg->sol, &txn->hdr_idx, &ctx);
1315
1316 /* if the header is not found or empty, let's fallback to round robin */
1317 if (!ctx.idx || !ctx.vlen)
1318 return NULL;
1319
1320 /* Found a the hh_name in the headers.
1321 * we will compute the hash based on this value ctx.val.
1322 */
1323 len = ctx.vlen;
1324 p = (char *)ctx.line + ctx.val;
1325 if (!px->hh_match_domain) {
1326 while (len) {
1327 hash = *p + (hash << 6) + (hash << 16) - hash;
1328 len--;
1329 p++;
1330 }
1331 } else {
1332 int dohash = 0;
1333 p += len - 1;
1334 /* special computation, use only main domain name, not tld/host
1335 * going back from the end of string, start hashing at first
1336 * dot stop at next.
1337 * This is designed to work with the 'Host' header, and requires
1338 * a special option to activate this.
1339 */
1340 while (len) {
1341 if (*p == '.') {
1342 if (!dohash)
1343 dohash = 1;
1344 else
1345 break;
1346 } else {
1347 if (dohash)
1348 hash = *p + (hash << 6) + (hash << 16) - hash;
1349 }
1350 len--;
1351 p--;
1352 }
1353 }
1354 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1355}
1356
1357
1358/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001359 * This function applies the load-balancing algorithm to the session, as
1360 * defined by the backend it is assigned to. The session is then marked as
1361 * 'assigned'.
1362 *
1363 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1364 * had a server previously assigned, it is rebalanced, trying to avoid the same
1365 * server.
1366 * The function tries to keep the original connection slot if it reconnects to
1367 * the same server, otherwise it releases it and tries to offer it.
1368 *
1369 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001370 *
1371 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001372 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1373 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1374 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001375 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1376 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001377 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1378 * it does not need to be called anymore. This means that s->srv can be trusted
1379 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001380 *
1381 */
1382
1383int assign_server(struct session *s)
1384{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001385
Willy Tarreau7c669d72008-06-20 15:04:11 +02001386 struct server *conn_slot;
1387 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001388
Willy Tarreaubaaee002006-06-26 02:48:02 +02001389#ifdef DEBUG_FULL
1390 fprintf(stderr,"assign_server : s=%p\n",s);
1391#endif
1392
Willy Tarreau7c669d72008-06-20 15:04:11 +02001393 err = SRV_STATUS_INTERNAL;
1394 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1395 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001396
Willy Tarreau7c669d72008-06-20 15:04:11 +02001397 s->prev_srv = s->prev_srv;
1398 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001399
Willy Tarreau7c669d72008-06-20 15:04:11 +02001400 /* We have to release any connection slot before applying any LB algo,
1401 * otherwise we may erroneously end up with no available slot.
1402 */
1403 if (conn_slot)
1404 sess_change_server(s, NULL);
1405
1406 /* We will now try to find the good server and store it into <s->srv>.
1407 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1408 * as well as if no server is available (check error code).
1409 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001410
Willy Tarreau7c669d72008-06-20 15:04:11 +02001411 s->srv = NULL;
1412 if (s->be->lbprm.algo & BE_LB_ALGO) {
1413 int len;
1414 /* we must check if we have at least one server available */
1415 if (!s->be->lbprm.tot_weight) {
1416 err = SRV_STATUS_NOSRV;
1417 goto out;
1418 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001419
Willy Tarreau7c669d72008-06-20 15:04:11 +02001420 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1421 case BE_LB_ALGO_RR:
1422 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1423 if (!s->srv) {
1424 err = SRV_STATUS_FULL;
1425 goto out;
1426 }
1427 break;
1428 case BE_LB_ALGO_LC:
1429 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1430 if (!s->srv) {
1431 err = SRV_STATUS_FULL;
1432 goto out;
1433 }
1434 break;
1435 case BE_LB_ALGO_SH:
1436 if (s->cli_addr.ss_family == AF_INET)
1437 len = 4;
1438 else if (s->cli_addr.ss_family == AF_INET6)
1439 len = 16;
1440 else {
1441 /* unknown IP family */
1442 err = SRV_STATUS_INTERNAL;
1443 goto out;
1444 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001445
Willy Tarreau7c669d72008-06-20 15:04:11 +02001446 s->srv = get_server_sh(s->be,
1447 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1448 len);
1449 break;
1450 case BE_LB_ALGO_UH:
1451 /* URI hashing */
1452 s->srv = get_server_uh(s->be,
1453 s->txn.req.sol + s->txn.req.sl.rq.u,
1454 s->txn.req.sl.rq.u_l);
1455 break;
1456 case BE_LB_ALGO_PH:
1457 /* URL Parameter hashing */
1458 if (s->txn.meth == HTTP_METH_POST &&
1459 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1460 s->txn.req.sl.rq.u_l ) == NULL)
1461 s->srv = get_server_ph_post(s);
1462 else
1463 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001464 s->txn.req.sol + s->txn.req.sl.rq.u,
1465 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001466
Willy Tarreau7c669d72008-06-20 15:04:11 +02001467 if (!s->srv) {
1468 /* parameter not found, fall back to round robin on the map */
1469 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001470 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001471 err = SRV_STATUS_FULL;
1472 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001473 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001474 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001475 break;
Benoitaffb4812009-03-25 13:02:10 +01001476 case BE_LB_ALGO_HH:
1477 /* Header Parameter hashing */
1478 s->srv = get_server_hh(s);
1479
1480 if (!s->srv) {
1481 /* parameter not found, fall back to round robin on the map */
1482 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
1483 if (!s->srv) {
1484 err = SRV_STATUS_FULL;
1485 goto out;
1486 }
1487 }
1488 break;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001489 default:
1490 /* unknown balancing algorithm */
1491 err = SRV_STATUS_INTERNAL;
1492 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001493 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001494 if (s->srv != s->prev_srv) {
1495 s->be->cum_lbconn++;
1496 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001497 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001498 }
1499 else if (s->be->options & PR_O_HTTP_PROXY) {
1500 if (!s->srv_addr.sin_addr.s_addr) {
1501 err = SRV_STATUS_NOSRV;
1502 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001503 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001504 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001505 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001506 !(s->be->options & PR_O_TRANSP)) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001507 err = SRV_STATUS_NOSRV;
1508 goto out;
1509 }
1510
1511 s->flags |= SN_ASSIGNED;
1512 err = SRV_STATUS_OK;
1513 out:
1514
1515 /* Either we take back our connection slot, or we offer it to someone
1516 * else if we don't need it anymore.
1517 */
1518 if (conn_slot) {
1519 if (conn_slot == s->srv) {
1520 sess_change_server(s, s->srv);
1521 } else {
1522 if (may_dequeue_tasks(conn_slot, s->be))
1523 process_srv_queue(conn_slot);
1524 }
1525 }
1526
1527 out_err:
1528 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001529}
1530
1531
1532/*
1533 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1534 * The address is taken from the currently assigned server, or from the
1535 * dispatch or transparent address.
1536 *
1537 * It may return :
1538 * SRV_STATUS_OK if everything is OK.
1539 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1540 *
1541 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1542 * not cleared, so it's to the caller to clear it if required.
1543 *
1544 */
1545int assign_server_address(struct session *s)
1546{
1547#ifdef DEBUG_FULL
1548 fprintf(stderr,"assign_server_address : s=%p\n",s);
1549#endif
1550
Willy Tarreau31682232007-11-29 15:38:04 +01001551 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001552 /* A server is necessarily known for this session */
1553 if (!(s->flags & SN_ASSIGNED))
1554 return SRV_STATUS_INTERNAL;
1555
1556 s->srv_addr = s->srv->addr;
1557
1558 /* if this server remaps proxied ports, we'll use
1559 * the port the client connected to with an offset. */
1560 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001561 if (!(s->be->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001562 get_frt_addr(s);
1563 if (s->frt_addr.ss_family == AF_INET) {
1564 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1565 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1566 } else {
1567 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1568 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1569 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001570 }
1571 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001572 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001573 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001574 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001575 }
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001576 else if (s->be->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001577 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001578 if (!(s->flags & SN_FRT_ADDR_SET))
1579 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001580
Willy Tarreaubd414282008-01-19 13:46:35 +01001581 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1582 /* when we support IPv6 on the backend, we may add other tests */
1583 //qfprintf(stderr, "Cannot get original server address.\n");
1584 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001585 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001586 else if (s->be->options & PR_O_HTTP_PROXY) {
1587 /* If HTTP PROXY option is set, then server is already assigned
1588 * during incoming client request parsing. */
1589 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001590 else {
1591 /* no server and no LB algorithm ! */
1592 return SRV_STATUS_INTERNAL;
1593 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001594
1595 s->flags |= SN_ADDR_SET;
1596 return SRV_STATUS_OK;
1597}
1598
1599
1600/* This function assigns a server to session <s> if required, and can add the
1601 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001602 * If ->srv_conn is set, the session is first released from the server.
1603 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1604 * be called before any connection and after any retry or redispatch occurs.
1605 *
1606 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001607 *
1608 * Returns :
1609 *
1610 * SRV_STATUS_OK if everything is OK.
1611 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1612 * SRV_STATUS_QUEUED if the connection has been queued.
1613 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001614 * connection could not be queued in s->srv,
1615 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001616 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1617 *
1618 */
1619int assign_server_and_queue(struct session *s)
1620{
1621 struct pendconn *p;
1622 int err;
1623
1624 if (s->pend_pos)
1625 return SRV_STATUS_INTERNAL;
1626
Willy Tarreau7c669d72008-06-20 15:04:11 +02001627 err = SRV_STATUS_OK;
1628 if (!(s->flags & SN_ASSIGNED)) {
1629 err = assign_server(s);
1630 if (s->prev_srv) {
1631 /* This session was previously assigned to a server. We have to
1632 * update the session's and the server's stats :
1633 * - if the server changed :
1634 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1635 * - set SN_REDISP if it was successfully redispatched
1636 * - increment srv->redispatches and be->redispatches
1637 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001638 */
1639
Willy Tarreau7c669d72008-06-20 15:04:11 +02001640 if (s->prev_srv != s->srv) {
1641 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1642 s->txn.flags &= ~TX_CK_MASK;
1643 s->txn.flags |= TX_CK_DOWN;
1644 }
1645 s->flags |= SN_REDISP;
1646 s->prev_srv->redispatches++;
1647 s->be->redispatches++;
1648 } else {
1649 s->prev_srv->retries++;
1650 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001651 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001652 }
1653 }
1654
Willy Tarreaubaaee002006-06-26 02:48:02 +02001655 switch (err) {
1656 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001657 /* we have SN_ASSIGNED set */
1658 if (!s->srv)
1659 return SRV_STATUS_OK; /* dispatch or proxy mode */
1660
1661 /* If we already have a connection slot, no need to check any queue */
1662 if (s->srv_conn == s->srv)
1663 return SRV_STATUS_OK;
1664
1665 /* OK, this session already has an assigned server, but no
1666 * connection slot yet. Either it is a redispatch, or it was
1667 * assigned from persistence information (direct mode).
1668 */
1669 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1670 /* server scheduled for redirection, and already assigned. We
1671 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001672 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001673 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001674 return SRV_STATUS_OK;
1675 }
1676
Willy Tarreau7c669d72008-06-20 15:04:11 +02001677 /* We might have to queue this session if the assigned server is full.
1678 * We know we have to queue it into the server's queue, so if a maxqueue
1679 * is set on the server, we must also check that the server's queue is
1680 * not full, in which case we have to return FULL.
1681 */
1682 if (s->srv->maxconn &&
1683 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1684
1685 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1686 return SRV_STATUS_FULL;
1687
Willy Tarreaubaaee002006-06-26 02:48:02 +02001688 p = pendconn_add(s);
1689 if (p)
1690 return SRV_STATUS_QUEUED;
1691 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001692 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001693 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001694
1695 /* OK, we can use this server. Let's reserve our place */
1696 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001697 return SRV_STATUS_OK;
1698
1699 case SRV_STATUS_FULL:
1700 /* queue this session into the proxy's queue */
1701 p = pendconn_add(s);
1702 if (p)
1703 return SRV_STATUS_QUEUED;
1704 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001705 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001706
1707 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001708 return err;
1709
Willy Tarreaubaaee002006-06-26 02:48:02 +02001710 case SRV_STATUS_INTERNAL:
1711 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001712
Willy Tarreaubaaee002006-06-26 02:48:02 +02001713 default:
1714 return SRV_STATUS_INTERNAL;
1715 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001716}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001717
1718/*
1719 * This function initiates a connection to the server assigned to this session
1720 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1721 * It can return one of :
1722 * - SN_ERR_NONE if everything's OK
1723 * - SN_ERR_SRVTO if there are no more servers
1724 * - SN_ERR_SRVCL if the connection was refused by the server
1725 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1726 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1727 * - SN_ERR_INTERNAL for any other purely internal errors
1728 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1729 */
1730int connect_server(struct session *s)
1731{
1732 int fd, err;
1733
1734 if (!(s->flags & SN_ADDR_SET)) {
1735 err = assign_server_address(s);
1736 if (err != SRV_STATUS_OK)
1737 return SN_ERR_INTERNAL;
1738 }
1739
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001740 if ((fd = s->req->cons->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001741 qfprintf(stderr, "Cannot get a server socket.\n");
1742
1743 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001744 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001745 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001746 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001747 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001748 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001749 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001750 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001751 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001752 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001753 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001754 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001755 /* this is a resource error */
1756 return SN_ERR_RESOURCE;
1757 }
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001758
Willy Tarreaubaaee002006-06-26 02:48:02 +02001759 if (fd >= global.maxsock) {
1760 /* do not log anything there, it's a normal condition when this option
1761 * is used to serialize connections to a server !
1762 */
1763 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1764 close(fd);
1765 return SN_ERR_PRXCOND; /* it is a configuration limit */
1766 }
1767
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001768#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreau3ab68cf2009-01-25 16:03:28 +01001769 if ((global.tune.options & GTUNE_USE_SPLICE) &&
1770 (s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001771 /* TCP splicing supported by both FE and BE */
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001772 tcp_splice_initfd(s->req->prod->fd, fd);
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001773 }
1774#endif
1775
Willy Tarreaubaaee002006-06-26 02:48:02 +02001776 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1777 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1778 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1779 close(fd);
1780 return SN_ERR_INTERNAL;
1781 }
1782
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001783 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001784 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1785
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001786 if (s->be->options & PR_O_TCP_NOLING)
1787 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1788
Willy Tarreaubaaee002006-06-26 02:48:02 +02001789 /* allow specific binding :
1790 * - server-specific at first
1791 * - proxy-specific next
1792 */
1793 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001794 struct sockaddr_in *remote = NULL;
1795 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001796
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001797#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001798 switch (s->srv->state & SRV_TPROXY_MASK) {
1799 case SRV_TPROXY_ADDR:
1800 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1801 flags = 3;
1802 break;
1803 case SRV_TPROXY_CLI:
1804 flags |= 2;
1805 /* fall through */
1806 case SRV_TPROXY_CIP:
1807 /* FIXME: what can we do if the client connects in IPv6 ? */
1808 flags |= 1;
1809 remote = (struct sockaddr_in *)&s->cli_addr;
1810 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001811 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001812#endif
Willy Tarreauc76721d2009-02-04 20:20:58 +01001813#ifdef SO_BINDTODEVICE
1814 /* Note: this might fail if not CAP_NET_RAW */
1815 if (s->srv->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001816 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->srv->iface_name, s->srv->iface_len + 1);
Willy Tarreauc76721d2009-02-04 20:20:58 +01001817#endif
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001818
1819 if (s->srv->sport_range) {
1820 int attempts = 10; /* should be more than enough to find a spare port */
1821 struct sockaddr_in src;
1822
1823 ret = 1;
1824 src = s->srv->source_addr;
1825
1826 do {
1827 /* note: in case of retry, we may have to release a previously
1828 * allocated port, hence this loop's construct.
1829 */
1830 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1831 fdtab[fd].port_range = NULL;
1832
1833 if (!attempts)
1834 break;
1835 attempts--;
1836
1837 fdtab[fd].local_port = port_range_alloc_port(s->srv->sport_range);
1838 if (!fdtab[fd].local_port)
1839 break;
1840
1841 fdtab[fd].port_range = s->srv->sport_range;
1842 src.sin_port = htons(fdtab[fd].local_port);
1843
1844 ret = tcpv4_bind_socket(fd, flags, &src, remote);
1845 } while (ret != 0); /* binding NOK */
1846 }
1847 else {
1848 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
1849 }
1850
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001851 if (ret) {
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001852 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1853 fdtab[fd].port_range = NULL;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001854 close(fd);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001855
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001856 if (ret == 1) {
1857 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1858 s->be->id, s->srv->id);
1859 send_log(s->be, LOG_EMERG,
1860 "Cannot bind to source address before connect() for server %s/%s.\n",
1861 s->be->id, s->srv->id);
1862 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001863 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001864 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001865 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001866 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001867 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001868 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001869 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001870 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001871 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001872 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001873 struct sockaddr_in *remote = NULL;
1874 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001875
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001876#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001877 switch (s->be->options & PR_O_TPXY_MASK) {
1878 case PR_O_TPXY_ADDR:
1879 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1880 flags = 3;
1881 break;
1882 case PR_O_TPXY_CLI:
1883 flags |= 2;
1884 /* fall through */
1885 case PR_O_TPXY_CIP:
1886 /* FIXME: what can we do if the client connects in IPv6 ? */
1887 flags |= 1;
1888 remote = (struct sockaddr_in *)&s->cli_addr;
1889 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001890 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001891#endif
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001892#ifdef SO_BINDTODEVICE
1893 /* Note: this might fail if not CAP_NET_RAW */
1894 if (s->be->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001895 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->be->iface_name, s->be->iface_len + 1);
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001896#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001897 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001898 if (ret) {
1899 close(fd);
1900 if (ret == 1) {
1901 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1902 s->be->id);
1903 send_log(s->be, LOG_EMERG,
1904 "Cannot bind to source address before connect() for proxy %s.\n",
1905 s->be->id);
1906 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001907 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001908 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001909 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001910 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1911 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001912 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001913 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001914 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001915 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001916
Willy Tarreaud88edf22009-06-14 15:48:17 +02001917#ifdef TCP_QUICKACK
1918 /* disabling tcp quick ack now allows the first request to leave the
1919 * machine with the first ACK. We only do this if there are pending
1920 * data in the buffer.
1921 */
1922 if ((s->be->options2 & PR_O2_SMARTCON) && s->req->send_max)
1923 setsockopt(fd, SOL_TCP, TCP_QUICKACK, (char *) &zero, sizeof(zero));
1924#endif
1925
Willy Tarreaubaaee002006-06-26 02:48:02 +02001926 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1927 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1928
1929 if (errno == EAGAIN || errno == EADDRINUSE) {
1930 char *msg;
1931 if (errno == EAGAIN) /* no free ports left, try again later */
1932 msg = "no free ports";
1933 else
1934 msg = "local address already in use";
1935
1936 qfprintf(stderr,"Cannot connect: %s.\n",msg);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001937 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1938 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001939 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001940 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001941 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001942 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001943 return SN_ERR_RESOURCE;
1944 } else if (errno == ETIMEDOUT) {
1945 //qfprintf(stderr,"Connect(): ETIMEDOUT");
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001946 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1947 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001948 close(fd);
1949 return SN_ERR_SRVTO;
1950 } else {
1951 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
1952 //qfprintf(stderr,"Connect(): %d", errno);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001953 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1954 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001955 close(fd);
1956 return SN_ERR_SRVCL;
1957 }
1958 }
1959
Willy Tarreaue5ed4062008-08-30 03:17:31 +02001960 fdtab[fd].owner = s->req->cons;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001961 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaufb14edc2009-06-14 15:24:37 +02001962 fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
Willy Tarreaud7971282006-07-29 18:36:34 +02001963 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02001964 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02001965 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02001966 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02001967
1968 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
1969 fdtab[fd].peerlen = sizeof(s->srv_addr);
1970
Willy Tarreaubaaee002006-06-26 02:48:02 +02001971 fd_insert(fd);
Willy Tarreau788e2842008-08-26 13:25:39 +02001972 EV_FD_SET(fd, DIR_WR); /* for connect status */
1973
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001974 s->req->cons->state = SI_ST_CON;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001975 if (s->srv) {
Willy Tarreau1e62de62008-11-11 20:20:02 +01001976 s->flags |= SN_CURR_SESS;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001977 s->srv->cur_sess++;
1978 if (s->srv->cur_sess > s->srv->cur_sess_max)
1979 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01001980 if (s->be->lbprm.server_take_conn)
1981 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001982 }
1983
Willy Tarreaua3780f22009-03-15 21:49:00 +01001984 s->req->cons->exp = tick_add_ifset(now_ms, s->be->timeout.connect);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001985 return SN_ERR_NONE; /* connection is OK */
1986}
1987
1988
Willy Tarreaubaaee002006-06-26 02:48:02 +02001989/* This function performs the "redispatch" part of a connection attempt. It
1990 * will assign a server if required, queue the connection if required, and
1991 * handle errors that might arise at this level. It can change the server
1992 * state. It will return 1 if it encounters an error, switches the server
1993 * state, or has to queue a connection. Otherwise, it will return 0 indicating
1994 * that the connection is ready to use.
1995 */
1996
1997int srv_redispatch_connect(struct session *t)
1998{
1999 int conn_err;
2000
2001 /* We know that we don't have any connection pending, so we will
2002 * try to get a new one, and wait in this state if it's queued
2003 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02002004 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02002005 conn_err = assign_server_and_queue(t);
2006 switch (conn_err) {
2007 case SRV_STATUS_OK:
2008 break;
2009
Willy Tarreau7c669d72008-06-20 15:04:11 +02002010 case SRV_STATUS_FULL:
2011 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
2012 * and we can redispatch to another server, or it is not and we return
2013 * 503. This only makes sense in DIRECT mode however, because normal LB
2014 * algorithms would never select such a server, and hash algorithms
2015 * would bring us on the same server again. Note that t->srv is set in
2016 * this case.
2017 */
2018 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
2019 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
2020 t->prev_srv = t->srv;
2021 goto redispatch;
2022 }
2023
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002024 if (!t->req->cons->err_type) {
2025 t->req->cons->err_type = SI_ET_QUEUE_ERR;
2026 t->req->cons->err_loc = t->srv;
2027 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02002028
2029 t->srv->failed_conns++;
2030 t->be->failed_conns++;
2031 return 1;
2032
Willy Tarreaubaaee002006-06-26 02:48:02 +02002033 case SRV_STATUS_NOSRV:
2034 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002035 if (!t->req->cons->err_type) {
2036 t->req->cons->err_type = SI_ET_CONN_ERR;
2037 t->req->cons->err_loc = NULL;
2038 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01002039
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002040 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002041 return 1;
2042
2043 case SRV_STATUS_QUEUED:
Willy Tarreau35374672008-09-03 18:11:02 +02002044 t->req->cons->exp = tick_add_ifset(now_ms, t->be->timeout.queue);
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002045 t->req->cons->state = SI_ST_QUE;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002046 /* do nothing else and do not wake any other session up */
2047 return 1;
2048
Willy Tarreaubaaee002006-06-26 02:48:02 +02002049 case SRV_STATUS_INTERNAL:
2050 default:
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002051 if (!t->req->cons->err_type) {
2052 t->req->cons->err_type = SI_ET_CONN_OTHER;
2053 t->req->cons->err_loc = t->srv;
2054 }
2055
Willy Tarreaubaaee002006-06-26 02:48:02 +02002056 if (t->srv)
Willy Tarreau7f062c42009-03-05 18:43:00 +01002057 srv_inc_sess_ctr(t->srv);
Willy Tarreau98937b82007-12-10 15:05:42 +01002058 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002059 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002060 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002061
2062 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002063 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002064 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002065 return 1;
2066 }
2067 /* if we get here, it's because we got SRV_STATUS_OK, which also
2068 * means that the connection has not been queued.
2069 */
2070 return 0;
2071}
2072
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002073int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002074 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002075 return px->down_time;
2076
2077 return now.tv_sec - px->last_change + px->down_time;
2078}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002079
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002080/* This function parses a "balance" statement in a backend section describing
2081 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2082 * returns -1, it may write an error message into ther <err> buffer, for at
2083 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2084 * written. The function must be called with <args> pointing to the first word
2085 * after "balance".
2086 */
2087int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2088{
2089 if (!*(args[0])) {
2090 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002091 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2092 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002093 return 0;
2094 }
2095
2096 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002097 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2098 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002099 }
Willy Tarreau51406232008-03-10 22:04:20 +01002100 else if (!strcmp(args[0], "leastconn")) {
2101 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2102 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2103 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002104 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002105 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2106 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002107 }
2108 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002109 int arg = 1;
2110
Willy Tarreau31682232007-11-29 15:38:04 +01002111 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2112 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002113
2114 while (*args[arg]) {
2115 if (!strcmp(args[arg], "len")) {
2116 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2117 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2118 return -1;
2119 }
2120 curproxy->uri_len_limit = atoi(args[arg+1]);
2121 arg += 2;
2122 }
2123 else if (!strcmp(args[arg], "depth")) {
2124 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2125 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2126 return -1;
2127 }
2128 /* hint: we store the position of the ending '/' (depth+1) so
2129 * that we avoid a comparison while computing the hash.
2130 */
2131 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2132 arg += 2;
2133 }
2134 else {
2135 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2136 return -1;
2137 }
2138 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002139 }
Willy Tarreau01732802007-11-01 22:48:15 +01002140 else if (!strcmp(args[0], "url_param")) {
2141 if (!*args[1]) {
2142 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2143 return -1;
2144 }
Willy Tarreau31682232007-11-29 15:38:04 +01002145 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2146 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreaua534fea2008-08-03 12:19:50 +02002147
2148 free(curproxy->url_param_name);
Willy Tarreau01732802007-11-01 22:48:15 +01002149 curproxy->url_param_name = strdup(args[1]);
Willy Tarreaua534fea2008-08-03 12:19:50 +02002150 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002151 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002152 if (strcmp(args[2], "check_post")) {
2153 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2154 return -1;
2155 }
2156 if (*args[3]) {
2157 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2158 curproxy->url_param_post_limit = str2ui(args[3]);
2159 }
2160 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2161 if (!curproxy->url_param_post_limit)
2162 curproxy->url_param_post_limit = 48;
2163 else if ( curproxy->url_param_post_limit < 3 )
2164 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2165 }
Benoitaffb4812009-03-25 13:02:10 +01002166 }
2167 else if (!strncmp(args[0], "hdr(", 4)) {
2168 const char *beg, *end;
2169
2170 beg = args[0] + 4;
2171 end = strchr(beg, ')');
2172
2173 if (!end || end == beg) {
2174 snprintf(err, errlen, "'balance hdr(name)' requires an http header field name.");
2175 return -1;
2176 }
2177
2178 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2179 curproxy->lbprm.algo |= BE_LB_ALGO_HH;
2180
2181 free(curproxy->hh_name);
2182 curproxy->hh_len = end - beg;
2183 curproxy->hh_name = my_strndup(beg, end - beg);
2184 curproxy->hh_match_domain = 0;
2185
2186 if (*args[1]) {
2187 if (strcmp(args[1], "use_domain_only")) {
2188 snprintf(err, errlen, "'balance hdr(name)' only accepts 'use_domain_only' modifier.");
2189 return -1;
2190 }
2191 curproxy->hh_match_domain = 1;
2192 }
2193
Willy Tarreau01732802007-11-01 22:48:15 +01002194 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002195 else {
Benoitaffb4812009-03-25 13:02:10 +01002196 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri', 'url_param' and 'hdr(name)' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002197 return -1;
2198 }
2199 return 0;
2200}
2201
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002202
2203/************************************************************************/
2204/* All supported keywords must be declared here. */
2205/************************************************************************/
2206
2207/* set test->i to the number of enabled servers on the proxy */
2208static int
2209acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2210 struct acl_expr *expr, struct acl_test *test)
2211{
2212 test->flags = ACL_TEST_F_VOL_TEST;
2213 if (expr->arg_len) {
2214 /* another proxy was designated, we must look for it */
2215 for (px = proxy; px; px = px->next)
2216 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2217 break;
2218 }
2219 if (!px)
2220 return 0;
2221
2222 if (px->srv_act)
2223 test->i = px->srv_act;
2224 else if (px->lbprm.fbck)
2225 test->i = 1;
2226 else
2227 test->i = px->srv_bck;
2228
2229 return 1;
2230}
2231
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002232/* set test->i to the number of enabled servers on the proxy */
2233static int
2234acl_fetch_connslots(struct proxy *px, struct session *l4, void *l7, int dir,
2235 struct acl_expr *expr, struct acl_test *test)
2236{
2237 struct server *iterator;
2238 test->flags = ACL_TEST_F_VOL_TEST;
2239 if (expr->arg_len) {
2240 /* another proxy was designated, we must look for it */
2241 for (px = proxy; px; px = px->next)
2242 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2243 break;
2244 }
2245 if (!px)
2246 return 0;
2247
2248 test->i = 0;
2249 iterator = px->srv;
2250 while (iterator) {
2251 if ((iterator->state & 1) == 0) {
2252 iterator = iterator->next;
2253 continue;
2254 }
2255 if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
2256 test->i = -1;
2257 return 1;
2258 }
2259
2260 test->i += (iterator->maxconn - iterator->cur_sess)
2261 + (iterator->maxqueue - iterator->nbpend);
2262 iterator = iterator->next;
2263 }
2264
2265 return 1;
2266}
2267
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002268/* set test->i to the number of connections per second reaching the frontend */
2269static int
2270acl_fetch_fe_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2271 struct acl_expr *expr, struct acl_test *test)
2272{
2273 test->flags = ACL_TEST_F_VOL_TEST;
2274 if (expr->arg_len) {
2275 /* another proxy was designated, we must look for it */
2276 for (px = proxy; px; px = px->next)
2277 if ((px->cap & PR_CAP_FE) && !strcmp(px->id, expr->arg.str))
2278 break;
2279 }
2280 if (!px)
2281 return 0;
2282
2283 test->i = read_freq_ctr(&px->fe_sess_per_sec);
2284 return 1;
2285}
2286
2287/* set test->i to the number of connections per second reaching the backend */
2288static int
2289acl_fetch_be_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2290 struct acl_expr *expr, struct acl_test *test)
2291{
2292 test->flags = ACL_TEST_F_VOL_TEST;
2293 if (expr->arg_len) {
2294 /* another proxy was designated, we must look for it */
2295 for (px = proxy; px; px = px->next)
2296 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2297 break;
2298 }
2299 if (!px)
2300 return 0;
2301
2302 test->i = read_freq_ctr(&px->be_sess_per_sec);
2303 return 1;
2304}
2305
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002306
2307/* Note: must not be declared <const> as its list will be overwritten */
2308static struct acl_kw_list acl_kws = {{ },{
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002309 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau3a8efeb2009-03-05 19:15:37 +01002310 { "connslots", acl_parse_int, acl_fetch_connslots, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002311 { "fe_sess_rate", acl_parse_int, acl_fetch_fe_sess_rate, acl_match_int, ACL_USE_NOTHING },
2312 { "be_sess_rate", acl_parse_int, acl_fetch_be_sess_rate, acl_match_int, ACL_USE_NOTHING },
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002313 { NULL, NULL, NULL, NULL },
2314}};
2315
2316
2317__attribute__((constructor))
2318static void __backend_init(void)
2319{
2320 acl_register_keywords(&acl_kws);
2321}
2322
2323
Willy Tarreaubaaee002006-06-26 02:48:02 +02002324/*
2325 * Local variables:
2326 * c-indent-level: 8
2327 * c-basic-offset: 8
2328 * End:
2329 */