blob: d6d47faeeb22a740f4f4964b86786348e6e1763f [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Backend variables and functions.
3 *
Willy Tarreaue8c66af2008-01-13 18:40:14 +01004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
Willy Tarreauf19cf372006-11-14 15:40:51 +010018#include <string.h>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +020019#include <ctype.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020020
Willy Tarreaud88edf22009-06-14 15:48:17 +020021#include <netinet/tcp.h>
22
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020024#include <common/config.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020025#include <common/debug.h>
Willy Tarreaub625a082007-11-26 01:15:43 +010026#include <common/eb32tree.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020027#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020028#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
Willy Tarreaubaaee002006-06-26 02:48:02 +020030#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020031
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +010032#include <proto/acl.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/backend.h>
Willy Tarreau14c8aac2007-05-08 19:46:30 +020034#include <proto/client.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035#include <proto/fd.h>
Willy Tarreau80587432006-12-24 17:47:20 +010036#include <proto/httperr.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037#include <proto/log.h>
Willy Tarreauc6f4ce82009-06-10 11:09:37 +020038#include <proto/port_range.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020039#include <proto/proto_http.h>
Willy Tarreaue8c66af2008-01-13 18:40:14 +010040#include <proto/proto_tcp.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020041#include <proto/queue.h>
Willy Tarreau7f062c42009-03-05 18:43:00 +010042#include <proto/server.h>
Willy Tarreau7c669d72008-06-20 15:04:11 +020043#include <proto/session.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020044#include <proto/stream_sock.h>
45#include <proto/task.h>
46
Willy Tarreau6d1a9882007-01-07 02:03:04 +010047#ifdef CONFIG_HAP_TCPSPLICE
48#include <libtcpsplice.h>
49#endif
50
Willy Tarreaub625a082007-11-26 01:15:43 +010051static inline void fwrr_remove_from_tree(struct server *s);
52static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
53static inline void fwrr_dequeue_srv(struct server *s);
54static void fwrr_get_srv(struct server *s);
55static void fwrr_queue_srv(struct server *s);
56
57/* This function returns non-zero if a server with the given weight and state
58 * is usable for LB, otherwise zero.
59 */
60static inline int srv_is_usable(int state, int weight)
61{
62 if (!weight)
63 return 0;
Willy Tarreau48494c02007-11-30 10:41:39 +010064 if (state & SRV_GOINGDOWN)
65 return 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010066 if (!(state & SRV_RUNNING))
67 return 0;
68 return 1;
69}
70
Willy Tarreaubaaee002006-06-26 02:48:02 +020071/*
72 * This function recounts the number of usable active and backup servers for
73 * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
Willy Tarreaub625a082007-11-26 01:15:43 +010074 * This function also recomputes the total active and backup weights. However,
Willy Tarreauf4cca452008-03-08 21:42:54 +010075 * it does not update tot_weight nor tot_used. Use update_backend_weight() for
Willy Tarreaub625a082007-11-26 01:15:43 +010076 * this.
Willy Tarreaubaaee002006-06-26 02:48:02 +020077 */
Willy Tarreaub625a082007-11-26 01:15:43 +010078static void recount_servers(struct proxy *px)
Willy Tarreaubaaee002006-06-26 02:48:02 +020079{
80 struct server *srv;
81
Willy Tarreau20697042007-11-15 23:26:18 +010082 px->srv_act = px->srv_bck = 0;
83 px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
Willy Tarreaub625a082007-11-26 01:15:43 +010084 px->lbprm.fbck = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +020085 for (srv = px->srv; srv != NULL; srv = srv->next) {
Willy Tarreaub625a082007-11-26 01:15:43 +010086 if (!srv_is_usable(srv->state, srv->eweight))
87 continue;
88
89 if (srv->state & SRV_BACKUP) {
90 if (!px->srv_bck &&
Willy Tarreauf4cca452008-03-08 21:42:54 +010091 !(px->options & PR_O_USE_ALL_BK))
Willy Tarreaub625a082007-11-26 01:15:43 +010092 px->lbprm.fbck = srv;
93 px->srv_bck++;
94 px->lbprm.tot_wbck += srv->eweight;
95 } else {
96 px->srv_act++;
97 px->lbprm.tot_wact += srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +020098 }
99 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100100}
Willy Tarreau20697042007-11-15 23:26:18 +0100101
Willy Tarreaub625a082007-11-26 01:15:43 +0100102/* This function simply updates the backend's tot_weight and tot_used values
103 * after servers weights have been updated. It is designed to be used after
104 * recount_servers() or equivalent.
105 */
106static void update_backend_weight(struct proxy *px)
107{
Willy Tarreau20697042007-11-15 23:26:18 +0100108 if (px->srv_act) {
109 px->lbprm.tot_weight = px->lbprm.tot_wact;
110 px->lbprm.tot_used = px->srv_act;
111 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100112 else if (px->lbprm.fbck) {
113 /* use only the first backup server */
114 px->lbprm.tot_weight = px->lbprm.fbck->eweight;
115 px->lbprm.tot_used = 1;
Willy Tarreau20697042007-11-15 23:26:18 +0100116 }
117 else {
Willy Tarreaub625a082007-11-26 01:15:43 +0100118 px->lbprm.tot_weight = px->lbprm.tot_wbck;
119 px->lbprm.tot_used = px->srv_bck;
Willy Tarreau20697042007-11-15 23:26:18 +0100120 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100121}
122
123/* this function updates the map according to server <srv>'s new state */
124static void map_set_server_status_down(struct server *srv)
125{
126 struct proxy *p = srv->proxy;
127
128 if (srv->state == srv->prev_state &&
129 srv->eweight == srv->prev_eweight)
130 return;
131
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100132 if (srv_is_usable(srv->state, srv->eweight))
133 goto out_update_state;
134
Willy Tarreaub625a082007-11-26 01:15:43 +0100135 /* FIXME: could be optimized since we know what changed */
136 recount_servers(p);
137 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100138 p->lbprm.map.state |= PR_MAP_RECALC;
139 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100140 srv->prev_state = srv->state;
141 srv->prev_eweight = srv->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200142}
143
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100144/* This function updates the map according to server <srv>'s new state */
Willy Tarreaub625a082007-11-26 01:15:43 +0100145static void map_set_server_status_up(struct server *srv)
146{
147 struct proxy *p = srv->proxy;
148
149 if (srv->state == srv->prev_state &&
150 srv->eweight == srv->prev_eweight)
151 return;
152
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100153 if (!srv_is_usable(srv->state, srv->eweight))
154 goto out_update_state;
155
Willy Tarreaub625a082007-11-26 01:15:43 +0100156 /* FIXME: could be optimized since we know what changed */
157 recount_servers(p);
158 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100159 p->lbprm.map.state |= PR_MAP_RECALC;
160 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100161 srv->prev_state = srv->state;
162 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100163}
164
Willy Tarreau20697042007-11-15 23:26:18 +0100165/* This function recomputes the server map for proxy px. It relies on
166 * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
167 * called after recount_servers(). It also expects px->lbprm.map.srv
168 * to be allocated with the largest size needed. It updates tot_weight.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200169 */
170void recalc_server_map(struct proxy *px)
171{
172 int o, tot, flag;
173 struct server *cur, *best;
174
Willy Tarreau20697042007-11-15 23:26:18 +0100175 switch (px->lbprm.tot_used) {
176 case 0: /* no server */
177 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200178 return;
Willy Tarreau20697042007-11-15 23:26:18 +0100179 case 1: /* only one server, just fill first entry */
180 tot = 1;
181 break;
182 default:
183 tot = px->lbprm.tot_weight;
184 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200185 }
186
Willy Tarreau20697042007-11-15 23:26:18 +0100187 /* here we *know* that we have some servers */
188 if (px->srv_act)
189 flag = SRV_RUNNING;
190 else
191 flag = SRV_RUNNING | SRV_BACKUP;
192
Willy Tarreaubaaee002006-06-26 02:48:02 +0200193 /* this algorithm gives priority to the first server, which means that
194 * it will respect the declaration order for equivalent weights, and
195 * that whatever the weights, the first server called will always be
Willy Tarreau20697042007-11-15 23:26:18 +0100196 * the first declared. This is an important asumption for the backup
Willy Tarreaubaaee002006-06-26 02:48:02 +0200197 * case, where we want the first server only.
198 */
199 for (cur = px->srv; cur; cur = cur->next)
200 cur->wscore = 0;
201
202 for (o = 0; o < tot; o++) {
203 int max = 0;
204 best = NULL;
205 for (cur = px->srv; cur; cur = cur->next) {
Willy Tarreau6704d672009-06-15 10:56:05 +0200206 if (cur->eweight &&
207 flag == (cur->state &
Willy Tarreau48494c02007-11-30 10:41:39 +0100208 (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
Willy Tarreaubaaee002006-06-26 02:48:02 +0200209 int v;
210
211 /* If we are forced to return only one server, we don't want to
212 * go further, because we would return the wrong one due to
213 * divide overflow.
214 */
215 if (tot == 1) {
216 best = cur;
Willy Tarreau20697042007-11-15 23:26:18 +0100217 /* note that best->wscore will be wrong but we don't care */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200218 break;
219 }
220
Willy Tarreau417fae02007-03-25 21:16:40 +0200221 cur->wscore += cur->eweight;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200222 v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
223 if (best == NULL || v > max) {
224 max = v;
225 best = cur;
226 }
227 }
228 }
Willy Tarreau20697042007-11-15 23:26:18 +0100229 px->lbprm.map.srv[o] = best;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200230 best->wscore -= tot;
231 }
Willy Tarreau20697042007-11-15 23:26:18 +0100232 px->lbprm.map.state &= ~PR_MAP_RECALC;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200233}
234
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100235/* This function is responsible of building the server MAP for map-based LB
236 * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
237 * weights if applicable. It should be called only once per proxy, at config
238 * time.
239 */
240void init_server_map(struct proxy *p)
241{
242 struct server *srv;
243 int pgcd;
244 int act, bck;
245
Willy Tarreaub625a082007-11-26 01:15:43 +0100246 p->lbprm.set_server_status_up = map_set_server_status_up;
247 p->lbprm.set_server_status_down = map_set_server_status_down;
248 p->lbprm.update_server_eweight = NULL;
249
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100250 if (!p->srv)
251 return;
252
253 /* We will factor the weights to reduce the table,
Willy Tarreau6704d672009-06-15 10:56:05 +0200254 * using Euclide's largest common divisor algorithm.
255 * Since we may have zero weights, we have to first
256 * find a non-zero weight server.
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100257 */
Willy Tarreau6704d672009-06-15 10:56:05 +0200258 pgcd = 1;
259 srv = p->srv;
260 while (srv && !srv->uweight)
261 srv = srv->next;
262
263 if (srv) {
264 pgcd = srv->uweight; /* note: cannot be zero */
265 while (pgcd > 1 && (srv = srv->next)) {
266 int w = srv->uweight;
267 while (w) {
268 int t = pgcd % w;
269 pgcd = w;
270 w = t;
271 }
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100272 }
273 }
274
275 /* It is sometimes useful to know what factor to apply
276 * to the backend's effective weight to know its real
277 * weight.
278 */
279 p->lbprm.wmult = pgcd;
280
281 act = bck = 0;
282 for (srv = p->srv; srv; srv = srv->next) {
283 srv->eweight = srv->uweight / pgcd;
Willy Tarreaub625a082007-11-26 01:15:43 +0100284 srv->prev_eweight = srv->eweight;
285 srv->prev_state = srv->state;
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100286 if (srv->state & SRV_BACKUP)
287 bck += srv->eweight;
288 else
289 act += srv->eweight;
290 }
291
292 /* this is the largest map we will ever need for this servers list */
293 if (act < bck)
294 act = bck;
295
Willy Tarreau6704d672009-06-15 10:56:05 +0200296 if (!act)
297 act = 1;
298
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100299 p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *));
300 /* recounts servers and their weights */
301 p->lbprm.map.state = PR_MAP_RECALC;
302 recount_servers(p);
Willy Tarreaub625a082007-11-26 01:15:43 +0100303 update_backend_weight(p);
Willy Tarreau5dc2fa62007-11-19 19:10:18 +0100304 recalc_server_map(p);
305}
306
Willy Tarreaub625a082007-11-26 01:15:43 +0100307/* This function updates the server trees according to server <srv>'s new
308 * state. It should be called when server <srv>'s status changes to down.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100309 * It is not important whether the server was already down or not. It is not
310 * important either that the new state is completely down (the caller may not
311 * know all the variables of a server's state).
Willy Tarreaub625a082007-11-26 01:15:43 +0100312 */
313static void fwrr_set_server_status_down(struct server *srv)
314{
315 struct proxy *p = srv->proxy;
316 struct fwrr_group *grp;
317
318 if (srv->state == srv->prev_state &&
319 srv->eweight == srv->prev_eweight)
320 return;
321
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100322 if (srv_is_usable(srv->state, srv->eweight))
323 goto out_update_state;
324
Willy Tarreaub625a082007-11-26 01:15:43 +0100325 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
326 /* server was already down */
327 goto out_update_backend;
328
329 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
330 grp->next_weight -= srv->prev_eweight;
331
332 if (srv->state & SRV_BACKUP) {
333 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
334 p->srv_bck--;
335
336 if (srv == p->lbprm.fbck) {
337 /* we lost the first backup server in a single-backup
338 * configuration, we must search another one.
339 */
340 struct server *srv2 = p->lbprm.fbck;
341 do {
342 srv2 = srv2->next;
343 } while (srv2 &&
344 !((srv2->state & SRV_BACKUP) &&
345 srv_is_usable(srv2->state, srv2->eweight)));
346 p->lbprm.fbck = srv2;
347 }
348 } else {
349 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
350 p->srv_act--;
351 }
352
353 fwrr_dequeue_srv(srv);
354 fwrr_remove_from_tree(srv);
355
356out_update_backend:
357 /* check/update tot_used, tot_weight */
358 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100359 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100360 srv->prev_state = srv->state;
361 srv->prev_eweight = srv->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100362}
363
364/* This function updates the server trees according to server <srv>'s new
365 * state. It should be called when server <srv>'s status changes to up.
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100366 * It is not important whether the server was already down or not. It is not
367 * important either that the new state is completely UP (the caller may not
368 * know all the variables of a server's state). This function will not change
Willy Tarreaub625a082007-11-26 01:15:43 +0100369 * the weight of a server which was already up.
370 */
371static void fwrr_set_server_status_up(struct server *srv)
372{
373 struct proxy *p = srv->proxy;
374 struct fwrr_group *grp;
375
376 if (srv->state == srv->prev_state &&
377 srv->eweight == srv->prev_eweight)
378 return;
379
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100380 if (!srv_is_usable(srv->state, srv->eweight))
381 goto out_update_state;
382
Willy Tarreaub625a082007-11-26 01:15:43 +0100383 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
384 /* server was already up */
385 goto out_update_backend;
386
387 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
388 grp->next_weight += srv->eweight;
389
390 if (srv->state & SRV_BACKUP) {
391 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
392 p->srv_bck++;
393
Willy Tarreauf4cca452008-03-08 21:42:54 +0100394 if (!(p->options & PR_O_USE_ALL_BK)) {
395 if (!p->lbprm.fbck) {
396 /* there was no backup server anymore */
Willy Tarreaub625a082007-11-26 01:15:43 +0100397 p->lbprm.fbck = srv;
Willy Tarreauf4cca452008-03-08 21:42:54 +0100398 } else {
399 /* we may have restored a backup server prior to fbck,
400 * in which case it should replace it.
401 */
402 struct server *srv2 = srv;
403 do {
404 srv2 = srv2->next;
405 } while (srv2 && (srv2 != p->lbprm.fbck));
406 if (srv2)
407 p->lbprm.fbck = srv;
408 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100409 }
410 } else {
411 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
412 p->srv_act++;
413 }
414
415 /* note that eweight cannot be 0 here */
416 fwrr_get_srv(srv);
417 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
418 fwrr_queue_srv(srv);
419
420out_update_backend:
421 /* check/update tot_used, tot_weight */
422 update_backend_weight(p);
Willy Tarreau0ebe1062007-11-30 11:11:02 +0100423 out_update_state:
Willy Tarreaub625a082007-11-26 01:15:43 +0100424 srv->prev_state = srv->state;
425 srv->prev_eweight = srv->eweight;
426}
427
428/* This function must be called after an update to server <srv>'s effective
429 * weight. It may be called after a state change too.
430 */
431static void fwrr_update_server_weight(struct server *srv)
432{
433 int old_state, new_state;
434 struct proxy *p = srv->proxy;
435 struct fwrr_group *grp;
436
437 if (srv->state == srv->prev_state &&
438 srv->eweight == srv->prev_eweight)
439 return;
440
441 /* If changing the server's weight changes its state, we simply apply
442 * the procedures we already have for status change. If the state
443 * remains down, the server is not in any tree, so it's as easy as
444 * updating its values. If the state remains up with different weights,
445 * there are some computations to perform to find a new place and
446 * possibly a new tree for this server.
447 */
448
449 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
450 new_state = srv_is_usable(srv->state, srv->eweight);
451
452 if (!old_state && !new_state) {
453 srv->prev_state = srv->state;
454 srv->prev_eweight = srv->eweight;
455 return;
456 }
457 else if (!old_state && new_state) {
458 fwrr_set_server_status_up(srv);
459 return;
460 }
461 else if (old_state && !new_state) {
462 fwrr_set_server_status_down(srv);
463 return;
464 }
465
466 grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
467 grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
468
469 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
470 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
471
472 if (srv->lb_tree == grp->init) {
473 fwrr_dequeue_srv(srv);
474 fwrr_queue_by_weight(grp->init, srv);
475 }
476 else if (!srv->lb_tree) {
477 /* FIXME: server was down. This is not possible right now but
478 * may be needed soon for slowstart or graceful shutdown.
479 */
480 fwrr_dequeue_srv(srv);
481 fwrr_get_srv(srv);
482 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
483 fwrr_queue_srv(srv);
484 } else {
485 /* The server is either active or in the next queue. If it's
486 * still in the active queue and it has not consumed all of its
487 * places, let's adjust its next position.
488 */
489 fwrr_get_srv(srv);
490
491 if (srv->eweight > 0) {
492 int prev_next = srv->npos;
493 int step = grp->next_weight / srv->eweight;
494
495 srv->npos = srv->lpos + step;
496 srv->rweight = 0;
497
498 if (srv->npos > prev_next)
499 srv->npos = prev_next;
500 if (srv->npos < grp->curr_pos + 2)
501 srv->npos = grp->curr_pos + step;
502 } else {
503 /* push it into the next tree */
504 srv->npos = grp->curr_pos + grp->curr_weight;
505 }
506
507 fwrr_dequeue_srv(srv);
508 fwrr_queue_srv(srv);
509 }
510
511 update_backend_weight(p);
512 srv->prev_state = srv->state;
513 srv->prev_eweight = srv->eweight;
514}
515
516/* Remove a server from a tree. It must have previously been dequeued. This
517 * function is meant to be called when a server is going down or has its
518 * weight disabled.
519 */
520static inline void fwrr_remove_from_tree(struct server *s)
521{
522 s->lb_tree = NULL;
523}
524
525/* Queue a server in the weight tree <root>, assuming the weight is >0.
526 * We want to sort them by inverted weights, because we need to place
527 * heavy servers first in order to get a smooth distribution.
528 */
529static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
530{
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100531 s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
Willy Tarreaub625a082007-11-26 01:15:43 +0100532 eb32_insert(root, &s->lb_node);
533 s->lb_tree = root;
534}
535
536/* This function is responsible for building the weight trees in case of fast
537 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
538 * ratio. Both active and backup groups are initialized.
539 */
540void fwrr_init_server_groups(struct proxy *p)
541{
542 struct server *srv;
543 struct eb_root init_head = EB_ROOT;
544
545 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
546 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
547 p->lbprm.update_server_eweight = fwrr_update_server_weight;
548
549 p->lbprm.wdiv = BE_WEIGHT_SCALE;
550 for (srv = p->srv; srv; srv = srv->next) {
551 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
552 srv->prev_state = srv->state;
553 }
554
555 recount_servers(p);
556 update_backend_weight(p);
557
558 /* prepare the active servers group */
559 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
560 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
561 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
562 p->lbprm.fwrr.act.t1 = init_head;
563 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
564 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
565
566 /* prepare the backup servers group */
567 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
568 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
569 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
570 p->lbprm.fwrr.bck.t1 = init_head;
571 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
572 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
573
574 /* queue active and backup servers in two distinct groups */
575 for (srv = p->srv; srv; srv = srv->next) {
576 if (!srv_is_usable(srv->state, srv->eweight))
577 continue;
578 fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
579 p->lbprm.fwrr.bck.init :
580 p->lbprm.fwrr.act.init,
581 srv);
582 }
583}
584
585/* simply removes a server from a weight tree */
586static inline void fwrr_dequeue_srv(struct server *s)
587{
588 eb32_delete(&s->lb_node);
589}
590
591/* queues a server into the appropriate group and tree depending on its
592 * backup status, and ->npos. If the server is disabled, simply assign
593 * it to the NULL tree.
594 */
595static void fwrr_queue_srv(struct server *s)
596{
597 struct proxy *p = s->proxy;
598 struct fwrr_group *grp;
599
600 grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
601
602 /* Delay everything which does not fit into the window and everything
603 * which does not fit into the theorical new window.
604 */
605 if (!srv_is_usable(s->state, s->eweight)) {
606 fwrr_remove_from_tree(s);
607 }
608 else if (s->eweight <= 0 ||
609 s->npos >= 2 * grp->curr_weight ||
610 s->npos >= grp->curr_weight + grp->next_weight) {
611 /* put into next tree, and readjust npos in case we could
612 * finally take this back to current. */
613 s->npos -= grp->curr_weight;
614 fwrr_queue_by_weight(grp->next, s);
615 }
616 else {
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100617 /* The sorting key is stored in units of s->npos * user_weight
618 * in order to avoid overflows. As stated in backend.h, the
619 * lower the scale, the rougher the weights modulation, and the
620 * higher the scale, the lower the number of servers without
621 * overflow. With this formula, the result is always positive,
622 * so we can use eb3é_insert().
Willy Tarreaub625a082007-11-26 01:15:43 +0100623 */
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100624 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
625 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
626
627 eb32_insert(&grp->curr, &s->lb_node);
Willy Tarreaub625a082007-11-26 01:15:43 +0100628 s->lb_tree = &grp->curr;
629 }
630}
631
632/* prepares a server when extracting it from the "init" tree */
633static inline void fwrr_get_srv_init(struct server *s)
634{
635 s->npos = s->rweight = 0;
636}
637
638/* prepares a server when extracting it from the "next" tree */
639static inline void fwrr_get_srv_next(struct server *s)
640{
641 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
642 &s->proxy->lbprm.fwrr.bck :
643 &s->proxy->lbprm.fwrr.act;
644
645 s->npos += grp->curr_weight;
646}
647
648/* prepares a server when it was marked down */
649static inline void fwrr_get_srv_down(struct server *s)
650{
651 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
652 &s->proxy->lbprm.fwrr.bck :
653 &s->proxy->lbprm.fwrr.act;
654
655 s->npos = grp->curr_pos;
656}
657
658/* prepares a server when extracting it from its tree */
659static void fwrr_get_srv(struct server *s)
660{
661 struct proxy *p = s->proxy;
662 struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
663 &p->lbprm.fwrr.bck :
664 &p->lbprm.fwrr.act;
665
666 if (s->lb_tree == grp->init) {
667 fwrr_get_srv_init(s);
668 }
669 else if (s->lb_tree == grp->next) {
670 fwrr_get_srv_next(s);
671 }
672 else if (s->lb_tree == NULL) {
673 fwrr_get_srv_down(s);
674 }
675}
676
677/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
678 * when this happens, and "next" filled with servers sorted by weights.
679 */
680static inline void fwrr_switch_trees(struct fwrr_group *grp)
681{
682 struct eb_root *swap;
683 swap = grp->init;
684 grp->init = grp->next;
685 grp->next = swap;
686 grp->curr_weight = grp->next_weight;
687 grp->curr_pos = grp->curr_weight;
688}
689
690/* return next server from the current tree in FWRR group <grp>, or a server
691 * from the "init" tree if appropriate. If both trees are empty, return NULL.
692 */
693static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
694{
695 struct eb32_node *node;
696 struct server *s;
697
698 node = eb32_first(&grp->curr);
699 s = eb32_entry(node, struct server, lb_node);
700
701 if (!node || s->npos > grp->curr_pos) {
702 /* either we have no server left, or we have a hole */
703 struct eb32_node *node2;
704 node2 = eb32_first(grp->init);
705 if (node2) {
706 node = node2;
707 s = eb32_entry(node, struct server, lb_node);
708 fwrr_get_srv_init(s);
709 if (s->eweight == 0) /* FIXME: is it possible at all ? */
710 node = NULL;
711 }
712 }
713 if (node)
714 return s;
715 else
716 return NULL;
717}
718
719/* Computes next position of server <s> in the group. It is mandatory for <s>
720 * to have a non-zero, positive eweight.
721*/
722static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
723{
724 if (!s->npos) {
725 /* first time ever for this server */
726 s->lpos = grp->curr_pos;
727 s->npos = grp->curr_pos + grp->next_weight / s->eweight;
728 s->rweight += grp->next_weight % s->eweight;
729
730 if (s->rweight >= s->eweight) {
731 s->rweight -= s->eweight;
732 s->npos++;
733 }
734 } else {
735 s->lpos = s->npos;
736 s->npos += grp->next_weight / s->eweight;
737 s->rweight += grp->next_weight % s->eweight;
738
739 if (s->rweight >= s->eweight) {
740 s->rweight -= s->eweight;
741 s->npos++;
742 }
743 }
744}
745
746/* Return next server from the current tree in backend <p>, or a server from
747 * the init tree if appropriate. If both trees are empty, return NULL.
748 * Saturated servers are skipped and requeued.
749 */
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100750static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
Willy Tarreaub625a082007-11-26 01:15:43 +0100751{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100752 struct server *srv, *full, *avoided;
Willy Tarreaub625a082007-11-26 01:15:43 +0100753 struct fwrr_group *grp;
Willy Tarreaub625a082007-11-26 01:15:43 +0100754 int switched;
755
756 if (p->srv_act)
757 grp = &p->lbprm.fwrr.act;
758 else if (p->lbprm.fbck)
759 return p->lbprm.fbck;
760 else if (p->srv_bck)
761 grp = &p->lbprm.fwrr.bck;
762 else
763 return NULL;
764
765 switched = 0;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100766 avoided = NULL;
Willy Tarreaub625a082007-11-26 01:15:43 +0100767 full = NULL; /* NULL-terminated list of saturated servers */
768 while (1) {
769 /* if we see an empty group, let's first try to collect weights
770 * which might have recently changed.
771 */
772 if (!grp->curr_weight)
773 grp->curr_pos = grp->curr_weight = grp->next_weight;
774
775 /* get first server from the "current" tree. When the end of
776 * the tree is reached, we may have to switch, but only once.
777 */
778 while (1) {
779 srv = fwrr_get_server_from_group(grp);
780 if (srv)
781 break;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100782 if (switched) {
783 if (avoided) {
784 srv = avoided;
785 break;
786 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100787 goto requeue_servers;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100788 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100789 switched = 1;
790 fwrr_switch_trees(grp);
791
792 }
793
794 /* OK, we have a server. However, it may be saturated, in which
795 * case we don't want to reconsider it for now. We'll update
796 * its position and dequeue it anyway, so that we can move it
797 * to a better place afterwards.
798 */
799 fwrr_update_position(grp, srv);
800 fwrr_dequeue_srv(srv);
801 grp->curr_pos++;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200802 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100803 /* make sure it is not the server we are trying to exclude... */
804 if (srv != srvtoavoid || avoided)
805 break;
806
807 avoided = srv; /* ...but remember that is was selected yet avoided */
808 }
Willy Tarreaub625a082007-11-26 01:15:43 +0100809
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100810 /* the server is saturated or avoided, let's chain it for later reinsertion */
Willy Tarreaub625a082007-11-26 01:15:43 +0100811 srv->next_full = full;
812 full = srv;
813 }
814
815 /* OK, we got the best server, let's update it */
816 fwrr_queue_srv(srv);
817
818 requeue_servers:
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100819 /* Requeue all extracted servers. If full==srv then it was
820 * avoided (unsucessfully) and chained, omit it now.
821 */
Willy Tarreau70bcfb72008-01-27 02:21:53 +0100822 if (unlikely(full != NULL)) {
Willy Tarreaub625a082007-11-26 01:15:43 +0100823 if (switched) {
824 /* the tree has switched, requeue all extracted servers
825 * into "init", because their place was lost, and only
826 * their weight matters.
827 */
828 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100829 if (likely(full != srv))
830 fwrr_queue_by_weight(grp->init, full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100831 full = full->next_full;
832 } while (full);
833 } else {
834 /* requeue all extracted servers just as if they were consumed
835 * so that they regain their expected place.
836 */
837 do {
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +0100838 if (likely(full != srv))
839 fwrr_queue_srv(full);
Willy Tarreaub625a082007-11-26 01:15:43 +0100840 full = full->next_full;
841 } while (full);
842 }
843 }
844 return srv;
845}
846
Willy Tarreau51406232008-03-10 22:04:20 +0100847/* Remove a server from a tree. It must have previously been dequeued. This
848 * function is meant to be called when a server is going down or has its
849 * weight disabled.
850 */
851static inline void fwlc_remove_from_tree(struct server *s)
852{
853 s->lb_tree = NULL;
854}
855
856/* simply removes a server from a tree */
857static inline void fwlc_dequeue_srv(struct server *s)
858{
859 eb32_delete(&s->lb_node);
860}
861
862/* Queue a server in its associated tree, assuming the weight is >0.
863 * Servers are sorted by #conns/weight. To ensure maximum accuracy,
864 * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key.
865 */
866static inline void fwlc_queue_srv(struct server *s)
867{
Willy Tarreau7c669d72008-06-20 15:04:11 +0200868 s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight;
Willy Tarreau51406232008-03-10 22:04:20 +0100869 eb32_insert(s->lb_tree, &s->lb_node);
870}
871
872/* Re-position the server in the FWLC tree after it has been assigned one
873 * connection or after it has released one. Note that it is possible that
874 * the server has been moved out of the tree due to failed health-checks.
875 */
876static void fwlc_srv_reposition(struct server *s)
877{
878 if (!s->lb_tree)
879 return;
880 fwlc_dequeue_srv(s);
881 fwlc_queue_srv(s);
882}
883
884/* This function updates the server trees according to server <srv>'s new
885 * state. It should be called when server <srv>'s status changes to down.
886 * It is not important whether the server was already down or not. It is not
887 * important either that the new state is completely down (the caller may not
888 * know all the variables of a server's state).
889 */
890static void fwlc_set_server_status_down(struct server *srv)
891{
892 struct proxy *p = srv->proxy;
893
894 if (srv->state == srv->prev_state &&
895 srv->eweight == srv->prev_eweight)
896 return;
897
898 if (srv_is_usable(srv->state, srv->eweight))
899 goto out_update_state;
900
901 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
902 /* server was already down */
903 goto out_update_backend;
904
905 if (srv->state & SRV_BACKUP) {
906 p->lbprm.tot_wbck -= srv->prev_eweight;
907 p->srv_bck--;
908
909 if (srv == p->lbprm.fbck) {
910 /* we lost the first backup server in a single-backup
911 * configuration, we must search another one.
912 */
913 struct server *srv2 = p->lbprm.fbck;
914 do {
915 srv2 = srv2->next;
916 } while (srv2 &&
917 !((srv2->state & SRV_BACKUP) &&
918 srv_is_usable(srv2->state, srv2->eweight)));
919 p->lbprm.fbck = srv2;
920 }
921 } else {
922 p->lbprm.tot_wact -= srv->prev_eweight;
923 p->srv_act--;
924 }
925
926 fwlc_dequeue_srv(srv);
927 fwlc_remove_from_tree(srv);
928
929out_update_backend:
930 /* check/update tot_used, tot_weight */
931 update_backend_weight(p);
932 out_update_state:
933 srv->prev_state = srv->state;
934 srv->prev_eweight = srv->eweight;
935}
936
937/* This function updates the server trees according to server <srv>'s new
938 * state. It should be called when server <srv>'s status changes to up.
939 * It is not important whether the server was already down or not. It is not
940 * important either that the new state is completely UP (the caller may not
941 * know all the variables of a server's state). This function will not change
942 * the weight of a server which was already up.
943 */
944static void fwlc_set_server_status_up(struct server *srv)
945{
946 struct proxy *p = srv->proxy;
947
948 if (srv->state == srv->prev_state &&
949 srv->eweight == srv->prev_eweight)
950 return;
951
952 if (!srv_is_usable(srv->state, srv->eweight))
953 goto out_update_state;
954
955 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
956 /* server was already up */
957 goto out_update_backend;
958
959 if (srv->state & SRV_BACKUP) {
960 srv->lb_tree = &p->lbprm.fwlc.bck;
961 p->lbprm.tot_wbck += srv->eweight;
962 p->srv_bck++;
963
964 if (!(p->options & PR_O_USE_ALL_BK)) {
965 if (!p->lbprm.fbck) {
966 /* there was no backup server anymore */
967 p->lbprm.fbck = srv;
968 } else {
969 /* we may have restored a backup server prior to fbck,
970 * in which case it should replace it.
971 */
972 struct server *srv2 = srv;
973 do {
974 srv2 = srv2->next;
975 } while (srv2 && (srv2 != p->lbprm.fbck));
976 if (srv2)
977 p->lbprm.fbck = srv;
978 }
979 }
980 } else {
981 srv->lb_tree = &p->lbprm.fwlc.act;
982 p->lbprm.tot_wact += srv->eweight;
983 p->srv_act++;
984 }
985
986 /* note that eweight cannot be 0 here */
987 fwlc_queue_srv(srv);
988
989 out_update_backend:
990 /* check/update tot_used, tot_weight */
991 update_backend_weight(p);
992 out_update_state:
993 srv->prev_state = srv->state;
994 srv->prev_eweight = srv->eweight;
995}
996
997/* This function must be called after an update to server <srv>'s effective
998 * weight. It may be called after a state change too.
999 */
1000static void fwlc_update_server_weight(struct server *srv)
1001{
1002 int old_state, new_state;
1003 struct proxy *p = srv->proxy;
1004
1005 if (srv->state == srv->prev_state &&
1006 srv->eweight == srv->prev_eweight)
1007 return;
1008
1009 /* If changing the server's weight changes its state, we simply apply
1010 * the procedures we already have for status change. If the state
1011 * remains down, the server is not in any tree, so it's as easy as
1012 * updating its values. If the state remains up with different weights,
1013 * there are some computations to perform to find a new place and
1014 * possibly a new tree for this server.
1015 */
1016
1017 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
1018 new_state = srv_is_usable(srv->state, srv->eweight);
1019
1020 if (!old_state && !new_state) {
1021 srv->prev_state = srv->state;
1022 srv->prev_eweight = srv->eweight;
1023 return;
1024 }
1025 else if (!old_state && new_state) {
1026 fwlc_set_server_status_up(srv);
1027 return;
1028 }
1029 else if (old_state && !new_state) {
1030 fwlc_set_server_status_down(srv);
1031 return;
1032 }
1033
1034 if (srv->lb_tree)
1035 fwlc_dequeue_srv(srv);
1036
1037 if (srv->state & SRV_BACKUP) {
1038 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
1039 srv->lb_tree = &p->lbprm.fwlc.bck;
1040 } else {
1041 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
1042 srv->lb_tree = &p->lbprm.fwlc.act;
1043 }
1044
1045 fwlc_queue_srv(srv);
1046
1047 update_backend_weight(p);
1048 srv->prev_state = srv->state;
1049 srv->prev_eweight = srv->eweight;
1050}
1051
1052/* This function is responsible for building the trees in case of fast
1053 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
1054 * uweight ratio. Both active and backup groups are initialized.
1055 */
1056void fwlc_init_server_tree(struct proxy *p)
1057{
1058 struct server *srv;
1059 struct eb_root init_head = EB_ROOT;
1060
1061 p->lbprm.set_server_status_up = fwlc_set_server_status_up;
1062 p->lbprm.set_server_status_down = fwlc_set_server_status_down;
1063 p->lbprm.update_server_eweight = fwlc_update_server_weight;
1064 p->lbprm.server_take_conn = fwlc_srv_reposition;
1065 p->lbprm.server_drop_conn = fwlc_srv_reposition;
1066
1067 p->lbprm.wdiv = BE_WEIGHT_SCALE;
1068 for (srv = p->srv; srv; srv = srv->next) {
1069 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
1070 srv->prev_state = srv->state;
1071 }
1072
1073 recount_servers(p);
1074 update_backend_weight(p);
1075
1076 p->lbprm.fwlc.act = init_head;
1077 p->lbprm.fwlc.bck = init_head;
1078
1079 /* queue active and backup servers in two distinct groups */
1080 for (srv = p->srv; srv; srv = srv->next) {
1081 if (!srv_is_usable(srv->state, srv->eweight))
1082 continue;
1083 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
1084 fwlc_queue_srv(srv);
1085 }
1086}
1087
1088/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
1089 * return NULL. Saturated servers are skipped.
1090 */
1091static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
1092{
1093 struct server *srv, *avoided;
1094 struct eb32_node *node;
1095
1096 srv = avoided = NULL;
1097
1098 if (p->srv_act)
1099 node = eb32_first(&p->lbprm.fwlc.act);
1100 else if (p->lbprm.fbck)
1101 return p->lbprm.fbck;
1102 else if (p->srv_bck)
1103 node = eb32_first(&p->lbprm.fwlc.bck);
1104 else
1105 return NULL;
1106
1107 while (node) {
1108 /* OK, we have a server. However, it may be saturated, in which
1109 * case we don't want to reconsider it for now, so we'll simply
1110 * skip it. Same if it's the server we try to avoid, in which
1111 * case we simply remember it for later use if needed.
1112 */
1113 struct server *s;
1114
1115 s = eb32_entry(node, struct server, lb_node);
Willy Tarreau7c669d72008-06-20 15:04:11 +02001116 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
Willy Tarreau51406232008-03-10 22:04:20 +01001117 if (s != srvtoavoid) {
1118 srv = s;
1119 break;
1120 }
1121 avoided = s;
1122 }
1123 node = eb32_next(node);
1124 }
1125
1126 if (!srv)
1127 srv = avoided;
1128
1129 return srv;
1130}
1131
Willy Tarreau01732802007-11-01 22:48:15 +01001132/*
1133 * This function tries to find a running server for the proxy <px> following
1134 * the URL parameter hash method. It looks for a specific parameter in the
1135 * URL and hashes it to compute the server ID. This is useful to optimize
1136 * performance by avoiding bounces between servers in contexts where sessions
1137 * are shared but cookies are not usable. If the parameter is not found, NULL
1138 * is returned. If any server is found, it will be returned. If no valid server
1139 * is found, NULL is returned.
Willy Tarreau01732802007-11-01 22:48:15 +01001140 */
1141struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len)
1142{
1143 unsigned long hash = 0;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001144 const char *p;
1145 const char *params;
Willy Tarreau01732802007-11-01 22:48:15 +01001146 int plen;
1147
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001148 /* when tot_weight is 0 then so is srv_count */
Willy Tarreau20697042007-11-15 23:26:18 +01001149 if (px->lbprm.tot_weight == 0)
Willy Tarreau01732802007-11-01 22:48:15 +01001150 return NULL;
1151
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001152 if ((p = memchr(uri, '?', uri_len)) == NULL)
1153 return NULL;
1154
Willy Tarreau20697042007-11-15 23:26:18 +01001155 if (px->lbprm.map.state & PR_MAP_RECALC)
1156 recalc_server_map(px);
1157
Willy Tarreau01732802007-11-01 22:48:15 +01001158 p++;
1159
1160 uri_len -= (p - uri);
1161 plen = px->url_param_len;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001162 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001163
1164 while (uri_len > plen) {
1165 /* Look for the parameter name followed by an equal symbol */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001166 if (params[plen] == '=') {
1167 if (memcmp(params, px->url_param_name, plen) == 0) {
1168 /* OK, we have the parameter here at <params>, and
Willy Tarreau01732802007-11-01 22:48:15 +01001169 * the value after the equal sign, at <p>
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001170 * skip the equal symbol
Willy Tarreau01732802007-11-01 22:48:15 +01001171 */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001172 p += plen + 1;
1173 uri_len -= plen + 1;
1174
Willy Tarreau01732802007-11-01 22:48:15 +01001175 while (uri_len && *p != '&') {
1176 hash = *p + (hash << 6) + (hash << 16) - hash;
1177 uri_len--;
1178 p++;
1179 }
Willy Tarreau20697042007-11-15 23:26:18 +01001180 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
Willy Tarreau01732802007-11-01 22:48:15 +01001181 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001182 }
1183 /* skip to next parameter */
1184 p = memchr(params, '&', uri_len);
1185 if (!p)
1186 return NULL;
1187 p++;
1188 uri_len -= (p - params);
1189 params = p;
1190 }
1191 return NULL;
1192}
1193
1194/*
1195 * this does the same as the previous server_ph, but check the body contents
1196 */
1197struct server *get_server_ph_post(struct session *s)
1198{
1199 unsigned long hash = 0;
1200 struct http_txn *txn = &s->txn;
1201 struct buffer *req = s->req;
1202 struct http_msg *msg = &txn->req;
1203 struct proxy *px = s->be;
1204 unsigned int plen = px->url_param_len;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001205 unsigned long body;
1206 unsigned long len;
1207 const char *params;
1208 struct hdr_ctx ctx;
1209 const char *p;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001210
1211 /* tot_weight appears to mean srv_count */
1212 if (px->lbprm.tot_weight == 0)
1213 return NULL;
1214
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001215 body = msg->sol[msg->eoh] == '\r' ? msg->eoh + 2 : msg->eoh + 1;
Willy Tarreaufb0528b2008-08-11 00:21:56 +02001216 len = req->l - body;
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001217 params = req->data + body;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001218
1219 if ( len == 0 )
1220 return NULL;
1221
1222 if (px->lbprm.map.state & PR_MAP_RECALC)
1223 recalc_server_map(px);
1224
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001225 ctx.idx = 0;
1226
1227 /* if the message is chunked, we skip the chunk size, but use the value as len */
1228 http_find_header2("Transfer-Encoding", 17, msg->sol, &txn->hdr_idx, &ctx);
Willy Tarreauadfb8562008-08-11 15:24:42 +02001229 if (ctx.idx && ctx.vlen >= 7 && strncasecmp(ctx.line+ctx.val, "chunked", 7) == 0) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001230 unsigned int chunk = 0;
Willy Tarreau03d60bb2009-01-09 11:13:00 +01001231 while ( params < (req->data+req->max_len) && !HTTP_IS_CRLF(*params)) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001232 char c = *params;
1233 if (ishex(c)) {
1234 unsigned int hex = toupper(c) - '0';
1235 if ( hex > 9 )
1236 hex -= 'A' - '9' - 1;
1237 chunk = (chunk << 4) | hex;
1238 }
1239 else
1240 return NULL;
1241 params++;
1242 len--;
Willy Tarreau01732802007-11-01 22:48:15 +01001243 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001244 /* spec says we get CRLF */
1245 if (HTTP_IS_CRLF(*params) && HTTP_IS_CRLF(params[1]))
1246 params += 2;
1247 else
1248 return NULL;
1249 /* ok we have some encoded length, just inspect the first chunk */
1250 len = chunk;
1251 }
Willy Tarreau01732802007-11-01 22:48:15 +01001252
Willy Tarreau192ee3e2008-04-19 21:24:56 +02001253 p = params;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001254
1255 while (len > plen) {
1256 /* Look for the parameter name followed by an equal symbol */
1257 if (params[plen] == '=') {
1258 if (memcmp(params, px->url_param_name, plen) == 0) {
1259 /* OK, we have the parameter here at <params>, and
1260 * the value after the equal sign, at <p>
1261 * skip the equal symbol
1262 */
1263 p += plen + 1;
1264 len -= plen + 1;
1265
1266 while (len && *p != '&') {
1267 if (unlikely(!HTTP_IS_TOKEN(*p))) {
1268 /* if in a POST, body must be URI encoded or its not a URI.
1269 * Do not interprete any possible binary data as a parameter.
1270 */
1271 if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
1272 break;
1273 return NULL; /* oh, no; this is not uri-encoded.
1274 * This body does not contain parameters.
1275 */
1276 }
1277 hash = *p + (hash << 6) + (hash << 16) - hash;
1278 len--;
1279 p++;
1280 /* should we break if vlen exceeds limit? */
1281 }
1282 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1283 }
1284 }
Willy Tarreau01732802007-11-01 22:48:15 +01001285 /* skip to next parameter */
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001286 p = memchr(params, '&', len);
Willy Tarreau01732802007-11-01 22:48:15 +01001287 if (!p)
1288 return NULL;
1289 p++;
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001290 len -= (p - params);
1291 params = p;
Willy Tarreau01732802007-11-01 22:48:15 +01001292 }
1293 return NULL;
1294}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001295
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001296
Willy Tarreaubaaee002006-06-26 02:48:02 +02001297/*
Benoitaffb4812009-03-25 13:02:10 +01001298 * This function tries to find a running server for the proxy <px> following
1299 * the Header parameter hash method. It looks for a specific parameter in the
1300 * URL and hashes it to compute the server ID. This is useful to optimize
1301 * performance by avoiding bounces between servers in contexts where sessions
1302 * are shared but cookies are not usable. If the parameter is not found, NULL
1303 * is returned. If any server is found, it will be returned. If no valid server
1304 * is found, NULL is returned.
1305 */
1306struct server *get_server_hh(struct session *s)
1307{
1308 unsigned long hash = 0;
1309 struct http_txn *txn = &s->txn;
1310 struct http_msg *msg = &txn->req;
1311 struct proxy *px = s->be;
1312 unsigned int plen = px->hh_len;
1313 unsigned long len;
1314 struct hdr_ctx ctx;
1315 const char *p;
1316
1317 /* tot_weight appears to mean srv_count */
1318 if (px->lbprm.tot_weight == 0)
1319 return NULL;
1320
1321 if (px->lbprm.map.state & PR_MAP_RECALC)
1322 recalc_server_map(px);
1323
1324 ctx.idx = 0;
1325
1326 /* if the message is chunked, we skip the chunk size, but use the value as len */
1327 http_find_header2(px->hh_name, plen, msg->sol, &txn->hdr_idx, &ctx);
1328
1329 /* if the header is not found or empty, let's fallback to round robin */
1330 if (!ctx.idx || !ctx.vlen)
1331 return NULL;
1332
1333 /* Found a the hh_name in the headers.
1334 * we will compute the hash based on this value ctx.val.
1335 */
1336 len = ctx.vlen;
1337 p = (char *)ctx.line + ctx.val;
1338 if (!px->hh_match_domain) {
1339 while (len) {
1340 hash = *p + (hash << 6) + (hash << 16) - hash;
1341 len--;
1342 p++;
1343 }
1344 } else {
1345 int dohash = 0;
1346 p += len - 1;
1347 /* special computation, use only main domain name, not tld/host
1348 * going back from the end of string, start hashing at first
1349 * dot stop at next.
1350 * This is designed to work with the 'Host' header, and requires
1351 * a special option to activate this.
1352 */
1353 while (len) {
1354 if (*p == '.') {
1355 if (!dohash)
1356 dohash = 1;
1357 else
1358 break;
1359 } else {
1360 if (dohash)
1361 hash = *p + (hash << 6) + (hash << 16) - hash;
1362 }
1363 len--;
1364 p--;
1365 }
1366 }
1367 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1368}
1369
Emeric Brun736aa232009-06-30 17:56:00 +02001370struct server *get_server_rch(struct session *s)
1371{
1372 unsigned long hash = 0;
1373 struct proxy *px = s->be;
1374 unsigned long len;
1375 const char *p;
1376 int ret;
1377 struct acl_expr expr;
1378 struct acl_test test;
1379
1380 /* tot_weight appears to mean srv_count */
1381 if (px->lbprm.tot_weight == 0)
1382 return NULL;
1383
1384 if (px->lbprm.map.state & PR_MAP_RECALC)
1385 recalc_server_map(px);
1386
1387 memset(&expr, 0, sizeof(expr));
1388 memset(&test, 0, sizeof(test));
1389
1390 expr.arg.str = px->hh_name;
1391 expr.arg_len = px->hh_len;
1392
1393 ret = acl_fetch_rdp_cookie(px, s, NULL, ACL_DIR_REQ, &expr, &test);
1394 if (ret == 0 || (test.flags & ACL_TEST_F_MAY_CHANGE) || test.len == 0)
1395 return NULL;
1396
1397 /* Found a the hh_name in the headers.
1398 * we will compute the hash based on this value ctx.val.
1399 */
1400 len = test.len;
1401 p = (char *)test.ptr;
1402 while (len) {
1403 hash = *p + (hash << 6) + (hash << 16) - hash;
1404 len--;
1405 p++;
1406 }
1407
1408 return px->lbprm.map.srv[hash % px->lbprm.tot_weight];
1409}
Benoitaffb4812009-03-25 13:02:10 +01001410
1411/*
Willy Tarreau7c669d72008-06-20 15:04:11 +02001412 * This function applies the load-balancing algorithm to the session, as
1413 * defined by the backend it is assigned to. The session is then marked as
1414 * 'assigned'.
1415 *
1416 * This function MAY NOT be called with SN_ASSIGNED already set. If the session
1417 * had a server previously assigned, it is rebalanced, trying to avoid the same
1418 * server.
1419 * The function tries to keep the original connection slot if it reconnects to
1420 * the same server, otherwise it releases it and tries to offer it.
1421 *
1422 * It is illegal to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001423 *
1424 * It may return :
Willy Tarreau7c669d72008-06-20 15:04:11 +02001425 * SRV_STATUS_OK if everything is OK. Session assigned to ->srv
1426 * SRV_STATUS_NOSRV if no server is available. Session is not ASSIGNED
1427 * SRV_STATUS_FULL if all servers are saturated. Session is not ASSIGNED
Willy Tarreaubaaee002006-06-26 02:48:02 +02001428 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1429 *
Willy Tarreau7c669d72008-06-20 15:04:11 +02001430 * Upon successful return, the session flag SN_ASSIGNED is set to indicate that
1431 * it does not need to be called anymore. This means that s->srv can be trusted
1432 * in balance and direct modes.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001433 *
1434 */
1435
1436int assign_server(struct session *s)
1437{
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001438
Willy Tarreau7c669d72008-06-20 15:04:11 +02001439 struct server *conn_slot;
1440 int err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001441
Willy Tarreaubaaee002006-06-26 02:48:02 +02001442#ifdef DEBUG_FULL
1443 fprintf(stderr,"assign_server : s=%p\n",s);
1444#endif
1445
Willy Tarreau7c669d72008-06-20 15:04:11 +02001446 err = SRV_STATUS_INTERNAL;
1447 if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED))
1448 goto out_err;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001449
Willy Tarreau7c669d72008-06-20 15:04:11 +02001450 s->prev_srv = s->prev_srv;
1451 conn_slot = s->srv_conn;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001452
Willy Tarreau7c669d72008-06-20 15:04:11 +02001453 /* We have to release any connection slot before applying any LB algo,
1454 * otherwise we may erroneously end up with no available slot.
1455 */
1456 if (conn_slot)
1457 sess_change_server(s, NULL);
1458
1459 /* We will now try to find the good server and store it into <s->srv>.
1460 * Note that <s->srv> may be NULL in case of dispatch or proxy mode,
1461 * as well as if no server is available (check error code).
1462 */
Willy Tarreau1a20a5d2007-11-01 21:08:19 +01001463
Willy Tarreau7c669d72008-06-20 15:04:11 +02001464 s->srv = NULL;
1465 if (s->be->lbprm.algo & BE_LB_ALGO) {
1466 int len;
1467 /* we must check if we have at least one server available */
1468 if (!s->be->lbprm.tot_weight) {
1469 err = SRV_STATUS_NOSRV;
1470 goto out;
1471 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001472
Willy Tarreau7c669d72008-06-20 15:04:11 +02001473 switch (s->be->lbprm.algo & BE_LB_ALGO) {
1474 case BE_LB_ALGO_RR:
1475 s->srv = fwrr_get_next_server(s->be, s->prev_srv);
1476 if (!s->srv) {
1477 err = SRV_STATUS_FULL;
1478 goto out;
1479 }
1480 break;
1481 case BE_LB_ALGO_LC:
1482 s->srv = fwlc_get_next_server(s->be, s->prev_srv);
1483 if (!s->srv) {
1484 err = SRV_STATUS_FULL;
1485 goto out;
1486 }
1487 break;
1488 case BE_LB_ALGO_SH:
1489 if (s->cli_addr.ss_family == AF_INET)
1490 len = 4;
1491 else if (s->cli_addr.ss_family == AF_INET6)
1492 len = 16;
1493 else {
1494 /* unknown IP family */
1495 err = SRV_STATUS_INTERNAL;
1496 goto out;
1497 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001498
Willy Tarreau7c669d72008-06-20 15:04:11 +02001499 s->srv = get_server_sh(s->be,
1500 (void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
1501 len);
1502 break;
1503 case BE_LB_ALGO_UH:
1504 /* URI hashing */
1505 s->srv = get_server_uh(s->be,
1506 s->txn.req.sol + s->txn.req.sl.rq.u,
1507 s->txn.req.sl.rq.u_l);
1508 break;
1509 case BE_LB_ALGO_PH:
1510 /* URL Parameter hashing */
1511 if (s->txn.meth == HTTP_METH_POST &&
1512 memchr(s->txn.req.sol + s->txn.req.sl.rq.u, '&',
1513 s->txn.req.sl.rq.u_l ) == NULL)
1514 s->srv = get_server_ph_post(s);
1515 else
1516 s->srv = get_server_ph(s->be,
Willy Tarreau2fcb5002007-05-08 13:35:26 +02001517 s->txn.req.sol + s->txn.req.sl.rq.u,
1518 s->txn.req.sl.rq.u_l);
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001519
Willy Tarreau7c669d72008-06-20 15:04:11 +02001520 if (!s->srv) {
1521 /* parameter not found, fall back to round robin on the map */
1522 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
Willy Tarreau01732802007-11-01 22:48:15 +01001523 if (!s->srv) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001524 err = SRV_STATUS_FULL;
1525 goto out;
Willy Tarreau01732802007-11-01 22:48:15 +01001526 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001527 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001528 break;
Benoitaffb4812009-03-25 13:02:10 +01001529 case BE_LB_ALGO_HH:
1530 /* Header Parameter hashing */
1531 s->srv = get_server_hh(s);
1532
1533 if (!s->srv) {
1534 /* parameter not found, fall back to round robin on the map */
1535 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
1536 if (!s->srv) {
1537 err = SRV_STATUS_FULL;
1538 goto out;
1539 }
1540 }
1541 break;
Emeric Brun736aa232009-06-30 17:56:00 +02001542 case BE_LB_ALGO_RCH:
1543 /* RDP Cookie hashing */
1544 s->srv = get_server_rch(s);
1545
1546 if (!s->srv) {
1547 /* parameter not found, fall back to round robin on the map */
1548 s->srv = get_server_rr_with_conns(s->be, s->prev_srv);
1549 if (!s->srv) {
1550 err = SRV_STATUS_FULL;
1551 goto out;
1552 }
1553 }
1554 break;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001555 default:
1556 /* unknown balancing algorithm */
1557 err = SRV_STATUS_INTERNAL;
1558 goto out;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001559 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001560 if (s->srv != s->prev_srv) {
1561 s->be->cum_lbconn++;
1562 s->srv->cum_lbconn++;
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001563 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001564 }
1565 else if (s->be->options & PR_O_HTTP_PROXY) {
1566 if (!s->srv_addr.sin_addr.s_addr) {
1567 err = SRV_STATUS_NOSRV;
1568 goto out;
Willy Tarreau5d65bbb2007-01-21 12:47:26 +01001569 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001570 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001571 else if (!*(int *)&s->be->dispatch_addr.sin_addr &&
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001572 !(s->be->options & PR_O_TRANSP)) {
Willy Tarreau7c669d72008-06-20 15:04:11 +02001573 err = SRV_STATUS_NOSRV;
1574 goto out;
1575 }
1576
1577 s->flags |= SN_ASSIGNED;
1578 err = SRV_STATUS_OK;
1579 out:
1580
1581 /* Either we take back our connection slot, or we offer it to someone
1582 * else if we don't need it anymore.
1583 */
1584 if (conn_slot) {
1585 if (conn_slot == s->srv) {
1586 sess_change_server(s, s->srv);
1587 } else {
1588 if (may_dequeue_tasks(conn_slot, s->be))
1589 process_srv_queue(conn_slot);
1590 }
1591 }
1592
1593 out_err:
1594 return err;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001595}
1596
1597
1598/*
1599 * This function assigns a server address to a session, and sets SN_ADDR_SET.
1600 * The address is taken from the currently assigned server, or from the
1601 * dispatch or transparent address.
1602 *
1603 * It may return :
1604 * SRV_STATUS_OK if everything is OK.
1605 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1606 *
1607 * Upon successful return, the session flag SN_ADDR_SET is set. This flag is
1608 * not cleared, so it's to the caller to clear it if required.
1609 *
1610 */
1611int assign_server_address(struct session *s)
1612{
1613#ifdef DEBUG_FULL
1614 fprintf(stderr,"assign_server_address : s=%p\n",s);
1615#endif
1616
Willy Tarreau31682232007-11-29 15:38:04 +01001617 if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_ALGO)) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001618 /* A server is necessarily known for this session */
1619 if (!(s->flags & SN_ASSIGNED))
1620 return SRV_STATUS_INTERNAL;
1621
1622 s->srv_addr = s->srv->addr;
1623
1624 /* if this server remaps proxied ports, we'll use
1625 * the port the client connected to with an offset. */
1626 if (s->srv->state & SRV_MAPPORTS) {
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001627 if (!(s->be->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
Willy Tarreau14c8aac2007-05-08 19:46:30 +02001628 get_frt_addr(s);
1629 if (s->frt_addr.ss_family == AF_INET) {
1630 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1631 ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
1632 } else {
1633 s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
1634 ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
1635 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001636 }
1637 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001638 else if (*(int *)&s->be->dispatch_addr.sin_addr) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001639 /* connect to the defined dispatch addr */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001640 s->srv_addr = s->be->dispatch_addr;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001641 }
Willy Tarreau4b1f8592008-12-23 23:13:55 +01001642 else if (s->be->options & PR_O_TRANSP) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001643 /* in transparent mode, use the original dest addr if no dispatch specified */
Willy Tarreaubd414282008-01-19 13:46:35 +01001644 if (!(s->flags & SN_FRT_ADDR_SET))
1645 get_frt_addr(s);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001646
Willy Tarreaubd414282008-01-19 13:46:35 +01001647 memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
1648 /* when we support IPv6 on the backend, we may add other tests */
1649 //qfprintf(stderr, "Cannot get original server address.\n");
1650 //return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001651 }
Alexandre Cassen5eb1a902007-11-29 15:43:32 +01001652 else if (s->be->options & PR_O_HTTP_PROXY) {
1653 /* If HTTP PROXY option is set, then server is already assigned
1654 * during incoming client request parsing. */
1655 }
Willy Tarreau1a1158b2007-01-20 11:07:46 +01001656 else {
1657 /* no server and no LB algorithm ! */
1658 return SRV_STATUS_INTERNAL;
1659 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001660
1661 s->flags |= SN_ADDR_SET;
1662 return SRV_STATUS_OK;
1663}
1664
1665
1666/* This function assigns a server to session <s> if required, and can add the
1667 * connection to either the assigned server's queue or to the proxy's queue.
Willy Tarreau7c669d72008-06-20 15:04:11 +02001668 * If ->srv_conn is set, the session is first released from the server.
1669 * It may also be called with SN_DIRECT and/or SN_ASSIGNED though. It will
1670 * be called before any connection and after any retry or redispatch occurs.
1671 *
1672 * It is not allowed to call this function with a session in a queue.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001673 *
1674 * Returns :
1675 *
1676 * SRV_STATUS_OK if everything is OK.
1677 * SRV_STATUS_NOSRV if no server is available. s->srv = NULL.
1678 * SRV_STATUS_QUEUED if the connection has been queued.
1679 * SRV_STATUS_FULL if the server(s) is/are saturated and the
Willy Tarreau7c669d72008-06-20 15:04:11 +02001680 * connection could not be queued in s->srv,
1681 * which may be NULL if we queue on the backend.
Willy Tarreaubaaee002006-06-26 02:48:02 +02001682 * SRV_STATUS_INTERNAL for other unrecoverable errors.
1683 *
1684 */
1685int assign_server_and_queue(struct session *s)
1686{
1687 struct pendconn *p;
1688 int err;
1689
1690 if (s->pend_pos)
1691 return SRV_STATUS_INTERNAL;
1692
Willy Tarreau7c669d72008-06-20 15:04:11 +02001693 err = SRV_STATUS_OK;
1694 if (!(s->flags & SN_ASSIGNED)) {
1695 err = assign_server(s);
1696 if (s->prev_srv) {
1697 /* This session was previously assigned to a server. We have to
1698 * update the session's and the server's stats :
1699 * - if the server changed :
1700 * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
1701 * - set SN_REDISP if it was successfully redispatched
1702 * - increment srv->redispatches and be->redispatches
1703 * - if the server remained the same : update retries.
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001704 */
1705
Willy Tarreau7c669d72008-06-20 15:04:11 +02001706 if (s->prev_srv != s->srv) {
1707 if ((s->txn.flags & TX_CK_MASK) == TX_CK_VALID) {
1708 s->txn.flags &= ~TX_CK_MASK;
1709 s->txn.flags |= TX_CK_DOWN;
1710 }
1711 s->flags |= SN_REDISP;
1712 s->prev_srv->redispatches++;
1713 s->be->redispatches++;
1714 } else {
1715 s->prev_srv->retries++;
1716 s->be->retries++;
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001717 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01001718 }
1719 }
1720
Willy Tarreaubaaee002006-06-26 02:48:02 +02001721 switch (err) {
1722 case SRV_STATUS_OK:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001723 /* we have SN_ASSIGNED set */
1724 if (!s->srv)
1725 return SRV_STATUS_OK; /* dispatch or proxy mode */
1726
1727 /* If we already have a connection slot, no need to check any queue */
1728 if (s->srv_conn == s->srv)
1729 return SRV_STATUS_OK;
1730
1731 /* OK, this session already has an assigned server, but no
1732 * connection slot yet. Either it is a redispatch, or it was
1733 * assigned from persistence information (direct mode).
1734 */
1735 if ((s->flags & SN_REDIRECTABLE) && s->srv->rdr_len) {
1736 /* server scheduled for redirection, and already assigned. We
1737 * don't want to go further nor check the queue.
Willy Tarreau21d2af32008-02-14 20:25:24 +01001738 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02001739 sess_change_server(s, s->srv); /* not really needed in fact */
Willy Tarreau21d2af32008-02-14 20:25:24 +01001740 return SRV_STATUS_OK;
1741 }
1742
Willy Tarreau7c669d72008-06-20 15:04:11 +02001743 /* We might have to queue this session if the assigned server is full.
1744 * We know we have to queue it into the server's queue, so if a maxqueue
1745 * is set on the server, we must also check that the server's queue is
1746 * not full, in which case we have to return FULL.
1747 */
1748 if (s->srv->maxconn &&
1749 (s->srv->nbpend || s->srv->served >= srv_dynamic_maxconn(s->srv))) {
1750
1751 if (s->srv->maxqueue > 0 && s->srv->nbpend >= s->srv->maxqueue)
1752 return SRV_STATUS_FULL;
1753
Willy Tarreaubaaee002006-06-26 02:48:02 +02001754 p = pendconn_add(s);
1755 if (p)
1756 return SRV_STATUS_QUEUED;
1757 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001758 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001759 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02001760
1761 /* OK, we can use this server. Let's reserve our place */
1762 sess_change_server(s, s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001763 return SRV_STATUS_OK;
1764
1765 case SRV_STATUS_FULL:
1766 /* queue this session into the proxy's queue */
1767 p = pendconn_add(s);
1768 if (p)
1769 return SRV_STATUS_QUEUED;
1770 else
Willy Tarreau7c669d72008-06-20 15:04:11 +02001771 return SRV_STATUS_INTERNAL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02001772
1773 case SRV_STATUS_NOSRV:
Willy Tarreau7c669d72008-06-20 15:04:11 +02001774 return err;
1775
Willy Tarreaubaaee002006-06-26 02:48:02 +02001776 case SRV_STATUS_INTERNAL:
1777 return err;
Willy Tarreau7c669d72008-06-20 15:04:11 +02001778
Willy Tarreaubaaee002006-06-26 02:48:02 +02001779 default:
1780 return SRV_STATUS_INTERNAL;
1781 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001782}
Willy Tarreaubaaee002006-06-26 02:48:02 +02001783
1784/*
1785 * This function initiates a connection to the server assigned to this session
1786 * (s->srv, s->srv_addr). It will assign a server if none is assigned yet.
1787 * It can return one of :
1788 * - SN_ERR_NONE if everything's OK
1789 * - SN_ERR_SRVTO if there are no more servers
1790 * - SN_ERR_SRVCL if the connection was refused by the server
1791 * - SN_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
1792 * - SN_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
1793 * - SN_ERR_INTERNAL for any other purely internal errors
1794 * Additionnally, in the case of SN_ERR_RESOURCE, an emergency log will be emitted.
1795 */
1796int connect_server(struct session *s)
1797{
1798 int fd, err;
1799
1800 if (!(s->flags & SN_ADDR_SET)) {
1801 err = assign_server_address(s);
1802 if (err != SRV_STATUS_OK)
1803 return SN_ERR_INTERNAL;
1804 }
1805
Willy Tarreaufa7e1022008-10-19 07:30:41 +02001806 if ((fd = s->req->cons->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
Willy Tarreaubaaee002006-06-26 02:48:02 +02001807 qfprintf(stderr, "Cannot get a server socket.\n");
1808
1809 if (errno == ENFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001810 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001811 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001812 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001813 else if (errno == EMFILE)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001814 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001815 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001816 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001817 else if (errno == ENOBUFS || errno == ENOMEM)
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001818 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02001819 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001820 s->be->id, maxfd);
Willy Tarreaubaaee002006-06-26 02:48:02 +02001821 /* this is a resource error */
1822 return SN_ERR_RESOURCE;
1823 }
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001824
Willy Tarreaubaaee002006-06-26 02:48:02 +02001825 if (fd >= global.maxsock) {
1826 /* do not log anything there, it's a normal condition when this option
1827 * is used to serialize connections to a server !
1828 */
1829 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
1830 close(fd);
1831 return SN_ERR_PRXCOND; /* it is a configuration limit */
1832 }
1833
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001834#ifdef CONFIG_HAP_TCPSPLICE
Willy Tarreau3ab68cf2009-01-25 16:03:28 +01001835 if ((global.tune.options & GTUNE_USE_SPLICE) &&
1836 (s->fe->options & s->be->options) & PR_O_TCPSPLICE) {
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001837 /* TCP splicing supported by both FE and BE */
Willy Tarreau7e5067d2008-12-07 16:27:56 +01001838 tcp_splice_initfd(s->req->prod->fd, fd);
Willy Tarreau6d1a9882007-01-07 02:03:04 +01001839 }
1840#endif
1841
Willy Tarreaubaaee002006-06-26 02:48:02 +02001842 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
1843 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) {
1844 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
1845 close(fd);
1846 return SN_ERR_INTERNAL;
1847 }
1848
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001849 if (s->be->options & PR_O_TCP_SRV_KA)
Willy Tarreaubaaee002006-06-26 02:48:02 +02001850 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
1851
Alexandre Cassen87ea5482007-10-11 20:48:58 +02001852 if (s->be->options & PR_O_TCP_NOLING)
1853 setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
1854
Willy Tarreaubaaee002006-06-26 02:48:02 +02001855 /* allow specific binding :
1856 * - server-specific at first
1857 * - proxy-specific next
1858 */
1859 if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001860 struct sockaddr_in *remote = NULL;
1861 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001862
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001863#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001864 switch (s->srv->state & SRV_TPROXY_MASK) {
1865 case SRV_TPROXY_ADDR:
1866 remote = (struct sockaddr_in *)&s->srv->tproxy_addr;
1867 flags = 3;
1868 break;
1869 case SRV_TPROXY_CLI:
1870 flags |= 2;
1871 /* fall through */
1872 case SRV_TPROXY_CIP:
1873 /* FIXME: what can we do if the client connects in IPv6 ? */
1874 flags |= 1;
1875 remote = (struct sockaddr_in *)&s->cli_addr;
1876 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001877 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001878#endif
Willy Tarreauc76721d2009-02-04 20:20:58 +01001879#ifdef SO_BINDTODEVICE
1880 /* Note: this might fail if not CAP_NET_RAW */
1881 if (s->srv->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001882 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->srv->iface_name, s->srv->iface_len + 1);
Willy Tarreauc76721d2009-02-04 20:20:58 +01001883#endif
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001884
1885 if (s->srv->sport_range) {
1886 int attempts = 10; /* should be more than enough to find a spare port */
1887 struct sockaddr_in src;
1888
1889 ret = 1;
1890 src = s->srv->source_addr;
1891
1892 do {
1893 /* note: in case of retry, we may have to release a previously
1894 * allocated port, hence this loop's construct.
1895 */
1896 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1897 fdtab[fd].port_range = NULL;
1898
1899 if (!attempts)
1900 break;
1901 attempts--;
1902
1903 fdtab[fd].local_port = port_range_alloc_port(s->srv->sport_range);
1904 if (!fdtab[fd].local_port)
1905 break;
1906
1907 fdtab[fd].port_range = s->srv->sport_range;
1908 src.sin_port = htons(fdtab[fd].local_port);
1909
1910 ret = tcpv4_bind_socket(fd, flags, &src, remote);
1911 } while (ret != 0); /* binding NOK */
1912 }
1913 else {
1914 ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
1915 }
1916
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001917 if (ret) {
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001918 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
1919 fdtab[fd].port_range = NULL;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001920 close(fd);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02001921
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001922 if (ret == 1) {
1923 Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
1924 s->be->id, s->srv->id);
1925 send_log(s->be, LOG_EMERG,
1926 "Cannot bind to source address before connect() for server %s/%s.\n",
1927 s->be->id, s->srv->id);
1928 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001929 Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001930 s->be->id, s->srv->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001931 send_log(s->be, LOG_EMERG,
Willy Tarreau77074d52006-11-12 23:57:19 +01001932 "Cannot bind to tproxy source address before connect() for server %s/%s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001933 s->be->id, s->srv->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001934 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001935 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001936 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001937 }
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001938 else if (s->be->options & PR_O_BIND_SRC) {
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001939 struct sockaddr_in *remote = NULL;
1940 int ret, flags = 0;
Willy Tarreau77074d52006-11-12 23:57:19 +01001941
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001942#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Willy Tarreau786d1912008-01-13 18:10:06 +01001943 switch (s->be->options & PR_O_TPXY_MASK) {
1944 case PR_O_TPXY_ADDR:
1945 remote = (struct sockaddr_in *)&s->be->tproxy_addr;
1946 flags = 3;
1947 break;
1948 case PR_O_TPXY_CLI:
1949 flags |= 2;
1950 /* fall through */
1951 case PR_O_TPXY_CIP:
1952 /* FIXME: what can we do if the client connects in IPv6 ? */
1953 flags |= 1;
1954 remote = (struct sockaddr_in *)&s->cli_addr;
1955 break;
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001956 }
Willy Tarreaucf1d5722008-02-14 20:28:18 +01001957#endif
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001958#ifdef SO_BINDTODEVICE
1959 /* Note: this might fail if not CAP_NET_RAW */
1960 if (s->be->iface_name)
Willy Tarreau604e8302009-03-06 00:48:23 +01001961 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->be->iface_name, s->be->iface_len + 1);
Willy Tarreaud53f96b2009-02-04 18:46:54 +01001962#endif
Willy Tarreaue8c66af2008-01-13 18:40:14 +01001963 ret = tcpv4_bind_socket(fd, flags, &s->be->source_addr, remote);
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001964 if (ret) {
1965 close(fd);
1966 if (ret == 1) {
1967 Alert("Cannot bind to source address before connect() for proxy %s. Aborting.\n",
1968 s->be->id);
1969 send_log(s->be, LOG_EMERG,
1970 "Cannot bind to source address before connect() for proxy %s.\n",
1971 s->be->id);
1972 } else {
Willy Tarreau77074d52006-11-12 23:57:19 +01001973 Alert("Cannot bind to tproxy source address before connect() for proxy %s. Aborting.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001974 s->be->id);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02001975 send_log(s->be, LOG_EMERG,
Willy Tarreaufe10a062008-01-12 22:22:34 +01001976 "Cannot bind to tproxy source address before connect() for proxy %s.\n",
1977 s->be->id);
Willy Tarreau77074d52006-11-12 23:57:19 +01001978 }
Willy Tarreau5b6995c2008-01-13 16:31:17 +01001979 return SN_ERR_RESOURCE;
Willy Tarreau77074d52006-11-12 23:57:19 +01001980 }
Willy Tarreaubaaee002006-06-26 02:48:02 +02001981 }
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02001982
Willy Tarreaud88edf22009-06-14 15:48:17 +02001983#ifdef TCP_QUICKACK
1984 /* disabling tcp quick ack now allows the first request to leave the
1985 * machine with the first ACK. We only do this if there are pending
1986 * data in the buffer.
1987 */
1988 if ((s->be->options2 & PR_O2_SMARTCON) && s->req->send_max)
1989 setsockopt(fd, SOL_TCP, TCP_QUICKACK, (char *) &zero, sizeof(zero));
1990#endif
1991
Willy Tarreaubaaee002006-06-26 02:48:02 +02001992 if ((connect(fd, (struct sockaddr *)&s->srv_addr, sizeof(s->srv_addr)) == -1) &&
1993 (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
1994
1995 if (errno == EAGAIN || errno == EADDRINUSE) {
1996 char *msg;
1997 if (errno == EAGAIN) /* no free ports left, try again later */
1998 msg = "no free ports";
1999 else
2000 msg = "local address already in use";
2001
2002 qfprintf(stderr,"Cannot connect: %s.\n",msg);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02002003 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
2004 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002005 close(fd);
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002006 send_log(s->be, LOG_EMERG,
Willy Tarreaubaaee002006-06-26 02:48:02 +02002007 "Connect() failed for server %s/%s: %s.\n",
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002008 s->be->id, s->srv->id, msg);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002009 return SN_ERR_RESOURCE;
2010 } else if (errno == ETIMEDOUT) {
2011 //qfprintf(stderr,"Connect(): ETIMEDOUT");
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02002012 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
2013 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002014 close(fd);
2015 return SN_ERR_SRVTO;
2016 } else {
2017 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
2018 //qfprintf(stderr,"Connect(): %d", errno);
Willy Tarreauc6f4ce82009-06-10 11:09:37 +02002019 port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
2020 fdtab[fd].port_range = NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002021 close(fd);
2022 return SN_ERR_SRVCL;
2023 }
2024 }
2025
Willy Tarreaue5ed4062008-08-30 03:17:31 +02002026 fdtab[fd].owner = s->req->cons;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002027 fdtab[fd].state = FD_STCONN; /* connection in progress */
Willy Tarreaufb14edc2009-06-14 15:24:37 +02002028 fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
Willy Tarreaud7971282006-07-29 18:36:34 +02002029 fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
Willy Tarreau54469402006-07-29 16:59:06 +02002030 fdtab[fd].cb[DIR_RD].b = s->rep;
Willy Tarreauf8306d52006-07-29 19:01:31 +02002031 fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
Willy Tarreau54469402006-07-29 16:59:06 +02002032 fdtab[fd].cb[DIR_WR].b = s->req;
Willy Tarreaue94ebd02007-10-09 17:14:37 +02002033
2034 fdtab[fd].peeraddr = (struct sockaddr *)&s->srv_addr;
2035 fdtab[fd].peerlen = sizeof(s->srv_addr);
2036
Willy Tarreaubaaee002006-06-26 02:48:02 +02002037 fd_insert(fd);
Willy Tarreau788e2842008-08-26 13:25:39 +02002038 EV_FD_SET(fd, DIR_WR); /* for connect status */
2039
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002040 s->req->cons->state = SI_ST_CON;
Willy Tarreaudc340a92009-06-28 23:10:19 +02002041 s->req->cons->flags |= SI_FL_CAP_SPLTCP; /* TCP supports splicing */
Willy Tarreaubaaee002006-06-26 02:48:02 +02002042 if (s->srv) {
Willy Tarreau1e62de62008-11-11 20:20:02 +01002043 s->flags |= SN_CURR_SESS;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002044 s->srv->cur_sess++;
2045 if (s->srv->cur_sess > s->srv->cur_sess_max)
2046 s->srv->cur_sess_max = s->srv->cur_sess;
Willy Tarreau51406232008-03-10 22:04:20 +01002047 if (s->be->lbprm.server_take_conn)
2048 s->be->lbprm.server_take_conn(s->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002049 }
2050
Willy Tarreaua3780f22009-03-15 21:49:00 +01002051 s->req->cons->exp = tick_add_ifset(now_ms, s->be->timeout.connect);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002052 return SN_ERR_NONE; /* connection is OK */
2053}
2054
2055
Willy Tarreaubaaee002006-06-26 02:48:02 +02002056/* This function performs the "redispatch" part of a connection attempt. It
2057 * will assign a server if required, queue the connection if required, and
2058 * handle errors that might arise at this level. It can change the server
2059 * state. It will return 1 if it encounters an error, switches the server
2060 * state, or has to queue a connection. Otherwise, it will return 0 indicating
2061 * that the connection is ready to use.
2062 */
2063
2064int srv_redispatch_connect(struct session *t)
2065{
2066 int conn_err;
2067
2068 /* We know that we don't have any connection pending, so we will
2069 * try to get a new one, and wait in this state if it's queued
2070 */
Willy Tarreau7c669d72008-06-20 15:04:11 +02002071 redispatch:
Willy Tarreaubaaee002006-06-26 02:48:02 +02002072 conn_err = assign_server_and_queue(t);
2073 switch (conn_err) {
2074 case SRV_STATUS_OK:
2075 break;
2076
Willy Tarreau7c669d72008-06-20 15:04:11 +02002077 case SRV_STATUS_FULL:
2078 /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
2079 * and we can redispatch to another server, or it is not and we return
2080 * 503. This only makes sense in DIRECT mode however, because normal LB
2081 * algorithms would never select such a server, and hash algorithms
2082 * would bring us on the same server again. Note that t->srv is set in
2083 * this case.
2084 */
2085 if ((t->flags & SN_DIRECT) && (t->be->options & PR_O_REDISP)) {
2086 t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
2087 t->prev_srv = t->srv;
2088 goto redispatch;
2089 }
2090
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002091 if (!t->req->cons->err_type) {
2092 t->req->cons->err_type = SI_ET_QUEUE_ERR;
2093 t->req->cons->err_loc = t->srv;
2094 }
Willy Tarreau7c669d72008-06-20 15:04:11 +02002095
2096 t->srv->failed_conns++;
2097 t->be->failed_conns++;
2098 return 1;
2099
Willy Tarreaubaaee002006-06-26 02:48:02 +02002100 case SRV_STATUS_NOSRV:
2101 /* note: it is guaranteed that t->srv == NULL here */
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002102 if (!t->req->cons->err_type) {
2103 t->req->cons->err_type = SI_ET_CONN_ERR;
2104 t->req->cons->err_loc = NULL;
2105 }
Krzysztof Piotr Oledzki5a329cf2008-02-22 03:50:19 +01002106
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002107 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002108 return 1;
2109
2110 case SRV_STATUS_QUEUED:
Willy Tarreau35374672008-09-03 18:11:02 +02002111 t->req->cons->exp = tick_add_ifset(now_ms, t->be->timeout.queue);
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002112 t->req->cons->state = SI_ST_QUE;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002113 /* do nothing else and do not wake any other session up */
2114 return 1;
2115
Willy Tarreaubaaee002006-06-26 02:48:02 +02002116 case SRV_STATUS_INTERNAL:
2117 default:
Willy Tarreaufa7e1022008-10-19 07:30:41 +02002118 if (!t->req->cons->err_type) {
2119 t->req->cons->err_type = SI_ET_CONN_OTHER;
2120 t->req->cons->err_loc = t->srv;
2121 }
2122
Willy Tarreaubaaee002006-06-26 02:48:02 +02002123 if (t->srv)
Willy Tarreau7f062c42009-03-05 18:43:00 +01002124 srv_inc_sess_ctr(t->srv);
Willy Tarreau98937b82007-12-10 15:05:42 +01002125 if (t->srv)
Willy Tarreaubaaee002006-06-26 02:48:02 +02002126 t->srv->failed_conns++;
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002127 t->be->failed_conns++;
Willy Tarreaubaaee002006-06-26 02:48:02 +02002128
2129 /* release other sessions waiting for this server */
Willy Tarreaue2e27a52007-04-01 00:01:37 +02002130 if (may_dequeue_tasks(t->srv, t->be))
Willy Tarreau7c669d72008-06-20 15:04:11 +02002131 process_srv_queue(t->srv);
Willy Tarreaubaaee002006-06-26 02:48:02 +02002132 return 1;
2133 }
2134 /* if we get here, it's because we got SRV_STATUS_OK, which also
2135 * means that the connection has not been queued.
2136 */
2137 return 0;
2138}
2139
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002140int be_downtime(struct proxy *px) {
Willy Tarreaub625a082007-11-26 01:15:43 +01002141 if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
Krzysztof Oledzki85130942007-10-22 16:21:10 +02002142 return px->down_time;
2143
2144 return now.tv_sec - px->last_change + px->down_time;
2145}
Willy Tarreaubaaee002006-06-26 02:48:02 +02002146
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002147/* This function parses a "balance" statement in a backend section describing
2148 * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
2149 * returns -1, it may write an error message into ther <err> buffer, for at
2150 * most <errlen> bytes, trailing zero included. The trailing '\n' will not be
2151 * written. The function must be called with <args> pointing to the first word
2152 * after "balance".
2153 */
2154int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy)
2155{
2156 if (!*(args[0])) {
2157 /* if no option is set, use round-robin by default */
Willy Tarreau31682232007-11-29 15:38:04 +01002158 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2159 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002160 return 0;
2161 }
2162
2163 if (!strcmp(args[0], "roundrobin")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002164 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2165 curproxy->lbprm.algo |= BE_LB_ALGO_RR;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002166 }
Willy Tarreau51406232008-03-10 22:04:20 +01002167 else if (!strcmp(args[0], "leastconn")) {
2168 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2169 curproxy->lbprm.algo |= BE_LB_ALGO_LC;
2170 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002171 else if (!strcmp(args[0], "source")) {
Willy Tarreau31682232007-11-29 15:38:04 +01002172 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2173 curproxy->lbprm.algo |= BE_LB_ALGO_SH;
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002174 }
2175 else if (!strcmp(args[0], "uri")) {
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002176 int arg = 1;
2177
Willy Tarreau31682232007-11-29 15:38:04 +01002178 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2179 curproxy->lbprm.algo |= BE_LB_ALGO_UH;
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002180
2181 while (*args[arg]) {
2182 if (!strcmp(args[arg], "len")) {
2183 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2184 snprintf(err, errlen, "'balance uri len' expects a positive integer (got '%s').", args[arg+1]);
2185 return -1;
2186 }
2187 curproxy->uri_len_limit = atoi(args[arg+1]);
2188 arg += 2;
2189 }
2190 else if (!strcmp(args[arg], "depth")) {
2191 if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
2192 snprintf(err, errlen, "'balance uri depth' expects a positive integer (got '%s').", args[arg+1]);
2193 return -1;
2194 }
2195 /* hint: we store the position of the ending '/' (depth+1) so
2196 * that we avoid a comparison while computing the hash.
2197 */
2198 curproxy->uri_dirs_depth1 = atoi(args[arg+1]) + 1;
2199 arg += 2;
2200 }
2201 else {
2202 snprintf(err, errlen, "'balance uri' only accepts parameters 'len' and 'depth' (got '%s').", args[arg]);
2203 return -1;
2204 }
2205 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002206 }
Willy Tarreau01732802007-11-01 22:48:15 +01002207 else if (!strcmp(args[0], "url_param")) {
2208 if (!*args[1]) {
2209 snprintf(err, errlen, "'balance url_param' requires an URL parameter name.");
2210 return -1;
2211 }
Willy Tarreau31682232007-11-29 15:38:04 +01002212 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2213 curproxy->lbprm.algo |= BE_LB_ALGO_PH;
Willy Tarreaua534fea2008-08-03 12:19:50 +02002214
2215 free(curproxy->url_param_name);
Willy Tarreau01732802007-11-01 22:48:15 +01002216 curproxy->url_param_name = strdup(args[1]);
Willy Tarreaua534fea2008-08-03 12:19:50 +02002217 curproxy->url_param_len = strlen(args[1]);
Marek Majkowski9c30fc12008-04-27 23:25:55 +02002218 if (*args[2]) {
matt.farnsworth@nokia.com1c2ab962008-04-14 20:47:37 +02002219 if (strcmp(args[2], "check_post")) {
2220 snprintf(err, errlen, "'balance url_param' only accepts check_post modifier.");
2221 return -1;
2222 }
2223 if (*args[3]) {
2224 /* TODO: maybe issue a warning if there is no value, no digits or too long */
2225 curproxy->url_param_post_limit = str2ui(args[3]);
2226 }
2227 /* if no limit, or faul value in args[3], then default to a moderate wordlen */
2228 if (!curproxy->url_param_post_limit)
2229 curproxy->url_param_post_limit = 48;
2230 else if ( curproxy->url_param_post_limit < 3 )
2231 curproxy->url_param_post_limit = 3; /* minimum example: S=3 or \r\nS=6& */
2232 }
Benoitaffb4812009-03-25 13:02:10 +01002233 }
2234 else if (!strncmp(args[0], "hdr(", 4)) {
2235 const char *beg, *end;
2236
2237 beg = args[0] + 4;
2238 end = strchr(beg, ')');
2239
2240 if (!end || end == beg) {
2241 snprintf(err, errlen, "'balance hdr(name)' requires an http header field name.");
2242 return -1;
2243 }
2244
2245 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2246 curproxy->lbprm.algo |= BE_LB_ALGO_HH;
2247
2248 free(curproxy->hh_name);
2249 curproxy->hh_len = end - beg;
2250 curproxy->hh_name = my_strndup(beg, end - beg);
2251 curproxy->hh_match_domain = 0;
2252
2253 if (*args[1]) {
2254 if (strcmp(args[1], "use_domain_only")) {
2255 snprintf(err, errlen, "'balance hdr(name)' only accepts 'use_domain_only' modifier.");
2256 return -1;
2257 }
2258 curproxy->hh_match_domain = 1;
2259 }
2260
Emeric Brun736aa232009-06-30 17:56:00 +02002261 }
2262 else if (!strncmp(args[0], "rdp-cookie", 10)) {
2263 curproxy->lbprm.algo &= ~BE_LB_ALGO;
2264 curproxy->lbprm.algo |= BE_LB_ALGO_RCH;
2265
2266 if ( *(args[0] + 10 ) == '(' ) { /* cookie name */
2267 const char *beg, *end;
2268
2269 beg = args[0] + 11;
2270 end = strchr(beg, ')');
2271
2272 if (!end || end == beg) {
2273 snprintf(err, errlen, "'balance rdp-cookie(name)' requires an rdp cookie name.");
2274 return -1;
2275 }
2276
2277 free(curproxy->hh_name);
2278 curproxy->hh_name = my_strndup(beg, end - beg);
2279 curproxy->hh_len = end - beg;
2280 }
2281 else if ( *(args[0] + 10 ) == '\0' ) { /* default cookie name 'mstshash' */
2282 free(curproxy->hh_name);
2283 curproxy->hh_name = strdup("mstshash");
2284 curproxy->hh_len = strlen(curproxy->hh_name);
2285 }
2286 else { /* syntax */
2287 snprintf(err, errlen, "'balance rdp-cookie(name)' requires an rdp cookie name.");
2288 return -1;
2289 }
Willy Tarreau01732802007-11-01 22:48:15 +01002290 }
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002291 else {
Emeric Brun736aa232009-06-30 17:56:00 +02002292 snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri', 'url_param', 'hdr(name)' and 'rdp-cookie(name)' options.");
Willy Tarreaua0cbda62007-11-01 21:39:54 +01002293 return -1;
2294 }
2295 return 0;
2296}
2297
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002298
2299/************************************************************************/
2300/* All supported keywords must be declared here. */
2301/************************************************************************/
2302
2303/* set test->i to the number of enabled servers on the proxy */
2304static int
2305acl_fetch_nbsrv(struct proxy *px, struct session *l4, void *l7, int dir,
2306 struct acl_expr *expr, struct acl_test *test)
2307{
2308 test->flags = ACL_TEST_F_VOL_TEST;
2309 if (expr->arg_len) {
2310 /* another proxy was designated, we must look for it */
2311 for (px = proxy; px; px = px->next)
2312 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2313 break;
2314 }
2315 if (!px)
2316 return 0;
2317
2318 if (px->srv_act)
2319 test->i = px->srv_act;
2320 else if (px->lbprm.fbck)
2321 test->i = 1;
2322 else
2323 test->i = px->srv_bck;
2324
2325 return 1;
2326}
2327
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002328/* set test->i to the number of enabled servers on the proxy */
2329static int
2330acl_fetch_connslots(struct proxy *px, struct session *l4, void *l7, int dir,
2331 struct acl_expr *expr, struct acl_test *test)
2332{
2333 struct server *iterator;
2334 test->flags = ACL_TEST_F_VOL_TEST;
2335 if (expr->arg_len) {
2336 /* another proxy was designated, we must look for it */
2337 for (px = proxy; px; px = px->next)
2338 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2339 break;
2340 }
2341 if (!px)
2342 return 0;
2343
2344 test->i = 0;
2345 iterator = px->srv;
2346 while (iterator) {
2347 if ((iterator->state & 1) == 0) {
2348 iterator = iterator->next;
2349 continue;
2350 }
2351 if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
2352 test->i = -1;
2353 return 1;
2354 }
2355
2356 test->i += (iterator->maxconn - iterator->cur_sess)
2357 + (iterator->maxqueue - iterator->nbpend);
2358 iterator = iterator->next;
2359 }
2360
2361 return 1;
2362}
2363
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002364/* set test->i to the number of connections per second reaching the frontend */
2365static int
2366acl_fetch_fe_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2367 struct acl_expr *expr, struct acl_test *test)
2368{
2369 test->flags = ACL_TEST_F_VOL_TEST;
2370 if (expr->arg_len) {
2371 /* another proxy was designated, we must look for it */
2372 for (px = proxy; px; px = px->next)
2373 if ((px->cap & PR_CAP_FE) && !strcmp(px->id, expr->arg.str))
2374 break;
2375 }
2376 if (!px)
2377 return 0;
2378
2379 test->i = read_freq_ctr(&px->fe_sess_per_sec);
2380 return 1;
2381}
2382
2383/* set test->i to the number of connections per second reaching the backend */
2384static int
2385acl_fetch_be_sess_rate(struct proxy *px, struct session *l4, void *l7, int dir,
2386 struct acl_expr *expr, struct acl_test *test)
2387{
2388 test->flags = ACL_TEST_F_VOL_TEST;
2389 if (expr->arg_len) {
2390 /* another proxy was designated, we must look for it */
2391 for (px = proxy; px; px = px->next)
2392 if ((px->cap & PR_CAP_BE) && !strcmp(px->id, expr->arg.str))
2393 break;
2394 }
2395 if (!px)
2396 return 0;
2397
2398 test->i = read_freq_ctr(&px->be_sess_per_sec);
2399 return 1;
2400}
2401
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002402
2403/* Note: must not be declared <const> as its list will be overwritten */
2404static struct acl_kw_list acl_kws = {{ },{
Jeffrey 'jf' Lim5051d7b2008-09-04 01:03:03 +08002405 { "nbsrv", acl_parse_int, acl_fetch_nbsrv, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau3a8efeb2009-03-05 19:15:37 +01002406 { "connslots", acl_parse_int, acl_fetch_connslots, acl_match_int, ACL_USE_NOTHING },
Willy Tarreau079ff0a2009-03-05 21:34:28 +01002407 { "fe_sess_rate", acl_parse_int, acl_fetch_fe_sess_rate, acl_match_int, ACL_USE_NOTHING },
2408 { "be_sess_rate", acl_parse_int, acl_fetch_be_sess_rate, acl_match_int, ACL_USE_NOTHING },
Willy Tarreaua9d3c1e2007-11-30 20:48:53 +01002409 { NULL, NULL, NULL, NULL },
2410}};
2411
2412
2413__attribute__((constructor))
2414static void __backend_init(void)
2415{
2416 acl_register_keywords(&acl_kws);
2417}
2418
2419
Willy Tarreaubaaee002006-06-26 02:48:02 +02002420/*
2421 * Local variables:
2422 * c-indent-level: 8
2423 * c-basic-offset: 8
2424 * End:
2425 */