blob: daff66629b85c95553092eaa278863aeb1ae27e7 [file] [log] [blame]
Willy Tarreauf09c6602012-02-13 17:12:08 +01001/*
2 * First Available Server load balancing algorithm.
3 *
Willy Tarreau64559c52012-04-07 09:08:45 +02004 * This file implements an algorithm which emerged during a discussion with
5 * Steen Larsen, initially inspired from Anshul Gandhi et.al.'s work now
6 * described as "packing" in section 3.5:
7 *
8 * http://reports-archive.adm.cs.cmu.edu/anon/2012/CMU-CS-12-109.pdf
9 *
Willy Tarreauf09c6602012-02-13 17:12:08 +010010 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <common/compat.h>
20#include <common/config.h>
21#include <common/debug.h>
22#include <eb32tree.h>
23
24#include <types/global.h>
25#include <types/server.h>
26
27#include <proto/backend.h>
28#include <proto/queue.h>
29
30
31/* Remove a server from a tree. It must have previously been dequeued. This
32 * function is meant to be called when a server is going down or has its
33 * weight disabled.
34 */
35static inline void fas_remove_from_tree(struct server *s)
36{
37 s->lb_tree = NULL;
38}
39
40/* simply removes a server from a tree */
41static inline void fas_dequeue_srv(struct server *s)
42{
43 eb32_delete(&s->lb_node);
44}
45
46/* Queue a server in its associated tree, assuming the weight is >0.
47 * Servers are sorted by unique ID so that we send all connections to the first
48 * available server in declaration order (or ID order) until its maxconn is
49 * reached. It is important to understand that the server weight is not used
50 * here.
51 */
52static inline void fas_queue_srv(struct server *s)
53{
54 s->lb_node.key = s->puid;
55 eb32_insert(s->lb_tree, &s->lb_node);
56}
57
58/* Re-position the server in the FS tree after it has been assigned one
59 * connection or after it has released one. Note that it is possible that
60 * the server has been moved out of the tree due to failed health-checks.
61 */
62static void fas_srv_reposition(struct server *s)
63{
64 if (!s->lb_tree)
65 return;
66 fas_dequeue_srv(s);
67 fas_queue_srv(s);
68}
69
70/* This function updates the server trees according to server <srv>'s new
71 * state. It should be called when server <srv>'s status changes to down.
72 * It is not important whether the server was already down or not. It is not
73 * important either that the new state is completely down (the caller may not
74 * know all the variables of a server's state).
75 */
76static void fas_set_server_status_down(struct server *srv)
77{
78 struct proxy *p = srv->proxy;
79
80 if (srv->state == srv->prev_state &&
81 srv->eweight == srv->prev_eweight)
82 return;
83
84 if (srv_is_usable(srv->state, srv->eweight))
85 goto out_update_state;
86
87 if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
88 /* server was already down */
89 goto out_update_backend;
90
91 if (srv->state & SRV_BACKUP) {
92 p->lbprm.tot_wbck -= srv->prev_eweight;
93 p->srv_bck--;
94
95 if (srv == p->lbprm.fbck) {
96 /* we lost the first backup server in a single-backup
97 * configuration, we must search another one.
98 */
99 struct server *srv2 = p->lbprm.fbck;
100 do {
101 srv2 = srv2->next;
102 } while (srv2 &&
103 !((srv2->state & SRV_BACKUP) &&
104 srv_is_usable(srv2->state, srv2->eweight)));
105 p->lbprm.fbck = srv2;
106 }
107 } else {
108 p->lbprm.tot_wact -= srv->prev_eweight;
109 p->srv_act--;
110 }
111
112 fas_dequeue_srv(srv);
113 fas_remove_from_tree(srv);
114
115out_update_backend:
116 /* check/update tot_used, tot_weight */
117 update_backend_weight(p);
118 out_update_state:
119 srv->prev_state = srv->state;
120 srv->prev_eweight = srv->eweight;
121}
122
123/* This function updates the server trees according to server <srv>'s new
124 * state. It should be called when server <srv>'s status changes to up.
125 * It is not important whether the server was already down or not. It is not
126 * important either that the new state is completely UP (the caller may not
127 * know all the variables of a server's state). This function will not change
128 * the weight of a server which was already up.
129 */
130static void fas_set_server_status_up(struct server *srv)
131{
132 struct proxy *p = srv->proxy;
133
134 if (srv->state == srv->prev_state &&
135 srv->eweight == srv->prev_eweight)
136 return;
137
138 if (!srv_is_usable(srv->state, srv->eweight))
139 goto out_update_state;
140
141 if (srv_is_usable(srv->prev_state, srv->prev_eweight))
142 /* server was already up */
143 goto out_update_backend;
144
145 if (srv->state & SRV_BACKUP) {
146 srv->lb_tree = &p->lbprm.fas.bck;
147 p->lbprm.tot_wbck += srv->eweight;
148 p->srv_bck++;
149
150 if (!(p->options & PR_O_USE_ALL_BK)) {
151 if (!p->lbprm.fbck) {
152 /* there was no backup server anymore */
153 p->lbprm.fbck = srv;
154 } else {
155 /* we may have restored a backup server prior to fbck,
156 * in which case it should replace it.
157 */
158 struct server *srv2 = srv;
159 do {
160 srv2 = srv2->next;
161 } while (srv2 && (srv2 != p->lbprm.fbck));
162 if (srv2)
163 p->lbprm.fbck = srv;
164 }
165 }
166 } else {
167 srv->lb_tree = &p->lbprm.fas.act;
168 p->lbprm.tot_wact += srv->eweight;
169 p->srv_act++;
170 }
171
172 /* note that eweight cannot be 0 here */
173 fas_queue_srv(srv);
174
175 out_update_backend:
176 /* check/update tot_used, tot_weight */
177 update_backend_weight(p);
178 out_update_state:
179 srv->prev_state = srv->state;
180 srv->prev_eweight = srv->eweight;
181}
182
183/* This function must be called after an update to server <srv>'s effective
184 * weight. It may be called after a state change too.
185 */
186static void fas_update_server_weight(struct server *srv)
187{
188 int old_state, new_state;
189 struct proxy *p = srv->proxy;
190
191 if (srv->state == srv->prev_state &&
192 srv->eweight == srv->prev_eweight)
193 return;
194
195 /* If changing the server's weight changes its state, we simply apply
196 * the procedures we already have for status change. If the state
197 * remains down, the server is not in any tree, so it's as easy as
198 * updating its values. If the state remains up with different weights,
199 * there are some computations to perform to find a new place and
200 * possibly a new tree for this server.
201 */
202
203 old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
204 new_state = srv_is_usable(srv->state, srv->eweight);
205
206 if (!old_state && !new_state) {
207 srv->prev_state = srv->state;
208 srv->prev_eweight = srv->eweight;
209 return;
210 }
211 else if (!old_state && new_state) {
212 fas_set_server_status_up(srv);
213 return;
214 }
215 else if (old_state && !new_state) {
216 fas_set_server_status_down(srv);
217 return;
218 }
219
220 if (srv->lb_tree)
221 fas_dequeue_srv(srv);
222
223 if (srv->state & SRV_BACKUP) {
224 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
225 srv->lb_tree = &p->lbprm.fas.bck;
226 } else {
227 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
228 srv->lb_tree = &p->lbprm.fas.act;
229 }
230
231 fas_queue_srv(srv);
232
233 update_backend_weight(p);
234 srv->prev_state = srv->state;
235 srv->prev_eweight = srv->eweight;
236}
237
238/* This function is responsible for building the trees in case of fast
239 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
240 * uweight ratio. Both active and backup groups are initialized.
241 */
242void fas_init_server_tree(struct proxy *p)
243{
244 struct server *srv;
245 struct eb_root init_head = EB_ROOT;
246
247 p->lbprm.set_server_status_up = fas_set_server_status_up;
248 p->lbprm.set_server_status_down = fas_set_server_status_down;
249 p->lbprm.update_server_eweight = fas_update_server_weight;
250 p->lbprm.server_take_conn = fas_srv_reposition;
251 p->lbprm.server_drop_conn = fas_srv_reposition;
252
253 p->lbprm.wdiv = BE_WEIGHT_SCALE;
254 for (srv = p->srv; srv; srv = srv->next) {
255 srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
256 srv->prev_state = srv->state;
257 }
258
259 recount_servers(p);
260 update_backend_weight(p);
261
262 p->lbprm.fas.act = init_head;
263 p->lbprm.fas.bck = init_head;
264
265 /* queue active and backup servers in two distinct groups */
266 for (srv = p->srv; srv; srv = srv->next) {
267 if (!srv_is_usable(srv->state, srv->eweight))
268 continue;
269 srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
270 fas_queue_srv(srv);
271 }
272}
273
274/* Return next server from the FS tree in backend <p>. If the tree is empty,
275 * return NULL. Saturated servers are skipped.
276 */
277struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
278{
279 struct server *srv, *avoided;
280 struct eb32_node *node;
281
282 srv = avoided = NULL;
283
284 if (p->srv_act)
285 node = eb32_first(&p->lbprm.fas.act);
286 else if (p->lbprm.fbck)
287 return p->lbprm.fbck;
288 else if (p->srv_bck)
289 node = eb32_first(&p->lbprm.fas.bck);
290 else
291 return NULL;
292
293 while (node) {
294 /* OK, we have a server. However, it may be saturated, in which
295 * case we don't want to reconsider it for now, so we'll simply
296 * skip it. Same if it's the server we try to avoid, in which
297 * case we simply remember it for later use if needed.
298 */
299 struct server *s;
300
301 s = eb32_entry(node, struct server, lb_node);
302 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
303 if (s != srvtoavoid) {
304 srv = s;
305 break;
306 }
307 avoided = s;
308 }
309 node = eb32_next(node);
310 }
311
312 if (!srv)
313 srv = avoided;
314
315 return srv;
316}
317
318
319/*
320 * Local variables:
321 * c-indent-level: 8
322 * c-basic-offset: 8
323 * End:
324 */