blob: 6027dac1f5f6331e8937f63b5e95532352d722ef [file] [log] [blame]
Willy Tarreauf09c6602012-02-13 17:12:08 +01001/*
2 * First Available Server load balancing algorithm.
3 *
Willy Tarreau64559c52012-04-07 09:08:45 +02004 * This file implements an algorithm which emerged during a discussion with
5 * Steen Larsen, initially inspired from Anshul Gandhi et.al.'s work now
6 * described as "packing" in section 3.5:
7 *
8 * http://reports-archive.adm.cs.cmu.edu/anon/2012/CMU-CS-12-109.pdf
9 *
Willy Tarreauf09c6602012-02-13 17:12:08 +010010 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <common/compat.h>
20#include <common/config.h>
21#include <common/debug.h>
22#include <eb32tree.h>
23
24#include <types/global.h>
25#include <types/server.h>
26
27#include <proto/backend.h>
28#include <proto/queue.h>
29
30
31/* Remove a server from a tree. It must have previously been dequeued. This
32 * function is meant to be called when a server is going down or has its
33 * weight disabled.
34 */
35static inline void fas_remove_from_tree(struct server *s)
36{
37 s->lb_tree = NULL;
38}
39
40/* simply removes a server from a tree */
41static inline void fas_dequeue_srv(struct server *s)
42{
43 eb32_delete(&s->lb_node);
44}
45
46/* Queue a server in its associated tree, assuming the weight is >0.
47 * Servers are sorted by unique ID so that we send all connections to the first
48 * available server in declaration order (or ID order) until its maxconn is
49 * reached. It is important to understand that the server weight is not used
50 * here.
51 */
52static inline void fas_queue_srv(struct server *s)
53{
54 s->lb_node.key = s->puid;
55 eb32_insert(s->lb_tree, &s->lb_node);
56}
57
58/* Re-position the server in the FS tree after it has been assigned one
59 * connection or after it has released one. Note that it is possible that
60 * the server has been moved out of the tree due to failed health-checks.
61 */
62static void fas_srv_reposition(struct server *s)
63{
64 if (!s->lb_tree)
65 return;
66 fas_dequeue_srv(s);
67 fas_queue_srv(s);
68}
69
70/* This function updates the server trees according to server <srv>'s new
71 * state. It should be called when server <srv>'s status changes to down.
72 * It is not important whether the server was already down or not. It is not
73 * important either that the new state is completely down (the caller may not
74 * know all the variables of a server's state).
75 */
76static void fas_set_server_status_down(struct server *srv)
77{
78 struct proxy *p = srv->proxy;
79
Willy Tarreauc5150da2014-05-13 19:27:31 +020080 if (!srv_lb_status_changed(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +010081 return;
82
Willy Tarreau87eb1d62014-05-13 18:51:40 +020083 if (srv_is_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +010084 goto out_update_state;
85
Willy Tarreau87eb1d62014-05-13 18:51:40 +020086 if (!srv_was_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +010087 /* server was already down */
88 goto out_update_backend;
89
Willy Tarreauc93cd162014-05-13 15:54:22 +020090 if (srv->flags & SRV_F_BACKUP) {
Willy Tarreauf09c6602012-02-13 17:12:08 +010091 p->lbprm.tot_wbck -= srv->prev_eweight;
92 p->srv_bck--;
93
94 if (srv == p->lbprm.fbck) {
95 /* we lost the first backup server in a single-backup
96 * configuration, we must search another one.
97 */
98 struct server *srv2 = p->lbprm.fbck;
99 do {
100 srv2 = srv2->next;
101 } while (srv2 &&
Willy Tarreauc93cd162014-05-13 15:54:22 +0200102 !((srv2->flags & SRV_F_BACKUP) &&
Willy Tarreau87eb1d62014-05-13 18:51:40 +0200103 srv_is_usable(srv2)));
Willy Tarreauf09c6602012-02-13 17:12:08 +0100104 p->lbprm.fbck = srv2;
105 }
106 } else {
107 p->lbprm.tot_wact -= srv->prev_eweight;
108 p->srv_act--;
109 }
110
111 fas_dequeue_srv(srv);
112 fas_remove_from_tree(srv);
113
114out_update_backend:
115 /* check/update tot_used, tot_weight */
116 update_backend_weight(p);
117 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200118 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100119}
120
121/* This function updates the server trees according to server <srv>'s new
122 * state. It should be called when server <srv>'s status changes to up.
123 * It is not important whether the server was already down or not. It is not
124 * important either that the new state is completely UP (the caller may not
125 * know all the variables of a server's state). This function will not change
126 * the weight of a server which was already up.
127 */
128static void fas_set_server_status_up(struct server *srv)
129{
130 struct proxy *p = srv->proxy;
131
Willy Tarreauc5150da2014-05-13 19:27:31 +0200132 if (!srv_lb_status_changed(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100133 return;
134
Willy Tarreau87eb1d62014-05-13 18:51:40 +0200135 if (!srv_is_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100136 goto out_update_state;
137
Willy Tarreau87eb1d62014-05-13 18:51:40 +0200138 if (srv_was_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100139 /* server was already up */
140 goto out_update_backend;
141
Willy Tarreauc93cd162014-05-13 15:54:22 +0200142 if (srv->flags & SRV_F_BACKUP) {
Willy Tarreauf09c6602012-02-13 17:12:08 +0100143 srv->lb_tree = &p->lbprm.fas.bck;
144 p->lbprm.tot_wbck += srv->eweight;
145 p->srv_bck++;
146
147 if (!(p->options & PR_O_USE_ALL_BK)) {
148 if (!p->lbprm.fbck) {
149 /* there was no backup server anymore */
150 p->lbprm.fbck = srv;
151 } else {
152 /* we may have restored a backup server prior to fbck,
153 * in which case it should replace it.
154 */
155 struct server *srv2 = srv;
156 do {
157 srv2 = srv2->next;
158 } while (srv2 && (srv2 != p->lbprm.fbck));
159 if (srv2)
160 p->lbprm.fbck = srv;
161 }
162 }
163 } else {
164 srv->lb_tree = &p->lbprm.fas.act;
165 p->lbprm.tot_wact += srv->eweight;
166 p->srv_act++;
167 }
168
169 /* note that eweight cannot be 0 here */
170 fas_queue_srv(srv);
171
172 out_update_backend:
173 /* check/update tot_used, tot_weight */
174 update_backend_weight(p);
175 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200176 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100177}
178
179/* This function must be called after an update to server <srv>'s effective
180 * weight. It may be called after a state change too.
181 */
182static void fas_update_server_weight(struct server *srv)
183{
184 int old_state, new_state;
185 struct proxy *p = srv->proxy;
186
Willy Tarreauc5150da2014-05-13 19:27:31 +0200187 if (!srv_lb_status_changed(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100188 return;
189
190 /* If changing the server's weight changes its state, we simply apply
191 * the procedures we already have for status change. If the state
192 * remains down, the server is not in any tree, so it's as easy as
193 * updating its values. If the state remains up with different weights,
194 * there are some computations to perform to find a new place and
195 * possibly a new tree for this server.
196 */
197
Willy Tarreau87eb1d62014-05-13 18:51:40 +0200198 old_state = srv_was_usable(srv);
199 new_state = srv_is_usable(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100200
201 if (!old_state && !new_state) {
Willy Tarreauc5150da2014-05-13 19:27:31 +0200202 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100203 return;
204 }
205 else if (!old_state && new_state) {
206 fas_set_server_status_up(srv);
207 return;
208 }
209 else if (old_state && !new_state) {
210 fas_set_server_status_down(srv);
211 return;
212 }
213
214 if (srv->lb_tree)
215 fas_dequeue_srv(srv);
216
Willy Tarreauc93cd162014-05-13 15:54:22 +0200217 if (srv->flags & SRV_F_BACKUP) {
Willy Tarreauf09c6602012-02-13 17:12:08 +0100218 p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
219 srv->lb_tree = &p->lbprm.fas.bck;
220 } else {
221 p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
222 srv->lb_tree = &p->lbprm.fas.act;
223 }
224
225 fas_queue_srv(srv);
226
227 update_backend_weight(p);
Willy Tarreauc5150da2014-05-13 19:27:31 +0200228 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100229}
230
231/* This function is responsible for building the trees in case of fast
232 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
233 * uweight ratio. Both active and backup groups are initialized.
234 */
235void fas_init_server_tree(struct proxy *p)
236{
237 struct server *srv;
238 struct eb_root init_head = EB_ROOT;
239
240 p->lbprm.set_server_status_up = fas_set_server_status_up;
241 p->lbprm.set_server_status_down = fas_set_server_status_down;
242 p->lbprm.update_server_eweight = fas_update_server_weight;
243 p->lbprm.server_take_conn = fas_srv_reposition;
244 p->lbprm.server_drop_conn = fas_srv_reposition;
245
246 p->lbprm.wdiv = BE_WEIGHT_SCALE;
247 for (srv = p->srv; srv; srv = srv->next) {
Willy Tarreau004e0452013-11-21 11:22:01 +0100248 srv->eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
Willy Tarreauc5150da2014-05-13 19:27:31 +0200249 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100250 }
251
252 recount_servers(p);
253 update_backend_weight(p);
254
255 p->lbprm.fas.act = init_head;
256 p->lbprm.fas.bck = init_head;
257
258 /* queue active and backup servers in two distinct groups */
259 for (srv = p->srv; srv; srv = srv->next) {
Willy Tarreau87eb1d62014-05-13 18:51:40 +0200260 if (!srv_is_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100261 continue;
Willy Tarreauc93cd162014-05-13 15:54:22 +0200262 srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100263 fas_queue_srv(srv);
264 }
265}
266
267/* Return next server from the FS tree in backend <p>. If the tree is empty,
268 * return NULL. Saturated servers are skipped.
269 */
270struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
271{
272 struct server *srv, *avoided;
273 struct eb32_node *node;
274
275 srv = avoided = NULL;
276
277 if (p->srv_act)
278 node = eb32_first(&p->lbprm.fas.act);
279 else if (p->lbprm.fbck)
280 return p->lbprm.fbck;
281 else if (p->srv_bck)
282 node = eb32_first(&p->lbprm.fas.bck);
283 else
284 return NULL;
285
286 while (node) {
287 /* OK, we have a server. However, it may be saturated, in which
288 * case we don't want to reconsider it for now, so we'll simply
289 * skip it. Same if it's the server we try to avoid, in which
290 * case we simply remember it for later use if needed.
291 */
292 struct server *s;
293
294 s = eb32_entry(node, struct server, lb_node);
295 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
296 if (s != srvtoavoid) {
297 srv = s;
298 break;
299 }
300 avoided = s;
301 }
302 node = eb32_next(node);
303 }
304
305 if (!srv)
306 srv = avoided;
307
308 return srv;
309}
310
311
312/*
313 * Local variables:
314 * c-indent-level: 8
315 * c-basic-offset: 8
316 * End:
317 */