blob: d301143693319caf51237fe1d9d453da87c53473 [file] [log] [blame]
Willy Tarreauf09c6602012-02-13 17:12:08 +01001/*
2 * First Available Server load balancing algorithm.
3 *
Willy Tarreau64559c52012-04-07 09:08:45 +02004 * This file implements an algorithm which emerged during a discussion with
5 * Steen Larsen, initially inspired from Anshul Gandhi et.al.'s work now
6 * described as "packing" in section 3.5:
7 *
8 * http://reports-archive.adm.cs.cmu.edu/anon/2012/CMU-CS-12-109.pdf
9 *
Willy Tarreauf09c6602012-02-13 17:12:08 +010010 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <common/compat.h>
20#include <common/config.h>
21#include <common/debug.h>
22#include <eb32tree.h>
23
24#include <types/global.h>
25#include <types/server.h>
26
27#include <proto/backend.h>
28#include <proto/queue.h>
29
30
31/* Remove a server from a tree. It must have previously been dequeued. This
32 * function is meant to be called when a server is going down or has its
33 * weight disabled.
34 */
35static inline void fas_remove_from_tree(struct server *s)
36{
37 s->lb_tree = NULL;
38}
39
40/* simply removes a server from a tree */
41static inline void fas_dequeue_srv(struct server *s)
42{
43 eb32_delete(&s->lb_node);
44}
45
46/* Queue a server in its associated tree, assuming the weight is >0.
47 * Servers are sorted by unique ID so that we send all connections to the first
48 * available server in declaration order (or ID order) until its maxconn is
49 * reached. It is important to understand that the server weight is not used
50 * here.
51 */
52static inline void fas_queue_srv(struct server *s)
53{
54 s->lb_node.key = s->puid;
55 eb32_insert(s->lb_tree, &s->lb_node);
56}
57
58/* Re-position the server in the FS tree after it has been assigned one
59 * connection or after it has released one. Note that it is possible that
60 * the server has been moved out of the tree due to failed health-checks.
61 */
62static void fas_srv_reposition(struct server *s)
63{
64 if (!s->lb_tree)
65 return;
Christopher Faulet5b517552017-06-09 14:17:53 +020066
Christopher Faulet2a944ee2017-11-07 10:42:54 +010067 HA_SPIN_LOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
Willy Tarreauf09c6602012-02-13 17:12:08 +010068 fas_dequeue_srv(s);
69 fas_queue_srv(s);
Christopher Faulet2a944ee2017-11-07 10:42:54 +010070 HA_SPIN_UNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
Willy Tarreauf09c6602012-02-13 17:12:08 +010071}
72
73/* This function updates the server trees according to server <srv>'s new
74 * state. It should be called when server <srv>'s status changes to down.
75 * It is not important whether the server was already down or not. It is not
76 * important either that the new state is completely down (the caller may not
77 * know all the variables of a server's state).
78 */
79static void fas_set_server_status_down(struct server *srv)
80{
81 struct proxy *p = srv->proxy;
82
Willy Tarreauc5150da2014-05-13 19:27:31 +020083 if (!srv_lb_status_changed(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +010084 return;
85
Emeric Brun52a91d32017-08-31 14:41:55 +020086 if (srv_willbe_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +010087 goto out_update_state;
88
Emeric Brun52a91d32017-08-31 14:41:55 +020089 if (!srv_currently_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +010090 /* server was already down */
91 goto out_update_backend;
92
Willy Tarreauc93cd162014-05-13 15:54:22 +020093 if (srv->flags & SRV_F_BACKUP) {
Emeric Brun52a91d32017-08-31 14:41:55 +020094 p->lbprm.tot_wbck -= srv->cur_eweight;
Willy Tarreauf09c6602012-02-13 17:12:08 +010095 p->srv_bck--;
96
97 if (srv == p->lbprm.fbck) {
98 /* we lost the first backup server in a single-backup
99 * configuration, we must search another one.
100 */
101 struct server *srv2 = p->lbprm.fbck;
102 do {
103 srv2 = srv2->next;
104 } while (srv2 &&
Willy Tarreauc93cd162014-05-13 15:54:22 +0200105 !((srv2->flags & SRV_F_BACKUP) &&
Emeric Brun52a91d32017-08-31 14:41:55 +0200106 srv_willbe_usable(srv2)));
Willy Tarreauf09c6602012-02-13 17:12:08 +0100107 p->lbprm.fbck = srv2;
108 }
109 } else {
Emeric Brun52a91d32017-08-31 14:41:55 +0200110 p->lbprm.tot_wact -= srv->cur_eweight;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100111 p->srv_act--;
112 }
113
114 fas_dequeue_srv(srv);
115 fas_remove_from_tree(srv);
116
Christopher Faulet5b517552017-06-09 14:17:53 +0200117 out_update_backend:
Willy Tarreauf09c6602012-02-13 17:12:08 +0100118 /* check/update tot_used, tot_weight */
119 update_backend_weight(p);
120 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200121 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100122}
123
124/* This function updates the server trees according to server <srv>'s new
125 * state. It should be called when server <srv>'s status changes to up.
126 * It is not important whether the server was already down or not. It is not
127 * important either that the new state is completely UP (the caller may not
128 * know all the variables of a server's state). This function will not change
129 * the weight of a server which was already up.
130 */
131static void fas_set_server_status_up(struct server *srv)
132{
133 struct proxy *p = srv->proxy;
134
Willy Tarreauc5150da2014-05-13 19:27:31 +0200135 if (!srv_lb_status_changed(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100136 return;
137
Emeric Brun52a91d32017-08-31 14:41:55 +0200138 if (!srv_willbe_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100139 goto out_update_state;
140
Emeric Brun52a91d32017-08-31 14:41:55 +0200141 if (srv_currently_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100142 /* server was already up */
143 goto out_update_backend;
144
Willy Tarreauc93cd162014-05-13 15:54:22 +0200145 if (srv->flags & SRV_F_BACKUP) {
Willy Tarreauf09c6602012-02-13 17:12:08 +0100146 srv->lb_tree = &p->lbprm.fas.bck;
Emeric Brun52a91d32017-08-31 14:41:55 +0200147 p->lbprm.tot_wbck += srv->next_eweight;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100148 p->srv_bck++;
149
150 if (!(p->options & PR_O_USE_ALL_BK)) {
151 if (!p->lbprm.fbck) {
152 /* there was no backup server anymore */
153 p->lbprm.fbck = srv;
154 } else {
155 /* we may have restored a backup server prior to fbck,
156 * in which case it should replace it.
157 */
158 struct server *srv2 = srv;
159 do {
160 srv2 = srv2->next;
161 } while (srv2 && (srv2 != p->lbprm.fbck));
162 if (srv2)
163 p->lbprm.fbck = srv;
164 }
165 }
166 } else {
167 srv->lb_tree = &p->lbprm.fas.act;
Emeric Brun52a91d32017-08-31 14:41:55 +0200168 p->lbprm.tot_wact += srv->next_eweight;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100169 p->srv_act++;
170 }
171
172 /* note that eweight cannot be 0 here */
173 fas_queue_srv(srv);
174
175 out_update_backend:
176 /* check/update tot_used, tot_weight */
177 update_backend_weight(p);
178 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200179 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100180}
181
182/* This function must be called after an update to server <srv>'s effective
183 * weight. It may be called after a state change too.
184 */
185static void fas_update_server_weight(struct server *srv)
186{
187 int old_state, new_state;
188 struct proxy *p = srv->proxy;
189
Willy Tarreauc5150da2014-05-13 19:27:31 +0200190 if (!srv_lb_status_changed(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100191 return;
192
193 /* If changing the server's weight changes its state, we simply apply
194 * the procedures we already have for status change. If the state
195 * remains down, the server is not in any tree, so it's as easy as
196 * updating its values. If the state remains up with different weights,
197 * there are some computations to perform to find a new place and
198 * possibly a new tree for this server.
199 */
200
Emeric Brun52a91d32017-08-31 14:41:55 +0200201 old_state = srv_currently_usable(srv);
202 new_state = srv_willbe_usable(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100203
204 if (!old_state && !new_state) {
Willy Tarreauc5150da2014-05-13 19:27:31 +0200205 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100206 return;
207 }
208 else if (!old_state && new_state) {
209 fas_set_server_status_up(srv);
210 return;
211 }
212 else if (old_state && !new_state) {
213 fas_set_server_status_down(srv);
214 return;
215 }
216
217 if (srv->lb_tree)
218 fas_dequeue_srv(srv);
219
Willy Tarreauc93cd162014-05-13 15:54:22 +0200220 if (srv->flags & SRV_F_BACKUP) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200221 p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100222 srv->lb_tree = &p->lbprm.fas.bck;
223 } else {
Emeric Brun52a91d32017-08-31 14:41:55 +0200224 p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100225 srv->lb_tree = &p->lbprm.fas.act;
226 }
227
228 fas_queue_srv(srv);
229
230 update_backend_weight(p);
Willy Tarreauc5150da2014-05-13 19:27:31 +0200231 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100232}
233
234/* This function is responsible for building the trees in case of fast
235 * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
236 * uweight ratio. Both active and backup groups are initialized.
237 */
238void fas_init_server_tree(struct proxy *p)
239{
240 struct server *srv;
241 struct eb_root init_head = EB_ROOT;
242
243 p->lbprm.set_server_status_up = fas_set_server_status_up;
244 p->lbprm.set_server_status_down = fas_set_server_status_down;
245 p->lbprm.update_server_eweight = fas_update_server_weight;
246 p->lbprm.server_take_conn = fas_srv_reposition;
247 p->lbprm.server_drop_conn = fas_srv_reposition;
248
249 p->lbprm.wdiv = BE_WEIGHT_SCALE;
250 for (srv = p->srv; srv; srv = srv->next) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200251 srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
Willy Tarreauc5150da2014-05-13 19:27:31 +0200252 srv_lb_commit_status(srv);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100253 }
254
255 recount_servers(p);
256 update_backend_weight(p);
257
258 p->lbprm.fas.act = init_head;
259 p->lbprm.fas.bck = init_head;
260
261 /* queue active and backup servers in two distinct groups */
262 for (srv = p->srv; srv; srv = srv->next) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200263 if (!srv_currently_usable(srv))
Willy Tarreauf09c6602012-02-13 17:12:08 +0100264 continue;
Willy Tarreauc93cd162014-05-13 15:54:22 +0200265 srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
Willy Tarreauf09c6602012-02-13 17:12:08 +0100266 fas_queue_srv(srv);
267 }
268}
269
270/* Return next server from the FS tree in backend <p>. If the tree is empty,
271 * return NULL. Saturated servers are skipped.
272 */
273struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
274{
275 struct server *srv, *avoided;
276 struct eb32_node *node;
277
278 srv = avoided = NULL;
279
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100280 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100281 if (p->srv_act)
282 node = eb32_first(&p->lbprm.fas.act);
Christopher Faulet5b517552017-06-09 14:17:53 +0200283 else if (p->lbprm.fbck) {
284 srv = p->lbprm.fbck;
285 goto out;
286 }
Willy Tarreauf09c6602012-02-13 17:12:08 +0100287 else if (p->srv_bck)
288 node = eb32_first(&p->lbprm.fas.bck);
Christopher Faulet5b517552017-06-09 14:17:53 +0200289 else {
290 srv = NULL;
291 goto out;
292 }
Willy Tarreauf09c6602012-02-13 17:12:08 +0100293
294 while (node) {
295 /* OK, we have a server. However, it may be saturated, in which
296 * case we don't want to reconsider it for now, so we'll simply
297 * skip it. Same if it's the server we try to avoid, in which
298 * case we simply remember it for later use if needed.
299 */
300 struct server *s;
301
302 s = eb32_entry(node, struct server, lb_node);
303 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
304 if (s != srvtoavoid) {
305 srv = s;
306 break;
307 }
308 avoided = s;
309 }
310 node = eb32_next(node);
311 }
312
313 if (!srv)
314 srv = avoided;
Christopher Faulet5b517552017-06-09 14:17:53 +0200315 out:
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100316 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreauf09c6602012-02-13 17:12:08 +0100317 return srv;
318}
319
320
321/*
322 * Local variables:
323 * c-indent-level: 8
324 * c-basic-offset: 8
325 * End:
326 */