blob: 7eafe489a746267e20cd781bc6166935c7981cce [file] [log] [blame]
Willy Tarreauf89c1872009-10-01 11:19:37 +02001/*
2 * Fast Weighted Round Robin load balancing algorithm.
3 *
4 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020013#include <haproxy/api.h>
Willy Tarreau8d2b7772020-05-27 10:58:19 +020014#include <import/eb32tree.h>
Willy Tarreauf89c1872009-10-01 11:19:37 +020015
Willy Tarreauf89c1872009-10-01 11:19:37 +020016#include <types/server.h>
17
18#include <proto/backend.h>
19#include <proto/queue.h>
20
21static inline void fwrr_remove_from_tree(struct server *s);
22static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
23static inline void fwrr_dequeue_srv(struct server *s);
24static void fwrr_get_srv(struct server *s);
25static void fwrr_queue_srv(struct server *s);
26
27
28/* This function updates the server trees according to server <srv>'s new
29 * state. It should be called when server <srv>'s status changes to down.
30 * It is not important whether the server was already down or not. It is not
31 * important either that the new state is completely down (the caller may not
32 * know all the variables of a server's state).
Willy Tarreau1b877482018-08-21 19:44:53 +020033 *
34 * The server's lock must be held. The lbprm's lock will be used.
Willy Tarreauf89c1872009-10-01 11:19:37 +020035 */
36static void fwrr_set_server_status_down(struct server *srv)
37{
38 struct proxy *p = srv->proxy;
39 struct fwrr_group *grp;
40
Willy Tarreauc5150da2014-05-13 19:27:31 +020041 if (!srv_lb_status_changed(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +020042 return;
43
Emeric Brun52a91d32017-08-31 14:41:55 +020044 if (srv_willbe_usable(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +020045 goto out_update_state;
46
Willy Tarreau1b877482018-08-21 19:44:53 +020047 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
48
Emeric Brun52a91d32017-08-31 14:41:55 +020049 if (!srv_currently_usable(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +020050 /* server was already down */
51 goto out_update_backend;
52
Willy Tarreauc93cd162014-05-13 15:54:22 +020053 grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
Emeric Brun52a91d32017-08-31 14:41:55 +020054 grp->next_weight -= srv->cur_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +020055
Willy Tarreauc93cd162014-05-13 15:54:22 +020056 if (srv->flags & SRV_F_BACKUP) {
Willy Tarreauf89c1872009-10-01 11:19:37 +020057 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
58 p->srv_bck--;
59
60 if (srv == p->lbprm.fbck) {
61 /* we lost the first backup server in a single-backup
62 * configuration, we must search another one.
63 */
64 struct server *srv2 = p->lbprm.fbck;
65 do {
66 srv2 = srv2->next;
67 } while (srv2 &&
Willy Tarreauc93cd162014-05-13 15:54:22 +020068 !((srv2->flags & SRV_F_BACKUP) &&
Emeric Brun52a91d32017-08-31 14:41:55 +020069 srv_willbe_usable(srv2)));
Willy Tarreauf89c1872009-10-01 11:19:37 +020070 p->lbprm.fbck = srv2;
71 }
72 } else {
73 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
74 p->srv_act--;
75 }
76
77 fwrr_dequeue_srv(srv);
78 fwrr_remove_from_tree(srv);
79
80out_update_backend:
81 /* check/update tot_used, tot_weight */
82 update_backend_weight(p);
Willy Tarreau1b877482018-08-21 19:44:53 +020083 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
84
Willy Tarreauf89c1872009-10-01 11:19:37 +020085 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +020086 srv_lb_commit_status(srv);
Willy Tarreauf89c1872009-10-01 11:19:37 +020087}
88
89/* This function updates the server trees according to server <srv>'s new
90 * state. It should be called when server <srv>'s status changes to up.
91 * It is not important whether the server was already down or not. It is not
92 * important either that the new state is completely UP (the caller may not
93 * know all the variables of a server's state). This function will not change
94 * the weight of a server which was already up.
Willy Tarreau1b877482018-08-21 19:44:53 +020095 *
96 * The server's lock must be held. The lbprm's lock will be used.
Willy Tarreauf89c1872009-10-01 11:19:37 +020097 */
98static void fwrr_set_server_status_up(struct server *srv)
99{
100 struct proxy *p = srv->proxy;
101 struct fwrr_group *grp;
102
Willy Tarreauc5150da2014-05-13 19:27:31 +0200103 if (!srv_lb_status_changed(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200104 return;
105
Emeric Brun52a91d32017-08-31 14:41:55 +0200106 if (!srv_willbe_usable(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200107 goto out_update_state;
108
Willy Tarreau1b877482018-08-21 19:44:53 +0200109 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
110
Emeric Brun52a91d32017-08-31 14:41:55 +0200111 if (srv_currently_usable(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200112 /* server was already up */
113 goto out_update_backend;
114
Willy Tarreauc93cd162014-05-13 15:54:22 +0200115 grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
Emeric Brun52a91d32017-08-31 14:41:55 +0200116 grp->next_weight += srv->next_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200117
Willy Tarreauc93cd162014-05-13 15:54:22 +0200118 if (srv->flags & SRV_F_BACKUP) {
Willy Tarreauf89c1872009-10-01 11:19:37 +0200119 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
120 p->srv_bck++;
121
122 if (!(p->options & PR_O_USE_ALL_BK)) {
123 if (!p->lbprm.fbck) {
124 /* there was no backup server anymore */
125 p->lbprm.fbck = srv;
126 } else {
127 /* we may have restored a backup server prior to fbck,
128 * in which case it should replace it.
129 */
130 struct server *srv2 = srv;
131 do {
132 srv2 = srv2->next;
133 } while (srv2 && (srv2 != p->lbprm.fbck));
134 if (srv2)
135 p->lbprm.fbck = srv;
136 }
137 }
138 } else {
139 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
140 p->srv_act++;
141 }
142
143 /* note that eweight cannot be 0 here */
144 fwrr_get_srv(srv);
Emeric Brun52a91d32017-08-31 14:41:55 +0200145 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200146 fwrr_queue_srv(srv);
147
148out_update_backend:
149 /* check/update tot_used, tot_weight */
150 update_backend_weight(p);
Willy Tarreau1b877482018-08-21 19:44:53 +0200151 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
152
Willy Tarreauf89c1872009-10-01 11:19:37 +0200153 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200154 srv_lb_commit_status(srv);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200155}
156
157/* This function must be called after an update to server <srv>'s effective
158 * weight. It may be called after a state change too.
Willy Tarreau1b877482018-08-21 19:44:53 +0200159 *
160 * The server's lock must be held. The lbprm's lock will be used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200161 */
162static void fwrr_update_server_weight(struct server *srv)
163{
164 int old_state, new_state;
165 struct proxy *p = srv->proxy;
166 struct fwrr_group *grp;
167
Willy Tarreauc5150da2014-05-13 19:27:31 +0200168 if (!srv_lb_status_changed(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200169 return;
170
171 /* If changing the server's weight changes its state, we simply apply
172 * the procedures we already have for status change. If the state
173 * remains down, the server is not in any tree, so it's as easy as
174 * updating its values. If the state remains up with different weights,
175 * there are some computations to perform to find a new place and
176 * possibly a new tree for this server.
177 */
178
Emeric Brun52a91d32017-08-31 14:41:55 +0200179 old_state = srv_currently_usable(srv);
180 new_state = srv_willbe_usable(srv);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200181
182 if (!old_state && !new_state) {
Willy Tarreauc5150da2014-05-13 19:27:31 +0200183 srv_lb_commit_status(srv);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200184 return;
185 }
186 else if (!old_state && new_state) {
187 fwrr_set_server_status_up(srv);
188 return;
189 }
190 else if (old_state && !new_state) {
191 fwrr_set_server_status_down(srv);
192 return;
193 }
194
Willy Tarreau1b877482018-08-21 19:44:53 +0200195 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
196
Willy Tarreauc93cd162014-05-13 15:54:22 +0200197 grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
Emeric Brun52a91d32017-08-31 14:41:55 +0200198 grp->next_weight = grp->next_weight - srv->cur_eweight + srv->next_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200199
200 p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
201 p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
202
203 if (srv->lb_tree == grp->init) {
204 fwrr_dequeue_srv(srv);
205 fwrr_queue_by_weight(grp->init, srv);
206 }
207 else if (!srv->lb_tree) {
208 /* FIXME: server was down. This is not possible right now but
209 * may be needed soon for slowstart or graceful shutdown.
210 */
211 fwrr_dequeue_srv(srv);
212 fwrr_get_srv(srv);
Emeric Brun52a91d32017-08-31 14:41:55 +0200213 srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200214 fwrr_queue_srv(srv);
215 } else {
216 /* The server is either active or in the next queue. If it's
217 * still in the active queue and it has not consumed all of its
218 * places, let's adjust its next position.
219 */
220 fwrr_get_srv(srv);
221
Emeric Brun52a91d32017-08-31 14:41:55 +0200222 if (srv->next_eweight > 0) {
Willy Tarreauf89c1872009-10-01 11:19:37 +0200223 int prev_next = srv->npos;
Emeric Brun52a91d32017-08-31 14:41:55 +0200224 int step = grp->next_weight / srv->next_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200225
226 srv->npos = srv->lpos + step;
227 srv->rweight = 0;
228
229 if (srv->npos > prev_next)
230 srv->npos = prev_next;
231 if (srv->npos < grp->curr_pos + 2)
232 srv->npos = grp->curr_pos + step;
233 } else {
234 /* push it into the next tree */
235 srv->npos = grp->curr_pos + grp->curr_weight;
236 }
237
238 fwrr_dequeue_srv(srv);
239 fwrr_queue_srv(srv);
240 }
241
242 update_backend_weight(p);
Willy Tarreau1b877482018-08-21 19:44:53 +0200243 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
244
Willy Tarreauc5150da2014-05-13 19:27:31 +0200245 srv_lb_commit_status(srv);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200246}
247
248/* Remove a server from a tree. It must have previously been dequeued. This
249 * function is meant to be called when a server is going down or has its
250 * weight disabled.
Willy Tarreau1b877482018-08-21 19:44:53 +0200251 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200252 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200253 */
254static inline void fwrr_remove_from_tree(struct server *s)
255{
256 s->lb_tree = NULL;
257}
258
259/* Queue a server in the weight tree <root>, assuming the weight is >0.
260 * We want to sort them by inverted weights, because we need to place
261 * heavy servers first in order to get a smooth distribution.
Willy Tarreau1b877482018-08-21 19:44:53 +0200262 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200263 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200264 */
265static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
266{
Emeric Brun52a91d32017-08-31 14:41:55 +0200267 s->lb_node.key = SRV_EWGHT_MAX - s->next_eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200268 eb32_insert(root, &s->lb_node);
269 s->lb_tree = root;
270}
271
272/* This function is responsible for building the weight trees in case of fast
273 * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
274 * ratio. Both active and backup groups are initialized.
275 */
276void fwrr_init_server_groups(struct proxy *p)
277{
278 struct server *srv;
279 struct eb_root init_head = EB_ROOT;
280
281 p->lbprm.set_server_status_up = fwrr_set_server_status_up;
282 p->lbprm.set_server_status_down = fwrr_set_server_status_down;
283 p->lbprm.update_server_eweight = fwrr_update_server_weight;
284
285 p->lbprm.wdiv = BE_WEIGHT_SCALE;
286 for (srv = p->srv; srv; srv = srv->next) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200287 srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
Willy Tarreauc5150da2014-05-13 19:27:31 +0200288 srv_lb_commit_status(srv);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200289 }
290
291 recount_servers(p);
292 update_backend_weight(p);
293
294 /* prepare the active servers group */
295 p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
296 p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
297 p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
298 p->lbprm.fwrr.act.t1 = init_head;
299 p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
300 p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
301
302 /* prepare the backup servers group */
303 p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
304 p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
305 p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
306 p->lbprm.fwrr.bck.t1 = init_head;
307 p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
308 p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
309
310 /* queue active and backup servers in two distinct groups */
311 for (srv = p->srv; srv; srv = srv->next) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200312 if (!srv_currently_usable(srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200313 continue;
Willy Tarreauc93cd162014-05-13 15:54:22 +0200314 fwrr_queue_by_weight((srv->flags & SRV_F_BACKUP) ?
Willy Tarreauf89c1872009-10-01 11:19:37 +0200315 p->lbprm.fwrr.bck.init :
316 p->lbprm.fwrr.act.init,
317 srv);
318 }
319}
320
Willy Tarreau1b877482018-08-21 19:44:53 +0200321/* simply removes a server from a weight tree.
322 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200323 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreau1b877482018-08-21 19:44:53 +0200324 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200325static inline void fwrr_dequeue_srv(struct server *s)
326{
327 eb32_delete(&s->lb_node);
328}
329
330/* queues a server into the appropriate group and tree depending on its
331 * backup status, and ->npos. If the server is disabled, simply assign
332 * it to the NULL tree.
Willy Tarreau1b877482018-08-21 19:44:53 +0200333 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200334 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200335 */
336static void fwrr_queue_srv(struct server *s)
337{
338 struct proxy *p = s->proxy;
339 struct fwrr_group *grp;
340
Willy Tarreauc93cd162014-05-13 15:54:22 +0200341 grp = (s->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
Christopher Faulet5b517552017-06-09 14:17:53 +0200342
Willy Tarreauf89c1872009-10-01 11:19:37 +0200343 /* Delay everything which does not fit into the window and everything
344 * which does not fit into the theorical new window.
345 */
Emeric Brun52a91d32017-08-31 14:41:55 +0200346 if (!srv_willbe_usable(s)) {
Willy Tarreauf89c1872009-10-01 11:19:37 +0200347 fwrr_remove_from_tree(s);
348 }
Emeric Brun52a91d32017-08-31 14:41:55 +0200349 else if (s->next_eweight <= 0 ||
Willy Tarreauf89c1872009-10-01 11:19:37 +0200350 s->npos >= 2 * grp->curr_weight ||
351 s->npos >= grp->curr_weight + grp->next_weight) {
352 /* put into next tree, and readjust npos in case we could
353 * finally take this back to current. */
Willy Tarreau274ba672019-04-24 10:48:00 +0200354 s->npos -= grp->curr_weight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200355 fwrr_queue_by_weight(grp->next, s);
356 }
357 else {
358 /* The sorting key is stored in units of s->npos * user_weight
359 * in order to avoid overflows. As stated in backend.h, the
360 * lower the scale, the rougher the weights modulation, and the
361 * higher the scale, the lower the number of servers without
362 * overflow. With this formula, the result is always positive,
Godbacha34bdc02013-07-22 07:44:53 +0800363 * so we can use eb32_insert().
Willy Tarreauf89c1872009-10-01 11:19:37 +0200364 */
365 s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
Emeric Brun52a91d32017-08-31 14:41:55 +0200366 (unsigned)(SRV_EWGHT_MAX + s->rweight - s->next_eweight) / BE_WEIGHT_SCALE;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200367
368 eb32_insert(&grp->curr, &s->lb_node);
369 s->lb_tree = &grp->curr;
370 }
371}
372
Willy Tarreau1b877482018-08-21 19:44:53 +0200373/* prepares a server when extracting it from the "init" tree.
374 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200375 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreau1b877482018-08-21 19:44:53 +0200376 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200377static inline void fwrr_get_srv_init(struct server *s)
378{
379 s->npos = s->rweight = 0;
380}
381
Willy Tarreau1b877482018-08-21 19:44:53 +0200382/* prepares a server when extracting it from the "next" tree.
383 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200384 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreau1b877482018-08-21 19:44:53 +0200385 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200386static inline void fwrr_get_srv_next(struct server *s)
387{
Willy Tarreauc93cd162014-05-13 15:54:22 +0200388 struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
Willy Tarreauf89c1872009-10-01 11:19:37 +0200389 &s->proxy->lbprm.fwrr.bck :
390 &s->proxy->lbprm.fwrr.act;
391
Willy Tarreau274ba672019-04-24 10:48:00 +0200392 s->npos += grp->curr_weight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200393}
394
Willy Tarreau1b877482018-08-21 19:44:53 +0200395/* prepares a server when it was marked down.
396 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200397 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreau1b877482018-08-21 19:44:53 +0200398 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200399static inline void fwrr_get_srv_down(struct server *s)
400{
Willy Tarreauc93cd162014-05-13 15:54:22 +0200401 struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
Willy Tarreauf89c1872009-10-01 11:19:37 +0200402 &s->proxy->lbprm.fwrr.bck :
403 &s->proxy->lbprm.fwrr.act;
404
405 s->npos = grp->curr_pos;
406}
407
Willy Tarreau1b877482018-08-21 19:44:53 +0200408/* prepares a server when extracting it from its tree.
409 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200410 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreau1b877482018-08-21 19:44:53 +0200411 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200412static void fwrr_get_srv(struct server *s)
413{
414 struct proxy *p = s->proxy;
Willy Tarreauc93cd162014-05-13 15:54:22 +0200415 struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
Willy Tarreauf89c1872009-10-01 11:19:37 +0200416 &p->lbprm.fwrr.bck :
417 &p->lbprm.fwrr.act;
418
419 if (s->lb_tree == grp->init) {
420 fwrr_get_srv_init(s);
421 }
422 else if (s->lb_tree == grp->next) {
423 fwrr_get_srv_next(s);
424 }
425 else if (s->lb_tree == NULL) {
426 fwrr_get_srv_down(s);
427 }
428}
429
430/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
431 * when this happens, and "next" filled with servers sorted by weights.
Willy Tarreau1b877482018-08-21 19:44:53 +0200432 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200433 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200434 */
435static inline void fwrr_switch_trees(struct fwrr_group *grp)
436{
437 struct eb_root *swap;
438 swap = grp->init;
439 grp->init = grp->next;
440 grp->next = swap;
441 grp->curr_weight = grp->next_weight;
442 grp->curr_pos = grp->curr_weight;
443}
444
445/* return next server from the current tree in FWRR group <grp>, or a server
446 * from the "init" tree if appropriate. If both trees are empty, return NULL.
Willy Tarreau1b877482018-08-21 19:44:53 +0200447 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200448 * The lbprm's lock must be held. The server's lock is not used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200449 */
450static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
451{
Willy Tarreau9df86f92019-04-16 11:21:14 +0200452 struct eb32_node *node1;
453 struct eb32_node *node2;
454 struct server *s1 = NULL;
455 struct server *s2 = NULL;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200456
Willy Tarreau9df86f92019-04-16 11:21:14 +0200457 node1 = eb32_first(&grp->curr);
458 if (node1) {
459 s1 = eb32_entry(node1, struct server, lb_node);
Willy Tarreau9df86f92019-04-16 11:21:14 +0200460 if (s1->cur_eweight && s1->npos <= grp->curr_pos)
461 return s1;
462 }
Christopher Faulet5b517552017-06-09 14:17:53 +0200463
Willy Tarreau9df86f92019-04-16 11:21:14 +0200464 /* Either we have no server left, or we have a hole. We'll look in the
465 * init tree or a better proposal. At this point, if <s1> is non-null,
Willy Tarreau274ba672019-04-24 10:48:00 +0200466 * it is guaranteed to remain available as the tree is locked.
Willy Tarreau9df86f92019-04-16 11:21:14 +0200467 */
468 node2 = eb32_first(grp->init);
469 if (node2) {
470 s2 = eb32_entry(node2, struct server, lb_node);
Willy Tarreau9df86f92019-04-16 11:21:14 +0200471 if (s2->cur_eweight) {
Willy Tarreau9df86f92019-04-16 11:21:14 +0200472 fwrr_get_srv_init(s2);
473 return s2;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200474 }
475 }
Willy Tarreau9df86f92019-04-16 11:21:14 +0200476 return s1;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200477}
478
Willy Tarreau274ba672019-04-24 10:48:00 +0200479/* Computes next position of server <s> in the group. Nothing is done if <s>
480 * has a zero weight.
Willy Tarreau1b877482018-08-21 19:44:53 +0200481 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200482 * The lbprm's lock must be held to protect lpos/npos/rweight.
Willy Tarreau1b877482018-08-21 19:44:53 +0200483 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200484static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
485{
Willy Tarreau274ba672019-04-24 10:48:00 +0200486 unsigned int eweight = *(volatile unsigned int *)&s->cur_eweight;
487
488 if (!eweight)
489 return;
490
Willy Tarreauf89c1872009-10-01 11:19:37 +0200491 if (!s->npos) {
492 /* first time ever for this server */
Willy Tarreau274ba672019-04-24 10:48:00 +0200493 s->npos = grp->curr_pos;
494 }
Willy Tarreauf89c1872009-10-01 11:19:37 +0200495
Willy Tarreau274ba672019-04-24 10:48:00 +0200496 s->lpos = s->npos;
497 s->npos += grp->next_weight / eweight;
498 s->rweight += grp->next_weight % eweight;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200499
Willy Tarreau274ba672019-04-24 10:48:00 +0200500 if (s->rweight >= eweight) {
501 s->rweight -= eweight;
502 s->npos++;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200503 }
504}
505
506/* Return next server from the current tree in backend <p>, or a server from
507 * the init tree if appropriate. If both trees are empty, return NULL.
508 * Saturated servers are skipped and requeued.
Willy Tarreau1b877482018-08-21 19:44:53 +0200509 *
Willy Tarreau274ba672019-04-24 10:48:00 +0200510 * The lbprm's lock will be used. The server's lock is not used.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200511 */
512struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
513{
514 struct server *srv, *full, *avoided;
515 struct fwrr_group *grp;
516 int switched;
517
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100518 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200519 if (p->srv_act)
520 grp = &p->lbprm.fwrr.act;
Christopher Faulet5b517552017-06-09 14:17:53 +0200521 else if (p->lbprm.fbck) {
522 srv = p->lbprm.fbck;
523 goto out;
524 }
Willy Tarreauf89c1872009-10-01 11:19:37 +0200525 else if (p->srv_bck)
526 grp = &p->lbprm.fwrr.bck;
Christopher Faulet5b517552017-06-09 14:17:53 +0200527 else {
528 srv = NULL;
529 goto out;
530 }
Willy Tarreauf89c1872009-10-01 11:19:37 +0200531
532 switched = 0;
533 avoided = NULL;
534 full = NULL; /* NULL-terminated list of saturated servers */
535 while (1) {
536 /* if we see an empty group, let's first try to collect weights
537 * which might have recently changed.
538 */
539 if (!grp->curr_weight)
540 grp->curr_pos = grp->curr_weight = grp->next_weight;
541
542 /* get first server from the "current" tree. When the end of
543 * the tree is reached, we may have to switch, but only once.
544 */
545 while (1) {
546 srv = fwrr_get_server_from_group(grp);
547 if (srv)
548 break;
549 if (switched) {
550 if (avoided) {
551 srv = avoided;
Willy Tarreaub6195ef2019-05-27 10:17:05 +0200552 goto take_this_one;
Willy Tarreauf89c1872009-10-01 11:19:37 +0200553 }
554 goto requeue_servers;
555 }
556 switched = 1;
557 fwrr_switch_trees(grp);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200558 }
559
Willy Tarreau274ba672019-04-24 10:48:00 +0200560 /* OK, we have a server. However, it may be saturated, in which
561 * case we don't want to reconsider it for now. We'll update
562 * its position and dequeue it anyway, so that we can move it
563 * to a better place afterwards.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200564 */
565 fwrr_update_position(grp, srv);
566 fwrr_dequeue_srv(srv);
567 grp->curr_pos++;
568 if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) {
569 /* make sure it is not the server we are trying to exclude... */
570 if (srv != srvtoavoid || avoided)
571 break;
572
573 avoided = srv; /* ...but remember that is was selected yet avoided */
574 }
575
Willy Tarreau9df86f92019-04-16 11:21:14 +0200576 /* the server is saturated or avoided, let's chain it for later reinsertion.
Willy Tarreau9df86f92019-04-16 11:21:14 +0200577 */
Willy Tarreauf89c1872009-10-01 11:19:37 +0200578 srv->next_full = full;
579 full = srv;
580 }
581
Willy Tarreaub6195ef2019-05-27 10:17:05 +0200582 take_this_one:
Willy Tarreauf89c1872009-10-01 11:19:37 +0200583 /* OK, we got the best server, let's update it */
584 fwrr_queue_srv(srv);
585
586 requeue_servers:
587 /* Requeue all extracted servers. If full==srv then it was
Willy Tarreau9df86f92019-04-16 11:21:14 +0200588 * avoided (unsuccessfully) and chained, omit it now. The
589 * only way to get there is by having <avoided>==NULL or
590 * <avoided>==<srv>.
Willy Tarreauf89c1872009-10-01 11:19:37 +0200591 */
592 if (unlikely(full != NULL)) {
593 if (switched) {
594 /* the tree has switched, requeue all extracted servers
595 * into "init", because their place was lost, and only
596 * their weight matters.
597 */
598 do {
Willy Tarreau274ba672019-04-24 10:48:00 +0200599 if (likely(full != srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200600 fwrr_queue_by_weight(grp->init, full);
601 full = full->next_full;
602 } while (full);
603 } else {
604 /* requeue all extracted servers just as if they were consumed
605 * so that they regain their expected place.
606 */
607 do {
Willy Tarreau274ba672019-04-24 10:48:00 +0200608 if (likely(full != srv))
Willy Tarreauf89c1872009-10-01 11:19:37 +0200609 fwrr_queue_srv(full);
610 full = full->next_full;
611 } while (full);
612 }
613 }
Christopher Faulet5b517552017-06-09 14:17:53 +0200614 out:
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100615 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreauf89c1872009-10-01 11:19:37 +0200616 return srv;
617}
618
619/*
620 * Local variables:
621 * c-indent-level: 8
622 * c-basic-offset: 8
623 * End:
624 */