blob: b216541239dad7b56fd55822ae9ce6611b68dff6 [file] [log] [blame]
Willy Tarreau6b2e11b2009-10-01 07:52:15 +02001/*
2 * Consistent Hash implementation
3 * Please consult this very well detailed article for more information :
4 * http://www.spiteful.com/2008/03/17/programmers-toolbox-part-3-consistent-hashing/
5 *
6 * Our implementation has to support both weighted hashing and weighted round
7 * robin because we'll use it to replace the previous map-based implementation
8 * which offered both algorithms.
9 *
Willy Tarreau4c14eaa2010-11-24 14:01:45 +010010 * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
Willy Tarreau6b2e11b2009-10-01 07:52:15 +020011 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020019#include <haproxy/api.h>
Willy Tarreau6b2e11b2009-10-01 07:52:15 +020020#include <common/debug.h>
Willy Tarreau4c14eaa2010-11-24 14:01:45 +010021#include <common/standard.h>
Willy Tarreau8d2b7772020-05-27 10:58:19 +020022#include <import/eb32tree.h>
Willy Tarreau6b2e11b2009-10-01 07:52:15 +020023
24#include <types/global.h>
25#include <types/server.h>
26
27#include <proto/backend.h>
28#include <proto/queue.h>
29
Willy Tarreau6b2e11b2009-10-01 07:52:15 +020030/* Return next tree node after <node> which must still be in the tree, or be
31 * NULL. Lookup wraps around the end to the beginning. If the next node is the
32 * same node, return NULL. This is designed to find a valid next node before
33 * deleting one from the tree.
34 */
35static inline struct eb32_node *chash_skip_node(struct eb_root *root, struct eb32_node *node)
36{
37 struct eb32_node *stop = node;
38
39 if (!node)
40 return NULL;
41 node = eb32_next(node);
42 if (!node)
43 node = eb32_first(root);
44 if (node == stop)
45 return NULL;
46 return node;
47}
48
49/* Remove all of a server's entries from its tree. This may be used when
50 * setting a server down.
51 */
52static inline void chash_dequeue_srv(struct server *s)
53{
54 while (s->lb_nodes_now > 0) {
55 if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
56 s->lb_nodes_now = s->lb_nodes_tot;
57 s->lb_nodes_now--;
58 if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
59 s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
60 eb32_delete(&s->lb_nodes[s->lb_nodes_now].node);
61 }
62}
63
64/* Adjust the number of entries of a server in its tree. The server must appear
65 * as many times as its weight indicates it. If it's there too often, we remove
66 * the last occurrences. If it's not there enough, we add more occurrences. To
67 * remove a server from the tree, normally call this with eweight=0.
Willy Tarreau1b877482018-08-21 19:44:53 +020068 *
69 * The server's lock and the lbprm's lock must be held.
Willy Tarreau6b2e11b2009-10-01 07:52:15 +020070 */
71static inline void chash_queue_dequeue_srv(struct server *s)
72{
Emeric Brun52a91d32017-08-31 14:41:55 +020073 while (s->lb_nodes_now > s->next_eweight) {
Willy Tarreau6b2e11b2009-10-01 07:52:15 +020074 if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
75 s->lb_nodes_now = s->lb_nodes_tot;
76 s->lb_nodes_now--;
77 if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
78 s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
79 eb32_delete(&s->lb_nodes[s->lb_nodes_now].node);
80 }
81
Olivier Houchardf8eb8d52017-10-17 15:52:59 +020082 /* Attempt to increase the total number of nodes, if the user
83 * increased the weight beyond the original weight
84 */
85 if (s->lb_nodes_tot < s->next_eweight) {
Christopher Faulet0a52c172019-08-01 10:09:29 +020086 struct tree_occ *new_nodes;
Olivier Houchardf8eb8d52017-10-17 15:52:59 +020087
Christopher Faulet0a52c172019-08-01 10:09:29 +020088 /* First we need to remove all server's entries from its tree
89 * because the realloc will change all nodes pointers */
90 chash_dequeue_srv(s);
91
92 new_nodes = realloc(s->lb_nodes, s->next_eweight * sizeof(*new_nodes));
Olivier Houchardf8eb8d52017-10-17 15:52:59 +020093 if (new_nodes) {
94 unsigned int j;
95
96 s->lb_nodes = new_nodes;
97 memset(&s->lb_nodes[s->lb_nodes_tot], 0,
98 (s->next_eweight - s->lb_nodes_tot) * sizeof(*s->lb_nodes));
99 for (j = s->lb_nodes_tot; j < s->next_eweight; j++) {
100 s->lb_nodes[j].server = s;
101 s->lb_nodes[j].node.key = full_hash(s->puid * SRV_EWGHT_RANGE + j);
102 }
103 s->lb_nodes_tot = s->next_eweight;
104 }
105 }
Emeric Brun52a91d32017-08-31 14:41:55 +0200106 while (s->lb_nodes_now < s->next_eweight) {
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200107 if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
108 break;
109 if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
110 s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
111 eb32_insert(s->lb_tree, &s->lb_nodes[s->lb_nodes_now].node);
112 s->lb_nodes_now++;
113 }
114}
115
116/* This function updates the server trees according to server <srv>'s new
117 * state. It should be called when server <srv>'s status changes to down.
118 * It is not important whether the server was already down or not. It is not
119 * important either that the new state is completely down (the caller may not
120 * know all the variables of a server's state).
Willy Tarreau1b877482018-08-21 19:44:53 +0200121 *
122 * The server's lock must be held. The lbprm lock will be used.
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200123 */
124static void chash_set_server_status_down(struct server *srv)
125{
126 struct proxy *p = srv->proxy;
127
Willy Tarreauc5150da2014-05-13 19:27:31 +0200128 if (!srv_lb_status_changed(srv))
Christopher Faulet5b517552017-06-09 14:17:53 +0200129 return;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200130
Willy Tarreau1b877482018-08-21 19:44:53 +0200131 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
132
Emeric Brun52a91d32017-08-31 14:41:55 +0200133 if (srv_willbe_usable(srv))
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200134 goto out_update_state;
135
Emeric Brun52a91d32017-08-31 14:41:55 +0200136 if (!srv_currently_usable(srv))
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200137 /* server was already down */
138 goto out_update_backend;
139
Willy Tarreauc93cd162014-05-13 15:54:22 +0200140 if (srv->flags & SRV_F_BACKUP) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200141 p->lbprm.tot_wbck -= srv->cur_eweight;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200142 p->srv_bck--;
143
144 if (srv == p->lbprm.fbck) {
145 /* we lost the first backup server in a single-backup
146 * configuration, we must search another one.
147 */
148 struct server *srv2 = p->lbprm.fbck;
149 do {
150 srv2 = srv2->next;
151 } while (srv2 &&
Willy Tarreauc93cd162014-05-13 15:54:22 +0200152 !((srv2->flags & SRV_F_BACKUP) &&
Emeric Brun52a91d32017-08-31 14:41:55 +0200153 srv_willbe_usable(srv2)));
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200154 p->lbprm.fbck = srv2;
155 }
156 } else {
Emeric Brun52a91d32017-08-31 14:41:55 +0200157 p->lbprm.tot_wact -= srv->cur_eweight;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200158 p->srv_act--;
159 }
160
161 chash_dequeue_srv(srv);
162
163out_update_backend:
164 /* check/update tot_used, tot_weight */
165 update_backend_weight(p);
166 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200167 srv_lb_commit_status(srv);
Willy Tarreau1b877482018-08-21 19:44:53 +0200168
169 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200170}
171
172/* This function updates the server trees according to server <srv>'s new
173 * state. It should be called when server <srv>'s status changes to up.
174 * It is not important whether the server was already down or not. It is not
175 * important either that the new state is completely UP (the caller may not
176 * know all the variables of a server's state). This function will not change
177 * the weight of a server which was already up.
Willy Tarreau1b877482018-08-21 19:44:53 +0200178 *
179 * The server's lock must be held. The lbprm lock will be used.
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200180 */
181static void chash_set_server_status_up(struct server *srv)
182{
183 struct proxy *p = srv->proxy;
184
Willy Tarreauc5150da2014-05-13 19:27:31 +0200185 if (!srv_lb_status_changed(srv))
Christopher Faulet5b517552017-06-09 14:17:53 +0200186 return;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200187
Willy Tarreau1b877482018-08-21 19:44:53 +0200188 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
189
Emeric Brun52a91d32017-08-31 14:41:55 +0200190 if (!srv_willbe_usable(srv))
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200191 goto out_update_state;
192
Emeric Brun52a91d32017-08-31 14:41:55 +0200193 if (srv_currently_usable(srv))
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200194 /* server was already up */
195 goto out_update_backend;
196
Willy Tarreauc93cd162014-05-13 15:54:22 +0200197 if (srv->flags & SRV_F_BACKUP) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200198 p->lbprm.tot_wbck += srv->next_eweight;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200199 p->srv_bck++;
200
201 if (!(p->options & PR_O_USE_ALL_BK)) {
202 if (!p->lbprm.fbck) {
203 /* there was no backup server anymore */
204 p->lbprm.fbck = srv;
205 } else {
206 /* we may have restored a backup server prior to fbck,
207 * in which case it should replace it.
208 */
209 struct server *srv2 = srv;
210 do {
211 srv2 = srv2->next;
212 } while (srv2 && (srv2 != p->lbprm.fbck));
213 if (srv2)
214 p->lbprm.fbck = srv;
215 }
216 }
217 } else {
Emeric Brun52a91d32017-08-31 14:41:55 +0200218 p->lbprm.tot_wact += srv->next_eweight;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200219 p->srv_act++;
220 }
221
222 /* note that eweight cannot be 0 here */
223 chash_queue_dequeue_srv(srv);
224
225 out_update_backend:
226 /* check/update tot_used, tot_weight */
227 update_backend_weight(p);
228 out_update_state:
Willy Tarreauc5150da2014-05-13 19:27:31 +0200229 srv_lb_commit_status(srv);
Willy Tarreau1b877482018-08-21 19:44:53 +0200230
231 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200232}
233
234/* This function must be called after an update to server <srv>'s effective
235 * weight. It may be called after a state change too.
Willy Tarreau1b877482018-08-21 19:44:53 +0200236 *
237 * The server's lock must be held. The lbprm lock may be used.
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200238 */
239static void chash_update_server_weight(struct server *srv)
240{
241 int old_state, new_state;
242 struct proxy *p = srv->proxy;
243
Willy Tarreauc5150da2014-05-13 19:27:31 +0200244 if (!srv_lb_status_changed(srv))
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200245 return;
246
247 /* If changing the server's weight changes its state, we simply apply
248 * the procedures we already have for status change. If the state
249 * remains down, the server is not in any tree, so it's as easy as
250 * updating its values. If the state remains up with different weights,
251 * there are some computations to perform to find a new place and
252 * possibly a new tree for this server.
253 */
254
Emeric Brun52a91d32017-08-31 14:41:55 +0200255 old_state = srv_currently_usable(srv);
256 new_state = srv_willbe_usable(srv);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200257
258 if (!old_state && !new_state) {
Willy Tarreauc5150da2014-05-13 19:27:31 +0200259 srv_lb_commit_status(srv);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200260 return;
261 }
262 else if (!old_state && new_state) {
263 chash_set_server_status_up(srv);
264 return;
265 }
266 else if (old_state && !new_state) {
267 chash_set_server_status_down(srv);
268 return;
269 }
270
Willy Tarreau1b877482018-08-21 19:44:53 +0200271 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
272
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200273 /* only adjust the server's presence in the tree */
274 chash_queue_dequeue_srv(srv);
275
Willy Tarreauc93cd162014-05-13 15:54:22 +0200276 if (srv->flags & SRV_F_BACKUP)
Emeric Brun52a91d32017-08-31 14:41:55 +0200277 p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200278 else
Emeric Brun52a91d32017-08-31 14:41:55 +0200279 p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200280
281 update_backend_weight(p);
Willy Tarreauc5150da2014-05-13 19:27:31 +0200282 srv_lb_commit_status(srv);
Willy Tarreau1b877482018-08-21 19:44:53 +0200283
284 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200285}
286
287/*
Andrew Rodland4f88c632016-10-25 12:50:37 -0400288 * This function implements the "Consistent Hashing with Bounded Loads" algorithm
289 * of Mirrokni, Thorup, and Zadimoghaddam (arxiv:1608.01350), adapted for use with
290 * unequal server weights.
291 */
292int chash_server_is_eligible(struct server *s)
293{
294 /* The total number of slots to allocate is the total number of outstanding requests
295 * (including the one we're about to make) times the load-balance-factor, rounded up.
296 */
Willy Tarreau76e84f52019-01-14 16:50:58 +0100297 unsigned tot_slots = ((s->proxy->served + 1) * s->proxy->lbprm.hash_balance_factor + 99) / 100;
Andrew Rodland4f88c632016-10-25 12:50:37 -0400298 unsigned slots_per_weight = tot_slots / s->proxy->lbprm.tot_weight;
299 unsigned remainder = tot_slots % s->proxy->lbprm.tot_weight;
300
301 /* Allocate a whole number of slots per weight unit... */
Emeric Brun52a91d32017-08-31 14:41:55 +0200302 unsigned slots = s->cur_eweight * slots_per_weight;
Andrew Rodland4f88c632016-10-25 12:50:37 -0400303
304 /* And then distribute the rest among servers proportionally to their weight. */
Emeric Brun52a91d32017-08-31 14:41:55 +0200305 slots += ((s->cumulative_weight + s->cur_eweight) * remainder) / s->proxy->lbprm.tot_weight
Andrew Rodland4f88c632016-10-25 12:50:37 -0400306 - (s->cumulative_weight * remainder) / s->proxy->lbprm.tot_weight;
307
308 /* But never leave a server with 0. */
309 if (slots == 0)
310 slots = 1;
311
312 return s->served < slots;
313}
314
315/*
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200316 * This function returns the running server from the CHASH tree, which is at
317 * the closest distance from the value of <hash>. Doing so ensures that even
318 * with a well imbalanced hash, if some servers are close to each other, they
319 * will still both receive traffic. If any server is found, it will be returned.
Willy Tarreau59884a62019-01-02 14:48:31 +0100320 * It will also skip server <avoid> if the hash result ends on this one.
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200321 * If no valid server is found, NULL is returned.
322 */
Willy Tarreau59884a62019-01-02 14:48:31 +0100323struct server *chash_get_server_hash(struct proxy *p, unsigned int hash, const struct server *avoid)
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200324{
325 struct eb32_node *next, *prev;
326 struct server *nsrv, *psrv;
327 struct eb_root *root;
328 unsigned int dn, dp;
Andrew Rodland4f88c632016-10-25 12:50:37 -0400329 int loop;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200330
Willy Tarreau1b877482018-08-21 19:44:53 +0200331 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
332
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200333 if (p->srv_act)
334 root = &p->lbprm.chash.act;
Willy Tarreau1b877482018-08-21 19:44:53 +0200335 else if (p->lbprm.fbck) {
336 nsrv = p->lbprm.fbck;
337 goto out;
338 }
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200339 else if (p->srv_bck)
340 root = &p->lbprm.chash.bck;
Willy Tarreau1b877482018-08-21 19:44:53 +0200341 else {
342 nsrv = NULL;
343 goto out;
344 }
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200345
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200346 /* find the node after and the node before */
347 next = eb32_lookup_ge(root, hash);
348 if (!next)
349 next = eb32_first(root);
Willy Tarreau1b877482018-08-21 19:44:53 +0200350 if (!next) {
351 nsrv = NULL; /* tree is empty */
352 goto out;
353 }
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200354
355 prev = eb32_prev(next);
356 if (!prev)
357 prev = eb32_last(root);
358
359 nsrv = eb32_entry(next, struct tree_occ, node)->server;
360 psrv = eb32_entry(prev, struct tree_occ, node)->server;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200361
Andrew Rodland18330ab2017-04-26 02:57:03 -0400362 /* OK we're located between two servers, let's
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200363 * compare distances between hash and the two servers
364 * and select the closest server.
365 */
366 dp = hash - prev->key;
367 dn = next->key - hash;
368
Andrew Rodland4f88c632016-10-25 12:50:37 -0400369 if (dp <= dn) {
370 next = prev;
371 nsrv = psrv;
372 }
373
374 loop = 0;
Willy Tarreau76e84f52019-01-14 16:50:58 +0100375 while (nsrv == avoid || (p->lbprm.hash_balance_factor && !chash_server_is_eligible(nsrv))) {
Andrew Rodland4f88c632016-10-25 12:50:37 -0400376 next = eb32_next(next);
377 if (!next) {
378 next = eb32_first(root);
379 if (++loop > 1) // protection against accidental loop
380 break;
381 }
382 nsrv = eb32_entry(next, struct tree_occ, node)->server;
383 }
384
Willy Tarreau1b877482018-08-21 19:44:53 +0200385 out:
386 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Andrew Rodland4f88c632016-10-25 12:50:37 -0400387 return nsrv;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200388}
389
390/* Return next server from the CHASH tree in backend <p>. If the tree is empty,
391 * return NULL. Saturated servers are skipped.
392 */
393struct server *chash_get_next_server(struct proxy *p, struct server *srvtoavoid)
394{
395 struct server *srv, *avoided;
396 struct eb32_node *node, *stop, *avoided_node;
397 struct eb_root *root;
398
399 srv = avoided = NULL;
400 avoided_node = NULL;
401
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100402 HA_SPIN_LOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200403 if (p->srv_act)
404 root = &p->lbprm.chash.act;
Christopher Faulet5b517552017-06-09 14:17:53 +0200405 else if (p->lbprm.fbck) {
406 srv = p->lbprm.fbck;
407 goto out;
408 }
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200409 else if (p->srv_bck)
410 root = &p->lbprm.chash.bck;
Christopher Faulet5b517552017-06-09 14:17:53 +0200411 else {
412 srv = NULL;
413 goto out;
414 }
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200415
416 stop = node = p->lbprm.chash.last;
417 do {
418 struct server *s;
419
420 if (node)
421 node = eb32_next(node);
422 if (!node)
423 node = eb32_first(root);
424
425 p->lbprm.chash.last = node;
Willy Tarreau1ed90ac2017-11-05 10:54:50 +0100426 if (!node) {
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200427 /* no node is available */
Willy Tarreau1ed90ac2017-11-05 10:54:50 +0100428 srv = NULL;
429 goto out;
430 }
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200431
Willy Tarreaud16a1b22013-04-12 14:46:51 +0200432 /* Note: if we came here after a down/up cycle with no last
433 * pointer, and after a redispatch (srvtoavoid is set), we
434 * must set stop to non-null otherwise we can loop forever.
435 */
436 if (!stop)
437 stop = node;
438
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200439 /* OK, we have a server. However, it may be saturated, in which
440 * case we don't want to reconsider it for now, so we'll simply
441 * skip it. Same if it's the server we try to avoid, in which
442 * case we simply remember it for later use if needed.
443 */
444 s = eb32_entry(node, struct tree_occ, node)->server;
445 if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
446 if (s != srvtoavoid) {
447 srv = s;
448 break;
449 }
450 avoided = s;
451 avoided_node = node;
452 }
453 } while (node != stop);
454
455 if (!srv) {
456 srv = avoided;
457 p->lbprm.chash.last = avoided_node;
458 }
459
Christopher Faulet5b517552017-06-09 14:17:53 +0200460 out:
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100461 HA_SPIN_UNLOCK(LBPRM_LOCK, &p->lbprm.lock);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200462 return srv;
463}
464
465/* This function is responsible for building the active and backup trees for
466 * constistent hashing. The servers receive an array of initialized nodes
467 * with their assigned keys. It also sets p->lbprm.wdiv to the eweight to
468 * uweight ratio.
469 */
470void chash_init_server_tree(struct proxy *p)
471{
472 struct server *srv;
473 struct eb_root init_head = EB_ROOT;
474 int node;
475
476 p->lbprm.set_server_status_up = chash_set_server_status_up;
477 p->lbprm.set_server_status_down = chash_set_server_status_down;
478 p->lbprm.update_server_eweight = chash_update_server_weight;
479 p->lbprm.server_take_conn = NULL;
480 p->lbprm.server_drop_conn = NULL;
481
482 p->lbprm.wdiv = BE_WEIGHT_SCALE;
483 for (srv = p->srv; srv; srv = srv->next) {
Emeric Brun52a91d32017-08-31 14:41:55 +0200484 srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
Willy Tarreauc5150da2014-05-13 19:27:31 +0200485 srv_lb_commit_status(srv);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200486 }
487
488 recount_servers(p);
489 update_backend_weight(p);
490
491 p->lbprm.chash.act = init_head;
492 p->lbprm.chash.bck = init_head;
493 p->lbprm.chash.last = NULL;
494
495 /* queue active and backup servers in two distinct groups */
496 for (srv = p->srv; srv; srv = srv->next) {
Willy Tarreauc93cd162014-05-13 15:54:22 +0200497 srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.chash.bck : &p->lbprm.chash.act;
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200498 srv->lb_nodes_tot = srv->uweight * BE_WEIGHT_SCALE;
499 srv->lb_nodes_now = 0;
Vincent Bernat3c2f2f22016-04-03 13:48:42 +0200500 srv->lb_nodes = calloc(srv->lb_nodes_tot, sizeof(struct tree_occ));
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200501 for (node = 0; node < srv->lb_nodes_tot; node++) {
502 srv->lb_nodes[node].server = srv;
Willy Tarreau4c14eaa2010-11-24 14:01:45 +0100503 srv->lb_nodes[node].node.key = full_hash(srv->puid * SRV_EWGHT_RANGE + node);
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200504 }
505
Emeric Brun52a91d32017-08-31 14:41:55 +0200506 if (srv_currently_usable(srv))
Willy Tarreau6b2e11b2009-10-01 07:52:15 +0200507 chash_queue_dequeue_srv(srv);
508 }
509}