blob: 02802bdb8bc2e2ce9aa3c1f5f97f112c89f4b6cd [file] [log] [blame]
Willy Tarreau18baa9d2007-11-26 01:24:50 +01001/*
2 * experimental weighted round robin scheduler - (c) 2007 willy tarreau.
3 *
4 * This filling algorithm is excellent at spreading the servers, as it also
5 * takes care of keeping the most uniform distance between occurences of each
6 * server, by maximizing this distance. It reduces the number of variables
7 * and expensive operations.
8 */
9
10#include <stdio.h>
11#include <stdlib.h>
12#include "eb32tree.h"
13
14struct srv {
15 struct eb32_node node;
16 struct eb_root *tree; // we want to know where the server is
17 int num;
18 int w; /* weight */
19 int next, last;
20 int rem;
21} *srv;
22
23/* those trees represent a sliding window of 3 time frames */
24struct eb_root tree_0 = EB_ROOT;
25struct eb_root tree_1 = EB_ROOT;
26struct eb_root tree_2 = EB_ROOT;
27
28struct eb_root *init_tree; /* receives positions 0..sw-1 */
29struct eb_root *next_tree; /* receives positions >= 2sw */
30
31int nsrv; /* # of servers */
32int nsw, sw; /* sum of weights */
33int p; /* current position, between sw..2sw-1 */
34
35/* queue a server in the weights tree */
36void queue_by_weight(struct eb_root *root, struct srv *s) {
37 s->node.key = 255 - s->w;
Willy Tarreaub698f0f2007-12-02 11:01:23 +010038 eb32_insert(root, &s->node);
Willy Tarreau18baa9d2007-11-26 01:24:50 +010039 s->tree = root;
40}
41
42/* queue a server in the weight tree <root>, except if its weight is 0 */
43void queue_by_weight_0(struct eb_root *root, struct srv *s) {
44 if (s->w) {
45 s->node.key = 255 - s->w;
Willy Tarreaub698f0f2007-12-02 11:01:23 +010046 eb32_insert(root, &s->node);
Willy Tarreau18baa9d2007-11-26 01:24:50 +010047 s->tree = root;
48 } else {
49 s->tree = NULL;
50 }
51}
52
53static inline void dequeue_srv(struct srv *s) {
54 eb32_delete(&s->node);
55}
56
57/* queues a server into the correct tree depending on ->next */
58void put_srv(struct srv *s) {
59 if (s->w <= 0 ||
60 s->next >= 2*sw || /* delay everything which does not fit into the window */
61 s->next >= sw+nsw) { /* and everything which does not fit into the theorical new window */
62 /* put into next tree */
63 s->next -= sw; // readjust next in case we could finally take this back to current.
64 queue_by_weight_0(next_tree, s);
Willy Tarreaub698f0f2007-12-02 11:01:23 +010065 } else {
66 // The overflow problem is caused by the scale we want to apply to user weight
67 // to turn it into effective weight. Since this is only used to provide a smooth
68 // slowstart on very low weights (1), it is a pure waste. Thus, we just have to
69 // apply a small scaling factor and warn the user that slowstart is not very smooth
70 // on low weights.
71 // The max key is about ((scale*maxw)*(scale*maxw)*nbsrv)/ratio (where the ratio is
72 // the arbitrary divide we perform in the examples above). Assuming that ratio==scale,
73 // this translates to maxkey=scale*maxw^2*nbsrv, so
74 // max_nbsrv=2^32/255^2/scale ~= 66051/scale
75 // Using a scale of 16 is enough to support 4000 servers without overflow, providing
76 // 6% steps during slowstart.
77
78 s->node.key = 256 * s->next + (16*255 + s->rem - s->w) / 16;
79
80 /* check for overflows */
81 if ((int)s->node.key < 0)
82 printf(" OV: srv=%p w=%d rem=%d next=%d key=%d", s, s->w, s->rem, s->next, s->node.key);
83 eb32_insert(&tree_0, &s->node);
Willy Tarreau18baa9d2007-11-26 01:24:50 +010084 s->tree = &tree_0;
85 }
86}
87
88/* prepares a server when extracting it from the init tree */
89static inline void get_srv_init(struct srv *s) {
90 s->next = s->rem = 0;
91}
92
93/* prepares a server when extracting it from the next tree */
94static inline void get_srv_next(struct srv *s) {
95 s->next += sw;
96}
97
98/* prepares a server when extracting it from the next tree */
99static inline void get_srv_down(struct srv *s) {
100 s->next = p;
101}
102
103/* prepares a server when extracting it from its tree */
104void get_srv(struct srv *s) {
105 if (s->tree == init_tree) {
106 get_srv_init(s);
107 }
108 else if (s->tree == next_tree) {
109 get_srv_next(s);
110 }
111 else if (s->tree == NULL) {
112 get_srv_down(s);
113 }
114}
115
116
117/* return next server from the current tree, or a server from the init tree
118 * if appropriate. If both trees are empty, return NULL.
119 */
120struct srv *get_next_server() {
121 struct eb32_node *node;
122 struct srv *s;
123
124 node = eb32_first(&tree_0);
125 s = eb32_entry(node, struct srv, node);
126
127 if (!node || s->next > p) {
128 /* either we have no server left, or we have a hole */
129 struct eb32_node *node2;
130 node2 = eb32_first(init_tree);
131 if (node2) {
132 node = node2;
133 s = eb32_entry(node, struct srv, node);
134 get_srv_init(s);
135 if (s->w == 0)
136 node = NULL;
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100137 s->node.key = 0; // do not display random values
Willy Tarreau18baa9d2007-11-26 01:24:50 +0100138 }
139 }
140 if (node)
141 return s;
142 else
143 return NULL;
144}
145
146void update_position(struct srv *s) {
147 //if (s->tree == init_tree) {
148 if (!s->next) {
149 // first time ever for this server
150 s->last = p;
151 s->next = p + nsw / s->w;
152 s->rem += nsw % s->w;
153
154 if (s->rem >= s->w) {
155 s->rem -= s->w;
156 s->next++;
157 }
158 } else {
159 s->last = s->next; // or p ?
160 //s->next += sw / s->w;
161 //s->rem += sw % s->w;
162 s->next += nsw / s->w;
163 s->rem += nsw % s->w;
164
165 if (s->rem >= s->w) {
166 s->rem -= s->w;
167 s->next++;
168 }
169 }
170}
171
172
173/* switches trees init_tree and next_tree. init_tree should be empty when
174 * this happens, and next_tree filled with servers sorted by weights.
175 */
176void switch_trees() {
177 struct eb_root *swap;
178 swap = init_tree;
179 init_tree = next_tree;
180 next_tree = swap;
181 sw = nsw;
182 p = sw;
183}
184
185main(int argc, char **argv) {
186 int conns;
187 int i;
188
189 struct srv *s;
190
191 argc--; argv++;
192 nsrv = argc;
193
194 if (!nsrv)
195 exit(1);
196
197 srv = (struct srv *)calloc(nsrv, sizeof(struct srv));
198
199 sw = 0;
200 for (i = 0; i < nsrv; i++) {
201 s = &srv[i];
202 s->num = i;
203 s->w = atol(argv[i]);
204 sw += s->w;
205 }
206
207 nsw = sw;
208
209 init_tree = &tree_1;
210 next_tree = &tree_2;
211
212 /* and insert all the servers in the PREV tree */
213 /* note that it is required to insert them according to
214 * the reverse order of their weights.
215 */
216 printf("---------------:");
217 for (i = 0; i < nsrv; i++) {
218 s = &srv[i];
219 queue_by_weight_0(init_tree, s);
220 printf("%2d", s->w);
221 }
222 printf("\n");
223
224 p = sw; // time base of current tree
225 conns = 0;
226 while (1) {
227 struct eb32_node *node;
228
229 printf("%08d|%06d: ", conns, p);
230
231 /* if we have en empty tree, let's first try to collect weights
232 * which might have changed.
233 */
234 if (!sw) {
235 if (nsw) {
236 sw = nsw;
237 p = sw;
238 /* do not switch trees, otherwise new servers (from init)
239 * would end up in next.
240 */
241 //switch_trees();
242 //printf("bla\n");
243 }
244 else
245 goto next_iteration;
246 }
247
248 s = get_next_server();
249 if (!s) {
250 printf("----------- switch (empty) -- sw=%d -> %d ---------\n", sw, nsw);
251 switch_trees();
252 s = get_next_server();
253 printf("%08d|%06d: ", conns, p);
254
255 if (!s)
256 goto next_iteration;
257 }
258 else if (s->next >= 2*sw) {
259 printf("ARGGGGG! s[%d].next=%d, max=%d\n", s->num, s->next, 2*sw-1);
260 }
261
262 /* now we have THE server we want to put at this position */
263 for (i = 0; i < s->num; i++) {
264 if (srv[i].w > 0)
265 printf(". ");
266 else
267 printf("_ ");
268 }
269 printf("# ");
270 for (i = s->num + 1; i < nsrv; i++) {
271 if (srv[i].w > 0)
272 printf(". ");
273 else
274 printf("_ ");
275 }
276 printf(" : ");
277
278 printf("s=%02d v=%04d w=%03d n=%03d r=%03d ",
279 s->num, s->node.key, s->w, s->next, s->rem);
280
281 update_position(s);
282 printf(" | next=%03d, rem=%03d ", s->next, s->rem);
283
284 if (s->next >= sw * 2) {
285 dequeue_srv(s);
286 //queue_by_weight(next_tree, s);
287 put_srv(s);
288 printf(" => next (w=%d, n=%d) ", s->w, s->next);
289 }
290 else {
291 printf(" => curr ");
292
293 //s->node.key = s->next;
294 /* we want to ensure that in case of conflicts, servers with
295 * the highest weights will get served first. Also, we still
296 * have the remainder to see where the entry expected to be
297 * inserted.
298 */
299 //s->node.key = 256 * s->next + 255 - s->w;
300 //s->node.key = sw * s->next + sw / s->w;
301 //s->node.key = sw * s->next + s->rem; /// seems best (check with filltab15) !
302
303 //s->node.key = (2 * sw * s->next) + s->rem + sw / s->w;
304
305 /* FIXME: must be optimized */
306 dequeue_srv(s);
307 put_srv(s);
308 //eb32i_insert(&tree_0, &s->node);
309 //s->tree = &tree_0;
310 }
311
312 next_iteration:
313 p++;
314 conns++;
Willy Tarreaub698f0f2007-12-02 11:01:23 +0100315 if (/*conns == 30*/ /**/random()%100 == 0/**/) {
316 int w = /*20*//**/random()%4096/**/;
Willy Tarreau18baa9d2007-11-26 01:24:50 +0100317 int num = /*1*//**/random()%nsrv/**/;
318 struct srv *s = &srv[num];
319
320 nsw = nsw - s->w + w;
321 //sw=nsw;
322
323 if (s->tree == init_tree) {
324 printf(" -- chgwght1(%d): %d->%d, n=%d --", s->num, s->w, w, s->next);
325 printf("(init)");
326 s->w = w;
327 dequeue_srv(s);
328 queue_by_weight_0(s->tree, s);
329 }
330 else if (s->tree == NULL) {
331 printf(" -- chgwght2(%d): %d->%d, n=%d --", s->num, s->w, w, s->next);
332 printf("(down)");
333 s->w = w;
334 dequeue_srv(s);
335 //queue_by_weight_0(init_tree, s);
336 get_srv(s);
337 s->next = p + (nsw + sw - p) / s->w;
338 put_srv(s);
339 }
340 else {
341 int oldnext;
342
343 /* the server is either active or in the next queue */
344 get_srv(s);
345 printf(" -- chgwght3(%d): %d->%d, n=%d, sw=%d, nsw=%d --", s->num, s->w, w, s->next, sw, nsw);
346
347 oldnext = s->next;
348 s->w = w;
349
350 /* we must measure how far we are from the end of the current window
351 * and try to fit their as many entries as should theorically be.
352 */
353
354 //s->w = s->w * (2*sw - p) / sw;
355 if (s->w > 0) {
356 int step = (nsw /*+ sw - p*/) / s->w;
357 s->next = s->last + step;
358 s->rem = 0;
359 if (s->next > oldnext) {
360 s->next = oldnext;
361 printf(" aaaaaaa ");
362 }
363
364 if (s->next < p + 2) {
365 s->next = p + step;
366 printf(" bbbbbb ");
367 }
368 } else {
369 printf(" push -- ");
370 /* push it into the next tree */
371 s->w = 0;
372 s->next = p + sw;
373 }
374
375
376 dequeue_srv(s);
377 printf(" n=%d", s->next);
378 put_srv(s);
379 }
380 }
381
382 printf("\n");
383
384 if (0 && conns % 50000 == 0) {
385 printf("-------- %-5d : changing all weights ----\n", conns);
386
387 for (i = 0; i < nsrv; i++) {
388 int w = i + 1;
389 s = &srv[i];
390 nsw = nsw - s->w + w;
391 s->w = w;
392 dequeue_srv(s);
393 queue_by_weight_0(next_tree, s); // or init_tree ?
394 }
395 }
396
397 }
398}
399