[CLEANUP] fwrr: ensure that we never overflow in placements Now we can compute the max place depending on the number of servers, maximum weight and weight scale. The formula has been stored as a comment so that it's easy to choose between smooth weight ramp up and high number of servers. The default scale has been set to 16, which permits 4000 servers with a granularity of 6% in the worst case (weight=1).

commit: b698f0f4a2a0a182dd4ca647d82f08379bd95b7e [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Sun Dec 02 11:01:23 2007 +0100
committer: Willy Tarreau <w@1wt.eu> Sun Dec 02 11:01:23 2007 +0100
tree: 6a30b98626a9789c61c9f6c277e9a4cb3c195067
parent: d1cd2764563dabb1e71cfce88b553f79fd16e76d [diff]
diff --git a/include/types/backend.h b/include/types/backend.h
index e392a7b..2b35860 100644
--- a/include/types/backend.h
+++ b/include/types/backend.h

@@ -44,7 +44,16 @@
 #define BE_LB_ALGO_PH	(BE_LB_PROP_L7  | 0x04) /* balance on URL parameter hash */
 
 /* various constants */
-#define BE_WEIGHT_SCALE 256             /* scale between user weight and effective weight */
+
+/* The scale factor between user weight an effective weight allows smooth
+ * weight modulation even with small weights (eg: 1). It should not be too high
+ * though because it limits the number of servers in FWRR mode in order to
+ * prevent any integer overflow. The max number of servers per backend is
+ * limited to about 2^32/255^2/scale ~= 66051/scale. A scale of 16 looks like
+ * a good value, as it allows more than 4000 servers per backend while leaving
+ * modulation steps of about 6% for servers with the lowest weight (1).
+ */
+#define BE_WEIGHT_SCALE 16
 
 #endif /* _TYPES_BACKEND_H */
 

diff --git a/include/types/server.h b/include/types/server.h
index 36ae7cb..614775b 100644
--- a/include/types/server.h
+++ b/include/types/server.h

@@ -62,6 +62,12 @@
 #define SRV_CHK_RUNNING 0x0002   /* server seen as running */
 #define SRV_CHK_DISABLE 0x0004   /* server returned a "disable" code */
 
+/* various constants */
+#define SRV_UWGHT_RANGE 256
+#define SRV_UWGHT_MAX   (SRV_UWGHT_RANGE - 1)
+#define SRV_EWGHT_RANGE (SRV_UWGHT_RANGE * BE_WEIGHT_SCALE)
+#define SRV_EWGHT_MAX   (SRV_UWGHT_MAX   * BE_WEIGHT_SCALE)
+
 struct server {
 	struct server *next;
 	int state;				/* server state (SRV_*) */

diff --git a/src/backend.c b/src/backend.c
index 4dc2b9c..1eed377 100644
--- a/src/backend.c
+++ b/src/backend.c

@@ -511,8 +511,7 @@
  */
 static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
 {
-	/* eweight can be as high as 256*255 */
-	s->lb_node.key = BE_WEIGHT_SCALE*255 - s->eweight;
+	s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
 	eb32_insert(root, &s->lb_node);
 	s->lb_tree = root;
 }
@@ -598,17 +597,17 @@
 		fwrr_queue_by_weight(grp->next, s);
 	}
 	else {
-		/* FIXME: we want to multiply by a constant to avoid overrides
-		 * after weight changes, but this can easily overflow on 32-bit
-		 * values. We need to change this for a 64-bit tree, and keep
-		 * the 65536 factor for optimal smoothness (both rweight and
-		 * eweight are 16 bit entities). s->npos is bound by the number
-		 * of servers times the maximum eweight (~= nsrv << 16).
+		/* The sorting key is stored in units of s->npos * user_weight
+		 * in order to avoid overflows. As stated in backend.h, the
+		 * lower the scale, the rougher the weights modulation, and the
+		 * higher the scale, the lower the number of servers without
+		 * overflow. With this formula, the result is always positive,
+		 * so we can use eb3é_insert().
 		 */
-		//s->lb_node.key = grp->curr_weight * s->npos + s->rweight - s->eweight;
-		//s->lb_node.key = 65536 * s->npos + s->rweight - s->eweight;
-		s->lb_node.key = 16 * s->npos + (s->rweight - s->eweight) / 4096;
-		eb32i_insert(&grp->curr, &s->lb_node);
+		s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
+			(unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
+
+		eb32_insert(&grp->curr, &s->lb_node);
 		s->lb_tree = &grp->curr;
 	}
 }

diff --git a/tests/filltab25.c b/tests/filltab25.c
index 8b983f4..02802bd 100644
--- a/tests/filltab25.c
+++ b/tests/filltab25.c

@@ -35,7 +35,7 @@
 /* queue a server in the weights tree */
 void queue_by_weight(struct eb_root *root, struct srv *s) {
 	s->node.key = 255 - s->w;
-	eb32i_insert(root, &s->node);
+	eb32_insert(root, &s->node);
 	s->tree = root;
 }
 
@@ -43,7 +43,7 @@
 void queue_by_weight_0(struct eb_root *root, struct srv *s) {
 	if (s->w) {
 		s->node.key = 255 - s->w;
-		eb32i_insert(root, &s->node);
+		eb32_insert(root, &s->node);
 		s->tree = root;
 	} else {
 		s->tree = NULL;
@@ -62,13 +62,25 @@
 		/* put into next tree */
 		s->next -= sw; // readjust next in case we could finally take this back to current.
 		queue_by_weight_0(next_tree, s);
-	} else { /* FIXME: sw can be smaller than s->rem! */
-		//s->node.key = sw * s->next + s->rem - s->w;
-		//s->node.key = 65536 * s->next + s->rem - s->w;
-		//s->node.key = 256*s->next + (s->rem / 256);
-		//s->node.key = 256*s->next + ((s->rem - s->w) / 256);
-		s->node.key = 16*s->next + ((s->rem - s->w) / 4096);
-		eb32i_insert(&tree_0, &s->node);
+	} else {
+		// The overflow problem is caused by the scale we want to apply to user weight
+		// to turn it into effective weight. Since this is only used to provide a smooth
+		// slowstart on very low weights (1), it is a pure waste. Thus, we just have to
+		// apply a small scaling factor and warn the user that slowstart is not very smooth
+		// on low weights.
+		// The max key is about ((scale*maxw)*(scale*maxw)*nbsrv)/ratio  (where the ratio is
+		// the arbitrary divide we perform in the examples above). Assuming that ratio==scale,
+		// this translates to maxkey=scale*maxw^2*nbsrv, so
+		//    max_nbsrv=2^32/255^2/scale ~= 66051/scale
+		// Using a scale of 16 is enough to support 4000 servers without overflow, providing
+		// 6% steps during slowstart.
+
+		s->node.key = 256 * s->next + (16*255 + s->rem - s->w) / 16;
+
+		/* check for overflows */
+		if ((int)s->node.key < 0)
+			printf(" OV: srv=%p w=%d rem=%d next=%d key=%d", s, s->w, s->rem, s->next, s->node.key);
+		eb32_insert(&tree_0, &s->node);
 		s->tree = &tree_0;
 	}
 }
@@ -122,6 +134,7 @@
 			get_srv_init(s);
 			if (s->w == 0)
 				node = NULL;
+			s->node.key = 0; // do not display random values
 		}
 	}
 	if (node)
@@ -299,8 +312,8 @@
 	next_iteration:
 		p++;
 		conns++;
-		if (/*conns == 30*/ /**/random()%1000 == 0/**/) {
-			int w = /*20*//**/random()%10000/**/;
+		if (/*conns == 30*/ /**/random()%100 == 0/**/) {
+			int w = /*20*//**/random()%4096/**/;
 			int num = /*1*//**/random()%nsrv/**/;
 			struct srv *s = &srv[num];
commit	b698f0f4a2a0a182dd4ca647d82f08379bd95b7e	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Sun Dec 02 11:01:23 2007 +0100
committer	Willy Tarreau <w@1wt.eu>	Sun Dec 02 11:01:23 2007 +0100
tree	6a30b98626a9789c61c9f6c277e9a4cb3c195067
parent	d1cd2764563dabb1e71cfce88b553f79fd16e76d [diff]