MEDIUM: backend: add the 'first' balancing algorithm The principle behind this load balancing algorithm was first imagined and modeled by Steen Larsen then iteratively refined through several work sessions until it would totally address its original goal. The purpose of this algorithm is to always use the smallest number of servers so that extra servers can be powered off during non-intensive hours. Additional tools may be used to do that work, possibly by locally monitoring the servers' activity. The first server with available connection slots receives the connection. The servers are choosen from the lowest numeric identifier to the highest (see server parameter "id"), which defaults to the server's position in the farm. Once a server reaches its maxconn value, the next server is used. It does not make sense to use this algorithm without setting maxconn. Note that it can however make sense to use minconn so that servers are not used at full load before starting new servers, and so that introduction of new servers requires a progressively increasing load (the number of servers would more or less follow the square root of the load until maxconn is reached). This algorithm ignores the server weight, and is more beneficial to long sessions such as RDP or IMAP than HTTP, though it can be useful there too.

commit: f09c6603d38bc4d129c4f3bdd1e7063c82af0f65 [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Mon Feb 13 17:12:08 2012 +0100
committer: Willy Tarreau <w@1wt.eu> Tue Feb 21 22:27:27 2012 +0100
tree: 96e0e485c4d7be0269c6f0a8a1943b180522c58f
parent: 3ebb1163ba7ac70bc24931d8af626196f6612618 [diff] [blame]
diff --git a/src/lb_fas.c b/src/lb_fas.c
new file mode 100644
index 0000000..07baf5d
--- /dev/null
+++ b/src/lb_fas.c

@@ -0,0 +1,318 @@
+/*
+ * First Available Server load balancing algorithm.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <common/compat.h>
+#include <common/config.h>
+#include <common/debug.h>
+#include <eb32tree.h>
+
+#include <types/global.h>
+#include <types/server.h>
+
+#include <proto/backend.h>
+#include <proto/queue.h>
+
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ */
+static inline void fas_remove_from_tree(struct server *s)
+{
+	s->lb_tree = NULL;
+}
+
+/* simply removes a server from a tree */
+static inline void fas_dequeue_srv(struct server *s)
+{
+	eb32_delete(&s->lb_node);
+}
+
+/* Queue a server in its associated tree, assuming the weight is >0.
+ * Servers are sorted by unique ID so that we send all connections to the first
+ * available server in declaration order (or ID order) until its maxconn is
+ * reached. It is important to understand that the server weight is not used
+ * here.
+ */
+static inline void fas_queue_srv(struct server *s)
+{
+	s->lb_node.key = s->puid;
+	eb32_insert(s->lb_tree, &s->lb_node);
+}
+
+/* Re-position the server in the FS tree after it has been assigned one
+ * connection or after it has released one. Note that it is possible that
+ * the server has been moved out of the tree due to failed health-checks.
+ */
+static void fas_srv_reposition(struct server *s)
+{
+	if (!s->lb_tree)
+		return;
+	fas_dequeue_srv(s);
+	fas_queue_srv(s);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ */
+static void fas_set_server_status_down(struct server *srv)
+{
+	struct proxy *p = srv->proxy;
+
+	if (srv->state == srv->prev_state &&
+	    srv->eweight == srv->prev_eweight)
+		return;
+
+	if (srv_is_usable(srv->state, srv->eweight))
+		goto out_update_state;
+
+	if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
+		/* server was already down */
+		goto out_update_backend;
+
+	if (srv->state & SRV_BACKUP) {
+		p->lbprm.tot_wbck -= srv->prev_eweight;
+		p->srv_bck--;
+
+		if (srv == p->lbprm.fbck) {
+			/* we lost the first backup server in a single-backup
+			 * configuration, we must search another one.
+			 */
+			struct server *srv2 = p->lbprm.fbck;
+			do {
+				srv2 = srv2->next;
+			} while (srv2 &&
+				 !((srv2->state & SRV_BACKUP) &&
+				   srv_is_usable(srv2->state, srv2->eweight)));
+			p->lbprm.fbck = srv2;
+		}
+	} else {
+		p->lbprm.tot_wact -= srv->prev_eweight;
+		p->srv_act--;
+	}
+
+	fas_dequeue_srv(srv);
+	fas_remove_from_tree(srv);
+
+out_update_backend:
+	/* check/update tot_used, tot_weight */
+	update_backend_weight(p);
+ out_update_state:
+	srv->prev_state = srv->state;
+	srv->prev_eweight = srv->eweight;
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ */
+static void fas_set_server_status_up(struct server *srv)
+{
+	struct proxy *p = srv->proxy;
+
+	if (srv->state == srv->prev_state &&
+	    srv->eweight == srv->prev_eweight)
+		return;
+
+	if (!srv_is_usable(srv->state, srv->eweight))
+		goto out_update_state;
+
+	if (srv_is_usable(srv->prev_state, srv->prev_eweight))
+		/* server was already up */
+		goto out_update_backend;
+
+	if (srv->state & SRV_BACKUP) {
+		srv->lb_tree = &p->lbprm.fas.bck;
+		p->lbprm.tot_wbck += srv->eweight;
+		p->srv_bck++;
+
+		if (!(p->options & PR_O_USE_ALL_BK)) {
+			if (!p->lbprm.fbck) {
+				/* there was no backup server anymore */
+				p->lbprm.fbck = srv;
+			} else {
+				/* we may have restored a backup server prior to fbck,
+				 * in which case it should replace it.
+				 */
+				struct server *srv2 = srv;
+				do {
+					srv2 = srv2->next;
+				} while (srv2 && (srv2 != p->lbprm.fbck));
+				if (srv2)
+					p->lbprm.fbck = srv;
+			}
+		}
+	} else {
+		srv->lb_tree = &p->lbprm.fas.act;
+		p->lbprm.tot_wact += srv->eweight;
+		p->srv_act++;
+	}
+
+	/* note that eweight cannot be 0 here */
+	fas_queue_srv(srv);
+
+ out_update_backend:
+	/* check/update tot_used, tot_weight */
+	update_backend_weight(p);
+ out_update_state:
+	srv->prev_state = srv->state;
+	srv->prev_eweight = srv->eweight;
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ */
+static void fas_update_server_weight(struct server *srv)
+{
+	int old_state, new_state;
+	struct proxy *p = srv->proxy;
+
+	if (srv->state == srv->prev_state &&
+	    srv->eweight == srv->prev_eweight)
+		return;
+
+	/* If changing the server's weight changes its state, we simply apply
+	 * the procedures we already have for status change. If the state
+	 * remains down, the server is not in any tree, so it's as easy as
+	 * updating its values. If the state remains up with different weights,
+	 * there are some computations to perform to find a new place and
+	 * possibly a new tree for this server.
+	 */
+	 
+	old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
+	new_state = srv_is_usable(srv->state, srv->eweight);
+
+	if (!old_state && !new_state) {
+		srv->prev_state = srv->state;
+		srv->prev_eweight = srv->eweight;
+		return;
+	}
+	else if (!old_state && new_state) {
+		fas_set_server_status_up(srv);
+		return;
+	}
+	else if (old_state && !new_state) {
+		fas_set_server_status_down(srv);
+		return;
+	}
+
+	if (srv->lb_tree)
+		fas_dequeue_srv(srv);
+
+	if (srv->state & SRV_BACKUP) {
+		p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
+		srv->lb_tree = &p->lbprm.fas.bck;
+	} else {
+		p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
+		srv->lb_tree = &p->lbprm.fas.act;
+	}
+
+	fas_queue_srv(srv);
+
+	update_backend_weight(p);
+	srv->prev_state = srv->state;
+	srv->prev_eweight = srv->eweight;
+}
+
+/* This function is responsible for building the trees in case of fast
+ * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio. Both active and backup groups are initialized.
+ */
+void fas_init_server_tree(struct proxy *p)
+{
+	struct server *srv;
+	struct eb_root init_head = EB_ROOT;
+
+	p->lbprm.set_server_status_up   = fas_set_server_status_up;
+	p->lbprm.set_server_status_down = fas_set_server_status_down;
+	p->lbprm.update_server_eweight  = fas_update_server_weight;
+	p->lbprm.server_take_conn = fas_srv_reposition;
+	p->lbprm.server_drop_conn = fas_srv_reposition;
+
+	p->lbprm.wdiv = BE_WEIGHT_SCALE;
+	for (srv = p->srv; srv; srv = srv->next) {
+		srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
+		srv->prev_state = srv->state;
+	}
+
+	recount_servers(p);
+	update_backend_weight(p);
+
+	p->lbprm.fas.act = init_head;
+	p->lbprm.fas.bck = init_head;
+
+	/* queue active and backup servers in two distinct groups */
+	for (srv = p->srv; srv; srv = srv->next) {
+		if (!srv_is_usable(srv->state, srv->eweight))
+			continue;
+		srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
+		fas_queue_srv(srv);
+	}
+}
+
+/* Return next server from the FS tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ */
+struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+	struct server *srv, *avoided;
+	struct eb32_node *node;
+
+	srv = avoided = NULL;
+
+	if (p->srv_act)
+		node = eb32_first(&p->lbprm.fas.act);
+	else if (p->lbprm.fbck)
+		return p->lbprm.fbck;
+	else if (p->srv_bck)
+		node = eb32_first(&p->lbprm.fas.bck);
+	else
+		return NULL;
+
+	while (node) {
+		/* OK, we have a server. However, it may be saturated, in which
+		 * case we don't want to reconsider it for now, so we'll simply
+		 * skip it. Same if it's the server we try to avoid, in which
+		 * case we simply remember it for later use if needed.
+		 */
+		struct server *s;
+
+		s = eb32_entry(node, struct server, lb_node);
+		if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
+			if (s != srvtoavoid) {
+				srv = s;
+				break;
+			}
+			avoided = s;
+		}
+		node = eb32_next(node);
+	}
+
+	if (!srv)
+		srv = avoided;
+
+	return srv;
+}
+
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
commit	f09c6603d38bc4d129c4f3bdd1e7063c82af0f65	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Mon Feb 13 17:12:08 2012 +0100
committer	Willy Tarreau <w@1wt.eu>	Tue Feb 21 22:27:27 2012 +0100
tree	96e0e485c4d7be0269c6f0a8a1943b180522c58f
parent	3ebb1163ba7ac70bc24931d8af626196f6612618 [diff] [blame]