[MEDIUM] implement the URI hash algorithm
Guillaume Dallaire contributed the URI hashing algorithm for
use with proxy-caches. It provides the advantage of optimizing
the cache hit rate.
diff --git a/doc/haproxy-en.txt b/doc/haproxy-en.txt
index d04a719..76d54c4 100644
--- a/doc/haproxy-en.txt
+++ b/doc/haproxy-en.txt
@@ -861,8 +861,9 @@
port binding and port mapping. To use this mode, the 'balance' keyword is used,
followed by the selected algorithm. Up to version 1.2.11, only 'roundrobin' was
available, which is also the default value if unspecified. Starting with
-version 1.2.12, a new 'source' keyword appeared. In this mode, there will be no
-dispatch address, but the proxy needs at least one server.
+version 1.2.12, a new 'source' keyword appeared. A new 'uri' keyword was added
+in version 1.3.10. In this mode, there will be no dispatch address, but the
+proxy needs at least one server.
Example : same as the last one, with internal load balancer
---------
@@ -959,6 +960,24 @@
server web1 192.168.1.1 cookie server01
server web2 192.168.1.2 cookie server02
+As indicated above, the 'uri' keyword was introduced in version 1.3.10. It is
+useful when load-balancing between reverse proxy-caches, because it will hash
+the URI and use the hash result to select a server, thus optimizing the hit
+rate on the caches, because the same URI will always reach the same cache. This
+keyword is only allowed in HTTP mode.
+
+Example :
+---------
+
+# Always send a given URI to the same server
+
+ listen http_proxy
+ bind :3128
+ mode http
+ balance uri
+ server squid1 192.168.1.1
+ server squid2 192.168.1.2
+
3.1) Server monitoring
----------------------
diff --git a/doc/haproxy-fr.txt b/doc/haproxy-fr.txt
index 6da1e03..8e83eab 100644
--- a/doc/haproxy-fr.txt
+++ b/doc/haproxy-fr.txt
@@ -863,9 +863,10 @@
cela, on précise le mot clé 'balance' dans la définition du service,
éventuellement suivi du nom d'un algorithme de répartition. Jusqu'à la version
1.2.11, seul 'roundrobin' était géré, et c'est aussi la valeur implicite par
-défaut. Avec la version 1.2.12, le nouveau mot clé 'source' est apparu. Il est
-évident qu'en cas d'utilisation du répartiteur interne, il ne faudra pas
-spécifier d'adresse de dispatch, et qu'il faudra au moins un serveur.
+défaut. Avec la version 1.2.12, le nouveau mot clé 'source' est apparu. La
+version 1.3.10 a également apporté le mot clé 'uri'. Il est évident qu'en cas
+d'utilisation du répartiteur interne, il ne faudra pas spécifier d'adresse de
+dispatch, et qu'il faudra au moins un serveur.
Exemple : même que précédemment en répartition interne
---------
@@ -965,6 +966,25 @@
server web1 192.168.1.1 cookie server01
server web2 192.168.1.2 cookie server02
+De plus, tel qu'indiqué ci-dessus, la version 1.3.10 a introduit le mot clé
+'uri'. Il est très pratique dans le cas de répartition de charge entre des
+reverse-proxy-caches, parce qu'il utilisera le résultat d'un hachage de l'URI
+pour choisir un serveur, ce qui aura pour effet d'optimiser le taux de cache
+du fait que la même URI sera toujours envoyée au même cache. Ce mot-clé n'est
+autorisé qu'en mode HTTP.
+
+Example :
+---------
+
+# Envoie toujours une URI donnée au même serveur
+
+ listen http_proxy
+ bind :3128
+ mode http
+ balance uri
+ server squid1 192.168.1.1
+ server squid2 192.168.1.2
+
3.1) Surveillance des serveurs
------------------------------
diff --git a/include/proto/backend.h b/include/proto/backend.h
index b4c641e..f0295b0 100644
--- a/include/proto/backend.h
+++ b/include/proto/backend.h
@@ -2,7 +2,7 @@
include/proto/backend.h
Functions prototypes for the backend.
- Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu
+ Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -116,6 +116,36 @@
return px->srv_map[h];
}
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the URI hash method. In order to optimize cache hits, the hash computation
+ * ends at the question mark. Depending on the number of active/backup servers,
+ * it will either look for active servers, or for backup servers.
+ * If any server is found, it will be returned. If no valid server is found,
+ * NULL is returned.
+ *
+ * This code was contributed by Guillaume Dallaire, who also selected this hash
+ * algorithm out of a tens because it gave him the best results.
+ *
+ */
+static inline struct server *get_server_uh(struct proxy *px, char *uri, int uri_len)
+{
+ unsigned long hash = 0;
+ int c;
+
+ if (px->srv_map_sz == 0)
+ return NULL;
+
+ while (uri_len--) {
+ c = *uri++;
+ if (c == '?')
+ break;
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return px->srv_map[hash % px->srv_map_sz];
+}
+
#endif /* _PROTO_BACKEND_H */
diff --git a/include/types/backend.h b/include/types/backend.h
index d6079ad..e2d4efc 100644
--- a/include/types/backend.h
+++ b/include/types/backend.h
@@ -49,7 +49,6 @@
#define PR_O_USE_ALL_BK 0x00100000 /* load-balance between backup servers */
#define PR_O_FORCE_CLO 0x00200000 /* enforce the connection close immediately after server response */
#define PR_O_BALANCE_SH 0x00400000 /* balance on source IP hash */
-#define PR_O_BALANCE (PR_O_BALANCE_RR | PR_O_BALANCE_SH)
#define PR_O_ABRT_CLOSE 0x00800000 /* immediately abort request when client closes */
#define PR_O_SSL3_CHK 0x01000000 /* use SSLv3 CLIENT_HELLO packets for server health */
@@ -58,6 +57,8 @@
#define PR_O_TPXY_CLI 0x06000000 /* bind to the client's IP+port when connect()ing */
#define PR_O_TPXY_MASK 0x06000000 /* bind to a non-local address when connect()ing */
#define PR_O_TCPSPLICE 0x08000000 /* delegate data transfer to linux kernel's tcp_splice */
+#define PR_O_BALANCE_UH 0x10000000 /* balance on URI hash */
+#define PR_O_BALANCE (PR_O_BALANCE_RR | PR_O_BALANCE_SH | PR_O_BALANCE_UH)
#endif /* _TYPES_BACKEND_H */
diff --git a/src/backend.c b/src/backend.c
index b357646..bb57050 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -187,6 +187,12 @@
(void *)&((struct sockaddr_in *)&s->cli_addr)->sin_addr,
len);
}
+ else if (s->be->options & PR_O_BALANCE_UH) {
+ /* URI hashing */
+ s->srv = get_server_uh(s->be,
+ s->txn.req.sol + s->txn.req.sl.rq.u,
+ s->txn.req.sl.rq.u_l);
+ }
else /* unknown balancing algorithm */
return SRV_STATUS_INTERNAL;
}
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 00b4908..feb7b60 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -1129,18 +1129,26 @@
if (*(args[1])) {
if (!strcmp(args[1], "roundrobin")) {
+ curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_RR;
}
else if (!strcmp(args[1], "source")) {
+ curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_SH;
}
+ else if (!strcmp(args[1], "uri")) {
+ curproxy->options &= ~PR_O_BALANCE;
+ curproxy->options |= PR_O_BALANCE_UH;
+ }
else {
- Alert("parsing [%s:%d] : '%s' only supports 'roundrobin' and 'source' options.\n", file, linenum, args[0]);
+ Alert("parsing [%s:%d] : '%s' only supports 'roundrobin', 'source' and 'uri' options.\n", file, linenum, args[0]);
return -1;
}
}
- else /* if no option is set, use round-robin by default */
+ else {/* if no option is set, use round-robin by default */
+ curproxy->options &= ~PR_O_BALANCE;
curproxy->options |= PR_O_BALANCE_RR;
+ }
}
else if (!strcmp(args[0], "server")) { /* server address */
int cur_arg;
@@ -2236,6 +2244,13 @@
Warning("parsing %s : monitor-uri will be ignored for %s '%s'.\n",
file, proxy_type_str(curproxy), curproxy->id);
}
+ if (curproxy->options & PR_O_BALANCE_UH) {
+ curproxy->options &= ~PR_O_BALANCE;
+ curproxy->options |= PR_O_BALANCE_RR;
+
+ Warning("parsing %s : URI hash will be ignored for %s '%s'. Falling back to round robin.\n",
+ file, proxy_type_str(curproxy), curproxy->id);
+ }
}
else if (curproxy->mode == PR_MODE_HTTP) { /* HTTP PROXY */
if ((curproxy->cookie_name != NULL) && ((newsrv = curproxy->srv) == NULL)) {