MAJOR: pattern: add LRU-based cache on pattern matching

The principle of this cache is to have a global cache for all pattern
matching operations which rely on lists (reg, sub, dir, dom, ...). The
input data, the expression and a random seed are used as a hashing key.
The cached entries contains a pointer to the expression and a revision
number for that expression so that we don't accidently used obsolete
data after a pattern update or a very unlikely hash collision.

Regarding the risk of collisions, 10k entries at 10k req/s mean 1% risk
of a collision after 60 years, that's already much less than the memory's
reliability in most machines and more durable than most admin's life
expectancy. A collision will result in a valid result to be returned
for a different entry from the same list. If this is not acceptable,
the cache can be disabled using tune.pattern.cache-size.

A test on a file containing 10k small regex showed that the regex
matching was limited to 6k/s instead of 70k with regular strings.
When enabling the LRU cache, the performance was back to 70k/s.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index b6b3f08..f72339a 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -493,6 +493,7 @@
    - tune.maxaccept
    - tune.maxpollevents
    - tune.maxrewrite
+   - tune.pattern.cache-size
    - tune.pipesize
    - tune.rcvbuf.client
    - tune.rcvbuf.server
@@ -1050,6 +1051,25 @@
   larger than that. This means you don't have to worry about it when changing
   bufsize.
 
+tune.pattern.cache-size <number>
+  Sets the size of the pattern lookup cache to <number> entries. This is an LRU
+  cache which reminds previous lookups and their results. It is used by ACLs
+  and maps on slow pattern lookups, namely the ones using the "sub", "reg",
+  "dir", "dom", "end", "bin" match methods as well as the case-insensitive
+  strings. It applies to pattern expressions which means that it will be able
+  to memorize the result of a lookup among all the patterns specified on a
+  configuration line (including all those loaded from files). It automatically
+  invalidates entries which are updated using HTTP actions or on the CLI. The
+  default cache size is set to 10000 entries, which limits its footprint to
+  about 5 MB on 32-bit systems and 8 MB on 64-bit systems. There is a very low
+  risk of collision in this cache, which is in the order of the size of the
+  cache divided by 2^64. Typically, at 10000 requests per second with the
+  default cache size of 10000 entries, there's 1% chance that a brute force
+  attack could cause a single collision after 60 years, or 0.1% after 6 years.
+  This is considered much lower than the risk of a memory corruption caused by
+  aging components. If this is not acceptable, the cache can be disabled by
+  setting this parameter to 0.
+
 tune.pipesize <number>
   Sets the kernel pipe buffer size to this size (in bytes). By default, pipes
   are the default size for the system. But sometimes when using TCP splicing,
diff --git a/include/common/defaults.h b/include/common/defaults.h
index 63b2b89..6193bdc 100644
--- a/include/common/defaults.h
+++ b/include/common/defaults.h
@@ -295,4 +295,15 @@
 #ifndef TLS_TICKETS_NO
 #define TLS_TICKETS_NO 3
 #endif
+
+/* pattern lookup default cache size, in number of entries :
+ * 10k entries at 10k req/s mean 1% risk of a collision after 60 years, that's
+ * already much less than the memory's reliability in most machines and more
+ * durable than most admin's life expectancy. A collision will result in a
+ * valid result to be returned for a different entry from the same list.
+ */
+#ifndef DEFAULT_PAT_LRU_SIZE
+#define DEFAULT_PAT_LRU_SIZE 10000
+#endif
+
 #endif /* _COMMON_DEFAULTS_H */
diff --git a/include/types/global.h b/include/types/global.h
index afd7aef..7533bb0 100644
--- a/include/types/global.h
+++ b/include/types/global.h
@@ -142,6 +142,7 @@
 		int pipesize;      /* pipe size in bytes, system defaults if zero */
 		int max_http_hdr;  /* max number of HTTP headers, use MAX_HTTP_HDR if zero */
 		int cookie_len;    /* max length of cookie captures */
+		int pattern_cache; /* max number of entries in the pattern cache. */
 		int sslcachesize;  /* SSL cache size in session, defaults to 20000 */
 #ifdef USE_OPENSSL
 		int sslprivatecache; /* Force to use a private session cache even if nbproc > 1 */
diff --git a/src/cfgparse.c b/src/cfgparse.c
index edaee56..4f75256 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -907,6 +907,22 @@
 			goto out;
 		}
 	}
+	else if (!strcmp(args[0], "tune.pattern.cache-size")) {
+		if (*args[1]) {
+			global.tune.pattern_cache = atoi(args[1]);
+			if (global.tune.pattern_cache < 0) {
+				Alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
+				      file, linenum, args[0]);
+				err_code |= ERR_ALERT | ERR_FATAL;
+				goto out;
+			}
+		} else {
+			Alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
+			      file, linenum, args[0]);
+			err_code |= ERR_ALERT | ERR_FATAL;
+			goto out;
+		}
+	}
 	else if (!strcmp(args[0], "uid")) {
 		if (global.uid != 0) {
 			Alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
diff --git a/src/haproxy.c b/src/haproxy.c
index 474179c..5822a8b 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -146,6 +146,7 @@
 		.maxrewrite = MAXREWRITE,
 		.chksize = BUFSIZE,
 		.reserved_bufs = RESERVED_BUFS,
+		.pattern_cache = DEFAULT_PAT_LRU_SIZE,
 #ifdef USE_OPENSSL
 		.sslcachesize = SSLCACHESIZE,
 		.ssl_default_dh_param = SSL_DEFAULT_DH_PARAM,
diff --git a/src/pattern.c b/src/pattern.c
index ebae85d..cbfa20d 100644
--- a/src/pattern.c
+++ b/src/pattern.c
@@ -24,6 +24,8 @@
 #include <proto/sample.h>
 
 #include <ebsttree.h>
+#include <import/lru.h>
+#include <import/xxhash.h>
 
 char *pat_match_names[PAT_MATCH_NUM] = {
 	[PAT_MATCH_FOUND] = "found",
@@ -144,6 +146,9 @@
 /* This is the root of the list of all pattern_ref avalaibles. */
 struct list pattern_reference = LIST_HEAD_INIT(pattern_reference);
 
+static struct lru64_head *pat_lru_tree;
+static unsigned long long pat_lru_seed;
+
 /*
  *
  * The following functions are not exported and are used by internals process
@@ -443,6 +448,8 @@
 	struct pattern_tree *elt;
 	struct pattern_list *lst;
 	struct pattern *pattern;
+	struct pattern *ret = NULL;
+	struct lru64 *lru = NULL;
 
 	/* Lookup a string in the expression's pattern tree. */
 	if (!eb_is_empty(&expr->pattern_tree)) {
@@ -468,6 +475,15 @@
 	}
 
 	/* look in the list */
+	if (pat_lru_tree) {
+		unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
+
+		lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
+				pat_lru_tree, expr, expr->revision);
+		if (lru && lru->domain)
+			return lru->data;
+	}
+
 	list_for_each_entry(lst, &expr->patterns, list) {
 		pattern = &lst->pat;
 
@@ -476,11 +492,16 @@
 
 		icase = expr->mflags & PAT_MF_IGNORE_CASE;
 		if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) ||
-		    (!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0))
-			return pattern;
+		    (!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)) {
+			ret = pattern;
+			break;
+		}
 	}
 
-	return NULL;
+	if (lru)
+		lru64_commit(lru, ret, expr, expr->revision);
+
+	return ret;
 }
 
 /* NB: For two binaries buf to be identical, it is required that their lengths match */
@@ -488,19 +509,34 @@
 {
 	struct pattern_list *lst;
 	struct pattern *pattern;
+	struct pattern *ret = NULL;
+	struct lru64 *lru = NULL;
+
+	if (pat_lru_tree) {
+		unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
 
-	/* Look in the list. */
+		lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
+				pat_lru_tree, expr, expr->revision);
+		if (lru && lru->domain)
+			return lru->data;
+	}
+
 	list_for_each_entry(lst, &expr->patterns, list) {
 		pattern = &lst->pat;
 
 		if (pattern->len != smp->data.str.len)
 			continue;
 
-		if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)
-			return pattern;
+		if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) {
+			ret = pattern;
+			break;
+		}
 	}
 
-	return NULL;
+	if (lru)
+		lru64_commit(lru, ret, expr, expr->revision);
+
+	return ret;
 }
 
 /* Executes a regex. It temporarily changes the data to add a trailing zero,
@@ -510,15 +546,31 @@
 {
 	struct pattern_list *lst;
 	struct pattern *pattern;
+	struct pattern *ret = NULL;
+	struct lru64 *lru = NULL;
 
-	/* look in the list */
+	if (pat_lru_tree) {
+		unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
+
+		lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
+				pat_lru_tree, expr, expr->revision);
+		if (lru && lru->domain)
+			return lru->data;
+	}
+
 	list_for_each_entry(lst, &expr->patterns, list) {
 		pattern = &lst->pat;
 
-		if (regex_exec2(pattern->ptr.reg, smp->data.str.str, smp->data.str.len))
-			return pattern;
+		if (regex_exec2(pattern->ptr.reg, smp->data.str.str, smp->data.str.len)) {
+			ret = pattern;
+			break;
+		}
 	}
-	return NULL;
+
+	if (lru)
+		lru64_commit(lru, ret, expr, expr->revision);
+
+	return ret;
 }
 
 /* Checks that the pattern matches the beginning of the tested string. */
@@ -530,6 +582,8 @@
 	struct pattern_tree *elt;
 	struct pattern_list *lst;
 	struct pattern *pattern;
+	struct pattern *ret = NULL;
+	struct lru64 *lru = NULL;
 
 	/* Lookup a string in the expression's pattern tree. */
 	if (!eb_is_empty(&expr->pattern_tree)) {
@@ -555,6 +609,15 @@
 	}
 
 	/* look in the list */
+	if (pat_lru_tree) {
+		unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
+
+		lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
+				pat_lru_tree, expr, expr->revision);
+		if (lru && lru->domain)
+			return lru->data;
+	}
+
 	list_for_each_entry(lst, &expr->patterns, list) {
 		pattern = &lst->pat;
 
@@ -566,9 +629,14 @@
 		    (!icase && strncmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0))
 			continue;
 
-		return pattern;
+		ret = pattern;
+		break;
 	}
-	return NULL;
+
+	if (lru)
+		lru64_commit(lru, ret, expr, expr->revision);
+
+	return ret;
 }
 
 /* Checks that the pattern matches the end of the tested string. */
@@ -577,6 +645,17 @@
 	int icase;
 	struct pattern_list *lst;
 	struct pattern *pattern;
+	struct pattern *ret = NULL;
+	struct lru64 *lru = NULL;
+
+	if (pat_lru_tree) {
+		unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
+
+		lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
+				pat_lru_tree, expr, expr->revision);
+		if (lru && lru->domain)
+			return lru->data;
+	}
 
 	list_for_each_entry(lst, &expr->patterns, list) {
 		pattern = &lst->pat;
@@ -589,9 +668,14 @@
 		    (!icase && strncmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0))
 			continue;
 
-		return pattern;
+		ret = pattern;
+		break;
 	}
-	return  NULL;
+
+	if (lru)
+		lru64_commit(lru, ret, expr, expr->revision);
+
+	return ret;
 }
 
 /* Checks that the pattern is included inside the tested string.
@@ -604,6 +688,17 @@
 	char *c;
 	struct pattern_list *lst;
 	struct pattern *pattern;
+	struct pattern *ret = NULL;
+	struct lru64 *lru = NULL;
+
+	if (pat_lru_tree) {
+		unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr;
+
+		lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed),
+				pat_lru_tree, expr, expr->revision);
+		if (lru && lru->domain)
+			return lru->data;
+	}
 
 	list_for_each_entry(lst, &expr->patterns, list) {
 		pattern = &lst->pat;
@@ -617,19 +712,27 @@
 			for (c = smp->data.str.str; c <= end; c++) {
 				if (tolower(*c) != tolower(*pattern->ptr.str))
 					continue;
-				if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0)
-					return pattern;
+				if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) {
+					ret = pattern;
+					goto leave;
+				}
 			}
 		} else {
 			for (c = smp->data.str.str; c <= end; c++) {
 				if (*c != *pattern->ptr.str)
 					continue;
-				if (strncmp(pattern->ptr.str, c, pattern->len) == 0)
-					return pattern;
+				if (strncmp(pattern->ptr.str, c, pattern->len) == 0) {
+					ret = pattern;
+					goto leave;
+				}
 			}
 		}
 	}
-	return NULL;
+ leave:
+	if (lru)
+		lru64_commit(lru, ret, expr, expr->revision);
+
+	return ret;
 }
 
 /* This one is used by other real functions. It checks that the pattern is
@@ -2321,6 +2424,10 @@
 	struct pat_ref *ref, *ref2, *ref3;
 	struct list pr = LIST_HEAD_INIT(pr);
 
+	pat_lru_seed = random();
+	if (global.tune.pattern_cache)
+		pat_lru_tree = lru64_new(global.tune.pattern_cache);
+
 	list_for_each_entry(ref, &pattern_reference, list) {
 		if (ref->unique_id == -1) {
 			/* Look for the first free id. */