MEDIUM: vars: replace the global name index with a hash

The global table of known variables names can only grow and was designed
for static names that are registered at boot. Nowadays it's possible to
set dynamic variable names from Lua or from the CLI, which causes a real
problem that was partially addressed in 2.2 with commit 4e172c93f
("MEDIUM: lua: Add `ifexist` parameter to `set_var`"). Please see github
issue #624 for more context.

This patch simplifies all this by removing the need for a central
registry of known names, and storing 64-bit hashes instead. This is
highly sufficient given the low number of variables in each context.
The hash is calculated using XXH64() which is bijective over the 64-bit
space thus is guaranteed collision-free for 1..8 chars. Above that the
risk remains around 1/2^64 per extra 8 chars so in practice this is
highly sufficient for our usage. A random seed is used at boot to seed
the hash so that it's not attackable from Lua for example.

There's one particular nit though. The "ifexist" hack mentioned above
is now limited to variables of scope "proc" only, and will only match
variables that were already created or declared, but will now verify
the scope as well. This may affect some bogus Lua scripts and SPOE
agents which used to accidentally work because a similarly named
variable used to exist in a different scope. These ones may need to be
fixed to comply with the doc.

Now we can sum up the situation as this one:
  - ephemeral variables (scopes sess, txn, req, res) will always be
    usable, regardless of any prior declaration. This effectively
    addresses the most problematic change from the commit above that
    in order to work well could have required some script auditing ;

  - process-wide variables (scope proc) that are mentioned in the
    configuration, referenced in a "register-var-names" SPOE directive,
    or created via "set-var" in the global section or the CLI, are
    permanent and will always accept to be set, with or without the
    "ifexist" restriction (SPOE uses this internally as well).

  - process-wide variables (scope proc) that are only created via a
    set-var() tcp/http action, via Lua's set_var() calls, or via an
    SPOE with the "force-set-var" directive), will not be permanent
    but will always accept to be replaced once they are created, even
    if "ifexist" is present

  - process-wide variables (scope proc) that do not exist will only
    support being created via the set-var() tcp/http action, Lua's
    set_var() calls without "ifexist", or an SPOE declared with
    "force-set-var".

This means that non-proc variables do not care about "ifexist" nor
prior declaration, and that using "ifexist" should most often be
reliable in Lua and that SPOE should most often work without any
prior declaration. It may be doable to turn "ifexist" to 1 by default
in Lua to further ease the transition. Note: regtests were adjusted.

Cc: Tim Düsterhus <tim@bastelstu.be>
diff --git a/src/vars.c b/src/vars.c
index cefd02a..98e1cfe 100644
--- a/src/vars.c
+++ b/src/vars.c
@@ -214,29 +214,19 @@
 	HA_RWLOCK_INIT(&vars->rwlock);
 }
 
-/* This function declares a new variable name. It returns a pointer
- * on the string identifying the name. This function assures that
- * the same name exists only once.
- *
- * This function check if the variable name is acceptable.
- *
- * The function returns NULL if an error occurs, and <err> is filled.
- * In this case, the HAProxy must be stopped because the structs are
- * left inconsistent. Otherwise, it returns the pointer on the global
- * name.
+/* This function returns a hash value and a scope for a variable name of a
+ * specified length. It makes sure that the scope is valid. It returns non-zero
+ * on success, 0 on failure. Neither hash nor scope may be NULL.
  */
-static char *register_name(const char *name, int len, enum vars_scope *scope,
-			   int alloc, char **err)
+static int vars_hash_name(const char *name, int len, enum vars_scope *scope,
+                         uint64_t *hash, char **err)
 {
-	int i;
-	char **var_names2;
 	const char *tmp;
-	char *res = NULL;
 
 	/* Check length. */
 	if (len == 0) {
 		memprintf(err, "Empty variable name cannot be accepted");
-		return res;
+		return 0;
 	}
 
 	/* Check scope. */
@@ -273,73 +263,32 @@
 	else {
 		memprintf(err, "invalid variable name '%.*s'. A variable name must be start by its scope. "
 		               "The scope can be 'proc', 'sess', 'txn', 'req', 'res' or 'check'", len, name);
-		return res;
-	}
-
-	if (alloc)
-		HA_RWLOCK_WRLOCK(VARS_LOCK, &var_names_rwlock);
-	else
-		HA_RWLOCK_RDLOCK(VARS_LOCK, &var_names_rwlock);
-
-
-	/* Look for existing variable name. */
-	for (i = 0; i < var_names_nb; i++)
-		if (strncmp(var_names[i], name, len) == 0 && var_names[i][len] == '\0') {
-			res = var_names[i];
-			goto end;
-		}
-
-	if (!alloc) {
-		res = NULL;
-		goto end;
-	}
-
-	/* Store variable name. If realloc fails, var_names remains valid */
-	var_names2 = realloc(var_names, (var_names_nb + 1) * sizeof(*var_names));
-	if (!var_names2) {
-		memprintf(err, "out of memory error");
-		res = NULL;
-		goto end;
-	}
-	var_names_nb++;
-	var_names = var_names2;
-	var_names[var_names_nb - 1] = malloc(len + 1);
-	if (!var_names[var_names_nb - 1]) {
-		memprintf(err, "out of memory error");
-		res = NULL;
-		goto end;
+		return 0;
 	}
-	memcpy(var_names[var_names_nb - 1], name, len);
-	var_names[var_names_nb - 1][len] = '\0';
 
 	/* Check variable name syntax. */
-	tmp = var_names[var_names_nb - 1];
-	while (*tmp) {
+	for (tmp = name; tmp < name + len; tmp++) {
 		if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
 			memprintf(err, "invalid syntax at char '%s'", tmp);
-			res = NULL;
-			goto end;
+			return 0;
 		}
-		tmp++;
 	}
-	res = var_names[var_names_nb - 1];
-
-  end:
-	if (alloc)
-		HA_RWLOCK_WRUNLOCK(VARS_LOCK, &var_names_rwlock);
-	else
-		HA_RWLOCK_RDUNLOCK(VARS_LOCK, &var_names_rwlock);
 
-	return res;
+	*hash = XXH3(name, len, var_name_hash_seed);
+	return 1;
 }
 
-/* This function returns an existing variable or returns NULL. */
-static inline struct var *var_get(struct vars *vars, const char *name)
+/* This function returns the variable from the given list that matches
+ * <name_hash> or returns NULL if not found. It's only a linked list since it
+ * is not expected to have many variables per scope (a few tens at best).
+ * The caller is responsible for ensuring that <vars> is properly locked.
+ */
+static struct var *var_get(struct vars *vars, uint64_t name_hash)
 {
 	struct var *var;
 
 	list_for_each_entry(var, &vars->head, l)
-		if (var->name == name)
+		if (var->name_hash == name_hash)
 			return var;
 	return NULL;
 }
@@ -356,11 +305,13 @@
 	return vars_get_by_desc(var_desc, smp, def);
 }
 
-/* This function tries to create variable <name> in scope <scope> and store
- * sample <smp> as its value. The stream and session are extracted from <smp>,
- * and the stream may be NULL when scope is SCOPE_SESS. In case there wouldn't
- * be enough memory to store the sample while the variable was already created,
- * it would be changed to a bool (which is memory-less).
+/* This function tries to create a variable whose name hash is <name_hash> in
+ * scope <scope> and store sample <smp> as its value.
+ *
+ * The stream and session are extracted from <smp>, whose stream may be NULL
+ * when scope is SCOPE_SESS. In case there wouldn't be enough memory to store
+ * the sample while the variable was already created, it would be changed to
+ * a bool (which is memory-less).
  *
  * Flags is a bitfield that may contain one of the following flags:
  *   - VF_UPDATEONLY: if the scope is SCOPE_PROC, the variable may only be
@@ -370,7 +321,7 @@
  *
  * It returns 0 on failure, non-zero on success.
  */
-static int var_set(const char *name, enum vars_scope scope, struct sample *smp, uint flags)
+static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags)
 {
 	struct vars *vars;
 	struct var *var;
@@ -383,7 +334,7 @@
 	HA_RWLOCK_WRLOCK(VARS_LOCK, &vars->rwlock);
 
 	/* Look for existing variable name. */
-	var = var_get(vars, name);
+	var = var_get(vars, name_hash);
 
 	if (var) {
 		if (flags & VF_CREATEONLY) {
@@ -417,7 +368,7 @@
 		if (!var)
 			goto unlock;
 		LIST_APPEND(&vars->head, &var->l);
-		var->name = name;
+		var->name_hash = name_hash;
 		var->flags = flags & VF_PERMANENT;
 	}
 
@@ -485,11 +436,11 @@
 	return ret;
 }
 
-/* This unsets variable <name> from scope <scope>, using the session and stream
- * found in <smp>. Note that stream may be null for SCOPE_SESS. Returns 0 if
- * the scope was not found otherwise 1.
+/* Deletes a variable matching name hash <name_hash> and scope <scope> for the
+ * session and stream found in <smp>. Note that stream may be null for
+ * SCOPE_SESS. Returns 0 if the scope was not found otherwise 1.
  */
-static int var_unset(const char *name, enum vars_scope scope, struct sample *smp)
+static int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp)
 {
 	struct vars *vars;
 	struct var  *var;
@@ -501,7 +452,7 @@
 
 	/* Look for existing variable name. */
 	HA_RWLOCK_WRLOCK(VARS_LOCK, &vars->rwlock);
-	var = var_get(vars, name);
+	var = var_get(vars, name_hash);
 	if (var) {
 		size = var_clear(var, 0);
 		var_accounting_diff(vars, smp->sess, smp->strm, -size);
@@ -513,13 +464,19 @@
 /* Returns 0 if fails, else returns 1. */
 static int smp_conv_store(const struct arg *args, struct sample *smp, void *private)
 {
-	return var_set(args[0].data.var.name, args[0].data.var.scope, smp, 0);
+	uint64_t seed = var_name_hash_seed;
+	uint64_t name_hash = XXH3(smp->data.u.str.area, smp->data.u.str.data, seed);
+
+	return var_set(name_hash, args[0].data.var.scope, smp, 0);
 }
 
 /* Returns 0 if fails, else returns 1. */
 static int smp_conv_clear(const struct arg *args, struct sample *smp, void *private)
 {
-	return var_unset(args[0].data.var.name, args[0].data.var.scope, smp);
+	uint64_t seed = var_name_hash_seed;
+	uint64_t name_hash = XXH3(smp->data.u.str.area, smp->data.u.str.data, seed);
+
+	return var_unset(name_hash, args[0].data.var.scope, smp);
 }
 
 /* This functions check an argument entry and fill it with a variable
@@ -528,9 +485,9 @@
  */
 int vars_check_arg(struct arg *arg, char **err)
 {
-	char *name;
 	enum vars_scope scope;
 	struct sample empty_smp = { };
+	uint64_t hash;
 
 	/* Check arg type. */
 	if (arg->type != ARGT_STR) {
@@ -539,13 +496,10 @@
 	}
 
 	/* Register new variable name. */
-	name = register_name(arg->data.str.area, arg->data.str.data, &scope,
-			     1,
-			     err);
-	if (!name)
+	if (!vars_hash_name(arg->data.str.area, arg->data.str.data, &scope, &hash, err))
 		return 0;
 
-	if (scope == SCOPE_PROC && !var_set(name, scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
+	if (scope == SCOPE_PROC && !var_set(hash, scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
 		return 0;
 
 	/* properly destroy the chunk */
@@ -553,75 +507,76 @@
 
 	/* Use the global variable name pointer. */
 	arg->type = ARGT_VAR;
-	arg->data.var.name = name;
+	arg->data.var.name_hash = hash;
 	arg->data.var.scope = scope;
 	return 1;
 }
 
-/* This function store a sample in a variable if it was already defined.
+/* This function stores a sample in a variable if it was already defined.
  * Returns zero on failure and non-zero otherwise. The variable not being
  * defined is treated as a failure.
  */
 int vars_set_by_name_ifexist(const char *name, size_t len, struct sample *smp)
 {
 	enum vars_scope scope;
+	uint64_t hash;
 
 	/* Resolve name and scope. */
-	name = register_name(name, len, &scope, 0, NULL);
-	if (!name)
+	if (!vars_hash_name(name, len, &scope, &hash, NULL))
 		return 0;
 
-	return var_set(name, scope, smp, VF_UPDATEONLY);
+	return var_set(hash, scope, smp, VF_UPDATEONLY);
 }
 
 
-/* This function store a sample in a variable.
+/* This function stores a sample in a variable.
  * Returns zero on failure and non-zero otherwise.
  */
 int vars_set_by_name(const char *name, size_t len, struct sample *smp)
 {
 	enum vars_scope scope;
+	uint64_t hash;
 
 	/* Resolve name and scope. */
-	name = register_name(name, len, &scope, 1, NULL);
-	if (!name)
+	if (!vars_hash_name(name, len, &scope, &hash, NULL))
 		return 0;
 
-	return var_set(name, scope, smp, 0);
+	return var_set(hash, scope, smp, 0);
 }
 
-/* This function unset a variable if it was already defined.
+/* This function unsets a variable if it was already defined.
  * Returns zero on failure and non-zero otherwise.
  */
 int vars_unset_by_name_ifexist(const char *name, size_t len, struct sample *smp)
 {
 	enum vars_scope scope;
+	uint64_t hash;
 
 	/* Resolve name and scope. */
-	name = register_name(name, len, &scope, 0, NULL);
-	if (!name)
+	if (!vars_hash_name(name, len, &scope, &hash, NULL))
 		return 0;
 
-	return var_unset(name, scope, smp);
+	return var_unset(hash, scope, smp);
 }
 
 
-/* This retrieves variable <name> from variables <vars>, and if found and not
- * empty, duplicates the result into sample <smp>. smp_dup() is used in order
- * to release the variables lock ASAP (so a pre-allocated chunk is obtained
- * via get_trash_shunk()). The variables' lock is used for reads.
+/* This retrieves variable whose hash matches <name_hash> from variables <vars>,
+ * and if found and not empty, duplicates the result into sample <smp>.
+ * smp_dup() is used in order to release the variables lock ASAP (so a pre-
+ * allocated chunk is obtained via get_trash_shunk()). The variables' lock is
+ * used for reads.
  *
  * The function returns 0 if the variable was not found and no default
  * value was provided in <def>, otherwise 1 with the sample filled.
  * Default values are always returned as strings.
  */
-static int var_to_smp(struct vars *vars, const char *name, struct sample *smp, const struct buffer *def)
+static int var_to_smp(struct vars *vars, uint64_t name_hash, struct sample *smp, const struct buffer *def)
 {
 	struct var *var;
 
 	/* Get the variable entry. */
 	HA_RWLOCK_RDLOCK(VARS_LOCK, &vars->rwlock);
-	var = var_get(vars, name);
+	var = var_get(vars, name_hash);
 	if (!var || !var->data.type) {
 		if (!def) {
 			HA_RWLOCK_RDUNLOCK(VARS_LOCK, &vars->rwlock);
@@ -658,10 +613,10 @@
 {
 	struct vars *vars;
 	enum vars_scope scope;
+	uint64_t hash;
 
 	/* Resolve name and scope. */
-	name = register_name(name, len, &scope, 0, NULL);
-	if (!name)
+	if (!vars_hash_name(name, len, &scope, &hash, NULL))
 		return 0;
 
 	/* Select "vars" pool according with the scope. */
@@ -669,8 +624,7 @@
 	if (!vars || vars->scope != scope)
 		return 0;
 
-
-	return var_to_smp(vars, name, smp, def);
+	return var_to_smp(vars, hash, smp, def);
 }
 
 /* This function fills a sample with the content of the variable described
@@ -697,7 +651,7 @@
 	if (!vars || vars->scope != var_desc->scope)
 		return 0;
 
-	return var_to_smp(vars, var_desc->name, smp, def);
+	return var_to_smp(vars, var_desc->name_hash, smp, def);
 }
 
 /* Always returns ACT_RET_CONT even if an error occurs. */
@@ -747,7 +701,7 @@
 		smp_set_owner(&smp, px, sess, s, 0);
 		smp.data.type = SMP_T_STR;
 		smp.data.u.str = *fmtstr;
-		var_set(rule->arg.vars.name, rule->arg.vars.scope, &smp, 0);
+		var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, 0);
 	}
 	else {
 		/* an expression is used */
@@ -757,7 +711,7 @@
 	}
 
 	/* Store the sample, and ignore errors. */
-	var_set(rule->arg.vars.name, rule->arg.vars.scope, &smp, 0);
+	var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, 0);
 	free_trash_chunk(fmtstr);
 	return ACT_RET_CONT;
 }
@@ -772,7 +726,7 @@
 	smp_set_owner(&smp, px, sess, s, SMP_OPT_FINAL);
 
 	/* Clear the variable using the sample context, and ignore errors. */
-	var_unset(rule->arg.vars.name, rule->arg.vars.scope, &smp);
+	var_unset(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp);
 	return ACT_RET_CONT;
 }
 
@@ -858,12 +812,11 @@
 	}
 
 	LIST_INIT(&rule->arg.vars.fmt);
-	rule->arg.vars.name = register_name(var_name, var_len, &rule->arg.vars.scope, 1, err);
-	if (!rule->arg.vars.name)
+	if (!vars_hash_name(var_name, var_len, &rule->arg.vars.scope, &rule->arg.vars.name_hash, err))
 		return ACT_RET_PRS_ERR;
 
 	if (rule->arg.vars.scope == SCOPE_PROC &&
-	    !var_set(rule->arg.vars.name, rule->arg.vars.scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
+	    !var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
 		return 0;
 
 	/* There is no fetch method when variable is unset. Just set the right