MINOR: cache: Improve accept_encoding_normalizer Turn the "Accept-Encoding" value to lower case before processing it. Calculate the CRC on every token instead of a sorted concatenation of them all (in order to avoir copying them) then XOR all the CRCs into a single hash (while ignoring duplicates).

commit: 8bb72aa82fd68822fc116f58a062e97c924b4fe5 [log] [tgz]
author: Remi Tricot-Le Breton <rlebreton@haproxy.com> Mon Nov 30 17:06:03 2020 +0100
committer: William Lallemand <wlallemand@haproxy.org> Wed Dec 02 16:32:54 2020 +0100
tree: 7ff3eda277a072ab5ae9a7619fecfcada5f99c9c
parent: f67442efdb509d5d15f530a536b13f29fa7f48b7 [diff]
diff --git a/reg-tests/cache/vary.vtc b/reg-tests/cache/vary.vtc
index 9690490..001018f 100644
--- a/reg-tests/cache/vary.vtc
+++ b/reg-tests/cache/vary.vtc

@@ -187,8 +187,9 @@
        expect resp.http.content-type == "text/plain"
        expect resp.http.X-Cache-Hit == 1
 
-       # The accept-encoding normalizer function sorts alphabeticaly the values
-       # before  calculating the secondary key
+       # The accept-encoding normalizer function converts the header values
+       # to lower case then calculates the hash of every sub part before
+       # sorting the hashes and xor'ing them (while removing duplicates).
        txreq -url "/accept-encoding-multiple" -hdr "Accept-Encoding: first,second"
        rxresp
        expect resp.status == 200
@@ -207,6 +208,12 @@
        expect resp.bodylen == 51
        expect resp.http.X-Cache-Hit == 1
 
+       txreq -url "/accept-encoding-multiple" -hdr "Accept-Encoding: FirsT,SECOND,first"
+       rxresp
+       expect resp.status == 200
+       expect resp.bodylen == 51
+       expect resp.http.X-Cache-Hit == 1
+
        # Unmanaged vary
        txreq -url "/unmanaged" -hdr "Accept-Encoding: first_value"
        rxresp
@@ -270,7 +277,7 @@
        expect resp.bodylen == 57
        expect resp.http.X-Cache-Hit == 1
 
-       # The following requests are trated by a backend that does not cache
+       # The following requests are treated by a backend that does not cache
        # responses containing a Vary header
        txreq -url "/no_vary_support"
        rxresp

diff --git a/src/cache.c b/src/cache.c
index 6489f16..1b86ccd 100644
--- a/src/cache.c
+++ b/src/cache.c

@@ -1791,10 +1791,14 @@
 
 int accept_encoding_cmp(const void *a, const void *b)
 {
-	const struct ist ist_a = *(const struct ist*)a;
-	const struct ist ist_b = *(const struct ist*)b;
+	unsigned int int_a = *(unsigned int*)a;
+	unsigned int int_b = *(unsigned int*)b;
 
-	return istdiff(ist_a, ist_b);
+	if (int_a < int_b)
+		return -1;
+	if (int_a > int_b)
+		return 1;
+	return 0;
 }
 
 #define ACCEPT_ENCODING_MAX_ENTRIES 16
@@ -1804,33 +1808,38 @@
  * for the newly constructed buffer.
  * Returns 0 in case of success.
  */
-static int accept_encoding_normalizer(struct ist value, char *buf, unsigned int *buf_len)
+static int accept_encoding_normalizer(struct ist full_value, char *buf, unsigned int *buf_len)
 {
-	struct ist values[ACCEPT_ENCODING_MAX_ENTRIES] = {{}};
+	unsigned int values[ACCEPT_ENCODING_MAX_ENTRIES] = {};
 	size_t count = 0;
 	char *comma = NULL;
-	struct buffer *trash = get_trash_chunk();
-	int hash_value = 0;
+	unsigned int hash_value = 0;
+	unsigned int prev = 0, curr = 0;
+
+	/* Turn accept-encoding value to lower case */
+	full_value = ist2bin_lc(istptr(full_value), full_value);
 
 	/* The hash will be built out of a sorted list of accepted encodings. */
-	while (count < (ACCEPT_ENCODING_MAX_ENTRIES - 1) && (comma = istchr(value, ',')) != NULL) {
-		size_t length = comma - istptr(value);
+	while (count < (ACCEPT_ENCODING_MAX_ENTRIES - 1) && (comma = istchr(full_value, ',')) != NULL) {
+		size_t length = comma - istptr(full_value);
 
-		values[count++] = isttrim(value, length);
-		value = istadv(value, length + 1);
-	}
-	values[count++] = value;
+		values[count++] = hash_crc32(istptr(full_value), length);
 
-	if (count == ACCEPT_ENCODING_MAX_ENTRIES)
-		return 1;
+		full_value = istadv(full_value, length + 1);
 
-	/* Sort the values alphabetically. */
-	qsort(values, count, sizeof(struct ist), &accept_encoding_cmp);
+	}
+	values[count++] = hash_crc32(istptr(full_value), istlen(full_value));
 
-	while (count)
-		chunk_istcat(trash, values[--count]);
+	/* Sort the values alphabetically. */
+	qsort(values, count, sizeof(*values), &accept_encoding_cmp);
 
-	hash_value = hash_crc32(b_orig(trash), b_data(trash));
+	while (count) {
+		curr = values[--count];
+		if (curr != prev) {
+			hash_value ^= curr;
+		}
+		prev = curr;
+	}
 
 	memcpy(buf, &hash_value, sizeof(hash_value));
 	*buf_len = sizeof(hash_value);
commit	8bb72aa82fd68822fc116f58a062e97c924b4fe5	[log] [tgz]
author	Remi Tricot-Le Breton <rlebreton@haproxy.com>	Mon Nov 30 17:06:03 2020 +0100
committer	William Lallemand <wlallemand@haproxy.org>	Wed Dec 02 16:32:54 2020 +0100
tree	7ff3eda277a072ab5ae9a7619fecfcada5f99c9c
parent	f67442efdb509d5d15f530a536b13f29fa7f48b7 [diff]