BUG/MEDIUM: cache: Fix hash collision in `accept-encoding` handling for `Vary`
This patch fixes GitHub Issue #988. Commit ce9e7b25217c46db1ac636b2c885a05bf91ae57e
was not sufficient, because it fell back to a hash comparison if the bitmap
of known encodings was not acceptable instead of directly returning the the
cached response is not compatible.
This patch also extends the reg-test to test the hash collision that was
mentioned in #988.
Vary handling is 2.4, no backport needed.
diff --git a/reg-tests/cache/vary.vtc b/reg-tests/cache/vary.vtc
index a840799..be79b6a 100644
--- a/reg-tests/cache/vary.vtc
+++ b/reg-tests/cache/vary.vtc
@@ -5,7 +5,8 @@
feature ignore_unknown_macro
server s1 {
- # Response varying on "accept-encoding"
+ # Response varying on "accept-encoding" with
+ # an unacceptable content-encoding
rxreq
expect req.url == "/accept-encoding"
txresp -hdr "Content-Encoding: gzip" \
@@ -16,6 +17,15 @@
# Response varying on "accept-encoding"
rxreq
expect req.url == "/accept-encoding"
+ txresp -hdr "Content-Encoding: gzip" \
+ -hdr "Vary: accept-encoding" \
+ -hdr "Cache-Control: max-age=5" \
+ -bodylen 45
+
+ # Response varying on "accept-encoding" with
+ # no content-encoding
+ rxreq
+ expect req.url == "/accept-encoding"
txresp -hdr "Content-Type: text/plain" \
-hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" \
@@ -57,7 +67,7 @@
expect req.url == "/referer-accept-encoding"
txresp -hdr "Vary: referer,accept-encoding" \
-hdr "Cache-Control: max-age=5" \
- -hdr "Content-Encoding: gzip" \
+ -hdr "Content-Encoding: br" \
-bodylen 54
rxreq
@@ -72,14 +82,14 @@
expect req.url == "/multiple_headers"
txresp -hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" \
- -hdr "Content-Encoding: gzip" \
+ -hdr "Content-Encoding: br" \
-bodylen 155
rxreq
expect req.url == "/multiple_headers"
txresp -hdr "Vary: accept-encoding" \
-hdr "Cache-Control: max-age=5" \
- -hdr "Content-Encoding: gzip" \
+ -hdr "Content-Encoding: br" \
-bodylen 166
@@ -163,6 +173,16 @@
expect resp.http.content-encoding == "gzip"
expect resp.bodylen == 45
+ # The response for the first request had an unacceptable `content-encoding`
+ # which might happen if that's the only thing the server supports, but
+ # we must not cache that and instead defer to the server.
+ txreq -url "/accept-encoding" -hdr "Accept-Encoding: first_value"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.content-encoding == "gzip"
+ expect resp.bodylen == 45
+ expect resp.http.X-Cache-Hit == 0
+
txreq -url "/accept-encoding" -hdr "Accept-Encoding: second_value"
rxresp
expect resp.status == 200
@@ -170,11 +190,15 @@
expect resp.http.content-type == "text/plain"
expect resp.http.X-Cache-Hit == 0
+ # This request matches the cache entry for the request above, despite
+ # matching the `accept-encoding` of the first request because the
+ # request above only has the `identity` encoding which is implicitly
+ # added, unless explicitely forbidden.
txreq -url "/accept-encoding" -hdr "Accept-Encoding: first_value"
rxresp
expect resp.status == 200
- expect resp.bodylen == 45
- expect resp.http.content-encoding == "gzip"
+ expect resp.bodylen == 48
+ expect resp.http.content-type == "text/plain"
expect resp.http.X-Cache-Hit == 1
txreq -url "/accept-encoding" -hdr "Accept-Encoding: second_value"
@@ -227,7 +251,7 @@
# Mixed Vary (Accept-Encoding + Referer)
txreq -url "/referer-accept-encoding" \
- -hdr "Accept-Encoding: first_value,second_value" \
+ -hdr "Accept-Encoding: br, gzip" \
-hdr "Referer: referer"
rxresp
expect resp.status == 200
@@ -235,7 +259,7 @@
expect resp.http.X-Cache-Hit == 0
txreq -url "/referer-accept-encoding" \
- -hdr "Accept-Encoding: first_value" \
+ -hdr "Accept-Encoding: br" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
@@ -243,7 +267,7 @@
expect resp.http.X-Cache-Hit == 0
txreq -url "/referer-accept-encoding" \
- -hdr "Accept-Encoding: second_value" \
+ -hdr "Accept-Encoding: gzip" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
@@ -252,14 +276,14 @@
txreq -url "/referer-accept-encoding" \
-hdr "Referer: referer" \
- -hdr "Accept-Encoding: second_value,first_value"
+ -hdr "Accept-Encoding: gzip, br"
rxresp
expect resp.status == 200
expect resp.bodylen == 51
expect resp.http.X-Cache-Hit == 1
txreq -url "/referer-accept-encoding" \
- -hdr "Accept-Encoding: first_value" \
+ -hdr "Accept-Encoding: br" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
@@ -267,7 +291,7 @@
expect resp.http.X-Cache-Hit == 1
txreq -url "/referer-accept-encoding" \
- -hdr "Accept-Encoding: second_value" \
+ -hdr "Accept-Encoding: gzip" \
-hdr "Referer: other-referer"
rxresp
expect resp.status == 200
@@ -277,16 +301,16 @@
# Multiple Accept-encoding headers
txreq -url "/multiple_headers" \
- -hdr "Accept-Encoding: first_encoding" \
- -hdr "Accept-Encoding: second_encoding,third_encoding"
+ -hdr "Accept-Encoding: gzip" \
+ -hdr "Accept-Encoding: br, deflate"
rxresp
expect resp.status == 200
expect resp.bodylen == 155
expect resp.http.X-Cache-Hit == 0
txreq -url "/multiple_headers" \
- -hdr "Accept-Encoding: third_encoding" \
- -hdr "Accept-Encoding: second_encoding,first_encoding"
+ -hdr "Accept-Encoding: deflate" \
+ -hdr "Accept-Encoding: br,gzip"
rxresp
expect resp.status == 200
expect resp.bodylen == 155
diff --git a/reg-tests/cache/vary_accept_encoding.vtc b/reg-tests/cache/vary_accept_encoding.vtc
index afd2cfb..59e819a 100644
--- a/reg-tests/cache/vary_accept_encoding.vtc
+++ b/reg-tests/cache/vary_accept_encoding.vtc
@@ -73,6 +73,21 @@
-hdr "Cache-Control: max-age=5" \
-hdr "Content-Encoding: unknown_encoding" \
-bodylen 119
+
+
+ rxreq
+ expect req.url == "/hash-collision"
+ txresp -hdr "Vary: accept-encoding" \
+ -hdr "Cache-Control: max-age=5" \
+ -hdr "Content-Encoding: br" \
+ -bodylen 129
+
+ rxreq
+ expect req.url == "/hash-collision"
+ txresp -hdr "Vary: accept-encoding" \
+ -hdr "Cache-Control: max-age=5" \
+ -hdr "Content-Encoding: gzip" \
+ -bodylen 139
} -start
@@ -300,4 +315,21 @@
expect resp.bodylen == 119
expect resp.http.X-Cache-Hit == 0
+ #
+ # Hash collision (https://github.com/haproxy/haproxy/issues/988)
+ #
+ # crc32(gzip) ^ crc32(br) ^ crc32(xxx) ^ crc32(jdcqiab) == crc32(gzip)
+ txreq -url "/hash-collision" -hdr "Accept-Encoding: br,gzip,xxx,jdcqiab"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.content-encoding == "br"
+ expect resp.bodylen == 129
+ expect resp.http.X-Cache-Hit == 0
+
+ txreq -url "/hash-collision" -hdr "Accept-Encoding: gzip"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.content-encoding == "gzip"
+ expect resp.bodylen == 139
+ expect resp.http.X-Cache-Hit == 0
} -run
diff --git a/src/cache.c b/src/cache.c
index c740818..23883df 100644
--- a/src/cache.c
+++ b/src/cache.c
@@ -2398,19 +2398,28 @@
/* All the bits set in the reference bitmap correspond to the
* stored response' encoding and should all be set in the new
* encoding bitmap in order for the client to be able to manage
- * the response. */
- if ((ref.encoding_bitmap & new.encoding_bitmap) == ref.encoding_bitmap) {
- /* The cached response has encodings that are accepted
- * by the client, it can be served directly by the cache
- * (as far as the accept-encoding part is concerned). */
- return 0;
- }
+ * the response.
+ *
+ * If this is the case the cached response has encodings that
+ * are accepted by the client. It can be served directly by
+ * the cache (as far as the accept-encoding part is concerned).
+ */
+
+ return (ref.encoding_bitmap & new.encoding_bitmap) != ref.encoding_bitmap;
}
+ else {
+ /* We must compare hashes only when the the response contains
+ * unknown encodings.
+ * Otherwise we might serve unacceptable responses if the hash
+ * of a client's `accept-encoding` header collides with a
+ * known encoding.
+ */
- ref.hash = read_u32(ref_hash+sizeof(ref.encoding_bitmap));
- new.hash = read_u32(new_hash+sizeof(new.encoding_bitmap));
+ ref.hash = read_u32(ref_hash+sizeof(ref.encoding_bitmap));
+ new.hash = read_u32(new_hash+sizeof(new.encoding_bitmap));
- return ref.hash != new.hash;
+ return ref.hash != new.hash;
+ }
}