MEDIUM: cache: Change caching conditions

Do not cache responses that do not have an explicit expiration time
(s-maxage or max-age Cache-Control directives or Expires header) or a
validator (ETag or Last-Modified headers) anymore, as suggested in
RFC 7234#3.
The TX_FLAG_IGNORE flag is used instead of the TX_FLAG_CACHEABLE so as
not to change the behavior of the checkcache option.

(cherry picked from commit cc9bf2e5fe1fe6f15de9e78b6aaea2cd6be5ca4f)
Signed-off-by: William Lallemand <wlallemand@haproxy.org>
diff --git a/include/haproxy/http_ana-t.h b/include/haproxy/http_ana-t.h
index bab438e..9449b79 100644
--- a/include/haproxy/http_ana-t.h
+++ b/include/haproxy/http_ana-t.h
@@ -59,7 +59,7 @@
 /* cacheability management, bits values 0x1000 to 0x3000 (0-3 shift 12) */
 #define TX_CACHEABLE	0x00001000	/* at least part of the response is cacheable */
 #define TX_CACHE_COOK	0x00002000	/* a cookie in the response is cacheable */
-#define TX_CACHE_IGNORE 0x00004000	/* do not retrieve object from cache */
+#define TX_CACHE_IGNORE 0x00004000	/* do not retrieve object from cache, or avoid caching response */
 #define TX_CACHE_SHIFT	12		/* bit shift */
 
 #define TX_CON_WANT_TUN 0x00008000	/* Will be a tunnel (CONNECT or 101-Switching-Protocol) */
diff --git a/reg-tests/cache/basic.vtc b/reg-tests/cache/basic.vtc
index e8255af..849057d 100644
--- a/reg-tests/cache/basic.vtc
+++ b/reg-tests/cache/basic.vtc
@@ -6,7 +6,8 @@
 
 server s1 {
     rxreq
-    txresp -nolen -hdr "Transfer-Encoding: chunked"
+    txresp -nolen -hdr "Transfer-Encoding: chunked" \
+        -hdr "Cache-Control: max-age=5"
     chunkedlen 1
     chunkedlen 1
     chunkedlen 2
diff --git a/reg-tests/cache/caching_rules.vtc b/reg-tests/cache/caching_rules.vtc
new file mode 100644
index 0000000..1abd924
--- /dev/null
+++ b/reg-tests/cache/caching_rules.vtc
@@ -0,0 +1,150 @@
+varnishtest "Caching rules test"
+# A respnse will not be cached unless it has an explicit age (Cache-Control max-age of s-maxage, Expires, Last-Modified headers, or ETag)
+
+#REQUIRE_VERSION=1.9
+
+feature ignore_unknown_macro
+
+server s1 {
+    rxreq
+    expect req.url == "/max-age"
+    txresp -hdr "Cache-Control: max-age=5" \
+        -bodylen 150
+
+    rxreq
+    expect req.url == "/s-maxage"
+    txresp -hdr "Cache-Control: s-maxage=5" \
+        -bodylen 160
+
+    rxreq
+    expect req.url == "/last-modified"
+    txresp -hdr "Last-Modified: Thu, 22 Oct 2020 16:51:12 GMT" \
+        -bodylen 180
+
+    rxreq
+    expect req.url == "/etag"
+    txresp -hdr "ETag: \"etag\"" \
+        -bodylen 190
+
+    rxreq
+    expect req.url == "/uncacheable"
+    txresp \
+        -bodylen 200
+
+    rxreq
+    expect req.url == "/uncacheable"
+    txresp \
+        -bodylen 210
+} -start
+
+server s2 {
+    rxreq
+    expect req.url == "/expires"
+    # Expires header is filled directly by the expires_be backend"
+    txresp \
+        -bodylen 170
+} -start
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        ${no-htx} option http-use-htx
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe
+        bind "fd@${fe}"
+        use_backend expires_be if { path_beg /expires }
+        default_backend test
+
+    backend expires_be
+        http-request cache-use my_cache
+        server www ${s2_addr}:${s2_port}
+        http-response set-header X-Cache-Hit %[res.cache_hit]
+        # Expires value set in the future (current_time+5s)
+        http-response set-header Expires %[date(5),http_date]
+        http-response cache-store my_cache
+
+    backend test
+        http-request cache-use my_cache
+        server www ${s1_addr}:${s1_port}
+        http-response cache-store my_cache
+        http-response set-header X-Cache-Hit %[res.cache_hit]
+
+    cache my_cache
+        total-max-size 3
+        max-age 20
+        max-object-size 3072
+} -start
+
+
+client c1 -connect ${h1_fe_sock} {
+        txreq -url "/max-age"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 150
+
+        txreq -url "/max-age"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 150
+        expect resp.http.X-Cache-Hit == 1
+
+        txreq -url "/s-maxage"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 160
+
+        txreq -url "/s-maxage"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 160
+        expect resp.http.X-Cache-Hit == 1
+
+        txreq -url "/expires"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 170
+
+        txreq -url "/expires"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 170
+        expect resp.http.X-Cache-Hit == 1
+
+        txreq -url "/last-modified"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 180
+
+        txreq -url "/last-modified"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 180
+        expect resp.http.X-Cache-Hit == 1
+
+        txreq -url "/etag"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 190
+
+        txreq -url "/etag"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 190
+        expect resp.http.X-Cache-Hit == 1
+
+        # The next response should not be cached
+        txreq -url "/uncacheable"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 200
+
+        txreq -url "/uncacheable"
+        rxresp
+        expect resp.status == 200
+        expect resp.bodylen == 210
+        expect resp.http.X-Cache-Hit == 0
+
+} -run
diff --git a/reg-tests/cache/if-modified-since.vtc b/reg-tests/cache/if-modified-since.vtc
index af8cbf0..e491e46 100644
--- a/reg-tests/cache/if-modified-since.vtc
+++ b/reg-tests/cache/if-modified-since.vtc
@@ -19,7 +19,8 @@
        rxreq
        expect req.url == "/date"
        txresp -nolen -hdr "Transfer-Encoding: chunked" \
-               -hdr "Date: Thu, 22 Oct 2020 16:51:12 GMT"
+               -hdr "Date: Thu, 22 Oct 2020 16:51:12 GMT" \
+               -hdr "Cache-Control: max-age=5"
        chunkedlen 16
        chunkedlen 16
        chunkedlen 16
diff --git a/reg-tests/cache/sample_fetches.vtc b/reg-tests/cache/sample_fetches.vtc
index 1ba0690..73e6e1b 100644
--- a/reg-tests/cache/sample_fetches.vtc
+++ b/reg-tests/cache/sample_fetches.vtc
@@ -7,7 +7,8 @@
 
 server s1 {
        rxreq
-       txresp -nolen -hdr "Transfer-Encoding: chunked"
+       txresp -nolen -hdr "Transfer-Encoding: chunked" \
+            -hdr "Cache-Control: max-age=5"
        chunkedlen 15
        chunkedlen 15
        chunkedlen 15
@@ -16,7 +17,8 @@
 
 server s2 {
        rxreq
-       txresp -nolen -hdr "Transfer-Encoding: chunked"
+       txresp -nolen -hdr "Transfer-Encoding: chunked" \
+            -hdr "Cache-Control: max-age=5"
        chunkedlen 16
        chunkedlen 16
        chunkedlen 16
@@ -25,14 +27,16 @@
 
 server s3 {
        rxreq
-       txresp -nolen -hdr "Transfer-Encoding: chunked"
+       txresp -nolen -hdr "Transfer-Encoding: chunked" \
+            -hdr "Cache-Control: max-age=5"
        chunkedlen 17
        chunkedlen 17
        chunkedlen 17
        chunkedlen 0
 
        rxreq
-       txresp -nolen -hdr "Transfer-Encoding: chunked"
+       txresp -nolen -hdr "Transfer-Encoding: chunked" \
+            -hdr "Cache-Control: max-age=5"
        chunkedlen 17
        chunkedlen 17
        chunkedlen 17
diff --git a/src/cache.c b/src/cache.c
index f86c96f..4f8fad9 100644
--- a/src/cache.c
+++ b/src/cache.c
@@ -664,7 +664,7 @@
 
 	http_check_response_for_cacheability(s, &s->res);
 
-	if (!(txn->flags & TX_CACHEABLE) || !(txn->flags & TX_CACHE_COOK))
+	if (!(txn->flags & TX_CACHEABLE) || !(txn->flags & TX_CACHE_COOK) || (txn->flags & TX_CACHE_IGNORE))
 		goto out;
 
 	age = 0;
diff --git a/src/http_ana.c b/src/http_ana.c
index deec220..7a9cd0b 100644
--- a/src/http_ana.c
+++ b/src/http_ana.c
@@ -3916,6 +3916,8 @@
 	struct http_txn *txn = s->txn;
 	struct http_hdr_ctx ctx = { .blk = NULL };
 	struct htx *htx;
+	int has_freshness_info = 0;
+	int has_validator = 0;
 
 	if (txn->status < 200) {
 		/* do not try to cache interim responses! */
@@ -3953,7 +3955,37 @@
 			txn->flags &= ~TX_CACHE_COOK;
 			continue;
 		}
+
+		if (istmatchi(ctx.value, ist("s-maxage")) ||
+		    istmatchi(ctx.value, ist("max-age"))) {
+			has_freshness_info = 1;
+			continue;
+		}
+	}
+
+	/* If no freshness information could be found in Cache-Control values,
+	 * look for an Expires header. */
+	if (!has_freshness_info) {
+		ctx.blk = NULL;
+		has_freshness_info = http_find_header(htx, ist("expires"), &ctx, 0);
 	}
+
+	/* If no freshness information could be found in Cache-Control or Expires
+	 * values, look for an explicit validator. */
+	if (!has_freshness_info) {
+		ctx.blk = NULL;
+		has_validator = 1;
+		if (!http_find_header(htx, ist("etag"), &ctx, 0)) {
+			ctx.blk = NULL;
+			if (!http_find_header(htx, ist("last-modified"), &ctx, 0))
+				has_validator = 0;
+		}
+	}
+
+	/* We won't store an entry that has neither a cache validator nor an
+	 * explicit expiration time, as suggested in RFC 7234#3. */
+	if (!has_freshness_info && !has_validator)
+		txn->flags |= TX_CACHE_IGNORE;
 }
 
 /*