blob: a5fa3c3f06edba13359904b5935a3c232b50860e [file] [log] [blame]
William Lallemand41db4602017-10-30 11:15:51 +01001/*
2 * Cache management
3 *
4 * Copyright 2017 HAProxy Technologies
5 * William Lallemand <wlallemand@haproxy.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
Willy Tarreaub2551052020-06-09 09:07:15 +020013#include <import/eb32tree.h>
14#include <import/sha1.h>
15
Willy Tarreau122eba92020-06-04 10:15:32 +020016#include <haproxy/action-t.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020018#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020019#include <haproxy/channel.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020020#include <haproxy/cli.h>
Willy Tarreau36979d92020-06-05 17:27:29 +020021#include <haproxy/errors.h>
Willy Tarreauc7babd82020-06-04 21:29:29 +020022#include <haproxy/filters.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020023#include <haproxy/hash.h>
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +020024#include <haproxy/http.h>
Willy Tarreauc2b1ff02020-06-04 21:21:03 +020025#include <haproxy/http_ana.h>
Willy Tarreau87735332020-06-04 09:08:41 +020026#include <haproxy/http_htx.h>
Willy Tarreauc761f842020-06-04 11:40:28 +020027#include <haproxy/http_rules.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020028#include <haproxy/htx.h>
29#include <haproxy/net_helper.h>
Willy Tarreaua264d962020-06-04 22:29:18 +020030#include <haproxy/proxy.h>
Remi Tricot-Le Bretonbf971212020-10-27 11:55:57 +010031#include <haproxy/sample.h>
Willy Tarreau334099c2020-06-03 18:38:48 +020032#include <haproxy/shctx.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020033#include <haproxy/stream.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020034#include <haproxy/stream_interface.h>
Willy Tarreauce6700a2021-05-08 13:03:55 +020035#include <haproxy/tools.h>
William Lallemand41db4602017-10-30 11:15:51 +010036
Christopher Faulet27d93c32018-12-15 22:32:02 +010037#define CACHE_FLT_F_IMPLICIT_DECL 0x00000001 /* The cache filtre was implicitly declared (ie without
Christopher Faulet99a17a22018-12-11 09:18:27 +010038 * the filter keyword) */
Tim Duesterhusd7c6e6a2020-09-14 18:01:33 +020039#define CACHE_FLT_INIT 0x00000002 /* Whether the cache name was freed. */
Christopher Fauletafd819c2018-12-11 08:57:45 +010040
Christopher Fauletf4a4ef72018-12-07 17:39:53 +010041const char *cache_store_flt_id = "cache store filter";
William Lallemand41db4602017-10-30 11:15:51 +010042
Willy Tarreau2231b632019-03-29 18:26:52 +010043extern struct applet http_cache_applet;
William Lallemand41db4602017-10-30 11:15:51 +010044
45struct flt_ops cache_ops;
46
47struct cache {
Willy Tarreaufd5efb52017-11-26 08:54:31 +010048 struct list list; /* cache linked list */
William Lallemand41db4602017-10-30 11:15:51 +010049 struct eb_root entries; /* head of cache entries based on keys */
Willy Tarreaufd5efb52017-11-26 08:54:31 +010050 unsigned int maxage; /* max-age */
51 unsigned int maxblocks;
Frédéric Lécaille4eba5442018-10-25 20:29:31 +020052 unsigned int maxobjsz; /* max-object-size (in bytes) */
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +010053 unsigned int max_secondary_entries; /* maximum number of secondary entries with the same primary hash */
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +010054 uint8_t vary_processing_enabled; /* boolean : manage Vary header (disabled by default) */
Willy Tarreaufd5efb52017-11-26 08:54:31 +010055 char id[33]; /* cache name */
William Lallemand41db4602017-10-30 11:15:51 +010056};
57
Christopher Faulet95220e22018-12-07 17:34:39 +010058/* cache config for filters */
59struct cache_flt_conf {
60 union {
61 struct cache *cache; /* cache used by the filter */
62 char *name; /* cache name used during conf parsing */
63 } c;
64 unsigned int flags; /* CACHE_FLT_F_* */
65};
66
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010067
68/*
69 * Vary-related structures and functions
70 */
71enum vary_header_bit {
72 VARY_ACCEPT_ENCODING = (1 << 0),
73 VARY_REFERER = (1 << 1),
74 VARY_LAST /* should always be last */
75};
76
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +010077/*
78 * Encoding list extracted from
79 * https://www.iana.org/assignments/http-parameters/http-parameters.xhtml
80 * and RFC7231#5.3.4.
81 */
82enum vary_encoding {
83 VARY_ENCODING_GZIP = (1 << 0),
84 VARY_ENCODING_DEFLATE = (1 << 1),
85 VARY_ENCODING_BR = (1 << 2),
86 VARY_ENCODING_COMPRESS = (1 << 3),
87 VARY_ENCODING_AES128GCM = (1 << 4),
88 VARY_ENCODING_EXI = (1 << 5),
89 VARY_ENCODING_PACK200_GZIP = (1 << 6),
90 VARY_ENCODING_ZSTD = (1 << 7),
91 VARY_ENCODING_IDENTITY = (1 << 8),
92 VARY_ENCODING_STAR = (1 << 9),
93 VARY_ENCODING_OTHER = (1 << 10)
94};
95
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010096struct vary_hashing_information {
97 struct ist hdr_name; /* Header name */
Ilya Shipitsinf38a0182020-12-21 01:16:17 +050098 enum vary_header_bit value; /* Bit representing the header in a vary signature */
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010099 unsigned int hash_length; /* Size of the sub hash for this header's value */
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100100 int(*norm_fn)(struct htx*,struct ist hdr_name,char* buf,unsigned int* buf_len); /* Normalization function */
Tim Duesterhused84d842021-01-18 13:41:17 +0100101 int(*cmp_fn)(const void *ref, const void *new, unsigned int len); /* Comparison function, should return 0 if the hashes are alike */
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100102};
103
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100104static int http_request_prebuild_full_secondary_key(struct stream *s);
105static int http_request_build_secondary_key(struct stream *s, int vary_signature);
106static int http_request_reduce_secondary_key(unsigned int vary_signature,
107 char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN]);
108
109static int parse_encoding_value(struct ist value, unsigned int *encoding_value,
110 unsigned int *has_null_weight);
111
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +0100112static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
113 char *buf, unsigned int *buf_len);
114static int default_normalizer(struct htx *htx, struct ist hdr_name,
115 char *buf, unsigned int *buf_len);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100116
Tim Duesterhused84d842021-01-18 13:41:17 +0100117static int accept_encoding_bitmap_cmp(const void *ref, const void *new, unsigned int len);
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100118
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100119/* Warning : do not forget to update HTTP_CACHE_SEC_KEY_LEN when new items are
120 * added to this array. */
121const struct vary_hashing_information vary_information[] = {
Tim Duesterhused84d842021-01-18 13:41:17 +0100122 { IST("accept-encoding"), VARY_ACCEPT_ENCODING, sizeof(uint32_t), &accept_encoding_normalizer, &accept_encoding_bitmap_cmp },
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100123 { IST("referer"), VARY_REFERER, sizeof(int), &default_normalizer, NULL },
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100124};
125
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100126
William Lallemand41db4602017-10-30 11:15:51 +0100127/*
128 * cache ctx for filters
129 */
130struct cache_st {
William Lallemand41db4602017-10-30 11:15:51 +0100131 struct shared_block *first_block;
132};
133
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +0100134#define DEFAULT_MAX_SECONDARY_ENTRY 10
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100135
William Lallemand41db4602017-10-30 11:15:51 +0100136struct cache_entry {
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +0100137 unsigned int complete; /* An entry won't be valid until complete is not null. */
William Lallemand41db4602017-10-30 11:15:51 +0100138 unsigned int latest_validation; /* latest validation date */
Willy Tarreau5e842192023-02-07 15:22:41 +0100139 unsigned int expire; /* expiration date (wall clock time) */
Frédéric Lécaillee7a770c2018-10-26 14:29:22 +0200140 unsigned int age; /* Origin server "Age" header value */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100141
William Lallemand41db4602017-10-30 11:15:51 +0100142 struct eb32_node eb; /* ebtree node used to hold the cache object */
William Lallemandf528fff2017-11-23 19:43:17 +0100143 char hash[20];
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +0200144
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100145 char secondary_key[HTTP_CACHE_SEC_KEY_LEN]; /* Optional secondary key. */
146 unsigned int secondary_key_signature; /* Bitfield of the HTTP headers that should be used
147 * to build secondary keys for this cache entry. */
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100148 unsigned int secondary_entries_count; /* Should only be filled in the last entry of a list of dup entries */
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100149 unsigned int last_clear_ts; /* Timestamp of the last call to clear_expired_duplicates. */
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100150
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +0200151 unsigned int etag_length; /* Length of the ETag value (if one was found in the response). */
152 unsigned int etag_offset; /* Offset of the ETag value in the data buffer. */
153
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +0200154 time_t last_modified; /* Origin server "Last-Modified" header value converted in
155 * seconds since epoch. If no "Last-Modified"
156 * header is found, use "Date" header value,
157 * otherwise use reception time. This field will
158 * be used in case of an "If-Modified-Since"-based
159 * conditional request. */
160
William Lallemand41db4602017-10-30 11:15:51 +0100161 unsigned char data[0];
162};
163
164#define CACHE_BLOCKSIZE 1024
Willy Tarreau96062a12018-11-11 14:00:28 +0100165#define CACHE_ENTRY_MAX_AGE 2147483648U
William Lallemand41db4602017-10-30 11:15:51 +0100166
167static struct list caches = LIST_HEAD_INIT(caches);
William Lallemandd1d1e222019-08-28 15:22:49 +0200168static struct list caches_config = LIST_HEAD_INIT(caches_config); /* cache config to init */
William Lallemand41db4602017-10-30 11:15:51 +0100169static struct cache *tmp_cache_config = NULL;
170
Willy Tarreau8ceae722018-11-26 11:58:30 +0100171DECLARE_STATIC_POOL(pool_head_cache_st, "cache_st", sizeof(struct cache_st));
172
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100173static struct eb32_node *insert_entry(struct cache *cache, struct cache_entry *new_entry);
174static void delete_entry(struct cache_entry *del_entry);
175
William Lallemandf528fff2017-11-23 19:43:17 +0100176struct cache_entry *entry_exist(struct cache *cache, char *hash)
William Lallemand4da3f8a2017-10-31 14:33:34 +0100177{
178 struct eb32_node *node;
179 struct cache_entry *entry;
180
Willy Tarreau8b507582020-02-25 09:35:07 +0100181 node = eb32_lookup(&cache->entries, read_u32(hash));
William Lallemand4da3f8a2017-10-31 14:33:34 +0100182 if (!node)
183 return NULL;
184
185 entry = eb32_entry(node, struct cache_entry, eb);
William Lallemandf528fff2017-11-23 19:43:17 +0100186
187 /* if that's not the right node */
188 if (memcmp(entry->hash, hash, sizeof(entry->hash)))
189 return NULL;
190
Willy Tarreau5e842192023-02-07 15:22:41 +0100191 if (entry->expire > date.tv_sec) {
William Lallemand4da3f8a2017-10-31 14:33:34 +0100192 return entry;
William Lallemand08727662017-11-21 20:01:27 +0100193 } else {
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100194 delete_entry(entry);
William Lallemand08727662017-11-21 20:01:27 +0100195 entry->eb.key = 0;
196 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100197 return NULL;
198
199}
200
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100201
202/*
203 * Compare a newly built secondary key to the one found in a cache_entry.
204 * Every sub-part of the key is compared to the reference through the dedicated
205 * comparison function of the sub-part (that might do more than a simple
206 * memcmp).
207 * Returns 0 if the keys are alike.
208 */
209static int secondary_key_cmp(const char *ref_key, const char *new_key)
210{
211 int retval = 0;
Tim Duesterhus5897cfe2021-01-18 13:41:18 +0100212 size_t idx = 0;
213 unsigned int offset = 0;
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100214 const struct vary_hashing_information *info;
215
216 for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && !retval; ++idx) {
217 info = &vary_information[idx];
218
219 if (info->cmp_fn)
220 retval = info->cmp_fn(&ref_key[offset], &new_key[offset], info->hash_length);
221 else
222 retval = memcmp(&ref_key[offset], &new_key[offset], info->hash_length);
223
224 offset += info->hash_length;
225 }
226
227 return retval;
228}
229
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100230/*
231 * There can be multiple entries with the same primary key in the ebtree so in
232 * order to get the proper one out of the list, we use a secondary_key.
233 * This function simply iterates over all the entries with the same primary_key
234 * until it finds the right one.
235 * Returns the cache_entry in case of success, NULL otherwise.
236 */
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100237struct cache_entry *secondary_entry_exist(struct cache *cache, struct cache_entry *entry,
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100238 const char *secondary_key)
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100239{
240 struct eb32_node *node = &entry->eb;
241
242 if (!entry->secondary_key_signature)
243 return NULL;
244
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100245 while (entry && secondary_key_cmp(entry->secondary_key, secondary_key) != 0) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100246 node = eb32_next_dup(node);
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100247
248 /* Make the best use of this iteration and clear expired entries
249 * when we find them. Calling delete_entry would be too costly
250 * so we simply call eb32_delete. The secondary_entry count will
251 * be updated when we try to insert a new entry to this list. */
Willy Tarreau5e842192023-02-07 15:22:41 +0100252 if (entry->expire <= date.tv_sec) {
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100253 eb32_delete(&entry->eb);
254 entry->eb.key = 0;
255 }
256
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100257 entry = node ? eb32_entry(node, struct cache_entry, eb) : NULL;
258 }
259
260 /* Expired entry */
Willy Tarreau5e842192023-02-07 15:22:41 +0100261 if (entry && entry->expire <= date.tv_sec) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100262 eb32_delete(&entry->eb);
263 entry->eb.key = 0;
264 entry = NULL;
265 }
266
267 return entry;
268}
269
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100270
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100271/*
272 * Remove all expired entries from a list of duplicates.
273 * Return the number of alive entries in the list and sets dup_tail to the
274 * current last item of the list.
275 */
276static unsigned int clear_expired_duplicates(struct eb32_node **dup_tail)
277{
278 unsigned int entry_count = 0;
279 struct cache_entry *entry = NULL;
280 struct eb32_node *prev = *dup_tail;
281 struct eb32_node *tail = NULL;
282
283 while (prev) {
284 entry = container_of(prev, struct cache_entry, eb);
285 prev = eb32_prev_dup(prev);
Willy Tarreau5e842192023-02-07 15:22:41 +0100286 if (entry->expire <= date.tv_sec) {
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100287 eb32_delete(&entry->eb);
288 entry->eb.key = 0;
289 }
290 else {
291 if (!tail)
292 tail = &entry->eb;
293 ++entry_count;
294 }
295 }
296
297 *dup_tail = tail;
298
299 return entry_count;
300}
301
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100302
303/*
304 * This function inserts a cache_entry in the cache's ebtree. In case of
305 * duplicate entries (vary), it then checks that the number of entries did not
306 * reach the max number of secondary entries. If this entry should not have been
307 * created, remove it.
308 * In the regular case (unique entries), this function does not do more than a
309 * simple insert. In case of secondary entries, it will at most cost an
310 * insertion+max_sec_entries time checks and entry deletion.
311 * Returns the newly inserted node in case of success, NULL otherwise.
312 */
313static struct eb32_node *insert_entry(struct cache *cache, struct cache_entry *new_entry)
314{
315 struct eb32_node *prev = NULL;
316 struct cache_entry *entry = NULL;
317 unsigned int entry_count = 0;
Willy Tarreau5e842192023-02-07 15:22:41 +0100318 unsigned int last_clear_ts = date.tv_sec;
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100319
320 struct eb32_node *node = eb32_insert(&cache->entries, &new_entry->eb);
321
322 /* We should not have multiple entries with the same primary key unless
323 * the entry has a non null vary signature. */
324 if (!new_entry->secondary_key_signature)
325 return node;
326
327 prev = eb32_prev_dup(node);
328 if (prev != NULL) {
329 /* The last entry of a duplicate list should contain the current
330 * number of entries in the list. */
331 entry = container_of(prev, struct cache_entry, eb);
332 entry_count = entry->secondary_entries_count;
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100333 last_clear_ts = entry->last_clear_ts;
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100334
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +0100335 if (entry_count >= cache->max_secondary_entries) {
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100336 /* Some entries of the duplicate list might be expired so
337 * we will iterate over all the items in order to free some
338 * space. In order to avoid going over the same list too
339 * often, we first check the timestamp of the last check
340 * performed. */
Willy Tarreau5e842192023-02-07 15:22:41 +0100341 if (last_clear_ts == date.tv_sec) {
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100342 /* Too many entries for this primary key, clear the
343 * one that was inserted. */
344 eb32_delete(node);
345 node->key = 0;
346 return NULL;
347 }
348
349 entry_count = clear_expired_duplicates(&prev);
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +0100350 if (entry_count >= cache->max_secondary_entries) {
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100351 /* Still too many entries for this primary key, delete
352 * the newly inserted one. */
353 entry = container_of(prev, struct cache_entry, eb);
Willy Tarreau5e842192023-02-07 15:22:41 +0100354 entry->last_clear_ts = date.tv_sec;
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100355 eb32_delete(node);
356 node->key = 0;
357 return NULL;
358 }
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100359 }
360 }
361
362 new_entry->secondary_entries_count = entry_count + 1;
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100363 new_entry->last_clear_ts = last_clear_ts;
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100364
365 return node;
366}
367
368
369/*
370 * This function removes an entry from the ebtree. If the entry was a duplicate
371 * (in case of Vary), it updates the secondary entry counter in another
372 * duplicate entry (the last entry of the dup list).
373 */
374static void delete_entry(struct cache_entry *del_entry)
375{
376 struct eb32_node *prev = NULL, *next = NULL;
377 struct cache_entry *entry = NULL;
378 struct eb32_node *last = NULL;
379
380 if (del_entry->secondary_key_signature) {
381 next = &del_entry->eb;
382
383 /* Look for last entry of the duplicates list. */
384 while ((next = eb32_next_dup(next))) {
385 last = next;
386 }
387
388 if (last) {
389 entry = container_of(last, struct cache_entry, eb);
390 --entry->secondary_entries_count;
391 }
392 else {
393 /* The current entry is the last one, look for the
394 * previous one to update its counter. */
395 prev = eb32_prev_dup(&del_entry->eb);
396 if (prev) {
397 entry = container_of(prev, struct cache_entry, eb);
398 entry->secondary_entries_count = del_entry->secondary_entries_count - 1;
399 }
400 }
401 }
402 eb32_delete(&del_entry->eb);
403 del_entry->eb.key = 0;
404}
405
406
William Lallemand4da3f8a2017-10-31 14:33:34 +0100407static inline struct shared_context *shctx_ptr(struct cache *cache)
408{
409 return (struct shared_context *)((unsigned char *)cache - ((struct shared_context *)NULL)->data);
410}
411
William Lallemand77c11972017-10-31 20:43:01 +0100412static inline struct shared_block *block_ptr(struct cache_entry *entry)
413{
414 return (struct shared_block *)((unsigned char *)entry - ((struct shared_block *)NULL)->data);
415}
416
417
418
William Lallemand41db4602017-10-30 11:15:51 +0100419static int
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100420cache_store_init(struct proxy *px, struct flt_conf *fconf)
William Lallemand41db4602017-10-30 11:15:51 +0100421{
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100422 fconf->flags |= FLT_CFG_FL_HTX;
William Lallemand41db4602017-10-30 11:15:51 +0100423 return 0;
424}
425
Christopher Faulet95220e22018-12-07 17:34:39 +0100426static void
427cache_store_deinit(struct proxy *px, struct flt_conf *fconf)
428{
429 struct cache_flt_conf *cconf = fconf->conf;
430
Tim Duesterhusd7c6e6a2020-09-14 18:01:33 +0200431 if (!(cconf->flags & CACHE_FLT_INIT))
432 free(cconf->c.name);
Christopher Faulet95220e22018-12-07 17:34:39 +0100433 free(cconf);
434}
435
William Lallemand4da3f8a2017-10-31 14:33:34 +0100436static int
Christopher Faulet95220e22018-12-07 17:34:39 +0100437cache_store_check(struct proxy *px, struct flt_conf *fconf)
438{
439 struct cache_flt_conf *cconf = fconf->conf;
Christopher Fauletafd819c2018-12-11 08:57:45 +0100440 struct flt_conf *f;
Christopher Faulet95220e22018-12-07 17:34:39 +0100441 struct cache *cache;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100442 int comp = 0;
Christopher Faulet95220e22018-12-07 17:34:39 +0100443
William Lallemandd1d1e222019-08-28 15:22:49 +0200444 /* Find the cache corresponding to the name in the filter config. The
445 * cache will not be referenced now in the filter config because it is
446 * not fully allocated. This step will be performed during the cache
447 * post_check.
448 */
449 list_for_each_entry(cache, &caches_config, list) {
Tim Duesterhuse5ff1412021-01-02 22:31:53 +0100450 if (strcmp(cache->id, cconf->c.name) == 0)
Christopher Faulet95220e22018-12-07 17:34:39 +0100451 goto found;
Christopher Faulet95220e22018-12-07 17:34:39 +0100452 }
453
454 ha_alert("config: %s '%s': unable to find the cache '%s' referenced by the filter 'cache'.\n",
455 proxy_type_str(px), px->id, (char *)cconf->c.name);
456 return 1;
457
458 found:
Christopher Fauletafd819c2018-12-11 08:57:45 +0100459 /* Here <cache> points on the cache the filter must use and <cconf>
460 * points on the cache filter configuration. */
461
462 /* Check all filters for proxy <px> to know if the compression is
Christopher Faulet27d93c32018-12-15 22:32:02 +0100463 * enabled and if it is after the cache. When the compression is before
464 * the cache, an error is returned. Also check if the cache filter must
465 * be explicitly declaired or not. */
Christopher Fauletafd819c2018-12-11 08:57:45 +0100466 list_for_each_entry(f, &px->filter_configs, list) {
467 if (f == fconf) {
Christopher Faulet27d93c32018-12-15 22:32:02 +0100468 /* The compression filter must be evaluated after the cache. */
469 if (comp) {
470 ha_alert("config: %s '%s': unable to enable the compression filter before "
471 "the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
472 return 1;
473 }
Christopher Faulet99a17a22018-12-11 09:18:27 +0100474 }
Christopher Faulet8f7fe1c2019-07-15 15:08:25 +0200475 else if (f->id == http_comp_flt_id)
Christopher Faulet27d93c32018-12-15 22:32:02 +0100476 comp = 1;
Christopher Faulet78fbb9f2019-08-11 23:11:03 +0200477 else if (f->id == fcgi_flt_id)
478 continue;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100479 else if ((f->id != fconf->id) && (cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
480 /* Implicit declaration is only allowed with the
Christopher Faulet78fbb9f2019-08-11 23:11:03 +0200481 * compression and fcgi. For other filters, an implicit
Christopher Faulet27d93c32018-12-15 22:32:02 +0100482 * declaration is required. */
483 ha_alert("config: %s '%s': require an explicit filter declaration "
484 "to use the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
485 return 1;
486 }
487
Christopher Fauletafd819c2018-12-11 08:57:45 +0100488 }
Christopher Faulet95220e22018-12-07 17:34:39 +0100489 return 0;
490}
491
492static int
Christopher Faulet65554e12020-03-06 14:52:06 +0100493cache_store_strm_init(struct stream *s, struct filter *filter)
William Lallemand4da3f8a2017-10-31 14:33:34 +0100494{
Christopher Faulet65554e12020-03-06 14:52:06 +0100495 struct cache_st *st;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100496
Willy Tarreauacc5b012021-03-22 15:00:49 +0100497 st = pool_alloc(pool_head_cache_st);
Christopher Faulet65554e12020-03-06 14:52:06 +0100498 if (st == NULL)
499 return -1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100500
Christopher Faulet65554e12020-03-06 14:52:06 +0100501 st->first_block = NULL;
502 filter->ctx = st;
Christopher Faulet839791a2019-01-07 16:12:07 +0100503
Christopher Faulet65554e12020-03-06 14:52:06 +0100504 /* Register post-analyzer on AN_RES_WAIT_HTTP */
505 filter->post_analyzers |= AN_RES_WAIT_HTTP;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100506 return 1;
507}
508
Christopher Faulet65554e12020-03-06 14:52:06 +0100509static void
510cache_store_strm_deinit(struct stream *s, struct filter *filter)
William Lallemand49dc0482017-11-24 14:33:54 +0100511{
512 struct cache_st *st = filter->ctx;
Christopher Faulet95220e22018-12-07 17:34:39 +0100513 struct cache_flt_conf *cconf = FLT_CONF(filter);
514 struct cache *cache = cconf->c.cache;
William Lallemand49dc0482017-11-24 14:33:54 +0100515 struct shared_context *shctx = shctx_ptr(cache);
516
William Lallemand49dc0482017-11-24 14:33:54 +0100517 /* Everything should be released in the http_end filter, but we need to do it
518 * there too, in case of errors */
William Lallemand49dc0482017-11-24 14:33:54 +0100519 if (st && st->first_block) {
Remi Tricot-Le Breton5c595362023-11-28 17:08:56 +0100520 struct cache_entry *object = (struct cache_entry *)st->first_block->data;
521
William Lallemand49dc0482017-11-24 14:33:54 +0100522 shctx_lock(shctx);
Remi Tricot-Le Breton5c595362023-11-28 17:08:56 +0100523 if (!object->complete) {
524 /* The stream was closed but the 'complete' flag was not
525 * set which means that cache_store_http_end was not
526 * called. The stream must have been closed before we
527 * could store the full answer in the cache.
528 */
529 delete_entry(object);
530 }
William Lallemand49dc0482017-11-24 14:33:54 +0100531 shctx_row_dec_hot(shctx, st->first_block);
532 shctx_unlock(shctx);
William Lallemand49dc0482017-11-24 14:33:54 +0100533 }
534 if (st) {
Willy Tarreaubafbe012017-11-24 17:34:44 +0100535 pool_free(pool_head_cache_st, st);
William Lallemand49dc0482017-11-24 14:33:54 +0100536 filter->ctx = NULL;
537 }
William Lallemand49dc0482017-11-24 14:33:54 +0100538}
539
Christopher Faulet839791a2019-01-07 16:12:07 +0100540static int
541cache_store_post_analyze(struct stream *s, struct filter *filter, struct channel *chn,
542 unsigned an_bit)
543{
544 struct http_txn *txn = s->txn;
545 struct http_msg *msg = &txn->rsp;
546 struct cache_st *st = filter->ctx;
547
548 if (an_bit != AN_RES_WAIT_HTTP)
549 goto end;
550
551 /* Here we need to check if any compression filter precedes the cache
552 * filter. This is only possible when the compression is configured in
553 * the frontend while the cache filter is configured on the
554 * backend. This case cannot be detected during HAProxy startup. So in
555 * such cases, the cache is disabled.
556 */
557 if (st && (msg->flags & HTTP_MSGF_COMPRESSING)) {
558 pool_free(pool_head_cache_st, st);
559 filter->ctx = NULL;
560 }
561
562 end:
563 return 1;
564}
William Lallemand49dc0482017-11-24 14:33:54 +0100565
566static int
William Lallemand4da3f8a2017-10-31 14:33:34 +0100567cache_store_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
568{
569 struct cache_st *st = filter->ctx;
570
William Lallemand4da3f8a2017-10-31 14:33:34 +0100571 if (!(msg->chn->flags & CF_ISRESP) || !st)
572 return 1;
573
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200574 if (st->first_block)
Christopher Faulet67658c92018-12-06 21:59:39 +0100575 register_data_filter(s, msg->chn, filter);
William Lallemand4da3f8a2017-10-31 14:33:34 +0100576 return 1;
577}
578
Frédéric Lécaille8df65ae2018-10-22 18:01:48 +0200579static inline void disable_cache_entry(struct cache_st *st,
580 struct filter *filter, struct shared_context *shctx)
581{
582 struct cache_entry *object;
583
584 object = (struct cache_entry *)st->first_block->data;
585 filter->ctx = NULL; /* disable cache */
586 shctx_lock(shctx);
587 shctx_row_dec_hot(shctx, st->first_block);
Remi Tricot-Le Breton964caaf2020-12-15 14:30:12 +0100588 eb32_delete(&object->eb);
Frédéric Lécaille8df65ae2018-10-22 18:01:48 +0200589 object->eb.key = 0;
590 shctx_unlock(shctx);
591 pool_free(pool_head_cache_st, st);
592}
593
William Lallemand4da3f8a2017-10-31 14:33:34 +0100594static int
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100595cache_store_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
596 unsigned int offset, unsigned int len)
597{
Christopher Faulet95220e22018-12-07 17:34:39 +0100598 struct cache_flt_conf *cconf = FLT_CONF(filter);
599 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100600 struct cache_st *st = filter->ctx;
601 struct htx *htx = htxbuf(&msg->chn->buf);
602 struct htx_blk *blk;
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200603 struct shared_block *fb;
Christopher Faulet497c7592020-03-02 16:19:50 +0100604 struct htx_ret htxret;
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200605 unsigned int orig_len, to_forward;
606 int ret;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100607
608 if (!len)
609 return len;
610
611 if (!st->first_block) {
612 unregister_data_filter(s, msg->chn, filter);
613 return len;
614 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100615
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200616 chunk_reset(&trash);
617 orig_len = len;
618 to_forward = 0;
Christopher Faulet497c7592020-03-02 16:19:50 +0100619
620 htxret = htx_find_offset(htx, offset);
621 blk = htxret.blk;
622 offset = htxret.ret;
623 for (; blk && len; blk = htx_get_next_blk(htx, blk)) {
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100624 enum htx_blk_type type = htx_get_blk_type(blk);
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200625 uint32_t info, sz = htx_get_blksz(blk);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100626 struct ist v;
627
628 switch (type) {
629 case HTX_BLK_UNUSED:
630 break;
631
632 case HTX_BLK_DATA:
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100633 v = htx_get_blk_value(htx, blk);
Tim Duesterhus154374c2021-03-02 18:57:27 +0100634 v = istadv(v, offset);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100635 if (v.len > len)
636 v.len = len;
637
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200638 info = (type << 28) + v.len;
639 chunk_memcat(&trash, (char *)&info, sizeof(info));
640 chunk_memcat(&trash, v.ptr, v.len);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100641 to_forward += v.len;
642 len -= v.len;
643 break;
644
645 default:
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200646 /* Here offset must always be 0 because only
647 * DATA blocks can be partially transferred. */
648 if (offset)
649 goto no_cache;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100650 if (sz > len)
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200651 goto end;
652
653 chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
654 chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100655 to_forward += sz;
656 len -= sz;
657 break;
658 }
659
660 offset = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100661 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200662
663 end:
664 shctx_lock(shctx);
665 fb = shctx_row_reserve_hot(shctx, st->first_block, trash.data);
666 if (!fb) {
667 shctx_unlock(shctx);
668 goto no_cache;
669 }
670 shctx_unlock(shctx);
671
672 ret = shctx_row_data_append(shctx, st->first_block, st->first_block->last_append,
673 (unsigned char *)b_head(&trash), b_data(&trash));
674 if (ret < 0)
675 goto no_cache;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100676
677 return to_forward;
678
679 no_cache:
680 disable_cache_entry(st, filter, shctx);
681 unregister_data_filter(s, msg->chn, filter);
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200682 return orig_len;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100683}
684
685static int
William Lallemand4da3f8a2017-10-31 14:33:34 +0100686cache_store_http_end(struct stream *s, struct filter *filter,
687 struct http_msg *msg)
688{
689 struct cache_st *st = filter->ctx;
Christopher Faulet95220e22018-12-07 17:34:39 +0100690 struct cache_flt_conf *cconf = FLT_CONF(filter);
691 struct cache *cache = cconf->c.cache;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100692 struct shared_context *shctx = shctx_ptr(cache);
693 struct cache_entry *object;
694
695 if (!(msg->chn->flags & CF_ISRESP))
696 return 1;
697
698 if (st && st->first_block) {
699
700 object = (struct cache_entry *)st->first_block->data;
701
William Lallemand4da3f8a2017-10-31 14:33:34 +0100702 shctx_lock(shctx);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +0100703 /* The whole payload was cached, the entry can now be used. */
704 object->complete = 1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100705 /* remove from the hotlist */
William Lallemand4da3f8a2017-10-31 14:33:34 +0100706 shctx_row_dec_hot(shctx, st->first_block);
707 shctx_unlock(shctx);
708
709 }
710 if (st) {
Willy Tarreaubafbe012017-11-24 17:34:44 +0100711 pool_free(pool_head_cache_st, st);
William Lallemand4da3f8a2017-10-31 14:33:34 +0100712 filter->ctx = NULL;
713 }
714
715 return 1;
716}
717
718 /*
719 * This intends to be used when checking HTTP headers for some
720 * word=value directive. Return a pointer to the first character of value, if
Willy Tarreau94a01e12021-01-06 17:35:12 +0100721 * the word was not found or if there wasn't any value assigned to it return NULL
William Lallemand4da3f8a2017-10-31 14:33:34 +0100722 */
723char *directive_value(const char *sample, int slen, const char *word, int wlen)
724{
725 int st = 0;
726
727 if (slen < wlen)
728 return 0;
729
730 while (wlen) {
731 char c = *sample ^ *word;
732 if (c && c != ('A' ^ 'a'))
733 return NULL;
734 sample++;
735 word++;
736 slen--;
737 wlen--;
738 }
739
740 while (slen) {
741 if (st == 0) {
742 if (*sample != '=')
743 return NULL;
744 sample++;
745 slen--;
746 st = 1;
747 continue;
748 } else {
749 return (char *)sample;
750 }
751 }
752
753 return NULL;
754}
755
756/*
757 * Return the maxage in seconds of an HTTP response.
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100758 * The returned value will always take the cache's configuration into account
759 * (cache->maxage) but the actual max age of the response will be set in the
760 * true_maxage parameter. It will be used to determine if a response is already
761 * stale or not.
William Lallemand4da3f8a2017-10-31 14:33:34 +0100762 * Compute the maxage using either:
763 * - the assigned max-age of the cache
764 * - the s-maxage directive
765 * - the max-age directive
766 * - (Expires - Data) headers
767 * - the default-max-age of the cache
768 *
769 */
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100770int http_calc_maxage(struct stream *s, struct cache *cache, int *true_maxage)
William Lallemand4da3f8a2017-10-31 14:33:34 +0100771{
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200772 struct htx *htx = htxbuf(&s->res.buf);
773 struct http_hdr_ctx ctx = { .blk = NULL };
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100774 long smaxage = -1;
775 long maxage = -1;
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100776 int expires = -1;
777 struct tm tm = {};
778 time_t expires_val = 0;
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100779 char *endptr = NULL;
780 int offset = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100781
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100782 /* The Cache-Control max-age and s-maxage directives should be followed by
783 * a positive numerical value (see RFC 7234#5.2.1.1). According to the
784 * specs, a sender "should not" generate a quoted-string value but we will
785 * still accept this format since it isn't strictly forbidden. */
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200786 while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
787 char *value;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100788
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200789 value = directive_value(ctx.value.ptr, ctx.value.len, "s-maxage", 8);
790 if (value) {
791 struct buffer *chk = get_trash_chunk();
William Lallemand4da3f8a2017-10-31 14:33:34 +0100792
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200793 chunk_strncat(chk, value, ctx.value.len - 8 + 1);
794 chunk_strncat(chk, "", 1);
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100795 offset = (*chk->area == '"') ? 1 : 0;
796 smaxage = strtol(chk->area + offset, &endptr, 10);
Willy Tarreau79a63b12021-11-08 12:09:27 +0100797 if (unlikely(smaxage < 0 || endptr == chk->area + offset))
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100798 return -1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100799 }
800
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200801 value = directive_value(ctx.value.ptr, ctx.value.len, "max-age", 7);
802 if (value) {
803 struct buffer *chk = get_trash_chunk();
Christopher Faulet5f2c49f2019-07-15 20:49:46 +0200804
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200805 chunk_strncat(chk, value, ctx.value.len - 7 + 1);
806 chunk_strncat(chk, "", 1);
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100807 offset = (*chk->area == '"') ? 1 : 0;
808 maxage = strtol(chk->area + offset, &endptr, 10);
Willy Tarreau79a63b12021-11-08 12:09:27 +0100809 if (unlikely(maxage < 0 || endptr == chk->area + offset))
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100810 return -1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100811 }
812 }
813
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100814 /* Look for Expires header if no s-maxage or max-age Cache-Control data
815 * was found. */
816 if (maxage == -1 && smaxage == -1) {
817 ctx.blk = NULL;
818 if (http_find_header(htx, ist("expires"), &ctx, 1)) {
819 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
820 expires_val = my_timegm(&tm);
821 /* A request having an expiring date earlier
822 * than the current date should be considered as
823 * stale. */
Willy Tarreau5e842192023-02-07 15:22:41 +0100824 expires = (expires_val >= date.tv_sec) ?
825 (expires_val - date.tv_sec) : 0;
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100826 }
827 else {
828 /* Following RFC 7234#5.3, an invalid date
829 * format must be treated as a date in the past
830 * so the cache entry must be seen as already
831 * expired. */
832 expires = 0;
833 }
834 }
835 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100836
837
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100838 if (smaxage > 0) {
839 if (true_maxage)
840 *true_maxage = smaxage;
William Lallemand49b44532017-11-24 18:53:43 +0100841 return MIN(smaxage, cache->maxage);
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100842 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100843
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100844 if (maxage > 0) {
845 if (true_maxage)
846 *true_maxage = maxage;
William Lallemand49b44532017-11-24 18:53:43 +0100847 return MIN(maxage, cache->maxage);
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100848 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100849
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100850 if (expires >= 0) {
851 if (true_maxage)
852 *true_maxage = expires;
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100853 return MIN(expires, cache->maxage);
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100854 }
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100855
William Lallemand49b44532017-11-24 18:53:43 +0100856 return cache->maxage;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100857
858}
859
860
William Lallemanda400a3a2017-11-20 19:13:12 +0100861static void cache_free_blocks(struct shared_block *first, struct shared_block *block)
862{
Willy Tarreau5bd37fa2018-04-04 20:17:03 +0200863 struct cache_entry *object = (struct cache_entry *)block->data;
864
865 if (first == block && object->eb.key)
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100866 delete_entry(object);
Willy Tarreau5bd37fa2018-04-04 20:17:03 +0200867 object->eb.key = 0;
William Lallemanda400a3a2017-11-20 19:13:12 +0100868}
869
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +0200870
871/* As per RFC 7234#4.3.2, in case of "If-Modified-Since" conditional request, the
872 * date value should be compared to a date determined by in a previous response (for
873 * the same entity). This date could either be the "Last-Modified" value, or the "Date"
874 * value of the response's reception time (by decreasing order of priority). */
875static time_t get_last_modified_time(struct htx *htx)
876{
877 time_t last_modified = 0;
878 struct http_hdr_ctx ctx = { .blk = NULL };
879 struct tm tm = {};
880
881 if (http_find_header(htx, ist("last-modified"), &ctx, 1)) {
882 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
883 last_modified = my_timegm(&tm);
884 }
885 }
886
887 if (!last_modified) {
888 ctx.blk = NULL;
889 if (http_find_header(htx, ist("date"), &ctx, 1)) {
890 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
891 last_modified = my_timegm(&tm);
892 }
893 }
894 }
895
896 /* Fallback on the current time if no "Last-Modified" or "Date" header
897 * was found. */
898 if (!last_modified)
Willy Tarreau5e842192023-02-07 15:22:41 +0100899 last_modified = date.tv_sec;
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +0200900
901 return last_modified;
902}
903
William Lallemand41db4602017-10-30 11:15:51 +0100904/*
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100905 * Checks the vary header's value. The headers on which vary should be applied
Ilya Shipitsinf38a0182020-12-21 01:16:17 +0500906 * must be explicitly supported in the vary_information array (see cache.c). If
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100907 * any other header is mentioned, we won't store the response.
908 * Returns 1 if Vary-based storage can work, 0 otherwise.
909 */
910static int http_check_vary_header(struct htx *htx, unsigned int *vary_signature)
911{
912 unsigned int vary_idx;
913 unsigned int vary_info_count;
914 const struct vary_hashing_information *vary_info;
915 struct http_hdr_ctx ctx = { .blk = NULL };
916
917 int retval = 1;
918
919 *vary_signature = 0;
920
921 vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
922 while (retval && http_find_header(htx, ist("Vary"), &ctx, 0)) {
923 for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
924 vary_info = &vary_information[vary_idx];
925 if (isteqi(ctx.value, vary_info->hdr_name)) {
926 *vary_signature |= vary_info->value;
927 break;
928 }
929 }
930 retval = (vary_idx < vary_info_count);
931 }
932
933 return retval;
934}
935
936
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100937/*
938 * Look for the accept-encoding part of the secondary_key and replace the
939 * encoding bitmap part of the hash with the actual encoding of the response,
940 * extracted from the content-encoding header value.
Remi Tricot-Le Breton6ca89162021-01-07 14:50:51 +0100941 * Responses that have an unknown encoding will not be cached if they also
942 * "vary" on the accept-encoding value.
943 * Returns 0 if we found a known encoding in the response, -1 otherwise.
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100944 */
Remi Tricot-Le Breton02c32272024-04-24 14:32:19 +0200945static int set_secondary_key_encoding(struct htx *htx, unsigned int vary_signature, char *secondary_key)
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100946{
947 unsigned int resp_encoding_bitmap = 0;
948 const struct vary_hashing_information *info = vary_information;
949 unsigned int offset = 0;
950 unsigned int count = 0;
951 unsigned int hash_info_count = sizeof(vary_information)/sizeof(*vary_information);
952 unsigned int encoding_value;
953 struct http_hdr_ctx ctx = { .blk = NULL };
954
Remi Tricot-Le Breton02c32272024-04-24 14:32:19 +0200955 /* We must not set the accept encoding part of the secondary signature
956 * if the response does not vary on 'Accept Encoding'. */
957 if (!(vary_signature & VARY_ACCEPT_ENCODING))
958 return 0;
959
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100960 /* Look for the accept-encoding part of the secondary_key. */
961 while (count < hash_info_count && info->value != VARY_ACCEPT_ENCODING) {
962 offset += info->hash_length;
963 ++info;
964 ++count;
965 }
966
967 if (count == hash_info_count)
Remi Tricot-Le Breton6ca89162021-01-07 14:50:51 +0100968 return -1;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100969
970 while (http_find_header(htx, ist("content-encoding"), &ctx, 0)) {
Remi Tricot-Le Breton6ca89162021-01-07 14:50:51 +0100971 if (parse_encoding_value(ctx.value, &encoding_value, NULL))
972 return -1; /* Do not store responses with an unknown encoding */
973 resp_encoding_bitmap |= encoding_value;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100974 }
975
976 if (!resp_encoding_bitmap)
977 resp_encoding_bitmap |= VARY_ENCODING_IDENTITY;
978
979 /* Rewrite the bitmap part of the hash with the new bitmap that only
Ilya Shipitsinb8888ab2021-01-06 21:20:16 +0500980 * corresponds the the response's encoding. */
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100981 write_u32(secondary_key + offset, resp_encoding_bitmap);
Remi Tricot-Le Breton6ca89162021-01-07 14:50:51 +0100982
983 return 0;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +0100984}
985
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100986
987/*
Ilya Shipitsin6fb0f212020-04-02 15:25:26 +0500988 * This function will store the headers of the response in a buffer and then
William Lallemand41db4602017-10-30 11:15:51 +0100989 * register a filter to store the data
990 */
991enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200992 struct session *sess, struct stream *s, int flags)
William Lallemand41db4602017-10-30 11:15:51 +0100993{
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100994 int effective_maxage = 0;
995 int true_maxage = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100996 struct http_txn *txn = s->txn;
997 struct http_msg *msg = &txn->rsp;
998 struct filter *filter;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100999 struct shared_block *first = NULL;
Christopher Faulet95220e22018-12-07 17:34:39 +01001000 struct cache_flt_conf *cconf = rule->arg.act.p[0];
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001001 struct cache *cache = cconf->c.cache;
1002 struct shared_context *shctx = shctx_ptr(cache);
Christopher Faulet839791a2019-01-07 16:12:07 +01001003 struct cache_st *cache_ctx = NULL;
1004 struct cache_entry *object, *old;
Willy Tarreau8b507582020-02-25 09:35:07 +01001005 unsigned int key = read_u32(txn->cache_hash);
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001006 struct htx *htx;
1007 struct http_hdr_ctx ctx;
Christopher Fauletb0667472019-09-03 22:22:12 +02001008 size_t hdrs_len = 0;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001009 int32_t pos;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001010 unsigned int vary_signature = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001011
William Lallemand4da3f8a2017-10-31 14:33:34 +01001012 /* Don't cache if the response came from a cache */
1013 if ((obj_type(s->target) == OBJ_TYPE_APPLET) &&
1014 s->target == &http_cache_applet.obj_type) {
1015 goto out;
1016 }
1017
1018 /* cache only HTTP/1.1 */
1019 if (!(txn->req.flags & HTTP_MSGF_VER_11))
1020 goto out;
1021
Willy Tarreau6905d182019-10-01 17:59:17 +02001022 /* cache only GET method */
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001023 if (txn->meth != HTTP_METH_GET) {
1024 /* In case of successful unsafe method on a stored resource, the
1025 * cached entry must be invalidated (see RFC7234#4.4).
1026 * A "non-error response" is one with a 2xx (Successful) or 3xx
1027 * (Redirection) status code. */
1028 if (txn->status >= 200 && txn->status < 400) {
1029 switch (txn->meth) {
1030 case HTTP_METH_OPTIONS:
1031 case HTTP_METH_GET:
1032 case HTTP_METH_HEAD:
1033 case HTTP_METH_TRACE:
1034 break;
1035
1036 default: /* Any unsafe method */
Ilya Shipitsinf38a0182020-12-21 01:16:17 +05001037 /* Discard any corresponding entry in case of successful
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001038 * unsafe request (such as PUT, POST or DELETE). */
1039 shctx_lock(shctx);
1040
1041 old = entry_exist(cconf->c.cache, txn->cache_hash);
1042 if (old) {
1043 eb32_delete(&old->eb);
1044 old->eb.key = 0;
1045 }
1046 shctx_unlock(shctx);
1047 }
1048 }
William Lallemand4da3f8a2017-10-31 14:33:34 +01001049 goto out;
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001050 }
William Lallemand4da3f8a2017-10-31 14:33:34 +01001051
Willy Tarreauc9036c02019-01-11 19:38:25 +01001052 /* cache key was not computed */
1053 if (!key)
1054 goto out;
1055
William Lallemand4da3f8a2017-10-31 14:33:34 +01001056 /* cache only 200 status code */
1057 if (txn->status != 200)
1058 goto out;
1059
Christopher Faulet839791a2019-01-07 16:12:07 +01001060 /* Find the corresponding filter instance for the current stream */
1061 list_for_each_entry(filter, &s->strm_flt.filters, list) {
1062 if (FLT_ID(filter) == cache_store_flt_id && FLT_CONF(filter) == cconf) {
1063 /* No filter ctx, don't cache anything */
1064 if (!filter->ctx)
1065 goto out;
1066 cache_ctx = filter->ctx;
1067 break;
1068 }
1069 }
1070
1071 /* from there, cache_ctx is always defined */
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001072 htx = htxbuf(&s->res.buf);
William Lallemand4da3f8a2017-10-31 14:33:34 +01001073
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001074 /* Do not cache too big objects. */
1075 if ((msg->flags & HTTP_MSGF_CNT_LEN) && shctx->max_obj_size > 0 &&
1076 htx->data + htx->extra > shctx->max_obj_size)
1077 goto out;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001078
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001079 /* Only a subset of headers are supported in our Vary implementation. If
1080 * any other header is present in the Vary header value, we won't be
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001081 * able to use the cache. Likewise, if Vary header support is disabled,
1082 * avoid caching responses that contain such a header. */
1083 ctx.blk = NULL;
1084 if (cache->vary_processing_enabled) {
1085 if (!http_check_vary_header(htx, &vary_signature))
1086 goto out;
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01001087 if (vary_signature) {
1088 /* If something went wrong during the secondary key
1089 * building, do not store the response. */
1090 if (!(txn->flags & TX_CACHE_HAS_SEC_KEY))
1091 goto out;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001092 http_request_reduce_secondary_key(vary_signature, txn->cache_secondary_hash);
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01001093 }
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001094 }
1095 else if (http_find_header(htx, ist("Vary"), &ctx, 0)) {
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001096 goto out;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001097 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001098
Christopher Fauletfc9cfe42019-07-16 14:54:53 +02001099 http_check_response_for_cacheability(s, &s->res);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001100
Remi Tricot-Le Bretona59ce4f2023-02-21 11:47:17 +01001101 if (!(txn->flags & TX_CACHEABLE) || !(txn->flags & TX_CACHE_COOK))
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001102 goto out;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001103
1104 shctx_lock(shctx);
1105 old = entry_exist(cache, txn->cache_hash);
1106 if (old) {
1107 if (vary_signature)
1108 old = secondary_entry_exist(cconf->c.cache, old,
1109 txn->cache_secondary_hash);
1110 if (old) {
1111 if (!old->complete) {
1112 /* An entry with the same primary key is already being
1113 * created, we should not try to store the current
1114 * response because it will waste space in the cache. */
1115 shctx_unlock(shctx);
1116 goto out;
1117 }
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +01001118 delete_entry(old);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001119 old->eb.key = 0;
1120 }
1121 }
1122 first = shctx_row_reserve_hot(shctx, NULL, sizeof(struct cache_entry));
1123 if (!first) {
1124 shctx_unlock(shctx);
1125 goto out;
1126 }
1127 /* the received memory is not initialized, we need at least to mark
1128 * the object as not indexed yet.
1129 */
1130 object = (struct cache_entry *)first->data;
1131 memset(object, 0, sizeof(*object));
1132 object->eb.key = key;
1133 object->secondary_key_signature = vary_signature;
1134 /* We need to temporarily set a valid expiring time until the actual one
1135 * is set by the end of this function (in case of concurrent accesses to
1136 * the same resource). This way the second access will find an existing
1137 * but not yet usable entry in the tree and will avoid storing its data. */
Willy Tarreau5e842192023-02-07 15:22:41 +01001138 object->expire = date.tv_sec + 2;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001139
1140 memcpy(object->hash, txn->cache_hash, sizeof(object->hash));
1141 if (vary_signature)
1142 memcpy(object->secondary_key, txn->cache_secondary_hash, HTTP_CACHE_SEC_KEY_LEN);
1143
1144 /* Insert the entry in the tree even if the payload is not cached yet. */
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +01001145 if (insert_entry(cache, object) != &object->eb) {
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001146 object->eb.key = 0;
1147 shctx_unlock(shctx);
1148 goto out;
1149 }
1150 shctx_unlock(shctx);
1151
1152 /* reserve space for the cache_entry structure */
1153 first->len = sizeof(struct cache_entry);
1154 first->last_append = NULL;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001155
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001156 /* Determine the entry's maximum age (taking into account the cache's
1157 * configuration) as well as the response's explicit max age (extracted
1158 * from cache-control directives or the expires header). */
1159 effective_maxage = http_calc_maxage(s, cconf->c.cache, &true_maxage);
1160
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001161 ctx.blk = NULL;
1162 if (http_find_header(htx, ist("Age"), &ctx, 0)) {
Tim Duesterhusc2942842021-01-02 22:47:17 +01001163 long long hdr_age;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001164 if (!strl2llrc(ctx.value.ptr, ctx.value.len, &hdr_age) && hdr_age > 0) {
1165 if (unlikely(hdr_age > CACHE_ENTRY_MAX_AGE))
1166 hdr_age = CACHE_ENTRY_MAX_AGE;
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001167 /* A response with an Age value greater than its
1168 * announced max age is stale and should not be stored. */
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001169 object->age = hdr_age;
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001170 if (unlikely(object->age > true_maxage))
1171 goto out;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001172 }
Remi Tricot-Le Breton51058d62020-12-03 18:19:32 +01001173 else
1174 goto out;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001175 http_remove_header(htx, &ctx);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001176 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001177
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +02001178 /* Build a last-modified time that will be stored in the cache_entry and
1179 * compared to a future If-Modified-Since client header. */
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001180 object->last_modified = get_last_modified_time(htx);
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +02001181
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001182 chunk_reset(&trash);
1183 for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
1184 struct htx_blk *blk = htx_get_blk(htx, pos);
1185 enum htx_blk_type type = htx_get_blk_type(blk);
1186 uint32_t sz = htx_get_blksz(blk);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001187
Christopher Fauletb0667472019-09-03 22:22:12 +02001188 hdrs_len += sizeof(*blk) + sz;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001189 chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
1190 chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +02001191
1192 /* Look for optional ETag header.
1193 * We need to store the offset of the ETag value in order for
1194 * future conditional requests to be able to perform ETag
1195 * comparisons. */
1196 if (type == HTX_BLK_HDR) {
Tim Duesterhuse2fff102021-01-02 22:47:16 +01001197 struct ist header_name = htx_get_blk_name(htx, blk);
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +02001198 if (isteq(header_name, ist("etag"))) {
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001199 object->etag_length = sz - istlen(header_name);
1200 object->etag_offset = sizeof(struct cache_entry) + b_data(&trash) - sz + istlen(header_name);
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +02001201 }
1202 }
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001203 if (type == HTX_BLK_EOH)
1204 break;
Frédéric Lécaillee7a770c2018-10-26 14:29:22 +02001205 }
1206
Christopher Fauletb0667472019-09-03 22:22:12 +02001207 /* Do not cache objects if the headers are too big. */
1208 if (hdrs_len > htx->size - global.tune.maxrewrite)
1209 goto out;
1210
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01001211 /* If the response has a secondary_key, fill its key part related to
1212 * encodings with the actual encoding of the response. This way any
1213 * subsequent request having the same primary key will have its accepted
Remi Tricot-Le Breton6ca89162021-01-07 14:50:51 +01001214 * encodings tested upon the cached response's one.
1215 * We will not cache a response that has an unknown encoding (not
Ilya Shipitsin7704b0e2021-01-23 02:11:59 +05001216 * explicitly supported in parse_encoding_value function). */
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01001217 if (cache->vary_processing_enabled && vary_signature)
Remi Tricot-Le Breton02c32272024-04-24 14:32:19 +02001218 if (set_secondary_key_encoding(htx, vary_signature, object->secondary_key))
Remi Tricot-Le Breton6ca89162021-01-07 14:50:51 +01001219 goto out;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01001220
William Lallemand4da3f8a2017-10-31 14:33:34 +01001221 shctx_lock(shctx);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001222 if (!shctx_row_reserve_hot(shctx, first, trash.data)) {
William Lallemand4da3f8a2017-10-31 14:33:34 +01001223 shctx_unlock(shctx);
1224 goto out;
1225 }
1226 shctx_unlock(shctx);
1227
William Lallemand4da3f8a2017-10-31 14:33:34 +01001228 /* cache the headers in a http action because it allows to chose what
1229 * to cache, for example you might want to cache a response before
1230 * modifying some HTTP headers, or on the contrary after modifying
1231 * those headers.
1232 */
William Lallemand4da3f8a2017-10-31 14:33:34 +01001233 /* does not need to be locked because it's in the "hot" list,
1234 * copy the headers */
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001235 if (shctx_row_data_append(shctx, first, NULL, (unsigned char *)trash.area, trash.data) < 0)
1236 goto out;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001237
1238 /* register the buffer in the filter ctx for filling it with data*/
Christopher Faulet839791a2019-01-07 16:12:07 +01001239 if (cache_ctx) {
1240 cache_ctx->first_block = first;
Christopher Faulet839791a2019-01-07 16:12:07 +01001241 /* store latest value and expiration time */
Willy Tarreau5e842192023-02-07 15:22:41 +01001242 object->latest_validation = date.tv_sec;
1243 object->expire = date.tv_sec + effective_maxage;
Christopher Faulet839791a2019-01-07 16:12:07 +01001244 return ACT_RET_CONT;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001245 }
1246
1247out:
1248 /* if does not cache */
1249 if (first) {
1250 shctx_lock(shctx);
William Lallemand08727662017-11-21 20:01:27 +01001251 first->len = 0;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001252 if (object->eb.key)
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +01001253 delete_entry(object);
William Lallemand08727662017-11-21 20:01:27 +01001254 object->eb.key = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001255 shctx_row_dec_hot(shctx, first);
1256 shctx_unlock(shctx);
1257 }
1258
William Lallemand41db4602017-10-30 11:15:51 +01001259 return ACT_RET_CONT;
1260}
1261
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001262#define HTX_CACHE_INIT 0 /* Initial state. */
1263#define HTX_CACHE_HEADER 1 /* Cache entry headers forwarding */
1264#define HTX_CACHE_DATA 2 /* Cache entry data forwarding */
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001265#define HTX_CACHE_EOM 3 /* Cache entry completely forwarded. Finish the HTX message */
1266#define HTX_CACHE_END 4 /* Cache entry treatment terminated */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001267
William Lallemandecb73b12017-11-24 14:33:55 +01001268static void http_cache_applet_release(struct appctx *appctx)
1269{
Christopher Faulet95220e22018-12-07 17:34:39 +01001270 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
William Lallemandecb73b12017-11-24 14:33:55 +01001271 struct cache_entry *cache_ptr = appctx->ctx.cache.entry;
Christopher Faulet95220e22018-12-07 17:34:39 +01001272 struct cache *cache = cconf->c.cache;
William Lallemandecb73b12017-11-24 14:33:55 +01001273 struct shared_block *first = block_ptr(cache_ptr);
1274
1275 shctx_lock(shctx_ptr(cache));
1276 shctx_row_dec_hot(shctx_ptr(cache), first);
1277 shctx_unlock(shctx_ptr(cache));
1278}
1279
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001280
1281static unsigned int htx_cache_dump_blk(struct appctx *appctx, struct htx *htx, enum htx_blk_type type,
1282 uint32_t info, struct shared_block *shblk, unsigned int offset)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001283{
Christopher Faulet95220e22018-12-07 17:34:39 +01001284 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
1285 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001286 struct htx_blk *blk;
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001287 char *ptr;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001288 unsigned int max, total;
1289 uint32_t blksz;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001290
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001291 max = htx_get_max_blksz(htx, channel_htx_recv_max(si_ic(appctx->owner), htx));
1292 if (!max)
1293 return 0;
Christopher Faulet2d7c5392019-06-03 10:41:26 +02001294 blksz = ((type == HTX_BLK_HDR || type == HTX_BLK_TLR)
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001295 ? (info & 0xff) + ((info >> 8) & 0xfffff)
1296 : info & 0xfffffff);
1297 if (blksz > max)
1298 return 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001299
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001300 blk = htx_add_blk(htx, type, blksz);
1301 if (!blk)
1302 return 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001303
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001304 blk->info = info;
1305 total = 4;
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001306 ptr = htx_get_blk_ptr(htx, blk);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001307 while (blksz) {
1308 max = MIN(blksz, shctx->block_size - offset);
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001309 memcpy(ptr, (const char *)shblk->data + offset, max);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001310 offset += max;
1311 blksz -= max;
1312 total += max;
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001313 ptr += max;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001314 if (blksz || offset == shctx->block_size) {
1315 shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
1316 offset = 0;
1317 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001318 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001319 appctx->ctx.cache.offset = offset;
1320 appctx->ctx.cache.next = shblk;
1321 appctx->ctx.cache.sent += total;
1322 return total;
1323}
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001324
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001325static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *htx,
1326 uint32_t info, struct shared_block *shblk, unsigned int offset)
1327{
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001328
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001329 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
1330 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
1331 unsigned int max, total, rem_data;
1332 uint32_t blksz;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001333
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001334 max = htx_get_max_blksz(htx, channel_htx_recv_max(si_ic(appctx->owner), htx));
1335 if (!max)
1336 return 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001337
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001338 rem_data = 0;
Christopher Fauletbda83972019-06-11 09:58:09 +02001339 if (appctx->ctx.cache.rem_data) {
1340 blksz = appctx->ctx.cache.rem_data;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001341 total = 0;
Christopher Fauletbda83972019-06-11 09:58:09 +02001342 }
1343 else {
1344 blksz = (info & 0xfffffff);
1345 total = 4;
1346 }
1347 if (blksz > max) {
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001348 rem_data = blksz - max;
1349 blksz = max;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001350 }
1351
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001352 while (blksz) {
1353 size_t sz;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001354
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001355 max = MIN(blksz, shctx->block_size - offset);
1356 sz = htx_add_data(htx, ist2(shblk->data + offset, max));
1357 offset += sz;
1358 blksz -= sz;
1359 total += sz;
1360 if (sz < max)
1361 break;
1362 if (blksz || offset == shctx->block_size) {
1363 shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
1364 offset = 0;
1365 }
1366 }
1367
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001368 appctx->ctx.cache.offset = offset;
1369 appctx->ctx.cache.next = shblk;
1370 appctx->ctx.cache.sent += total;
1371 appctx->ctx.cache.rem_data = rem_data + blksz;
1372 return total;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001373}
1374
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001375static size_t htx_cache_dump_msg(struct appctx *appctx, struct htx *htx, unsigned int len,
1376 enum htx_blk_type mark)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001377{
Christopher Faulet95220e22018-12-07 17:34:39 +01001378 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
1379 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001380 struct shared_block *shblk;
1381 unsigned int offset, sz;
1382 unsigned int ret, total = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001383
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001384 while (len) {
1385 enum htx_blk_type type;
1386 uint32_t info;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001387
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001388 shblk = appctx->ctx.cache.next;
1389 offset = appctx->ctx.cache.offset;
1390 if (appctx->ctx.cache.rem_data) {
1391 type = HTX_BLK_DATA;
1392 info = 0;
1393 goto add_data_blk;
1394 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001395
Ilya Shipitsin6fb0f212020-04-02 15:25:26 +05001396 /* Get info of the next HTX block. May be split on 2 shblk */
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001397 sz = MIN(4, shctx->block_size - offset);
1398 memcpy((char *)&info, (const char *)shblk->data + offset, sz);
1399 offset += sz;
1400 if (sz < 4) {
1401 shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
1402 memcpy(((char *)&info)+sz, (const char *)shblk->data, 4 - sz);
1403 offset = (4 - sz);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001404 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001405
1406 /* Get payload of the next HTX block and insert it. */
1407 type = (info >> 28);
1408 if (type != HTX_BLK_DATA)
1409 ret = htx_cache_dump_blk(appctx, htx, type, info, shblk, offset);
1410 else {
1411 add_data_blk:
1412 ret = htx_cache_dump_data_blk(appctx, htx, info, shblk, offset);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001413 }
1414
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001415 if (!ret)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001416 break;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001417 total += ret;
1418 len -= ret;
1419
1420 if (appctx->ctx.cache.rem_data || type == mark)
1421 break;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001422 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001423
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001424 return total;
1425}
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001426
1427static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx)
1428{
1429 struct cache_entry *cache_ptr = appctx->ctx.cache.entry;
1430 unsigned int age;
1431 char *end;
1432
1433 chunk_reset(&trash);
Willy Tarreau5e842192023-02-07 15:22:41 +01001434 age = MAX(0, (int)(date.tv_sec - cache_ptr->latest_validation)) + cache_ptr->age;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001435 if (unlikely(age > CACHE_ENTRY_MAX_AGE))
1436 age = CACHE_ENTRY_MAX_AGE;
1437 end = ultoa_o(age, b_head(&trash), b_size(&trash));
1438 b_set_data(&trash, end - b_head(&trash));
1439 if (!http_add_header(htx, ist("Age"), ist2(b_head(&trash), b_data(&trash))))
1440 return 0;
1441 return 1;
1442}
1443
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001444static void http_cache_io_handler(struct appctx *appctx)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001445{
1446 struct cache_entry *cache_ptr = appctx->ctx.cache.entry;
1447 struct shared_block *first = block_ptr(cache_ptr);
1448 struct stream_interface *si = appctx->owner;
1449 struct channel *req = si_oc(si);
1450 struct channel *res = si_ic(si);
1451 struct htx *req_htx, *res_htx;
1452 struct buffer *errmsg;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001453 unsigned int len;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001454 size_t ret, total = 0;
1455
Christopher Fauleta9169322022-03-07 16:44:30 +01001456 res_htx = htx_from_buf(&res->buf);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001457 total = res_htx->data;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001458
1459 if (unlikely(si->state == SI_ST_DIS || si->state == SI_ST_CLO))
1460 goto out;
1461
Ilya Shipitsin6fb0f212020-04-02 15:25:26 +05001462 /* Check if the input buffer is available. */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001463 if (!b_size(&res->buf)) {
1464 si_rx_room_blk(si);
1465 goto out;
1466 }
1467
Willy Tarreauefef3232018-12-16 00:37:45 +01001468 if (res->flags & (CF_SHUTW|CF_SHUTR|CF_SHUTW_NOW))
Willy Tarreau273e9642018-12-16 00:35:15 +01001469 appctx->st0 = HTX_CACHE_END;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001470
1471 if (appctx->st0 == HTX_CACHE_INIT) {
1472 appctx->ctx.cache.next = block_ptr(cache_ptr);
1473 appctx->ctx.cache.offset = sizeof(*cache_ptr);
1474 appctx->ctx.cache.sent = 0;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001475 appctx->ctx.cache.rem_data = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001476 appctx->st0 = HTX_CACHE_HEADER;
1477 }
1478
1479 if (appctx->st0 == HTX_CACHE_HEADER) {
1480 /* Headers must be dump at once. Otherwise it is an error */
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001481 len = first->len - sizeof(*cache_ptr) - appctx->ctx.cache.sent;
1482 ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOH);
1483 if (!ret || (htx_get_tail_type(res_htx) != HTX_BLK_EOH) ||
1484 !htx_cache_add_age_hdr(appctx, res_htx))
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001485 goto error;
1486
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001487 /* In case of a conditional request, we might want to send a
1488 * "304 Not Modified" response instead of the stored data. */
Tim Duesterhuse0142342020-10-22 21:15:06 +02001489 if (appctx->ctx.cache.send_notmodified) {
1490 if (!http_replace_res_status(res_htx, ist("304"), ist("Not Modified"))) {
1491 /* If replacing the status code fails we need to send the full response. */
1492 appctx->ctx.cache.send_notmodified = 0;
1493 }
1494 }
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001495
1496 /* Skip response body for HEAD requests or in case of "304 Not
1497 * Modified" response. */
1498 if (si_strm(si)->txn->meth == HTTP_METH_HEAD || appctx->ctx.cache.send_notmodified)
Christopher Fauletf0dd0372019-02-25 11:08:34 +01001499 appctx->st0 = HTX_CACHE_EOM;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001500 else
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001501 appctx->st0 = HTX_CACHE_DATA;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001502 }
1503
1504 if (appctx->st0 == HTX_CACHE_DATA) {
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001505 len = first->len - sizeof(*cache_ptr) - appctx->ctx.cache.sent;
1506 if (len) {
Christopher Fauletd1ac2b92020-12-02 19:12:22 +01001507 ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_UNUSED);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001508 if (ret < len) {
1509 si_rx_room_blk(si);
1510 goto out;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001511 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001512 }
Christopher Fauletd1ac2b92020-12-02 19:12:22 +01001513 appctx->st0 = HTX_CACHE_EOM;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001514 }
1515
1516 if (appctx->st0 == HTX_CACHE_EOM) {
Christopher Fauletd1ac2b92020-12-02 19:12:22 +01001517 /* no more data are expected. */
1518 res_htx->flags |= HTX_FL_EOM;
Christopher Fauletba476122022-03-07 15:53:57 +01001519 res->flags |= CF_EOI;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001520 appctx->st0 = HTX_CACHE_END;
1521 }
1522
1523 end:
Christopher Fauletadb36312019-02-25 11:40:49 +01001524 if (!(res->flags & CF_SHUTR) && appctx->st0 == HTX_CACHE_END) {
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001525 res->flags |= CF_READ_NULL;
1526 si_shutr(si);
1527 }
1528
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001529 out:
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001530 total = res_htx->data - total;
Christopher Faulet61123912019-01-02 14:10:01 +01001531 if (total)
1532 channel_add_input(res, total);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001533 htx_to_buf(res_htx, &res->buf);
Christopher Fauletadb36312019-02-25 11:40:49 +01001534
1535 /* eat the whole request */
1536 if (co_data(req)) {
1537 req_htx = htx_from_buf(&req->buf);
1538 co_htx_skip(req, req_htx, co_data(req));
1539 htx_to_buf(req_htx, &req->buf);
1540 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001541 return;
1542
1543 error:
1544 /* Sent and HTTP error 500 */
1545 b_reset(&res->buf);
Christopher Fauletf7346382019-07-17 22:02:08 +02001546 errmsg = &http_err_chunks[HTTP_ERR_500];
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001547 res->buf.data = b_data(errmsg);
1548 memcpy(res->buf.area, b_head(errmsg), b_data(errmsg));
1549 res_htx = htx_from_buf(&res->buf);
1550
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001551 total = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001552 appctx->st0 = HTX_CACHE_END;
1553 goto end;
1554}
1555
1556
Christopher Faulet95220e22018-12-07 17:34:39 +01001557static int parse_cache_rule(struct proxy *proxy, const char *name, struct act_rule *rule, char **err)
William Lallemand41db4602017-10-30 11:15:51 +01001558{
1559 struct flt_conf *fconf;
Christopher Faulet95220e22018-12-07 17:34:39 +01001560 struct cache_flt_conf *cconf = NULL;
William Lallemand41db4602017-10-30 11:15:51 +01001561
Christopher Faulet95220e22018-12-07 17:34:39 +01001562 if (!*name || strcmp(name, "if") == 0 || strcmp(name, "unless") == 0) {
William Lallemand41db4602017-10-30 11:15:51 +01001563 memprintf(err, "expects a cache name");
Christopher Faulet95220e22018-12-07 17:34:39 +01001564 goto err;
William Lallemand41db4602017-10-30 11:15:51 +01001565 }
1566
1567 /* check if a cache filter was already registered with this cache
1568 * name, if that's the case, must use it. */
1569 list_for_each_entry(fconf, &proxy->filter_configs, list) {
Christopher Faulet95220e22018-12-07 17:34:39 +01001570 if (fconf->id == cache_store_flt_id) {
1571 cconf = fconf->conf;
Tim Duesterhuse5ff1412021-01-02 22:31:53 +01001572 if (cconf && strcmp((char *)cconf->c.name, name) == 0) {
Christopher Faulet95220e22018-12-07 17:34:39 +01001573 rule->arg.act.p[0] = cconf;
1574 return 1;
1575 }
William Lallemand41db4602017-10-30 11:15:51 +01001576 }
1577 }
1578
Christopher Faulet95220e22018-12-07 17:34:39 +01001579 /* Create the filter cache config */
1580 cconf = calloc(1, sizeof(*cconf));
1581 if (!cconf) {
1582 memprintf(err, "out of memory\n");
1583 goto err;
1584 }
Christopher Faulet99a17a22018-12-11 09:18:27 +01001585 cconf->flags = CACHE_FLT_F_IMPLICIT_DECL;
Christopher Faulet95220e22018-12-07 17:34:39 +01001586 cconf->c.name = strdup(name);
1587 if (!cconf->c.name) {
1588 memprintf(err, "out of memory\n");
William Lallemand41db4602017-10-30 11:15:51 +01001589 goto err;
1590 }
Christopher Faulet95220e22018-12-07 17:34:39 +01001591
William Lallemand41db4602017-10-30 11:15:51 +01001592 /* register a filter to fill the cache buffer */
1593 fconf = calloc(1, sizeof(*fconf));
1594 if (!fconf) {
Christopher Faulet95220e22018-12-07 17:34:39 +01001595 memprintf(err, "out of memory\n");
William Lallemand41db4602017-10-30 11:15:51 +01001596 goto err;
1597 }
Christopher Faulet95220e22018-12-07 17:34:39 +01001598 fconf->id = cache_store_flt_id;
1599 fconf->conf = cconf;
William Lallemand41db4602017-10-30 11:15:51 +01001600 fconf->ops = &cache_ops;
Willy Tarreau2b718102021-04-21 07:32:39 +02001601 LIST_APPEND(&proxy->filter_configs, &fconf->list);
William Lallemand41db4602017-10-30 11:15:51 +01001602
Christopher Faulet95220e22018-12-07 17:34:39 +01001603 rule->arg.act.p[0] = cconf;
1604 return 1;
William Lallemand41db4602017-10-30 11:15:51 +01001605
Christopher Faulet95220e22018-12-07 17:34:39 +01001606 err:
1607 free(cconf);
1608 return 0;
1609}
1610
1611enum act_parse_ret parse_cache_store(const char **args, int *orig_arg, struct proxy *proxy,
1612 struct act_rule *rule, char **err)
1613{
1614 rule->action = ACT_CUSTOM;
1615 rule->action_ptr = http_action_store_cache;
1616
1617 if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
1618 return ACT_RET_PRS_ERR;
William Lallemand41db4602017-10-30 11:15:51 +01001619
Christopher Faulet95220e22018-12-07 17:34:39 +01001620 (*orig_arg)++;
1621 return ACT_RET_PRS_OK;
William Lallemand41db4602017-10-30 11:15:51 +01001622}
1623
Baptiste Assmanndb92a832019-08-05 16:55:32 +02001624/* This produces a sha1 hash of the concatenation of the HTTP method,
1625 * the first occurrence of the Host header followed by the path component
1626 * if it begins with a slash ('/'). */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001627int sha1_hosturi(struct stream *s)
William Lallemandf528fff2017-11-23 19:43:17 +01001628{
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001629 struct http_txn *txn = s->txn;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001630 struct htx *htx = htxbuf(&s->req.buf);
1631 struct htx_sl *sl;
1632 struct http_hdr_ctx ctx;
Willy Tarreauccc61d82019-10-17 09:28:28 +02001633 struct ist uri;
William Lallemandf528fff2017-11-23 19:43:17 +01001634 blk_SHA_CTX sha1_ctx;
Willy Tarreau83061a82018-07-13 11:56:34 +02001635 struct buffer *trash;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001636
William Lallemandf528fff2017-11-23 19:43:17 +01001637 trash = get_trash_chunk();
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001638 ctx.blk = NULL;
Baptiste Assmanndb92a832019-08-05 16:55:32 +02001639
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001640 sl = http_get_stline(htx);
Willy Tarreauccc61d82019-10-17 09:28:28 +02001641 uri = htx_sl_req_uri(sl); // whole uri
1642 if (!uri.len)
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001643 return 0;
Willy Tarreauccc61d82019-10-17 09:28:28 +02001644
1645 /* In HTTP/1, most URIs are seen in origin form ('/path/to/resource'),
1646 * unless haproxy is deployed in front of an outbound cache. In HTTP/2,
1647 * URIs are almost always sent in absolute form with their scheme. In
1648 * this case, the scheme is almost always "https". In order to support
1649 * sharing of cache objects between H1 and H2, we'll hash the absolute
1650 * URI whenever known, or prepend "https://" + the Host header for
1651 * relative URIs. The difference will only appear on absolute HTTP/1
1652 * requests sent to an origin server, which practically is never met in
1653 * the real world so we don't care about the ability to share the same
1654 * key here.URIs are normalized from the absolute URI to an origin form as
1655 * well.
1656 */
1657 if (!(sl->flags & HTX_SL_F_HAS_AUTHORITY)) {
Willy Tarreau20020ae2019-10-29 13:02:15 +01001658 chunk_istcat(trash, ist("https://"));
Willy Tarreauccc61d82019-10-17 09:28:28 +02001659 if (!http_find_header(htx, ist("Host"), &ctx, 0))
1660 return 0;
Willy Tarreau20020ae2019-10-29 13:02:15 +01001661 chunk_istcat(trash, ctx.value);
Willy Tarreauccc61d82019-10-17 09:28:28 +02001662 }
1663
1664 chunk_memcat(trash, uri.ptr, uri.len);
William Lallemandf528fff2017-11-23 19:43:17 +01001665
1666 /* hash everything */
1667 blk_SHA1_Init(&sha1_ctx);
Willy Tarreau843b7cb2018-07-13 10:54:26 +02001668 blk_SHA1_Update(&sha1_ctx, trash->area, trash->data);
William Lallemandf528fff2017-11-23 19:43:17 +01001669 blk_SHA1_Final((unsigned char *)txn->cache_hash, &sha1_ctx);
1670
1671 return 1;
1672}
1673
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001674/* Looks for "If-None-Match" headers in the request and compares their value
1675 * with the one that might have been stored in the cache_entry. If any of them
1676 * matches, a "304 Not Modified" response should be sent instead of the cached
1677 * data.
1678 * Although unlikely in a GET/HEAD request, the "If-None-Match: *" syntax is
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001679 * valid and should receive a "304 Not Modified" response (RFC 7234#4.3.2).
1680 *
1681 * If no "If-None-Match" header was found, look for an "If-Modified-Since"
1682 * header and compare its value (date) to the one stored in the cache_entry.
1683 * If the request's date is later than the cached one, we also send a
1684 * "304 Not Modified" response (see RFCs 7232#3.3 and 7234#4.3.2).
1685 *
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001686 * Returns 1 if "304 Not Modified" should be sent, 0 otherwise.
1687 */
1688static int should_send_notmodified_response(struct cache *cache, struct htx *htx,
1689 struct cache_entry *entry)
1690{
1691 int retval = 0;
1692
1693 struct http_hdr_ctx ctx = { .blk = NULL };
1694 struct ist cache_entry_etag = IST_NULL;
1695 struct buffer *etag_buffer = NULL;
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001696 int if_none_match_found = 0;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001697
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001698 struct tm tm = {};
1699 time_t if_modified_since = 0;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001700
1701 /* If we find a "If-None-Match" header in the request, rebuild the
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001702 * cache_entry's ETag in order to perform comparisons.
1703 * There could be multiple "if-none-match" header lines. */
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001704 while (http_find_header(htx, ist("if-none-match"), &ctx, 0)) {
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001705 if_none_match_found = 1;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001706
1707 /* A '*' matches everything. */
1708 if (isteq(ctx.value, ist("*")) != 0) {
1709 retval = 1;
1710 break;
1711 }
1712
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001713 /* No need to rebuild an etag if none was stored in the cache. */
1714 if (entry->etag_length == 0)
1715 break;
1716
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001717 /* Rebuild the stored ETag. */
1718 if (etag_buffer == NULL) {
1719 etag_buffer = get_trash_chunk();
1720
1721 if (shctx_row_data_get(shctx_ptr(cache), block_ptr(entry),
1722 (unsigned char*)b_orig(etag_buffer),
1723 entry->etag_offset, entry->etag_length) == 0) {
1724 cache_entry_etag = ist2(b_orig(etag_buffer), entry->etag_length);
1725 } else {
1726 /* We could not rebuild the ETag in one go, we
1727 * won't send a "304 Not Modified" response. */
1728 break;
1729 }
1730 }
1731
1732 if (http_compare_etags(cache_entry_etag, ctx.value) == 1) {
1733 retval = 1;
1734 break;
1735 }
1736 }
1737
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001738 /* If the request did not contain an "If-None-Match" header, we look for
1739 * an "If-Modified-Since" header (see RFC 7232#3.3). */
1740 if (retval == 0 && if_none_match_found == 0) {
1741 ctx.blk = NULL;
1742 if (http_find_header(htx, ist("if-modified-since"), &ctx, 1)) {
1743 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
1744 if_modified_since = my_timegm(&tm);
1745
1746 /* We send a "304 Not Modified" response if the
1747 * entry's last modified date is earlier than
1748 * the one found in the "If-Modified-Since"
1749 * header. */
1750 retval = (entry->last_modified <= if_modified_since);
1751 }
1752 }
1753 }
1754
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001755 return retval;
1756}
1757
William Lallemand41db4602017-10-30 11:15:51 +01001758enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *px,
1759 struct session *sess, struct stream *s, int flags)
1760{
William Lallemand77c11972017-10-31 20:43:01 +01001761
Christopher Fauletb3d4bca2019-02-25 10:59:33 +01001762 struct http_txn *txn = s->txn;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001763 struct cache_entry *res, *sec_entry = NULL;
Christopher Faulet95220e22018-12-07 17:34:39 +01001764 struct cache_flt_conf *cconf = rule->arg.act.p[0];
1765 struct cache *cache = cconf->c.cache;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001766 struct shared_block *entry_block;
1767
William Lallemand77c11972017-10-31 20:43:01 +01001768
Willy Tarreau6905d182019-10-01 17:59:17 +02001769 /* Ignore cache for HTTP/1.0 requests and for requests other than GET
1770 * and HEAD */
Christopher Fauletb3d4bca2019-02-25 10:59:33 +01001771 if (!(txn->req.flags & HTTP_MSGF_VER_11) ||
Willy Tarreau6905d182019-10-01 17:59:17 +02001772 (txn->meth != HTTP_METH_GET && txn->meth != HTTP_METH_HEAD))
Christopher Fauletb3d4bca2019-02-25 10:59:33 +01001773 txn->flags |= TX_CACHE_IGNORE;
1774
Christopher Fauletfc9cfe42019-07-16 14:54:53 +02001775 http_check_request_for_cacheability(s, &s->req);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001776
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001777 /* The request's hash has to be calculated for all requests, even POSTs
Ilya Shipitsinf38a0182020-12-21 01:16:17 +05001778 * or PUTs for instance because RFC7234 specifies that a successful
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001779 * "unsafe" method on a stored resource must invalidate it
1780 * (see RFC7234#4.4). */
1781 if (!sha1_hosturi(s))
Willy Tarreau504455c2017-12-22 17:47:35 +01001782 return ACT_RET_CONT;
1783
Willy Tarreau504455c2017-12-22 17:47:35 +01001784 if (s->txn->flags & TX_CACHE_IGNORE)
1785 return ACT_RET_CONT;
1786
Willy Tarreaua1214a52018-12-14 14:00:25 +01001787 if (px == strm_fe(s))
Willy Tarreau4781b152021-04-06 13:53:36 +02001788 _HA_ATOMIC_INC(&px->fe_counters.p.http.cache_lookups);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001789 else
Willy Tarreau4781b152021-04-06 13:53:36 +02001790 _HA_ATOMIC_INC(&px->be_counters.p.http.cache_lookups);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001791
William Lallemanda400a3a2017-11-20 19:13:12 +01001792 shctx_lock(shctx_ptr(cache));
William Lallemandf528fff2017-11-23 19:43:17 +01001793 res = entry_exist(cache, s->txn->cache_hash);
Remi Tricot-Le Bretona8ba4382023-02-21 17:42:04 +01001794 /* We must not use an entry that is not complete but the check will be
1795 * performed after we look for a potential secondary entry (in case of
1796 * Vary). */
1797 if (res) {
William Lallemand77c11972017-10-31 20:43:01 +01001798 struct appctx *appctx;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001799 entry_block = block_ptr(res);
1800 shctx_row_inc_hot(shctx_ptr(cache), entry_block);
William Lallemanda400a3a2017-11-20 19:13:12 +01001801 shctx_unlock(shctx_ptr(cache));
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001802
1803 /* In case of Vary, we could have multiple entries with the same
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01001804 * primary hash. We need to calculate the secondary hash in order
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001805 * to find the actual entry we want (if it exists). */
1806 if (res->secondary_key_signature) {
1807 if (!http_request_build_secondary_key(s, res->secondary_key_signature)) {
1808 shctx_lock(shctx_ptr(cache));
1809 sec_entry = secondary_entry_exist(cache, res,
1810 s->txn->cache_secondary_hash);
1811 if (sec_entry && sec_entry != res) {
1812 /* The wrong row was added to the hot list. */
1813 shctx_row_dec_hot(shctx_ptr(cache), entry_block);
1814 entry_block = block_ptr(sec_entry);
1815 shctx_row_inc_hot(shctx_ptr(cache), entry_block);
1816 }
1817 res = sec_entry;
1818 shctx_unlock(shctx_ptr(cache));
1819 }
1820 else
1821 res = NULL;
1822 }
1823
Remi Tricot-Le Bretona8ba4382023-02-21 17:42:04 +01001824 /* We either looked for a valid secondary entry and could not
1825 * find one, or the entry we want to use is not complete. We
1826 * can't use the cache's entry and must forward the request to
1827 * the server. */
1828 if (!res || !res->complete) {
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001829 shctx_lock(shctx_ptr(cache));
1830 shctx_row_dec_hot(shctx_ptr(cache), entry_block);
1831 shctx_unlock(shctx_ptr(cache));
1832 return ACT_RET_CONT;
1833 }
1834
William Lallemand77c11972017-10-31 20:43:01 +01001835 s->target = &http_cache_applet.obj_type;
Willy Tarreau14bfe9a2018-12-19 15:19:27 +01001836 if ((appctx = si_register_handler(&s->si[1], objt_applet(s->target)))) {
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001837 appctx->st0 = HTX_CACHE_INIT;
William Lallemand77c11972017-10-31 20:43:01 +01001838 appctx->rule = rule;
1839 appctx->ctx.cache.entry = res;
Frédéric Lécaille8df65ae2018-10-22 18:01:48 +02001840 appctx->ctx.cache.next = NULL;
1841 appctx->ctx.cache.sent = 0;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001842 appctx->ctx.cache.send_notmodified =
1843 should_send_notmodified_response(cache, htxbuf(&s->req.buf), res);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001844
1845 if (px == strm_fe(s))
Willy Tarreau4781b152021-04-06 13:53:36 +02001846 _HA_ATOMIC_INC(&px->fe_counters.p.http.cache_hits);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001847 else
Willy Tarreau4781b152021-04-06 13:53:36 +02001848 _HA_ATOMIC_INC(&px->be_counters.p.http.cache_hits);
Olivier Houchardfccf8402017-11-01 14:04:02 +01001849 return ACT_RET_CONT;
William Lallemand77c11972017-10-31 20:43:01 +01001850 } else {
Christopher Faulet95d7bc52022-04-21 11:30:43 +02001851 s->target = NULL;
William Lallemand55e76742017-11-21 20:01:28 +01001852 shctx_lock(shctx_ptr(cache));
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001853 shctx_row_dec_hot(shctx_ptr(cache), entry_block);
William Lallemand55e76742017-11-21 20:01:28 +01001854 shctx_unlock(shctx_ptr(cache));
Christopher Faulet95d7bc52022-04-21 11:30:43 +02001855 return ACT_RET_CONT;
William Lallemand77c11972017-10-31 20:43:01 +01001856 }
1857 }
William Lallemanda400a3a2017-11-20 19:13:12 +01001858 shctx_unlock(shctx_ptr(cache));
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001859
1860 /* Shared context does not need to be locked while we calculate the
1861 * secondary hash. */
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001862 if (!res && cache->vary_processing_enabled) {
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001863 /* Build a complete secondary hash until the server response
1864 * tells us which fields should be kept (if any). */
1865 http_request_prebuild_full_secondary_key(s);
1866 }
Olivier Houchardfccf8402017-11-01 14:04:02 +01001867 return ACT_RET_CONT;
William Lallemand41db4602017-10-30 11:15:51 +01001868}
1869
1870
1871enum act_parse_ret parse_cache_use(const char **args, int *orig_arg, struct proxy *proxy,
1872 struct act_rule *rule, char **err)
1873{
William Lallemand41db4602017-10-30 11:15:51 +01001874 rule->action = ACT_CUSTOM;
1875 rule->action_ptr = http_action_req_cache_use;
1876
Christopher Faulet95220e22018-12-07 17:34:39 +01001877 if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
William Lallemand41db4602017-10-30 11:15:51 +01001878 return ACT_RET_PRS_ERR;
William Lallemand41db4602017-10-30 11:15:51 +01001879
1880 (*orig_arg)++;
1881 return ACT_RET_PRS_OK;
William Lallemand41db4602017-10-30 11:15:51 +01001882}
1883
1884int cfg_parse_cache(const char *file, int linenum, char **args, int kwm)
1885{
1886 int err_code = 0;
1887
1888 if (strcmp(args[0], "cache") == 0) { /* new cache section */
1889
1890 if (!*args[1]) {
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001891 ha_alert("parsing [%s:%d] : '%s' expects a <name> argument\n",
Christopher Faulet767a84b2017-11-24 16:50:31 +01001892 file, linenum, args[0]);
William Lallemand41db4602017-10-30 11:15:51 +01001893 err_code |= ERR_ALERT | ERR_ABORT;
1894 goto out;
1895 }
1896
1897 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1898 err_code |= ERR_ABORT;
1899 goto out;
1900 }
1901
1902 if (tmp_cache_config == NULL) {
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001903 struct cache *cache_config;
1904
William Lallemand41db4602017-10-30 11:15:51 +01001905 tmp_cache_config = calloc(1, sizeof(*tmp_cache_config));
1906 if (!tmp_cache_config) {
Christopher Faulet767a84b2017-11-24 16:50:31 +01001907 ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
William Lallemand41db4602017-10-30 11:15:51 +01001908 err_code |= ERR_ALERT | ERR_ABORT;
1909 goto out;
1910 }
1911
1912 strlcpy2(tmp_cache_config->id, args[1], 33);
1913 if (strlen(args[1]) > 32) {
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001914 ha_warning("parsing [%s:%d]: cache name is limited to 32 characters, truncate to '%s'.\n",
Christopher Faulet767a84b2017-11-24 16:50:31 +01001915 file, linenum, tmp_cache_config->id);
William Lallemand41db4602017-10-30 11:15:51 +01001916 err_code |= ERR_WARN;
1917 }
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001918
1919 list_for_each_entry(cache_config, &caches_config, list) {
1920 if (strcmp(tmp_cache_config->id, cache_config->id) == 0) {
1921 ha_alert("parsing [%s:%d]: Duplicate cache name '%s'.\n",
1922 file, linenum, tmp_cache_config->id);
1923 err_code |= ERR_ALERT | ERR_ABORT;
1924 goto out;
1925 }
1926 }
1927
William Lallemand49b44532017-11-24 18:53:43 +01001928 tmp_cache_config->maxage = 60;
William Lallemand41db4602017-10-30 11:15:51 +01001929 tmp_cache_config->maxblocks = 0;
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001930 tmp_cache_config->maxobjsz = 0;
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +01001931 tmp_cache_config->max_secondary_entries = DEFAULT_MAX_SECONDARY_ENTRY;
William Lallemand41db4602017-10-30 11:15:51 +01001932 }
1933 } else if (strcmp(args[0], "total-max-size") == 0) {
Frédéric Lécailleb9b8b6b2018-10-25 20:17:45 +02001934 unsigned long int maxsize;
1935 char *err;
William Lallemand41db4602017-10-30 11:15:51 +01001936
1937 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1938 err_code |= ERR_ABORT;
1939 goto out;
1940 }
1941
Frédéric Lécailleb9b8b6b2018-10-25 20:17:45 +02001942 maxsize = strtoul(args[1], &err, 10);
1943 if (err == args[1] || *err != '\0') {
1944 ha_warning("parsing [%s:%d]: total-max-size wrong value '%s'\n",
1945 file, linenum, args[1]);
1946 err_code |= ERR_ABORT;
1947 goto out;
1948 }
1949
1950 if (maxsize > (UINT_MAX >> 20)) {
1951 ha_warning("parsing [%s:%d]: \"total-max-size\" (%s) must not be greater than %u\n",
1952 file, linenum, args[1], UINT_MAX >> 20);
1953 err_code |= ERR_ABORT;
1954 goto out;
1955 }
1956
William Lallemand41db4602017-10-30 11:15:51 +01001957 /* size in megabytes */
Frédéric Lécailleb9b8b6b2018-10-25 20:17:45 +02001958 maxsize *= 1024 * 1024 / CACHE_BLOCKSIZE;
William Lallemand41db4602017-10-30 11:15:51 +01001959 tmp_cache_config->maxblocks = maxsize;
William Lallemand49b44532017-11-24 18:53:43 +01001960 } else if (strcmp(args[0], "max-age") == 0) {
1961 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1962 err_code |= ERR_ABORT;
1963 goto out;
1964 }
1965
1966 if (!*args[1]) {
1967 ha_warning("parsing [%s:%d]: '%s' expects an age parameter in seconds.\n",
1968 file, linenum, args[0]);
1969 err_code |= ERR_WARN;
1970 }
1971
1972 tmp_cache_config->maxage = atoi(args[1]);
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001973 } else if (strcmp(args[0], "max-object-size") == 0) {
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02001974 unsigned int maxobjsz;
1975 char *err;
1976
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001977 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1978 err_code |= ERR_ABORT;
1979 goto out;
1980 }
1981
1982 if (!*args[1]) {
1983 ha_warning("parsing [%s:%d]: '%s' expects a maximum file size parameter in bytes.\n",
1984 file, linenum, args[0]);
1985 err_code |= ERR_WARN;
1986 }
1987
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02001988 maxobjsz = strtoul(args[1], &err, 10);
1989 if (err == args[1] || *err != '\0') {
1990 ha_warning("parsing [%s:%d]: max-object-size wrong value '%s'\n",
1991 file, linenum, args[1]);
1992 err_code |= ERR_ABORT;
1993 goto out;
1994 }
1995 tmp_cache_config->maxobjsz = maxobjsz;
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001996 } else if (strcmp(args[0], "process-vary") == 0) {
1997 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1998 err_code |= ERR_ABORT;
1999 goto out;
2000 }
2001
2002 if (!*args[1]) {
Remi Tricot-Le Bretone6cc5b52020-12-23 18:13:53 +01002003 ha_warning("parsing [%s:%d]: '%s' expects \"on\" or \"off\" (enable or disable vary processing).\n",
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01002004 file, linenum, args[0]);
2005 err_code |= ERR_WARN;
2006 }
Remi Tricot-Le Bretone6cc5b52020-12-23 18:13:53 +01002007 if (strcmp(args[1], "on") == 0)
2008 tmp_cache_config->vary_processing_enabled = 1;
2009 else if (strcmp(args[1], "off") == 0)
2010 tmp_cache_config->vary_processing_enabled = 0;
2011 else {
2012 ha_warning("parsing [%s:%d]: '%s' expects \"on\" or \"off\" (enable or disable vary processing).\n",
2013 file, linenum, args[0]);
2014 err_code |= ERR_WARN;
2015 }
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +01002016 } else if (strcmp(args[0], "max-secondary-entries") == 0) {
2017 unsigned int max_sec_entries;
2018 char *err;
2019
2020 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
2021 err_code |= ERR_ABORT;
2022 goto out;
2023 }
2024
2025 if (!*args[1]) {
2026 ha_warning("parsing [%s:%d]: '%s' expects a strictly positive number.\n",
2027 file, linenum, args[0]);
2028 err_code |= ERR_WARN;
2029 }
2030
2031 max_sec_entries = strtoul(args[1], &err, 10);
2032 if (err == args[1] || *err != '\0' || max_sec_entries == 0) {
2033 ha_warning("parsing [%s:%d]: max-secondary-entries wrong value '%s'\n",
2034 file, linenum, args[1]);
2035 err_code |= ERR_ABORT;
2036 goto out;
2037 }
2038 tmp_cache_config->max_secondary_entries = max_sec_entries;
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02002039 }
2040 else if (*args[0] != 0) {
Christopher Faulet767a84b2017-11-24 16:50:31 +01002041 ha_alert("parsing [%s:%d] : unknown keyword '%s' in 'cache' section\n", file, linenum, args[0]);
William Lallemand41db4602017-10-30 11:15:51 +01002042 err_code |= ERR_ALERT | ERR_FATAL;
2043 goto out;
2044 }
2045out:
2046 return err_code;
2047}
2048
2049/* once the cache section is parsed */
2050
2051int cfg_post_parse_section_cache()
2052{
William Lallemand41db4602017-10-30 11:15:51 +01002053 int err_code = 0;
William Lallemand41db4602017-10-30 11:15:51 +01002054
2055 if (tmp_cache_config) {
William Lallemand41db4602017-10-30 11:15:51 +01002056
2057 if (tmp_cache_config->maxblocks <= 0) {
Christopher Faulet767a84b2017-11-24 16:50:31 +01002058 ha_alert("Size not specified for cache '%s'\n", tmp_cache_config->id);
William Lallemand41db4602017-10-30 11:15:51 +01002059 err_code |= ERR_FATAL | ERR_ALERT;
2060 goto out;
2061 }
2062
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02002063 if (!tmp_cache_config->maxobjsz) {
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02002064 /* Default max. file size is a 256th of the cache size. */
2065 tmp_cache_config->maxobjsz =
2066 (tmp_cache_config->maxblocks * CACHE_BLOCKSIZE) >> 8;
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02002067 }
2068 else if (tmp_cache_config->maxobjsz > tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2) {
2069 ha_alert("\"max-object-size\" is limited to an half of \"total-max-size\" => %u\n", tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2);
2070 err_code |= ERR_FATAL | ERR_ALERT;
2071 goto out;
2072 }
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02002073
William Lallemandd1d1e222019-08-28 15:22:49 +02002074 /* add to the list of cache to init and reinit tmp_cache_config
2075 * for next cache section, if any.
2076 */
Willy Tarreau2b718102021-04-21 07:32:39 +02002077 LIST_APPEND(&caches_config, &tmp_cache_config->list);
William Lallemandd1d1e222019-08-28 15:22:49 +02002078 tmp_cache_config = NULL;
2079 return err_code;
2080 }
2081out:
Willy Tarreau61cfdf42021-02-20 10:46:51 +01002082 ha_free(&tmp_cache_config);
William Lallemandd1d1e222019-08-28 15:22:49 +02002083 return err_code;
2084
2085}
2086
2087int post_check_cache()
2088{
2089 struct proxy *px;
2090 struct cache *back, *cache_config, *cache;
2091 struct shared_context *shctx;
2092 int ret_shctx;
Christopher Fauletfc633b62020-11-06 15:24:23 +01002093 int err_code = ERR_NONE;
William Lallemandd1d1e222019-08-28 15:22:49 +02002094
2095 list_for_each_entry_safe(cache_config, back, &caches_config, list) {
2096
2097 ret_shctx = shctx_init(&shctx, cache_config->maxblocks, CACHE_BLOCKSIZE,
2098 cache_config->maxobjsz, sizeof(struct cache), 1);
William Lallemand4da3f8a2017-10-31 14:33:34 +01002099
Frédéric Lécaillebc584492018-10-25 20:18:59 +02002100 if (ret_shctx <= 0) {
William Lallemand41db4602017-10-30 11:15:51 +01002101 if (ret_shctx == SHCTX_E_INIT_LOCK)
Christopher Faulet767a84b2017-11-24 16:50:31 +01002102 ha_alert("Unable to initialize the lock for the cache.\n");
William Lallemand41db4602017-10-30 11:15:51 +01002103 else
Christopher Faulet767a84b2017-11-24 16:50:31 +01002104 ha_alert("Unable to allocate cache.\n");
William Lallemand41db4602017-10-30 11:15:51 +01002105
2106 err_code |= ERR_FATAL | ERR_ALERT;
2107 goto out;
2108 }
William Lallemanda400a3a2017-11-20 19:13:12 +01002109 shctx->free_block = cache_free_blocks;
William Lallemandd1d1e222019-08-28 15:22:49 +02002110 /* the cache structure is stored in the shctx and added to the
2111 * caches list, we can remove the entry from the caches_config
2112 * list */
2113 memcpy(shctx->data, cache_config, sizeof(struct cache));
William Lallemand41db4602017-10-30 11:15:51 +01002114 cache = (struct cache *)shctx->data;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01002115 cache->entries = EB_ROOT;
Willy Tarreau2b718102021-04-21 07:32:39 +02002116 LIST_APPEND(&caches, &cache->list);
2117 LIST_DELETE(&cache_config->list);
William Lallemandd1d1e222019-08-28 15:22:49 +02002118 free(cache_config);
2119
2120 /* Find all references for this cache in the existing filters
2121 * (over all proxies) and reference it in matching filters.
2122 */
2123 for (px = proxies_list; px; px = px->next) {
2124 struct flt_conf *fconf;
2125 struct cache_flt_conf *cconf;
2126
2127 list_for_each_entry(fconf, &px->filter_configs, list) {
2128 if (fconf->id != cache_store_flt_id)
2129 continue;
2130
2131 cconf = fconf->conf;
Tim Duesterhuse5ff1412021-01-02 22:31:53 +01002132 if (strcmp(cache->id, cconf->c.name) == 0) {
William Lallemandd1d1e222019-08-28 15:22:49 +02002133 free(cconf->c.name);
Tim Duesterhusd7c6e6a2020-09-14 18:01:33 +02002134 cconf->flags |= CACHE_FLT_INIT;
William Lallemandd1d1e222019-08-28 15:22:49 +02002135 cconf->c.cache = cache;
2136 break;
2137 }
2138 }
2139 }
William Lallemand41db4602017-10-30 11:15:51 +01002140 }
William Lallemandd1d1e222019-08-28 15:22:49 +02002141
William Lallemand41db4602017-10-30 11:15:51 +01002142out:
William Lallemand41db4602017-10-30 11:15:51 +01002143 return err_code;
2144
William Lallemand41db4602017-10-30 11:15:51 +01002145}
2146
William Lallemand41db4602017-10-30 11:15:51 +01002147struct flt_ops cache_ops = {
2148 .init = cache_store_init,
Christopher Faulet95220e22018-12-07 17:34:39 +01002149 .check = cache_store_check,
2150 .deinit = cache_store_deinit,
William Lallemand41db4602017-10-30 11:15:51 +01002151
Christopher Faulet65554e12020-03-06 14:52:06 +01002152 /* Handle stream init/deinit */
2153 .attach = cache_store_strm_init,
2154 .detach = cache_store_strm_deinit,
2155
William Lallemand4da3f8a2017-10-31 14:33:34 +01002156 /* Handle channels activity */
Christopher Faulet839791a2019-01-07 16:12:07 +01002157 .channel_post_analyze = cache_store_post_analyze,
William Lallemand4da3f8a2017-10-31 14:33:34 +01002158
2159 /* Filter HTTP requests and responses */
2160 .http_headers = cache_store_http_headers,
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01002161 .http_payload = cache_store_http_payload,
William Lallemand4da3f8a2017-10-31 14:33:34 +01002162 .http_end = cache_store_http_end,
William Lallemand41db4602017-10-30 11:15:51 +01002163};
2164
Christopher Faulet99a17a22018-12-11 09:18:27 +01002165
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002166#define CHECK_ENCODING(str, encoding_name, encoding_value) \
2167 ({ \
2168 int retval = 0; \
2169 if (istmatch(str, (struct ist){ .ptr = encoding_name+1, .len = sizeof(encoding_name) - 2 })) { \
2170 retval = encoding_value; \
2171 encoding = istadv(encoding, sizeof(encoding_name) - 2); \
2172 } \
2173 (retval); \
2174 })
2175
2176/*
2177 * Parse the encoding <encoding> and try to match the encoding part upon an
2178 * encoding list of explicitly supported encodings (which all have a specific
2179 * bit in an encoding bitmap). If a weight is included in the value, find out if
2180 * it is null or not. The bit value will be set in the <encoding_value>
2181 * parameter and the <has_null_weight> will be set to 1 if the weight is strictly
2182 * 0, 1 otherwise.
2183 * The encodings list is extracted from
2184 * https://www.iana.org/assignments/http-parameters/http-parameters.xhtml.
2185 * Returns 0 in case of success and -1 in case of error.
2186 */
2187static int parse_encoding_value(struct ist encoding, unsigned int *encoding_value,
2188 unsigned int *has_null_weight)
2189{
2190 int retval = 0;
2191
2192 if (!encoding_value)
2193 return -1;
2194
2195 if (!istlen(encoding))
2196 return -1; /* Invalid encoding */
2197
2198 *encoding_value = 0;
2199 if (has_null_weight)
2200 *has_null_weight = 0;
2201
2202 switch (*encoding.ptr) {
2203 case 'a':
2204 encoding = istadv(encoding, 1);
2205 *encoding_value = CHECK_ENCODING(encoding, "aes128gcm", VARY_ENCODING_AES128GCM);
2206 break;
2207 case 'b':
2208 encoding = istadv(encoding, 1);
2209 *encoding_value = CHECK_ENCODING(encoding, "br", VARY_ENCODING_BR);
2210 break;
2211 case 'c':
2212 encoding = istadv(encoding, 1);
2213 *encoding_value = CHECK_ENCODING(encoding, "compress", VARY_ENCODING_COMPRESS);
2214 break;
2215 case 'd':
2216 encoding = istadv(encoding, 1);
2217 *encoding_value = CHECK_ENCODING(encoding, "deflate", VARY_ENCODING_DEFLATE);
2218 break;
2219 case 'e':
2220 encoding = istadv(encoding, 1);
2221 *encoding_value = CHECK_ENCODING(encoding, "exi", VARY_ENCODING_EXI);
2222 break;
2223 case 'g':
2224 encoding = istadv(encoding, 1);
2225 *encoding_value = CHECK_ENCODING(encoding, "gzip", VARY_ENCODING_GZIP);
2226 break;
2227 case 'i':
2228 encoding = istadv(encoding, 1);
2229 *encoding_value = CHECK_ENCODING(encoding, "identity", VARY_ENCODING_IDENTITY);
2230 break;
2231 case 'p':
2232 encoding = istadv(encoding, 1);
2233 *encoding_value = CHECK_ENCODING(encoding, "pack200-gzip", VARY_ENCODING_PACK200_GZIP);
2234 break;
2235 case 'x':
2236 encoding = istadv(encoding, 1);
2237 *encoding_value = CHECK_ENCODING(encoding, "x-gzip", VARY_ENCODING_GZIP);
2238 if (!*encoding_value)
2239 *encoding_value = CHECK_ENCODING(encoding, "x-compress", VARY_ENCODING_COMPRESS);
2240 break;
2241 case 'z':
2242 encoding = istadv(encoding, 1);
2243 *encoding_value = CHECK_ENCODING(encoding, "zstd", VARY_ENCODING_ZSTD);
2244 break;
2245 case '*':
2246 encoding = istadv(encoding, 1);
2247 *encoding_value = VARY_ENCODING_STAR;
2248 break;
2249 default:
2250 retval = -1; /* Unmanaged encoding */
2251 break;
2252 }
2253
2254 /* Process the optional weight part of the encoding. */
2255 if (*encoding_value) {
2256 encoding = http_trim_leading_spht(encoding);
2257 if (istlen(encoding)) {
2258 if (*encoding.ptr != ';')
2259 return -1;
2260
2261 if (has_null_weight) {
2262 encoding = istadv(encoding, 1);
2263
2264 encoding = http_trim_leading_spht(encoding);
2265
2266 *has_null_weight = isteq(encoding, ist("q=0"));
2267 }
2268 }
2269 }
2270
2271 return retval;
2272}
2273
Tim Duesterhus23b29452020-11-24 22:22:56 +01002274#define ACCEPT_ENCODING_MAX_ENTRIES 16
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002275/*
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002276 * Build a bitmap of the accept-encoding header.
2277 *
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002278 * The bitmap is built by matching every sub-part of the accept-encoding value
2279 * with a subset of explicitly supported encodings, which all have their own bit
2280 * in the bitmap. This bitmap will be used to determine if a response can be
2281 * served to a client (that is if it has an encoding that is accepted by the
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002282 * client). Any unknown encodings will be indicated by the VARY_ENCODING_OTHER
2283 * bit.
2284 *
2285 * Returns 0 in case of success and -1 in case of error.
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002286 */
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002287static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
2288 char *buf, unsigned int *buf_len)
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002289{
Tim Duesterhus23b29452020-11-24 22:22:56 +01002290 size_t count = 0;
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002291 uint32_t encoding_bitmap = 0;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002292 unsigned int encoding_bmp_bl = -1;
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002293 struct http_hdr_ctx ctx = { .blk = NULL };
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002294 unsigned int encoding_value;
2295 unsigned int rejected_encoding;
2296
Ilya Shipitsinb8888ab2021-01-06 21:20:16 +05002297 /* A user agent always accepts an unencoded value unless it explicitly
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002298 * refuses it through an "identity;q=0" accept-encoding value. */
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002299 encoding_bitmap |= VARY_ENCODING_IDENTITY;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002300
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002301 /* Iterate over all the ACCEPT_ENCODING_MAX_ENTRIES first accept-encoding
2302 * values that might span acrosse multiple accept-encoding headers. */
2303 while (http_find_header(htx, hdr_name, &ctx, 0) && count < ACCEPT_ENCODING_MAX_ENTRIES) {
Tim Duesterhusbeec7ea2021-06-18 15:09:28 +02002304 count++;
2305
2306 /* As per RFC7231#5.3.4, "An Accept-Encoding header field with a
2307 * combined field-value that is empty implies that the user agent
2308 * does not want any content-coding in response."
2309 *
2310 * We must (and did) count the existence of this empty header to not
2311 * hit the `count == 0` case below, but must ignore the value to not
2312 * include VARY_ENCODING_OTHER into the final bitmap.
2313 */
2314 if (istlen(ctx.value) == 0)
2315 continue;
2316
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002317 /* Turn accept-encoding value to lower case */
2318 ist2bin_lc(istptr(ctx.value), ctx.value);
Tim Duesterhus23b29452020-11-24 22:22:56 +01002319
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002320 /* Try to identify a known encoding and to manage null weights. */
2321 if (!parse_encoding_value(ctx.value, &encoding_value, &rejected_encoding)) {
2322 if (rejected_encoding)
2323 encoding_bmp_bl &= ~encoding_value;
2324 else
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002325 encoding_bitmap |= encoding_value;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002326 }
2327 else {
2328 /* Unknown encoding */
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002329 encoding_bitmap |= VARY_ENCODING_OTHER;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002330 }
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002331 }
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002332
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002333 /* If a "*" was found in the accepted encodings (without a null weight),
Ilya Shipitsinb8888ab2021-01-06 21:20:16 +05002334 * all the encoding are accepted except the ones explicitly rejected. */
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002335 if (encoding_bitmap & VARY_ENCODING_STAR) {
2336 encoding_bitmap = ~0;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002337 }
2338
Ilya Shipitsinb8888ab2021-01-06 21:20:16 +05002339 /* Clear explicitly rejected encodings from the bitmap */
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002340 encoding_bitmap &= encoding_bmp_bl;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002341
2342 /* As per RFC7231#5.3.4, "If no Accept-Encoding field is in the request,
2343 * any content-coding is considered acceptable by the user agent". */
2344 if (count == 0)
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002345 encoding_bitmap = ~0;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002346
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002347 /* A request with more than ACCEPT_ENCODING_MAX_ENTRIES accepted
2348 * encodings might be illegitimate so we will not use it. */
2349 if (count == ACCEPT_ENCODING_MAX_ENTRIES)
2350 return -1;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002351
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002352 write_u32(buf, encoding_bitmap);
2353 *buf_len = sizeof(encoding_bitmap);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002354
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002355 /* This function fills the hash buffer correctly even if no header was
2356 * found, hence the 0 return value (success). */
Tim Duesterhus23b29452020-11-24 22:22:56 +01002357 return 0;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002358}
Tim Duesterhus23b29452020-11-24 22:22:56 +01002359#undef ACCEPT_ENCODING_MAX_ENTRIES
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002360
2361/*
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002362 * Normalizer used by default for the Referer header. It only
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002363 * calculates a simple crc of the whole value.
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002364 * Only the first occurrence of the header will be taken into account in the
2365 * hash.
2366 * Returns 0 in case of success, 1 if the hash buffer should be filled with 0s
2367 * and -1 in case of error.
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002368 */
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002369static int default_normalizer(struct htx *htx, struct ist hdr_name,
2370 char *buf, unsigned int *buf_len)
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002371{
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002372 int retval = 1;
2373 struct http_hdr_ctx ctx = { .blk = NULL };
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002374
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002375 if (http_find_header(htx, hdr_name, &ctx, 1)) {
2376 retval = 0;
2377 write_u32(buf, hash_crc32(istptr(ctx.value), istlen(ctx.value)));
2378 *buf_len = sizeof(int);
2379 }
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002380
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002381 return retval;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002382}
2383
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002384/*
Tim Duesterhused84d842021-01-18 13:41:17 +01002385 * Accept-Encoding bitmap comparison function.
2386 * Returns 0 if the bitmaps are compatible.
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002387 */
Tim Duesterhused84d842021-01-18 13:41:17 +01002388static int accept_encoding_bitmap_cmp(const void *ref, const void *new, unsigned int len)
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002389{
Tim Duesterhused84d842021-01-18 13:41:17 +01002390 uint32_t ref_bitmap = read_u32(ref);
2391 uint32_t new_bitmap = read_u32(new);
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002392
Tim Duesterhused84d842021-01-18 13:41:17 +01002393 if (!(ref_bitmap & VARY_ENCODING_OTHER)) {
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002394 /* All the bits set in the reference bitmap correspond to the
2395 * stored response' encoding and should all be set in the new
2396 * encoding bitmap in order for the client to be able to manage
Tim Duesterhusdc38bc42020-12-29 12:43:53 +01002397 * the response.
2398 *
2399 * If this is the case the cached response has encodings that
2400 * are accepted by the client. It can be served directly by
2401 * the cache (as far as the accept-encoding part is concerned).
2402 */
2403
Tim Duesterhused84d842021-01-18 13:41:17 +01002404 return (ref_bitmap & new_bitmap) != ref_bitmap;
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002405 }
Tim Duesterhusdc38bc42020-12-29 12:43:53 +01002406 else {
Tim Duesterhus1d66e392021-01-18 13:41:16 +01002407 return 1;
Tim Duesterhusdc38bc42020-12-29 12:43:53 +01002408 }
Remi Tricot-Le Bretonce9e7b22020-12-23 18:13:49 +01002409}
2410
2411
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002412/*
2413 * Pre-calculate the hashes of all the supported headers (in our Vary
2414 * implementation) of a given request. We have to calculate all the hashes
2415 * in advance because the actual Vary signature won't be known until the first
2416 * response.
2417 * Only the first occurrence of every header will be taken into account in the
2418 * hash.
2419 * If the header is not present, the hash portion of the given header will be
2420 * filled with zeros.
2421 * Returns 0 in case of success.
2422 */
2423static int http_request_prebuild_full_secondary_key(struct stream *s)
2424{
Remi Tricot-Le Bretonbba29122020-12-23 18:13:44 +01002425 /* The fake signature (second parameter) will ensure that every part of the
2426 * secondary key is calculated. */
2427 return http_request_build_secondary_key(s, ~0);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002428}
2429
2430
2431/*
2432 * Calculate the secondary key for a request for which we already have a known
2433 * vary signature. The key is made by aggregating hashes calculated for every
2434 * header mentioned in the vary signature.
2435 * Only the first occurrence of every header will be taken into account in the
2436 * hash.
2437 * If the header is not present, the hash portion of the given header will be
2438 * filled with zeros.
2439 * Returns 0 in case of success.
2440 */
2441static int http_request_build_secondary_key(struct stream *s, int vary_signature)
2442{
2443 struct http_txn *txn = s->txn;
2444 struct htx *htx = htxbuf(&s->req.buf);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002445
2446 unsigned int idx;
2447 const struct vary_hashing_information *info = NULL;
2448 unsigned int hash_length = 0;
2449 int retval = 0;
2450 int offset = 0;
2451
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002452 for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && retval >= 0; ++idx) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002453 info = &vary_information[idx];
2454
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002455 /* The normalizing functions will be in charge of getting the
2456 * header values from the htx. This way they can manage multiple
2457 * occurrences of their processed header. */
2458 if ((vary_signature & info->value) && info->norm_fn != NULL &&
2459 !(retval = info->norm_fn(htx, info->hdr_name, &txn->cache_secondary_hash[offset], &hash_length))) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002460 offset += hash_length;
2461 }
2462 else {
2463 /* Fill hash with 0s. */
2464 hash_length = info->hash_length;
2465 memset(&txn->cache_secondary_hash[offset], 0, hash_length);
2466 offset += hash_length;
2467 }
2468 }
2469
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01002470 if (retval >= 0)
2471 txn->flags |= TX_CACHE_HAS_SEC_KEY;
2472
2473 return (retval < 0);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002474}
2475
2476/*
2477 * Build the actual secondary key of a given request out of the prebuilt key and
2478 * the actual vary signature (extracted from the response).
2479 * Returns 0 in case of success.
2480 */
2481static int http_request_reduce_secondary_key(unsigned int vary_signature,
2482 char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN])
2483{
2484 int offset = 0;
2485 int global_offset = 0;
2486 int vary_info_count = 0;
2487 int keep = 0;
2488 unsigned int vary_idx;
2489 const struct vary_hashing_information *vary_info;
2490
2491 vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
2492 for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
2493 vary_info = &vary_information[vary_idx];
2494 keep = (vary_signature & vary_info->value) ? 0xff : 0;
2495
2496 for (offset = 0; offset < vary_info->hash_length; ++offset,++global_offset) {
2497 prebuilt_key[global_offset] &= keep;
2498 }
2499 }
2500
2501 return 0;
2502}
2503
2504
Christopher Faulet99a17a22018-12-11 09:18:27 +01002505
2506static int
2507parse_cache_flt(char **args, int *cur_arg, struct proxy *px,
2508 struct flt_conf *fconf, char **err, void *private)
2509{
2510 struct flt_conf *f, *back;
Willy Tarreaua73da1e2018-12-14 10:19:28 +01002511 struct cache_flt_conf *cconf = NULL;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002512 char *name = NULL;
2513 int pos = *cur_arg;
2514
Christopher Faulet2a37cdb2020-05-18 11:58:16 +02002515 /* Get the cache filter name. <pos> point on "cache" keyword */
2516 if (!*args[pos + 1]) {
Tim Duesterhusea969f62020-08-18 22:06:51 +02002517 memprintf(err, "%s : expects a <name> argument", args[pos]);
Christopher Faulet2a37cdb2020-05-18 11:58:16 +02002518 goto error;
2519 }
2520 name = strdup(args[pos + 1]);
2521 if (!name) {
2522 memprintf(err, "%s '%s' : out of memory", args[pos], args[pos + 1]);
2523 goto error;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002524 }
Christopher Faulet2a37cdb2020-05-18 11:58:16 +02002525 pos += 2;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002526
2527 /* Check if an implicit filter with the same name already exists. If so,
2528 * we remove the implicit filter to use the explicit one. */
2529 list_for_each_entry_safe(f, back, &px->filter_configs, list) {
2530 if (f->id != cache_store_flt_id)
2531 continue;
2532
2533 cconf = f->conf;
Tim Duesterhuse5ff1412021-01-02 22:31:53 +01002534 if (strcmp(name, cconf->c.name) != 0) {
Christopher Faulet99a17a22018-12-11 09:18:27 +01002535 cconf = NULL;
2536 continue;
2537 }
2538
2539 if (!(cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
2540 cconf = NULL;
2541 memprintf(err, "%s: multiple explicit declarations of the cache filter '%s'",
2542 px->id, name);
Tim Duesterhusd34b1ce2020-01-18 01:46:18 +01002543 goto error;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002544 }
2545
2546 /* Remove the implicit filter. <cconf> is kept for the explicit one */
Willy Tarreau2b718102021-04-21 07:32:39 +02002547 LIST_DELETE(&f->list);
Christopher Faulet99a17a22018-12-11 09:18:27 +01002548 free(f);
2549 free(name);
2550 break;
2551 }
2552
2553 /* No implicit cache filter found, create configuration for the explicit one */
2554 if (!cconf) {
2555 cconf = calloc(1, sizeof(*cconf));
2556 if (!cconf) {
2557 memprintf(err, "%s: out of memory", args[*cur_arg]);
2558 goto error;
2559 }
2560 cconf->c.name = name;
2561 }
2562
2563 cconf->flags = 0;
2564 fconf->id = cache_store_flt_id;
2565 fconf->conf = cconf;
2566 fconf->ops = &cache_ops;
2567
2568 *cur_arg = pos;
2569 return 0;
2570
2571 error:
2572 free(name);
2573 free(cconf);
2574 return -1;
2575}
2576
Aurélien Nephtaliabbf6072018-04-18 13:26:46 +02002577static int cli_parse_show_cache(char **args, char *payload, struct appctx *appctx, void *private)
William Lallemand1f49a362017-11-21 20:01:26 +01002578{
2579 if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
2580 return 1;
2581
2582 return 0;
2583}
2584
2585static int cli_io_handler_show_cache(struct appctx *appctx)
2586{
2587 struct cache* cache = appctx->ctx.cli.p0;
2588 struct stream_interface *si = appctx->owner;
2589
William Lallemand1f49a362017-11-21 20:01:26 +01002590 if (cache == NULL) {
2591 cache = LIST_ELEM((caches).n, typeof(struct cache *), list);
2592 }
2593
2594 list_for_each_entry_from(cache, &caches, list) {
2595 struct eb32_node *node = NULL;
2596 unsigned int next_key;
2597 struct cache_entry *entry;
Remi Tricot-Le Bretone3e1e5f2020-11-27 15:48:40 +01002598 unsigned int i;
William Lallemand1f49a362017-11-21 20:01:26 +01002599
William Lallemand1f49a362017-11-21 20:01:26 +01002600 next_key = appctx->ctx.cli.i0;
Willy Tarreauafe1de52018-04-04 11:56:43 +02002601 if (!next_key) {
2602 chunk_printf(&trash, "%p: %s (shctx:%p, available blocks:%d)\n", cache, cache->id, shctx_ptr(cache), shctx_ptr(cache)->nbav);
2603 if (ci_putchk(si_ic(si), &trash) == -1) {
Willy Tarreaudb398432018-11-15 11:08:52 +01002604 si_rx_room_blk(si);
Willy Tarreauafe1de52018-04-04 11:56:43 +02002605 return 0;
2606 }
2607 }
William Lallemand1f49a362017-11-21 20:01:26 +01002608
2609 appctx->ctx.cli.p0 = cache;
2610
2611 while (1) {
2612
2613 shctx_lock(shctx_ptr(cache));
Christopher Fauleteba18d52021-11-23 16:03:05 +01002614 node = eb32_lookup_ge(&cache->entries, next_key);
William Lallemand1f49a362017-11-21 20:01:26 +01002615 if (!node) {
2616 shctx_unlock(shctx_ptr(cache));
Willy Tarreauafe1de52018-04-04 11:56:43 +02002617 appctx->ctx.cli.i0 = 0;
William Lallemand1f49a362017-11-21 20:01:26 +01002618 break;
2619 }
2620
2621 entry = container_of(node, struct cache_entry, eb);
William Lallemand1f49a362017-11-21 20:01:26 +01002622 next_key = node->key + 1;
Willy Tarreaudcce27f2022-04-13 11:21:39 +02002623
Willy Tarreau5e842192023-02-07 15:22:41 +01002624 if (entry->expire > date.tv_sec) {
Willy Tarreaudcce27f2022-04-13 11:21:39 +02002625 chunk_printf(&trash, "%p hash:%u vary:0x", entry, read_u32(entry->hash));
2626 for (i = 0; i < HTTP_CACHE_SEC_KEY_LEN; ++i)
2627 chunk_appendf(&trash, "%02x", (unsigned char)entry->secondary_key[i]);
2628 chunk_appendf(&trash, " size:%u (%u blocks), refcount:%u, expire:%d\n",
2629 block_ptr(entry)->len, block_ptr(entry)->block_count,
Willy Tarreau5e842192023-02-07 15:22:41 +01002630 block_ptr(entry)->refcount, entry->expire - (int)date.tv_sec);
Willy Tarreaudcce27f2022-04-13 11:21:39 +02002631 } else {
2632 /* time to remove that one */
2633 delete_entry(entry);
2634 entry->eb.key = 0;
2635 }
2636
William Lallemand1f49a362017-11-21 20:01:26 +01002637 appctx->ctx.cli.i0 = next_key;
2638
2639 shctx_unlock(shctx_ptr(cache));
2640
2641 if (ci_putchk(si_ic(si), &trash) == -1) {
Willy Tarreaudb398432018-11-15 11:08:52 +01002642 si_rx_room_blk(si);
William Lallemand1f49a362017-11-21 20:01:26 +01002643 return 0;
2644 }
2645 }
2646
2647 }
2648
2649 return 1;
2650
2651}
2652
Remi Tricot-Le Bretonbf971212020-10-27 11:55:57 +01002653
2654/*
2655 * boolean, returns true if response was built out of a cache entry.
2656 */
2657static int
2658smp_fetch_res_cache_hit(const struct arg *args, struct sample *smp,
2659 const char *kw, void *private)
2660{
2661 smp->data.type = SMP_T_BOOL;
2662 smp->data.u.sint = (smp->strm ? (smp->strm->target == &http_cache_applet.obj_type) : 0);
2663
2664 return 1;
2665}
2666
2667/*
2668 * string, returns cache name (if response came from a cache).
2669 */
2670static int
2671smp_fetch_res_cache_name(const struct arg *args, struct sample *smp,
2672 const char *kw, void *private)
2673{
2674 struct appctx *appctx = NULL;
2675
2676 struct cache_flt_conf *cconf = NULL;
2677 struct cache *cache = NULL;
2678
2679 if (!smp->strm || smp->strm->target != &http_cache_applet.obj_type)
2680 return 0;
2681
2682 /* Get appctx from the stream_interface. */
2683 appctx = si_appctx(&smp->strm->si[1]);
2684 if (appctx && appctx->rule) {
2685 cconf = appctx->rule->arg.act.p[0];
2686 if (cconf) {
2687 cache = cconf->c.cache;
2688
2689 smp->data.type = SMP_T_STR;
2690 smp->flags = SMP_F_CONST;
2691 smp->data.u.str.area = cache->id;
2692 smp->data.u.str.data = strlen(cache->id);
2693 return 1;
2694 }
2695 }
2696
2697 return 0;
2698}
2699
Christopher Faulet99a17a22018-12-11 09:18:27 +01002700/* Declare the filter parser for "cache" keyword */
2701static struct flt_kw_list filter_kws = { "CACHE", { }, {
2702 { "cache", parse_cache_flt, NULL },
2703 { NULL, NULL, NULL },
2704 }
2705};
2706
2707INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
2708
William Lallemand1f49a362017-11-21 20:01:26 +01002709static struct cli_kw_list cli_kws = {{},{
Willy Tarreaub205bfd2021-05-07 11:38:37 +02002710 { { "show", "cache", NULL }, "show cache : show cache status", cli_parse_show_cache, cli_io_handler_show_cache, NULL, NULL },
William Lallemande899af82017-11-22 16:41:26 +01002711 {{},}
William Lallemand1f49a362017-11-21 20:01:26 +01002712}};
2713
Willy Tarreau0108d902018-11-25 19:14:37 +01002714INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
William Lallemand1f49a362017-11-21 20:01:26 +01002715
William Lallemand41db4602017-10-30 11:15:51 +01002716static struct action_kw_list http_res_actions = {
2717 .kw = {
2718 { "cache-store", parse_cache_store },
2719 { NULL, NULL }
2720 }
2721};
2722
Willy Tarreau0108d902018-11-25 19:14:37 +01002723INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
2724
William Lallemand41db4602017-10-30 11:15:51 +01002725static struct action_kw_list http_req_actions = {
2726 .kw = {
2727 { "cache-use", parse_cache_use },
2728 { NULL, NULL }
2729 }
2730};
2731
Willy Tarreau0108d902018-11-25 19:14:37 +01002732INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
2733
Willy Tarreau2231b632019-03-29 18:26:52 +01002734struct applet http_cache_applet = {
William Lallemand41db4602017-10-30 11:15:51 +01002735 .obj_type = OBJ_TYPE_APPLET,
2736 .name = "<CACHE>", /* used for logging */
William Lallemand77c11972017-10-31 20:43:01 +01002737 .fct = http_cache_io_handler,
William Lallemandecb73b12017-11-24 14:33:55 +01002738 .release = http_cache_applet_release,
William Lallemand41db4602017-10-30 11:15:51 +01002739};
2740
Willy Tarreaue6552512018-11-26 11:33:13 +01002741/* config parsers for this section */
2742REGISTER_CONFIG_SECTION("cache", cfg_parse_cache, cfg_post_parse_section_cache);
William Lallemandd1d1e222019-08-28 15:22:49 +02002743REGISTER_POST_CHECK(post_check_cache);
Remi Tricot-Le Bretonbf971212020-10-27 11:55:57 +01002744
2745
2746/* Note: must not be declared <const> as its list will be overwritten */
2747static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
2748 { "res.cache_hit", smp_fetch_res_cache_hit, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
2749 { "res.cache_name", smp_fetch_res_cache_name, 0, NULL, SMP_T_STR, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
2750 { /* END */ },
2751 }
2752};
2753
2754INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);