blob: 94ab7bc46beec4768d56558d731e907f454765af [file] [log] [blame]
William Lallemand41db4602017-10-30 11:15:51 +01001/*
2 * Cache management
3 *
4 * Copyright 2017 HAProxy Technologies
5 * William Lallemand <wlallemand@haproxy.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
Willy Tarreaub2551052020-06-09 09:07:15 +020013#include <import/eb32tree.h>
14#include <import/sha1.h>
15
Willy Tarreau122eba92020-06-04 10:15:32 +020016#include <haproxy/action-t.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020018#include <haproxy/cfgparse.h>
Willy Tarreauf1d32c42020-06-04 21:07:02 +020019#include <haproxy/channel.h>
Willy Tarreau83487a82020-06-04 20:19:54 +020020#include <haproxy/cli.h>
Willy Tarreau36979d92020-06-05 17:27:29 +020021#include <haproxy/errors.h>
Willy Tarreauc7babd82020-06-04 21:29:29 +020022#include <haproxy/filters.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020023#include <haproxy/hash.h>
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +020024#include <haproxy/http.h>
Willy Tarreauc2b1ff02020-06-04 21:21:03 +020025#include <haproxy/http_ana.h>
Willy Tarreau87735332020-06-04 09:08:41 +020026#include <haproxy/http_htx.h>
Willy Tarreauc761f842020-06-04 11:40:28 +020027#include <haproxy/http_rules.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020028#include <haproxy/htx.h>
29#include <haproxy/net_helper.h>
Willy Tarreaua264d962020-06-04 22:29:18 +020030#include <haproxy/proxy.h>
Remi Tricot-Le Bretonbf971212020-10-27 11:55:57 +010031#include <haproxy/sample.h>
Willy Tarreau334099c2020-06-03 18:38:48 +020032#include <haproxy/shctx.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020033#include <haproxy/stream.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020034#include <haproxy/stream_interface.h>
William Lallemand41db4602017-10-30 11:15:51 +010035
Christopher Faulet27d93c32018-12-15 22:32:02 +010036#define CACHE_FLT_F_IMPLICIT_DECL 0x00000001 /* The cache filtre was implicitly declared (ie without
Christopher Faulet99a17a22018-12-11 09:18:27 +010037 * the filter keyword) */
Tim Duesterhusd7c6e6a2020-09-14 18:01:33 +020038#define CACHE_FLT_INIT 0x00000002 /* Whether the cache name was freed. */
Christopher Fauletafd819c2018-12-11 08:57:45 +010039
Christopher Fauletf4a4ef72018-12-07 17:39:53 +010040const char *cache_store_flt_id = "cache store filter";
William Lallemand41db4602017-10-30 11:15:51 +010041
Willy Tarreau2231b632019-03-29 18:26:52 +010042extern struct applet http_cache_applet;
William Lallemand41db4602017-10-30 11:15:51 +010043
44struct flt_ops cache_ops;
45
46struct cache {
Willy Tarreaufd5efb52017-11-26 08:54:31 +010047 struct list list; /* cache linked list */
William Lallemand41db4602017-10-30 11:15:51 +010048 struct eb_root entries; /* head of cache entries based on keys */
Willy Tarreaufd5efb52017-11-26 08:54:31 +010049 unsigned int maxage; /* max-age */
50 unsigned int maxblocks;
Frédéric Lécaille4eba5442018-10-25 20:29:31 +020051 unsigned int maxobjsz; /* max-object-size (in bytes) */
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +010052 unsigned int max_secondary_entries; /* maximum number of secondary entries with the same primary hash */
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +010053 uint8_t vary_processing_enabled; /* boolean : manage Vary header (disabled by default) */
Willy Tarreaufd5efb52017-11-26 08:54:31 +010054 char id[33]; /* cache name */
William Lallemand41db4602017-10-30 11:15:51 +010055};
56
Christopher Faulet95220e22018-12-07 17:34:39 +010057/* cache config for filters */
58struct cache_flt_conf {
59 union {
60 struct cache *cache; /* cache used by the filter */
61 char *name; /* cache name used during conf parsing */
62 } c;
63 unsigned int flags; /* CACHE_FLT_F_* */
64};
65
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010066
67/*
68 * Vary-related structures and functions
69 */
70enum vary_header_bit {
71 VARY_ACCEPT_ENCODING = (1 << 0),
72 VARY_REFERER = (1 << 1),
73 VARY_LAST /* should always be last */
74};
75
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010076struct vary_hashing_information {
77 struct ist hdr_name; /* Header name */
Ilya Shipitsinf38a0182020-12-21 01:16:17 +050078 enum vary_header_bit value; /* Bit representing the header in a vary signature */
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010079 unsigned int hash_length; /* Size of the sub hash for this header's value */
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +010080 int(*norm_fn)(struct htx*,struct ist hdr_name,char* buf,unsigned int* buf_len); /* Normalization function */
81 int(*cmp_fn)(const char *ref_hash, const char *new_hash, unsigned int hash_len); /* Comparison function, should return 0 if the hashes are alike */
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010082};
83
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +010084static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
85 char *buf, unsigned int *buf_len);
86static int default_normalizer(struct htx *htx, struct ist hdr_name,
87 char *buf, unsigned int *buf_len);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010088
89/* Warning : do not forget to update HTTP_CACHE_SEC_KEY_LEN when new items are
90 * added to this array. */
91const struct vary_hashing_information vary_information[] = {
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +010092 { IST("accept-encoding"), VARY_ACCEPT_ENCODING, sizeof(int), &accept_encoding_normalizer, NULL },
93 { IST("referer"), VARY_REFERER, sizeof(int), &default_normalizer, NULL },
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +010094};
95
96static int http_request_prebuild_full_secondary_key(struct stream *s);
97static int http_request_build_secondary_key(struct stream *s, int vary_signature);
98static int http_request_reduce_secondary_key(unsigned int vary_signature,
99 char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN]);
100
101
William Lallemand41db4602017-10-30 11:15:51 +0100102/*
103 * cache ctx for filters
104 */
105struct cache_st {
William Lallemand41db4602017-10-30 11:15:51 +0100106 struct shared_block *first_block;
107};
108
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +0100109#define DEFAULT_MAX_SECONDARY_ENTRY 10
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100110
William Lallemand41db4602017-10-30 11:15:51 +0100111struct cache_entry {
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +0100112 unsigned int complete; /* An entry won't be valid until complete is not null. */
William Lallemand41db4602017-10-30 11:15:51 +0100113 unsigned int latest_validation; /* latest validation date */
114 unsigned int expire; /* expiration date */
Frédéric Lécaillee7a770c2018-10-26 14:29:22 +0200115 unsigned int age; /* Origin server "Age" header value */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100116
William Lallemand41db4602017-10-30 11:15:51 +0100117 struct eb32_node eb; /* ebtree node used to hold the cache object */
William Lallemandf528fff2017-11-23 19:43:17 +0100118 char hash[20];
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +0200119
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100120 char secondary_key[HTTP_CACHE_SEC_KEY_LEN]; /* Optional secondary key. */
121 unsigned int secondary_key_signature; /* Bitfield of the HTTP headers that should be used
122 * to build secondary keys for this cache entry. */
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100123 unsigned int secondary_entries_count; /* Should only be filled in the last entry of a list of dup entries */
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100124 unsigned int last_clear_ts; /* Timestamp of the last call to clear_expired_duplicates. */
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100125
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +0200126 unsigned int etag_length; /* Length of the ETag value (if one was found in the response). */
127 unsigned int etag_offset; /* Offset of the ETag value in the data buffer. */
128
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +0200129 time_t last_modified; /* Origin server "Last-Modified" header value converted in
130 * seconds since epoch. If no "Last-Modified"
131 * header is found, use "Date" header value,
132 * otherwise use reception time. This field will
133 * be used in case of an "If-Modified-Since"-based
134 * conditional request. */
135
William Lallemand41db4602017-10-30 11:15:51 +0100136 unsigned char data[0];
137};
138
139#define CACHE_BLOCKSIZE 1024
Willy Tarreau96062a12018-11-11 14:00:28 +0100140#define CACHE_ENTRY_MAX_AGE 2147483648U
William Lallemand41db4602017-10-30 11:15:51 +0100141
142static struct list caches = LIST_HEAD_INIT(caches);
William Lallemandd1d1e222019-08-28 15:22:49 +0200143static struct list caches_config = LIST_HEAD_INIT(caches_config); /* cache config to init */
William Lallemand41db4602017-10-30 11:15:51 +0100144static struct cache *tmp_cache_config = NULL;
145
Willy Tarreau8ceae722018-11-26 11:58:30 +0100146DECLARE_STATIC_POOL(pool_head_cache_st, "cache_st", sizeof(struct cache_st));
147
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100148static struct eb32_node *insert_entry(struct cache *cache, struct cache_entry *new_entry);
149static void delete_entry(struct cache_entry *del_entry);
150
William Lallemandf528fff2017-11-23 19:43:17 +0100151struct cache_entry *entry_exist(struct cache *cache, char *hash)
William Lallemand4da3f8a2017-10-31 14:33:34 +0100152{
153 struct eb32_node *node;
154 struct cache_entry *entry;
155
Willy Tarreau8b507582020-02-25 09:35:07 +0100156 node = eb32_lookup(&cache->entries, read_u32(hash));
William Lallemand4da3f8a2017-10-31 14:33:34 +0100157 if (!node)
158 return NULL;
159
160 entry = eb32_entry(node, struct cache_entry, eb);
William Lallemandf528fff2017-11-23 19:43:17 +0100161
162 /* if that's not the right node */
163 if (memcmp(entry->hash, hash, sizeof(entry->hash)))
164 return NULL;
165
William Lallemand08727662017-11-21 20:01:27 +0100166 if (entry->expire > now.tv_sec) {
William Lallemand4da3f8a2017-10-31 14:33:34 +0100167 return entry;
William Lallemand08727662017-11-21 20:01:27 +0100168 } else {
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100169 delete_entry(entry);
William Lallemand08727662017-11-21 20:01:27 +0100170 entry->eb.key = 0;
171 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100172 return NULL;
173
174}
175
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100176
177/*
178 * Compare a newly built secondary key to the one found in a cache_entry.
179 * Every sub-part of the key is compared to the reference through the dedicated
180 * comparison function of the sub-part (that might do more than a simple
181 * memcmp).
182 * Returns 0 if the keys are alike.
183 */
184static int secondary_key_cmp(const char *ref_key, const char *new_key)
185{
186 int retval = 0;
187 int idx = 0;
188 int offset = 0;
189 const struct vary_hashing_information *info;
190
191 for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && !retval; ++idx) {
192 info = &vary_information[idx];
193
194 if (info->cmp_fn)
195 retval = info->cmp_fn(&ref_key[offset], &new_key[offset], info->hash_length);
196 else
197 retval = memcmp(&ref_key[offset], &new_key[offset], info->hash_length);
198
199 offset += info->hash_length;
200 }
201
202 return retval;
203}
204
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100205/*
206 * There can be multiple entries with the same primary key in the ebtree so in
207 * order to get the proper one out of the list, we use a secondary_key.
208 * This function simply iterates over all the entries with the same primary_key
209 * until it finds the right one.
210 * Returns the cache_entry in case of success, NULL otherwise.
211 */
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100212struct cache_entry *secondary_entry_exist(struct cache *cache, struct cache_entry *entry,
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100213 const char *secondary_key)
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100214{
215 struct eb32_node *node = &entry->eb;
216
217 if (!entry->secondary_key_signature)
218 return NULL;
219
Remi Tricot-Le Breton6a34b2b2020-12-23 18:13:47 +0100220 while (entry && secondary_key_cmp(entry->secondary_key, secondary_key) != 0) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100221 node = eb32_next_dup(node);
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100222
223 /* Make the best use of this iteration and clear expired entries
224 * when we find them. Calling delete_entry would be too costly
225 * so we simply call eb32_delete. The secondary_entry count will
226 * be updated when we try to insert a new entry to this list. */
227 if (entry->expire <= now.tv_sec) {
228 eb32_delete(&entry->eb);
229 entry->eb.key = 0;
230 }
231
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100232 entry = node ? eb32_entry(node, struct cache_entry, eb) : NULL;
233 }
234
235 /* Expired entry */
236 if (entry && entry->expire <= now.tv_sec) {
237 eb32_delete(&entry->eb);
238 entry->eb.key = 0;
239 entry = NULL;
240 }
241
242 return entry;
243}
244
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100245
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100246/*
247 * Remove all expired entries from a list of duplicates.
248 * Return the number of alive entries in the list and sets dup_tail to the
249 * current last item of the list.
250 */
251static unsigned int clear_expired_duplicates(struct eb32_node **dup_tail)
252{
253 unsigned int entry_count = 0;
254 struct cache_entry *entry = NULL;
255 struct eb32_node *prev = *dup_tail;
256 struct eb32_node *tail = NULL;
257
258 while (prev) {
259 entry = container_of(prev, struct cache_entry, eb);
260 prev = eb32_prev_dup(prev);
261 if (entry->expire <= now.tv_sec) {
262 eb32_delete(&entry->eb);
263 entry->eb.key = 0;
264 }
265 else {
266 if (!tail)
267 tail = &entry->eb;
268 ++entry_count;
269 }
270 }
271
272 *dup_tail = tail;
273
274 return entry_count;
275}
276
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100277
278/*
279 * This function inserts a cache_entry in the cache's ebtree. In case of
280 * duplicate entries (vary), it then checks that the number of entries did not
281 * reach the max number of secondary entries. If this entry should not have been
282 * created, remove it.
283 * In the regular case (unique entries), this function does not do more than a
284 * simple insert. In case of secondary entries, it will at most cost an
285 * insertion+max_sec_entries time checks and entry deletion.
286 * Returns the newly inserted node in case of success, NULL otherwise.
287 */
288static struct eb32_node *insert_entry(struct cache *cache, struct cache_entry *new_entry)
289{
290 struct eb32_node *prev = NULL;
291 struct cache_entry *entry = NULL;
292 unsigned int entry_count = 0;
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100293 unsigned int last_clear_ts = now.tv_sec;
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100294
295 struct eb32_node *node = eb32_insert(&cache->entries, &new_entry->eb);
296
297 /* We should not have multiple entries with the same primary key unless
298 * the entry has a non null vary signature. */
299 if (!new_entry->secondary_key_signature)
300 return node;
301
302 prev = eb32_prev_dup(node);
303 if (prev != NULL) {
304 /* The last entry of a duplicate list should contain the current
305 * number of entries in the list. */
306 entry = container_of(prev, struct cache_entry, eb);
307 entry_count = entry->secondary_entries_count;
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100308 last_clear_ts = entry->last_clear_ts;
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100309
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +0100310 if (entry_count >= cache->max_secondary_entries) {
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100311 /* Some entries of the duplicate list might be expired so
312 * we will iterate over all the items in order to free some
313 * space. In order to avoid going over the same list too
314 * often, we first check the timestamp of the last check
315 * performed. */
316 if (last_clear_ts == now.tv_sec) {
317 /* Too many entries for this primary key, clear the
318 * one that was inserted. */
319 eb32_delete(node);
320 node->key = 0;
321 return NULL;
322 }
323
324 entry_count = clear_expired_duplicates(&prev);
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +0100325 if (entry_count >= cache->max_secondary_entries) {
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100326 /* Still too many entries for this primary key, delete
327 * the newly inserted one. */
328 entry = container_of(prev, struct cache_entry, eb);
329 entry->last_clear_ts = now.tv_sec;
330 eb32_delete(node);
331 node->key = 0;
332 return NULL;
333 }
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100334 }
335 }
336
337 new_entry->secondary_entries_count = entry_count + 1;
Remi Tricot-Le Breton73be7962020-12-10 17:58:42 +0100338 new_entry->last_clear_ts = last_clear_ts;
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100339
340 return node;
341}
342
343
344/*
345 * This function removes an entry from the ebtree. If the entry was a duplicate
346 * (in case of Vary), it updates the secondary entry counter in another
347 * duplicate entry (the last entry of the dup list).
348 */
349static void delete_entry(struct cache_entry *del_entry)
350{
351 struct eb32_node *prev = NULL, *next = NULL;
352 struct cache_entry *entry = NULL;
353 struct eb32_node *last = NULL;
354
355 if (del_entry->secondary_key_signature) {
356 next = &del_entry->eb;
357
358 /* Look for last entry of the duplicates list. */
359 while ((next = eb32_next_dup(next))) {
360 last = next;
361 }
362
363 if (last) {
364 entry = container_of(last, struct cache_entry, eb);
365 --entry->secondary_entries_count;
366 }
367 else {
368 /* The current entry is the last one, look for the
369 * previous one to update its counter. */
370 prev = eb32_prev_dup(&del_entry->eb);
371 if (prev) {
372 entry = container_of(prev, struct cache_entry, eb);
373 entry->secondary_entries_count = del_entry->secondary_entries_count - 1;
374 }
375 }
376 }
377 eb32_delete(&del_entry->eb);
378 del_entry->eb.key = 0;
379}
380
381
William Lallemand4da3f8a2017-10-31 14:33:34 +0100382static inline struct shared_context *shctx_ptr(struct cache *cache)
383{
384 return (struct shared_context *)((unsigned char *)cache - ((struct shared_context *)NULL)->data);
385}
386
William Lallemand77c11972017-10-31 20:43:01 +0100387static inline struct shared_block *block_ptr(struct cache_entry *entry)
388{
389 return (struct shared_block *)((unsigned char *)entry - ((struct shared_block *)NULL)->data);
390}
391
392
393
William Lallemand41db4602017-10-30 11:15:51 +0100394static int
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100395cache_store_init(struct proxy *px, struct flt_conf *fconf)
William Lallemand41db4602017-10-30 11:15:51 +0100396{
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100397 fconf->flags |= FLT_CFG_FL_HTX;
William Lallemand41db4602017-10-30 11:15:51 +0100398 return 0;
399}
400
Christopher Faulet95220e22018-12-07 17:34:39 +0100401static void
402cache_store_deinit(struct proxy *px, struct flt_conf *fconf)
403{
404 struct cache_flt_conf *cconf = fconf->conf;
405
Tim Duesterhusd7c6e6a2020-09-14 18:01:33 +0200406 if (!(cconf->flags & CACHE_FLT_INIT))
407 free(cconf->c.name);
Christopher Faulet95220e22018-12-07 17:34:39 +0100408 free(cconf);
409}
410
William Lallemand4da3f8a2017-10-31 14:33:34 +0100411static int
Christopher Faulet95220e22018-12-07 17:34:39 +0100412cache_store_check(struct proxy *px, struct flt_conf *fconf)
413{
414 struct cache_flt_conf *cconf = fconf->conf;
Christopher Fauletafd819c2018-12-11 08:57:45 +0100415 struct flt_conf *f;
Christopher Faulet95220e22018-12-07 17:34:39 +0100416 struct cache *cache;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100417 int comp = 0;
Christopher Faulet95220e22018-12-07 17:34:39 +0100418
William Lallemandd1d1e222019-08-28 15:22:49 +0200419 /* Find the cache corresponding to the name in the filter config. The
420 * cache will not be referenced now in the filter config because it is
421 * not fully allocated. This step will be performed during the cache
422 * post_check.
423 */
424 list_for_each_entry(cache, &caches_config, list) {
425 if (!strcmp(cache->id, cconf->c.name))
Christopher Faulet95220e22018-12-07 17:34:39 +0100426 goto found;
Christopher Faulet95220e22018-12-07 17:34:39 +0100427 }
428
429 ha_alert("config: %s '%s': unable to find the cache '%s' referenced by the filter 'cache'.\n",
430 proxy_type_str(px), px->id, (char *)cconf->c.name);
431 return 1;
432
433 found:
Christopher Fauletafd819c2018-12-11 08:57:45 +0100434 /* Here <cache> points on the cache the filter must use and <cconf>
435 * points on the cache filter configuration. */
436
437 /* Check all filters for proxy <px> to know if the compression is
Christopher Faulet27d93c32018-12-15 22:32:02 +0100438 * enabled and if it is after the cache. When the compression is before
439 * the cache, an error is returned. Also check if the cache filter must
440 * be explicitly declaired or not. */
Christopher Fauletafd819c2018-12-11 08:57:45 +0100441 list_for_each_entry(f, &px->filter_configs, list) {
442 if (f == fconf) {
Christopher Faulet27d93c32018-12-15 22:32:02 +0100443 /* The compression filter must be evaluated after the cache. */
444 if (comp) {
445 ha_alert("config: %s '%s': unable to enable the compression filter before "
446 "the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
447 return 1;
448 }
Christopher Faulet99a17a22018-12-11 09:18:27 +0100449 }
Christopher Faulet8f7fe1c2019-07-15 15:08:25 +0200450 else if (f->id == http_comp_flt_id)
Christopher Faulet27d93c32018-12-15 22:32:02 +0100451 comp = 1;
Christopher Faulet78fbb9f2019-08-11 23:11:03 +0200452 else if (f->id == fcgi_flt_id)
453 continue;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100454 else if ((f->id != fconf->id) && (cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
455 /* Implicit declaration is only allowed with the
Christopher Faulet78fbb9f2019-08-11 23:11:03 +0200456 * compression and fcgi. For other filters, an implicit
Christopher Faulet27d93c32018-12-15 22:32:02 +0100457 * declaration is required. */
458 ha_alert("config: %s '%s': require an explicit filter declaration "
459 "to use the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
460 return 1;
461 }
462
Christopher Fauletafd819c2018-12-11 08:57:45 +0100463 }
Christopher Faulet95220e22018-12-07 17:34:39 +0100464 return 0;
465}
466
467static int
Christopher Faulet65554e12020-03-06 14:52:06 +0100468cache_store_strm_init(struct stream *s, struct filter *filter)
William Lallemand4da3f8a2017-10-31 14:33:34 +0100469{
Christopher Faulet65554e12020-03-06 14:52:06 +0100470 struct cache_st *st;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100471
Christopher Faulet65554e12020-03-06 14:52:06 +0100472 st = pool_alloc_dirty(pool_head_cache_st);
473 if (st == NULL)
474 return -1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100475
Christopher Faulet65554e12020-03-06 14:52:06 +0100476 st->first_block = NULL;
477 filter->ctx = st;
Christopher Faulet839791a2019-01-07 16:12:07 +0100478
Christopher Faulet65554e12020-03-06 14:52:06 +0100479 /* Register post-analyzer on AN_RES_WAIT_HTTP */
480 filter->post_analyzers |= AN_RES_WAIT_HTTP;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100481 return 1;
482}
483
Christopher Faulet65554e12020-03-06 14:52:06 +0100484static void
485cache_store_strm_deinit(struct stream *s, struct filter *filter)
William Lallemand49dc0482017-11-24 14:33:54 +0100486{
487 struct cache_st *st = filter->ctx;
Christopher Faulet95220e22018-12-07 17:34:39 +0100488 struct cache_flt_conf *cconf = FLT_CONF(filter);
489 struct cache *cache = cconf->c.cache;
William Lallemand49dc0482017-11-24 14:33:54 +0100490 struct shared_context *shctx = shctx_ptr(cache);
491
William Lallemand49dc0482017-11-24 14:33:54 +0100492 /* Everything should be released in the http_end filter, but we need to do it
493 * there too, in case of errors */
William Lallemand49dc0482017-11-24 14:33:54 +0100494 if (st && st->first_block) {
William Lallemand49dc0482017-11-24 14:33:54 +0100495 shctx_lock(shctx);
496 shctx_row_dec_hot(shctx, st->first_block);
497 shctx_unlock(shctx);
William Lallemand49dc0482017-11-24 14:33:54 +0100498 }
499 if (st) {
Willy Tarreaubafbe012017-11-24 17:34:44 +0100500 pool_free(pool_head_cache_st, st);
William Lallemand49dc0482017-11-24 14:33:54 +0100501 filter->ctx = NULL;
502 }
William Lallemand49dc0482017-11-24 14:33:54 +0100503}
504
Christopher Faulet839791a2019-01-07 16:12:07 +0100505static int
506cache_store_post_analyze(struct stream *s, struct filter *filter, struct channel *chn,
507 unsigned an_bit)
508{
509 struct http_txn *txn = s->txn;
510 struct http_msg *msg = &txn->rsp;
511 struct cache_st *st = filter->ctx;
512
513 if (an_bit != AN_RES_WAIT_HTTP)
514 goto end;
515
516 /* Here we need to check if any compression filter precedes the cache
517 * filter. This is only possible when the compression is configured in
518 * the frontend while the cache filter is configured on the
519 * backend. This case cannot be detected during HAProxy startup. So in
520 * such cases, the cache is disabled.
521 */
522 if (st && (msg->flags & HTTP_MSGF_COMPRESSING)) {
523 pool_free(pool_head_cache_st, st);
524 filter->ctx = NULL;
525 }
526
527 end:
528 return 1;
529}
William Lallemand49dc0482017-11-24 14:33:54 +0100530
531static int
William Lallemand4da3f8a2017-10-31 14:33:34 +0100532cache_store_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
533{
534 struct cache_st *st = filter->ctx;
535
William Lallemand4da3f8a2017-10-31 14:33:34 +0100536 if (!(msg->chn->flags & CF_ISRESP) || !st)
537 return 1;
538
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200539 if (st->first_block)
Christopher Faulet67658c92018-12-06 21:59:39 +0100540 register_data_filter(s, msg->chn, filter);
William Lallemand4da3f8a2017-10-31 14:33:34 +0100541 return 1;
542}
543
Frédéric Lécaille8df65ae2018-10-22 18:01:48 +0200544static inline void disable_cache_entry(struct cache_st *st,
545 struct filter *filter, struct shared_context *shctx)
546{
547 struct cache_entry *object;
548
549 object = (struct cache_entry *)st->first_block->data;
550 filter->ctx = NULL; /* disable cache */
551 shctx_lock(shctx);
552 shctx_row_dec_hot(shctx, st->first_block);
Remi Tricot-Le Breton964caaf2020-12-15 14:30:12 +0100553 eb32_delete(&object->eb);
Frédéric Lécaille8df65ae2018-10-22 18:01:48 +0200554 object->eb.key = 0;
555 shctx_unlock(shctx);
556 pool_free(pool_head_cache_st, st);
557}
558
William Lallemand4da3f8a2017-10-31 14:33:34 +0100559static int
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100560cache_store_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
561 unsigned int offset, unsigned int len)
562{
Christopher Faulet95220e22018-12-07 17:34:39 +0100563 struct cache_flt_conf *cconf = FLT_CONF(filter);
564 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100565 struct cache_st *st = filter->ctx;
566 struct htx *htx = htxbuf(&msg->chn->buf);
567 struct htx_blk *blk;
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200568 struct shared_block *fb;
Christopher Faulet497c7592020-03-02 16:19:50 +0100569 struct htx_ret htxret;
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200570 unsigned int orig_len, to_forward;
571 int ret;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100572
573 if (!len)
574 return len;
575
576 if (!st->first_block) {
577 unregister_data_filter(s, msg->chn, filter);
578 return len;
579 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100580
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200581 chunk_reset(&trash);
582 orig_len = len;
583 to_forward = 0;
Christopher Faulet497c7592020-03-02 16:19:50 +0100584
585 htxret = htx_find_offset(htx, offset);
586 blk = htxret.blk;
587 offset = htxret.ret;
588 for (; blk && len; blk = htx_get_next_blk(htx, blk)) {
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100589 enum htx_blk_type type = htx_get_blk_type(blk);
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200590 uint32_t info, sz = htx_get_blksz(blk);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100591 struct ist v;
592
593 switch (type) {
594 case HTX_BLK_UNUSED:
595 break;
596
597 case HTX_BLK_DATA:
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100598 v = htx_get_blk_value(htx, blk);
599 v.ptr += offset;
600 v.len -= offset;
601 if (v.len > len)
602 v.len = len;
603
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200604 info = (type << 28) + v.len;
605 chunk_memcat(&trash, (char *)&info, sizeof(info));
606 chunk_memcat(&trash, v.ptr, v.len);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100607 to_forward += v.len;
608 len -= v.len;
609 break;
610
611 default:
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200612 /* Here offset must always be 0 because only
613 * DATA blocks can be partially transferred. */
614 if (offset)
615 goto no_cache;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100616 if (sz > len)
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200617 goto end;
618
619 chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
620 chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100621 to_forward += sz;
622 len -= sz;
623 break;
624 }
625
626 offset = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100627 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200628
629 end:
630 shctx_lock(shctx);
631 fb = shctx_row_reserve_hot(shctx, st->first_block, trash.data);
632 if (!fb) {
633 shctx_unlock(shctx);
634 goto no_cache;
635 }
636 shctx_unlock(shctx);
637
638 ret = shctx_row_data_append(shctx, st->first_block, st->first_block->last_append,
639 (unsigned char *)b_head(&trash), b_data(&trash));
640 if (ret < 0)
641 goto no_cache;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100642
643 return to_forward;
644
645 no_cache:
646 disable_cache_entry(st, filter, shctx);
647 unregister_data_filter(s, msg->chn, filter);
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200648 return orig_len;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100649}
650
651static int
William Lallemand4da3f8a2017-10-31 14:33:34 +0100652cache_store_http_end(struct stream *s, struct filter *filter,
653 struct http_msg *msg)
654{
655 struct cache_st *st = filter->ctx;
Christopher Faulet95220e22018-12-07 17:34:39 +0100656 struct cache_flt_conf *cconf = FLT_CONF(filter);
657 struct cache *cache = cconf->c.cache;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100658 struct shared_context *shctx = shctx_ptr(cache);
659 struct cache_entry *object;
660
661 if (!(msg->chn->flags & CF_ISRESP))
662 return 1;
663
664 if (st && st->first_block) {
665
666 object = (struct cache_entry *)st->first_block->data;
667
William Lallemand4da3f8a2017-10-31 14:33:34 +0100668 shctx_lock(shctx);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +0100669 /* The whole payload was cached, the entry can now be used. */
670 object->complete = 1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100671 /* remove from the hotlist */
William Lallemand4da3f8a2017-10-31 14:33:34 +0100672 shctx_row_dec_hot(shctx, st->first_block);
673 shctx_unlock(shctx);
674
675 }
676 if (st) {
Willy Tarreaubafbe012017-11-24 17:34:44 +0100677 pool_free(pool_head_cache_st, st);
William Lallemand4da3f8a2017-10-31 14:33:34 +0100678 filter->ctx = NULL;
679 }
680
681 return 1;
682}
683
684 /*
685 * This intends to be used when checking HTTP headers for some
686 * word=value directive. Return a pointer to the first character of value, if
687 * the word was not found or if there wasn't any value assigned ot it return NULL
688 */
689char *directive_value(const char *sample, int slen, const char *word, int wlen)
690{
691 int st = 0;
692
693 if (slen < wlen)
694 return 0;
695
696 while (wlen) {
697 char c = *sample ^ *word;
698 if (c && c != ('A' ^ 'a'))
699 return NULL;
700 sample++;
701 word++;
702 slen--;
703 wlen--;
704 }
705
706 while (slen) {
707 if (st == 0) {
708 if (*sample != '=')
709 return NULL;
710 sample++;
711 slen--;
712 st = 1;
713 continue;
714 } else {
715 return (char *)sample;
716 }
717 }
718
719 return NULL;
720}
721
722/*
723 * Return the maxage in seconds of an HTTP response.
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100724 * The returned value will always take the cache's configuration into account
725 * (cache->maxage) but the actual max age of the response will be set in the
726 * true_maxage parameter. It will be used to determine if a response is already
727 * stale or not.
William Lallemand4da3f8a2017-10-31 14:33:34 +0100728 * Compute the maxage using either:
729 * - the assigned max-age of the cache
730 * - the s-maxage directive
731 * - the max-age directive
732 * - (Expires - Data) headers
733 * - the default-max-age of the cache
734 *
735 */
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100736int http_calc_maxage(struct stream *s, struct cache *cache, int *true_maxage)
William Lallemand4da3f8a2017-10-31 14:33:34 +0100737{
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200738 struct htx *htx = htxbuf(&s->res.buf);
739 struct http_hdr_ctx ctx = { .blk = NULL };
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100740 long smaxage = -1;
741 long maxage = -1;
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100742 int expires = -1;
743 struct tm tm = {};
744 time_t expires_val = 0;
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100745 char *endptr = NULL;
746 int offset = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100747
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100748 /* The Cache-Control max-age and s-maxage directives should be followed by
749 * a positive numerical value (see RFC 7234#5.2.1.1). According to the
750 * specs, a sender "should not" generate a quoted-string value but we will
751 * still accept this format since it isn't strictly forbidden. */
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200752 while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
753 char *value;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100754
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200755 value = directive_value(ctx.value.ptr, ctx.value.len, "s-maxage", 8);
756 if (value) {
757 struct buffer *chk = get_trash_chunk();
William Lallemand4da3f8a2017-10-31 14:33:34 +0100758
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200759 chunk_strncat(chk, value, ctx.value.len - 8 + 1);
760 chunk_strncat(chk, "", 1);
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100761 offset = (*chk->area == '"') ? 1 : 0;
762 smaxage = strtol(chk->area + offset, &endptr, 10);
763 if (unlikely(smaxage < 0 || endptr == chk->area))
764 return -1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100765 }
766
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200767 value = directive_value(ctx.value.ptr, ctx.value.len, "max-age", 7);
768 if (value) {
769 struct buffer *chk = get_trash_chunk();
Christopher Faulet5f2c49f2019-07-15 20:49:46 +0200770
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200771 chunk_strncat(chk, value, ctx.value.len - 7 + 1);
772 chunk_strncat(chk, "", 1);
Remi Tricot-Le Bretonfcea3742020-12-03 18:19:30 +0100773 offset = (*chk->area == '"') ? 1 : 0;
774 maxage = strtol(chk->area + offset, &endptr, 10);
775 if (unlikely(maxage < 0 || endptr == chk->area))
776 return -1;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100777 }
778 }
779
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100780 /* Look for Expires header if no s-maxage or max-age Cache-Control data
781 * was found. */
782 if (maxage == -1 && smaxage == -1) {
783 ctx.blk = NULL;
784 if (http_find_header(htx, ist("expires"), &ctx, 1)) {
785 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
786 expires_val = my_timegm(&tm);
787 /* A request having an expiring date earlier
788 * than the current date should be considered as
789 * stale. */
790 expires = (expires_val >= now.tv_sec) ?
791 (expires_val - now.tv_sec) : 0;
792 }
793 else {
794 /* Following RFC 7234#5.3, an invalid date
795 * format must be treated as a date in the past
796 * so the cache entry must be seen as already
797 * expired. */
798 expires = 0;
799 }
800 }
801 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100802
803
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100804 if (smaxage > 0) {
805 if (true_maxage)
806 *true_maxage = smaxage;
William Lallemand49b44532017-11-24 18:53:43 +0100807 return MIN(smaxage, cache->maxage);
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100808 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100809
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100810 if (maxage > 0) {
811 if (true_maxage)
812 *true_maxage = maxage;
William Lallemand49b44532017-11-24 18:53:43 +0100813 return MIN(maxage, cache->maxage);
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100814 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100815
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100816 if (expires >= 0) {
817 if (true_maxage)
818 *true_maxage = expires;
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100819 return MIN(expires, cache->maxage);
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100820 }
Remi Tricot-Le Bretona6476112020-10-28 17:52:53 +0100821
William Lallemand49b44532017-11-24 18:53:43 +0100822 return cache->maxage;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100823
824}
825
826
William Lallemanda400a3a2017-11-20 19:13:12 +0100827static void cache_free_blocks(struct shared_block *first, struct shared_block *block)
828{
Willy Tarreau5bd37fa2018-04-04 20:17:03 +0200829 struct cache_entry *object = (struct cache_entry *)block->data;
830
831 if (first == block && object->eb.key)
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +0100832 delete_entry(object);
Willy Tarreau5bd37fa2018-04-04 20:17:03 +0200833 object->eb.key = 0;
William Lallemanda400a3a2017-11-20 19:13:12 +0100834}
835
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +0200836
837/* As per RFC 7234#4.3.2, in case of "If-Modified-Since" conditional request, the
838 * date value should be compared to a date determined by in a previous response (for
839 * the same entity). This date could either be the "Last-Modified" value, or the "Date"
840 * value of the response's reception time (by decreasing order of priority). */
841static time_t get_last_modified_time(struct htx *htx)
842{
843 time_t last_modified = 0;
844 struct http_hdr_ctx ctx = { .blk = NULL };
845 struct tm tm = {};
846
847 if (http_find_header(htx, ist("last-modified"), &ctx, 1)) {
848 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
849 last_modified = my_timegm(&tm);
850 }
851 }
852
853 if (!last_modified) {
854 ctx.blk = NULL;
855 if (http_find_header(htx, ist("date"), &ctx, 1)) {
856 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
857 last_modified = my_timegm(&tm);
858 }
859 }
860 }
861
862 /* Fallback on the current time if no "Last-Modified" or "Date" header
863 * was found. */
864 if (!last_modified)
865 last_modified = now.tv_sec;
866
867 return last_modified;
868}
869
William Lallemand41db4602017-10-30 11:15:51 +0100870/*
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100871 * Checks the vary header's value. The headers on which vary should be applied
Ilya Shipitsinf38a0182020-12-21 01:16:17 +0500872 * must be explicitly supported in the vary_information array (see cache.c). If
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +0100873 * any other header is mentioned, we won't store the response.
874 * Returns 1 if Vary-based storage can work, 0 otherwise.
875 */
876static int http_check_vary_header(struct htx *htx, unsigned int *vary_signature)
877{
878 unsigned int vary_idx;
879 unsigned int vary_info_count;
880 const struct vary_hashing_information *vary_info;
881 struct http_hdr_ctx ctx = { .blk = NULL };
882
883 int retval = 1;
884
885 *vary_signature = 0;
886
887 vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
888 while (retval && http_find_header(htx, ist("Vary"), &ctx, 0)) {
889 for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
890 vary_info = &vary_information[vary_idx];
891 if (isteqi(ctx.value, vary_info->hdr_name)) {
892 *vary_signature |= vary_info->value;
893 break;
894 }
895 }
896 retval = (vary_idx < vary_info_count);
897 }
898
899 return retval;
900}
901
902
903
904/*
Ilya Shipitsin6fb0f212020-04-02 15:25:26 +0500905 * This function will store the headers of the response in a buffer and then
William Lallemand41db4602017-10-30 11:15:51 +0100906 * register a filter to store the data
907 */
908enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
Christopher Faulet8f3c2562019-06-03 22:19:18 +0200909 struct session *sess, struct stream *s, int flags)
William Lallemand41db4602017-10-30 11:15:51 +0100910{
Frédéric Lécaillee7a770c2018-10-26 14:29:22 +0200911 long long hdr_age;
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +0100912 int effective_maxage = 0;
913 int true_maxage = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100914 struct http_txn *txn = s->txn;
915 struct http_msg *msg = &txn->rsp;
916 struct filter *filter;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100917 struct shared_block *first = NULL;
Christopher Faulet95220e22018-12-07 17:34:39 +0100918 struct cache_flt_conf *cconf = rule->arg.act.p[0];
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +0100919 struct cache *cache = cconf->c.cache;
920 struct shared_context *shctx = shctx_ptr(cache);
Christopher Faulet839791a2019-01-07 16:12:07 +0100921 struct cache_st *cache_ctx = NULL;
922 struct cache_entry *object, *old;
Willy Tarreau8b507582020-02-25 09:35:07 +0100923 unsigned int key = read_u32(txn->cache_hash);
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200924 struct htx *htx;
925 struct http_hdr_ctx ctx;
Christopher Fauletb0667472019-09-03 22:22:12 +0200926 size_t hdrs_len = 0;
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200927 int32_t pos;
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +0200928 struct ist header_name = IST_NULL;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100929 unsigned int vary_signature = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +0100930
William Lallemand4da3f8a2017-10-31 14:33:34 +0100931 /* Don't cache if the response came from a cache */
932 if ((obj_type(s->target) == OBJ_TYPE_APPLET) &&
933 s->target == &http_cache_applet.obj_type) {
934 goto out;
935 }
936
937 /* cache only HTTP/1.1 */
938 if (!(txn->req.flags & HTTP_MSGF_VER_11))
939 goto out;
940
Willy Tarreau6905d182019-10-01 17:59:17 +0200941 /* cache only GET method */
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +0100942 if (txn->meth != HTTP_METH_GET) {
943 /* In case of successful unsafe method on a stored resource, the
944 * cached entry must be invalidated (see RFC7234#4.4).
945 * A "non-error response" is one with a 2xx (Successful) or 3xx
946 * (Redirection) status code. */
947 if (txn->status >= 200 && txn->status < 400) {
948 switch (txn->meth) {
949 case HTTP_METH_OPTIONS:
950 case HTTP_METH_GET:
951 case HTTP_METH_HEAD:
952 case HTTP_METH_TRACE:
953 break;
954
955 default: /* Any unsafe method */
Ilya Shipitsinf38a0182020-12-21 01:16:17 +0500956 /* Discard any corresponding entry in case of successful
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +0100957 * unsafe request (such as PUT, POST or DELETE). */
958 shctx_lock(shctx);
959
960 old = entry_exist(cconf->c.cache, txn->cache_hash);
961 if (old) {
962 eb32_delete(&old->eb);
963 old->eb.key = 0;
964 }
965 shctx_unlock(shctx);
966 }
967 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100968 goto out;
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +0100969 }
William Lallemand4da3f8a2017-10-31 14:33:34 +0100970
Willy Tarreauc9036c02019-01-11 19:38:25 +0100971 /* cache key was not computed */
972 if (!key)
973 goto out;
974
William Lallemand4da3f8a2017-10-31 14:33:34 +0100975 /* cache only 200 status code */
976 if (txn->status != 200)
977 goto out;
978
Christopher Faulet839791a2019-01-07 16:12:07 +0100979 /* Find the corresponding filter instance for the current stream */
980 list_for_each_entry(filter, &s->strm_flt.filters, list) {
981 if (FLT_ID(filter) == cache_store_flt_id && FLT_CONF(filter) == cconf) {
982 /* No filter ctx, don't cache anything */
983 if (!filter->ctx)
984 goto out;
985 cache_ctx = filter->ctx;
986 break;
987 }
988 }
989
990 /* from there, cache_ctx is always defined */
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200991 htx = htxbuf(&s->res.buf);
William Lallemand4da3f8a2017-10-31 14:33:34 +0100992
Christopher Faulet95e7ea32019-07-15 21:01:29 +0200993 /* Do not cache too big objects. */
994 if ((msg->flags & HTTP_MSGF_CNT_LEN) && shctx->max_obj_size > 0 &&
995 htx->data + htx->extra > shctx->max_obj_size)
996 goto out;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +0100997
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +0100998 /* Only a subset of headers are supported in our Vary implementation. If
999 * any other header is present in the Vary header value, we won't be
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001000 * able to use the cache. Likewise, if Vary header support is disabled,
1001 * avoid caching responses that contain such a header. */
1002 ctx.blk = NULL;
1003 if (cache->vary_processing_enabled) {
1004 if (!http_check_vary_header(htx, &vary_signature))
1005 goto out;
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01001006 if (vary_signature) {
1007 /* If something went wrong during the secondary key
1008 * building, do not store the response. */
1009 if (!(txn->flags & TX_CACHE_HAS_SEC_KEY))
1010 goto out;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001011 http_request_reduce_secondary_key(vary_signature, txn->cache_secondary_hash);
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01001012 }
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001013 }
1014 else if (http_find_header(htx, ist("Vary"), &ctx, 0)) {
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001015 goto out;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001016 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001017
Christopher Fauletfc9cfe42019-07-16 14:54:53 +02001018 http_check_response_for_cacheability(s, &s->res);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001019
Remi Tricot-Le Bretoncc9bf2e2020-11-12 11:14:41 +01001020 if (!(txn->flags & TX_CACHEABLE) || !(txn->flags & TX_CACHE_COOK) || (txn->flags & TX_CACHE_IGNORE))
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001021 goto out;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001022
1023 shctx_lock(shctx);
1024 old = entry_exist(cache, txn->cache_hash);
1025 if (old) {
1026 if (vary_signature)
1027 old = secondary_entry_exist(cconf->c.cache, old,
1028 txn->cache_secondary_hash);
1029 if (old) {
1030 if (!old->complete) {
1031 /* An entry with the same primary key is already being
1032 * created, we should not try to store the current
1033 * response because it will waste space in the cache. */
1034 shctx_unlock(shctx);
1035 goto out;
1036 }
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +01001037 delete_entry(old);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001038 old->eb.key = 0;
1039 }
1040 }
1041 first = shctx_row_reserve_hot(shctx, NULL, sizeof(struct cache_entry));
1042 if (!first) {
1043 shctx_unlock(shctx);
1044 goto out;
1045 }
1046 /* the received memory is not initialized, we need at least to mark
1047 * the object as not indexed yet.
1048 */
1049 object = (struct cache_entry *)first->data;
1050 memset(object, 0, sizeof(*object));
1051 object->eb.key = key;
1052 object->secondary_key_signature = vary_signature;
1053 /* We need to temporarily set a valid expiring time until the actual one
1054 * is set by the end of this function (in case of concurrent accesses to
1055 * the same resource). This way the second access will find an existing
1056 * but not yet usable entry in the tree and will avoid storing its data. */
1057 object->expire = now.tv_sec + 2;
1058
1059 memcpy(object->hash, txn->cache_hash, sizeof(object->hash));
1060 if (vary_signature)
1061 memcpy(object->secondary_key, txn->cache_secondary_hash, HTTP_CACHE_SEC_KEY_LEN);
1062
1063 /* Insert the entry in the tree even if the payload is not cached yet. */
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +01001064 if (insert_entry(cache, object) != &object->eb) {
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001065 object->eb.key = 0;
1066 shctx_unlock(shctx);
1067 goto out;
1068 }
1069 shctx_unlock(shctx);
1070
1071 /* reserve space for the cache_entry structure */
1072 first->len = sizeof(struct cache_entry);
1073 first->last_append = NULL;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001074
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001075 /* Determine the entry's maximum age (taking into account the cache's
1076 * configuration) as well as the response's explicit max age (extracted
1077 * from cache-control directives or the expires header). */
1078 effective_maxage = http_calc_maxage(s, cconf->c.cache, &true_maxage);
1079
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001080 ctx.blk = NULL;
1081 if (http_find_header(htx, ist("Age"), &ctx, 0)) {
1082 if (!strl2llrc(ctx.value.ptr, ctx.value.len, &hdr_age) && hdr_age > 0) {
1083 if (unlikely(hdr_age > CACHE_ENTRY_MAX_AGE))
1084 hdr_age = CACHE_ENTRY_MAX_AGE;
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001085 /* A response with an Age value greater than its
1086 * announced max age is stale and should not be stored. */
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001087 object->age = hdr_age;
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001088 if (unlikely(object->age > true_maxage))
1089 goto out;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001090 }
Remi Tricot-Le Breton51058d62020-12-03 18:19:32 +01001091 else
1092 goto out;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001093 http_remove_header(htx, &ctx);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001094 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001095
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +02001096 /* Build a last-modified time that will be stored in the cache_entry and
1097 * compared to a future If-Modified-Since client header. */
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001098 object->last_modified = get_last_modified_time(htx);
Remi Tricot Le Breton27091b42020-10-23 10:51:27 +02001099
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001100 chunk_reset(&trash);
1101 for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
1102 struct htx_blk *blk = htx_get_blk(htx, pos);
1103 enum htx_blk_type type = htx_get_blk_type(blk);
1104 uint32_t sz = htx_get_blksz(blk);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001105
Christopher Fauletb0667472019-09-03 22:22:12 +02001106 hdrs_len += sizeof(*blk) + sz;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001107 chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
1108 chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +02001109
1110 /* Look for optional ETag header.
1111 * We need to store the offset of the ETag value in order for
1112 * future conditional requests to be able to perform ETag
1113 * comparisons. */
1114 if (type == HTX_BLK_HDR) {
1115 header_name = htx_get_blk_name(htx, blk);
1116 if (isteq(header_name, ist("etag"))) {
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001117 object->etag_length = sz - istlen(header_name);
1118 object->etag_offset = sizeof(struct cache_entry) + b_data(&trash) - sz + istlen(header_name);
Remi Tricot-Le Bretondbb65b52020-10-22 10:40:04 +02001119 }
1120 }
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001121 if (type == HTX_BLK_EOH)
1122 break;
Frédéric Lécaillee7a770c2018-10-26 14:29:22 +02001123 }
1124
Christopher Fauletb0667472019-09-03 22:22:12 +02001125 /* Do not cache objects if the headers are too big. */
1126 if (hdrs_len > htx->size - global.tune.maxrewrite)
1127 goto out;
1128
William Lallemand4da3f8a2017-10-31 14:33:34 +01001129 shctx_lock(shctx);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001130 if (!shctx_row_reserve_hot(shctx, first, trash.data)) {
William Lallemand4da3f8a2017-10-31 14:33:34 +01001131 shctx_unlock(shctx);
1132 goto out;
1133 }
1134 shctx_unlock(shctx);
1135
William Lallemand4da3f8a2017-10-31 14:33:34 +01001136 /* cache the headers in a http action because it allows to chose what
1137 * to cache, for example you might want to cache a response before
1138 * modifying some HTTP headers, or on the contrary after modifying
1139 * those headers.
1140 */
William Lallemand4da3f8a2017-10-31 14:33:34 +01001141 /* does not need to be locked because it's in the "hot" list,
1142 * copy the headers */
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001143 if (shctx_row_data_append(shctx, first, NULL, (unsigned char *)trash.area, trash.data) < 0)
1144 goto out;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001145
1146 /* register the buffer in the filter ctx for filling it with data*/
Christopher Faulet839791a2019-01-07 16:12:07 +01001147 if (cache_ctx) {
1148 cache_ctx->first_block = first;
Christopher Faulet839791a2019-01-07 16:12:07 +01001149 /* store latest value and expiration time */
1150 object->latest_validation = now.tv_sec;
Remi Tricot-Le Breton795e1412020-12-03 18:19:29 +01001151 object->expire = now.tv_sec + effective_maxage;
Christopher Faulet839791a2019-01-07 16:12:07 +01001152 return ACT_RET_CONT;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001153 }
1154
1155out:
1156 /* if does not cache */
1157 if (first) {
1158 shctx_lock(shctx);
William Lallemand08727662017-11-21 20:01:27 +01001159 first->len = 0;
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001160 if (object->eb.key)
Remi Tricot-Le Breton65904e42020-12-10 17:58:41 +01001161 delete_entry(object);
William Lallemand08727662017-11-21 20:01:27 +01001162 object->eb.key = 0;
William Lallemand4da3f8a2017-10-31 14:33:34 +01001163 shctx_row_dec_hot(shctx, first);
1164 shctx_unlock(shctx);
1165 }
1166
William Lallemand41db4602017-10-30 11:15:51 +01001167 return ACT_RET_CONT;
1168}
1169
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001170#define HTX_CACHE_INIT 0 /* Initial state. */
1171#define HTX_CACHE_HEADER 1 /* Cache entry headers forwarding */
1172#define HTX_CACHE_DATA 2 /* Cache entry data forwarding */
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001173#define HTX_CACHE_EOM 3 /* Cache entry completely forwarded. Finish the HTX message */
1174#define HTX_CACHE_END 4 /* Cache entry treatment terminated */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001175
William Lallemandecb73b12017-11-24 14:33:55 +01001176static void http_cache_applet_release(struct appctx *appctx)
1177{
Christopher Faulet95220e22018-12-07 17:34:39 +01001178 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
William Lallemandecb73b12017-11-24 14:33:55 +01001179 struct cache_entry *cache_ptr = appctx->ctx.cache.entry;
Christopher Faulet95220e22018-12-07 17:34:39 +01001180 struct cache *cache = cconf->c.cache;
William Lallemandecb73b12017-11-24 14:33:55 +01001181 struct shared_block *first = block_ptr(cache_ptr);
1182
1183 shctx_lock(shctx_ptr(cache));
1184 shctx_row_dec_hot(shctx_ptr(cache), first);
1185 shctx_unlock(shctx_ptr(cache));
1186}
1187
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001188
1189static unsigned int htx_cache_dump_blk(struct appctx *appctx, struct htx *htx, enum htx_blk_type type,
1190 uint32_t info, struct shared_block *shblk, unsigned int offset)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001191{
Christopher Faulet95220e22018-12-07 17:34:39 +01001192 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
1193 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001194 struct htx_blk *blk;
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001195 char *ptr;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001196 unsigned int max, total;
1197 uint32_t blksz;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001198
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001199 max = htx_get_max_blksz(htx, channel_htx_recv_max(si_ic(appctx->owner), htx));
1200 if (!max)
1201 return 0;
Christopher Faulet2d7c5392019-06-03 10:41:26 +02001202 blksz = ((type == HTX_BLK_HDR || type == HTX_BLK_TLR)
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001203 ? (info & 0xff) + ((info >> 8) & 0xfffff)
1204 : info & 0xfffffff);
1205 if (blksz > max)
1206 return 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001207
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001208 blk = htx_add_blk(htx, type, blksz);
1209 if (!blk)
1210 return 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001211
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001212 blk->info = info;
1213 total = 4;
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001214 ptr = htx_get_blk_ptr(htx, blk);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001215 while (blksz) {
1216 max = MIN(blksz, shctx->block_size - offset);
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001217 memcpy(ptr, (const char *)shblk->data + offset, max);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001218 offset += max;
1219 blksz -= max;
1220 total += max;
Christopher Faulet15a4ce82019-09-03 22:11:52 +02001221 ptr += max;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001222 if (blksz || offset == shctx->block_size) {
1223 shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
1224 offset = 0;
1225 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001226 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001227 appctx->ctx.cache.offset = offset;
1228 appctx->ctx.cache.next = shblk;
1229 appctx->ctx.cache.sent += total;
1230 return total;
1231}
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001232
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001233static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *htx,
1234 uint32_t info, struct shared_block *shblk, unsigned int offset)
1235{
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001236
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001237 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
1238 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
1239 unsigned int max, total, rem_data;
1240 uint32_t blksz;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001241
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001242 max = htx_get_max_blksz(htx, channel_htx_recv_max(si_ic(appctx->owner), htx));
1243 if (!max)
1244 return 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001245
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001246 rem_data = 0;
Christopher Fauletbda83972019-06-11 09:58:09 +02001247 if (appctx->ctx.cache.rem_data) {
1248 blksz = appctx->ctx.cache.rem_data;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001249 total = 0;
Christopher Fauletbda83972019-06-11 09:58:09 +02001250 }
1251 else {
1252 blksz = (info & 0xfffffff);
1253 total = 4;
1254 }
1255 if (blksz > max) {
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001256 rem_data = blksz - max;
1257 blksz = max;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001258 }
1259
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001260 while (blksz) {
1261 size_t sz;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001262
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001263 max = MIN(blksz, shctx->block_size - offset);
1264 sz = htx_add_data(htx, ist2(shblk->data + offset, max));
1265 offset += sz;
1266 blksz -= sz;
1267 total += sz;
1268 if (sz < max)
1269 break;
1270 if (blksz || offset == shctx->block_size) {
1271 shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
1272 offset = 0;
1273 }
1274 }
1275
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001276 appctx->ctx.cache.offset = offset;
1277 appctx->ctx.cache.next = shblk;
1278 appctx->ctx.cache.sent += total;
1279 appctx->ctx.cache.rem_data = rem_data + blksz;
1280 return total;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001281}
1282
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001283static size_t htx_cache_dump_msg(struct appctx *appctx, struct htx *htx, unsigned int len,
1284 enum htx_blk_type mark)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001285{
Christopher Faulet95220e22018-12-07 17:34:39 +01001286 struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
1287 struct shared_context *shctx = shctx_ptr(cconf->c.cache);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001288 struct shared_block *shblk;
1289 unsigned int offset, sz;
1290 unsigned int ret, total = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001291
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001292 while (len) {
1293 enum htx_blk_type type;
1294 uint32_t info;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001295
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001296 shblk = appctx->ctx.cache.next;
1297 offset = appctx->ctx.cache.offset;
1298 if (appctx->ctx.cache.rem_data) {
1299 type = HTX_BLK_DATA;
1300 info = 0;
1301 goto add_data_blk;
1302 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001303
Ilya Shipitsin6fb0f212020-04-02 15:25:26 +05001304 /* Get info of the next HTX block. May be split on 2 shblk */
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001305 sz = MIN(4, shctx->block_size - offset);
1306 memcpy((char *)&info, (const char *)shblk->data + offset, sz);
1307 offset += sz;
1308 if (sz < 4) {
1309 shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
1310 memcpy(((char *)&info)+sz, (const char *)shblk->data, 4 - sz);
1311 offset = (4 - sz);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001312 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001313
1314 /* Get payload of the next HTX block and insert it. */
1315 type = (info >> 28);
1316 if (type != HTX_BLK_DATA)
1317 ret = htx_cache_dump_blk(appctx, htx, type, info, shblk, offset);
1318 else {
1319 add_data_blk:
1320 ret = htx_cache_dump_data_blk(appctx, htx, info, shblk, offset);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001321 }
1322
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001323 if (!ret)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001324 break;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001325 total += ret;
1326 len -= ret;
1327
1328 if (appctx->ctx.cache.rem_data || type == mark)
1329 break;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001330 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001331
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001332 return total;
1333}
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001334
1335static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx)
1336{
1337 struct cache_entry *cache_ptr = appctx->ctx.cache.entry;
1338 unsigned int age;
1339 char *end;
1340
1341 chunk_reset(&trash);
1342 age = MAX(0, (int)(now.tv_sec - cache_ptr->latest_validation)) + cache_ptr->age;
1343 if (unlikely(age > CACHE_ENTRY_MAX_AGE))
1344 age = CACHE_ENTRY_MAX_AGE;
1345 end = ultoa_o(age, b_head(&trash), b_size(&trash));
1346 b_set_data(&trash, end - b_head(&trash));
1347 if (!http_add_header(htx, ist("Age"), ist2(b_head(&trash), b_data(&trash))))
1348 return 0;
1349 return 1;
1350}
1351
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001352static void http_cache_io_handler(struct appctx *appctx)
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001353{
1354 struct cache_entry *cache_ptr = appctx->ctx.cache.entry;
1355 struct shared_block *first = block_ptr(cache_ptr);
1356 struct stream_interface *si = appctx->owner;
1357 struct channel *req = si_oc(si);
1358 struct channel *res = si_ic(si);
1359 struct htx *req_htx, *res_htx;
1360 struct buffer *errmsg;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001361 unsigned int len;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001362 size_t ret, total = 0;
1363
1364 res_htx = htxbuf(&res->buf);
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001365 total = res_htx->data;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001366
1367 if (unlikely(si->state == SI_ST_DIS || si->state == SI_ST_CLO))
1368 goto out;
1369
Ilya Shipitsin6fb0f212020-04-02 15:25:26 +05001370 /* Check if the input buffer is available. */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001371 if (!b_size(&res->buf)) {
1372 si_rx_room_blk(si);
1373 goto out;
1374 }
1375
Willy Tarreauefef3232018-12-16 00:37:45 +01001376 if (res->flags & (CF_SHUTW|CF_SHUTR|CF_SHUTW_NOW))
Willy Tarreau273e9642018-12-16 00:35:15 +01001377 appctx->st0 = HTX_CACHE_END;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001378
1379 if (appctx->st0 == HTX_CACHE_INIT) {
1380 appctx->ctx.cache.next = block_ptr(cache_ptr);
1381 appctx->ctx.cache.offset = sizeof(*cache_ptr);
1382 appctx->ctx.cache.sent = 0;
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001383 appctx->ctx.cache.rem_data = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001384 appctx->st0 = HTX_CACHE_HEADER;
1385 }
1386
1387 if (appctx->st0 == HTX_CACHE_HEADER) {
1388 /* Headers must be dump at once. Otherwise it is an error */
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001389 len = first->len - sizeof(*cache_ptr) - appctx->ctx.cache.sent;
1390 ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOH);
1391 if (!ret || (htx_get_tail_type(res_htx) != HTX_BLK_EOH) ||
1392 !htx_cache_add_age_hdr(appctx, res_htx))
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001393 goto error;
1394
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001395 /* In case of a conditional request, we might want to send a
1396 * "304 Not Modified" response instead of the stored data. */
Tim Duesterhuse0142342020-10-22 21:15:06 +02001397 if (appctx->ctx.cache.send_notmodified) {
1398 if (!http_replace_res_status(res_htx, ist("304"), ist("Not Modified"))) {
1399 /* If replacing the status code fails we need to send the full response. */
1400 appctx->ctx.cache.send_notmodified = 0;
1401 }
1402 }
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001403
1404 /* Skip response body for HEAD requests or in case of "304 Not
1405 * Modified" response. */
1406 if (si_strm(si)->txn->meth == HTTP_METH_HEAD || appctx->ctx.cache.send_notmodified)
Christopher Fauletf0dd0372019-02-25 11:08:34 +01001407 appctx->st0 = HTX_CACHE_EOM;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001408 else
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001409 appctx->st0 = HTX_CACHE_DATA;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001410 }
1411
1412 if (appctx->st0 == HTX_CACHE_DATA) {
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001413 len = first->len - sizeof(*cache_ptr) - appctx->ctx.cache.sent;
1414 if (len) {
1415 ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOM);
1416 if (ret < len) {
1417 si_rx_room_blk(si);
1418 goto out;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001419 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001420 }
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001421 appctx->st0 = HTX_CACHE_END;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001422 }
1423
1424 if (appctx->st0 == HTX_CACHE_EOM) {
Christopher Faulet810df062020-07-22 16:20:34 +02001425 res_htx->flags |= HTX_FL_EOI; /* no more data are expected. Only EOM remains to add now */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001426 if (!htx_add_endof(res_htx, HTX_BLK_EOM)) {
1427 si_rx_room_blk(si);
1428 goto out;
1429 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001430 appctx->st0 = HTX_CACHE_END;
1431 }
1432
1433 end:
Christopher Fauletadb36312019-02-25 11:40:49 +01001434 if (!(res->flags & CF_SHUTR) && appctx->st0 == HTX_CACHE_END) {
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001435 res->flags |= CF_READ_NULL;
1436 si_shutr(si);
1437 }
1438
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001439 out:
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001440 total = res_htx->data - total;
Christopher Faulet61123912019-01-02 14:10:01 +01001441 if (total)
1442 channel_add_input(res, total);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001443 htx_to_buf(res_htx, &res->buf);
Christopher Fauletadb36312019-02-25 11:40:49 +01001444
1445 /* eat the whole request */
1446 if (co_data(req)) {
1447 req_htx = htx_from_buf(&req->buf);
1448 co_htx_skip(req, req_htx, co_data(req));
1449 htx_to_buf(req_htx, &req->buf);
1450 }
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001451 return;
1452
1453 error:
1454 /* Sent and HTTP error 500 */
1455 b_reset(&res->buf);
Christopher Fauletf7346382019-07-17 22:02:08 +02001456 errmsg = &http_err_chunks[HTTP_ERR_500];
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001457 res->buf.data = b_data(errmsg);
1458 memcpy(res->buf.area, b_head(errmsg), b_data(errmsg));
1459 res_htx = htx_from_buf(&res->buf);
1460
Christopher Faulet8f3c2562019-06-03 22:19:18 +02001461 total = 0;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001462 appctx->st0 = HTX_CACHE_END;
1463 goto end;
1464}
1465
1466
Christopher Faulet95220e22018-12-07 17:34:39 +01001467static int parse_cache_rule(struct proxy *proxy, const char *name, struct act_rule *rule, char **err)
William Lallemand41db4602017-10-30 11:15:51 +01001468{
1469 struct flt_conf *fconf;
Christopher Faulet95220e22018-12-07 17:34:39 +01001470 struct cache_flt_conf *cconf = NULL;
William Lallemand41db4602017-10-30 11:15:51 +01001471
Christopher Faulet95220e22018-12-07 17:34:39 +01001472 if (!*name || strcmp(name, "if") == 0 || strcmp(name, "unless") == 0) {
William Lallemand41db4602017-10-30 11:15:51 +01001473 memprintf(err, "expects a cache name");
Christopher Faulet95220e22018-12-07 17:34:39 +01001474 goto err;
William Lallemand41db4602017-10-30 11:15:51 +01001475 }
1476
1477 /* check if a cache filter was already registered with this cache
1478 * name, if that's the case, must use it. */
1479 list_for_each_entry(fconf, &proxy->filter_configs, list) {
Christopher Faulet95220e22018-12-07 17:34:39 +01001480 if (fconf->id == cache_store_flt_id) {
1481 cconf = fconf->conf;
1482 if (cconf && !strcmp((char *)cconf->c.name, name)) {
1483 rule->arg.act.p[0] = cconf;
1484 return 1;
1485 }
William Lallemand41db4602017-10-30 11:15:51 +01001486 }
1487 }
1488
Christopher Faulet95220e22018-12-07 17:34:39 +01001489 /* Create the filter cache config */
1490 cconf = calloc(1, sizeof(*cconf));
1491 if (!cconf) {
1492 memprintf(err, "out of memory\n");
1493 goto err;
1494 }
Christopher Faulet99a17a22018-12-11 09:18:27 +01001495 cconf->flags = CACHE_FLT_F_IMPLICIT_DECL;
Christopher Faulet95220e22018-12-07 17:34:39 +01001496 cconf->c.name = strdup(name);
1497 if (!cconf->c.name) {
1498 memprintf(err, "out of memory\n");
William Lallemand41db4602017-10-30 11:15:51 +01001499 goto err;
1500 }
Christopher Faulet95220e22018-12-07 17:34:39 +01001501
William Lallemand41db4602017-10-30 11:15:51 +01001502 /* register a filter to fill the cache buffer */
1503 fconf = calloc(1, sizeof(*fconf));
1504 if (!fconf) {
Christopher Faulet95220e22018-12-07 17:34:39 +01001505 memprintf(err, "out of memory\n");
William Lallemand41db4602017-10-30 11:15:51 +01001506 goto err;
1507 }
Christopher Faulet95220e22018-12-07 17:34:39 +01001508 fconf->id = cache_store_flt_id;
1509 fconf->conf = cconf;
William Lallemand41db4602017-10-30 11:15:51 +01001510 fconf->ops = &cache_ops;
1511 LIST_ADDQ(&proxy->filter_configs, &fconf->list);
1512
Christopher Faulet95220e22018-12-07 17:34:39 +01001513 rule->arg.act.p[0] = cconf;
1514 return 1;
William Lallemand41db4602017-10-30 11:15:51 +01001515
Christopher Faulet95220e22018-12-07 17:34:39 +01001516 err:
1517 free(cconf);
1518 return 0;
1519}
1520
1521enum act_parse_ret parse_cache_store(const char **args, int *orig_arg, struct proxy *proxy,
1522 struct act_rule *rule, char **err)
1523{
1524 rule->action = ACT_CUSTOM;
1525 rule->action_ptr = http_action_store_cache;
1526
1527 if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
1528 return ACT_RET_PRS_ERR;
William Lallemand41db4602017-10-30 11:15:51 +01001529
Christopher Faulet95220e22018-12-07 17:34:39 +01001530 (*orig_arg)++;
1531 return ACT_RET_PRS_OK;
William Lallemand41db4602017-10-30 11:15:51 +01001532}
1533
Baptiste Assmanndb92a832019-08-05 16:55:32 +02001534/* This produces a sha1 hash of the concatenation of the HTTP method,
1535 * the first occurrence of the Host header followed by the path component
1536 * if it begins with a slash ('/'). */
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001537int sha1_hosturi(struct stream *s)
William Lallemandf528fff2017-11-23 19:43:17 +01001538{
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001539 struct http_txn *txn = s->txn;
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001540 struct htx *htx = htxbuf(&s->req.buf);
1541 struct htx_sl *sl;
1542 struct http_hdr_ctx ctx;
Willy Tarreauccc61d82019-10-17 09:28:28 +02001543 struct ist uri;
William Lallemandf528fff2017-11-23 19:43:17 +01001544 blk_SHA_CTX sha1_ctx;
Willy Tarreau83061a82018-07-13 11:56:34 +02001545 struct buffer *trash;
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001546
William Lallemandf528fff2017-11-23 19:43:17 +01001547 trash = get_trash_chunk();
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001548 ctx.blk = NULL;
Baptiste Assmanndb92a832019-08-05 16:55:32 +02001549
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001550 sl = http_get_stline(htx);
Willy Tarreauccc61d82019-10-17 09:28:28 +02001551 uri = htx_sl_req_uri(sl); // whole uri
1552 if (!uri.len)
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001553 return 0;
Willy Tarreauccc61d82019-10-17 09:28:28 +02001554
1555 /* In HTTP/1, most URIs are seen in origin form ('/path/to/resource'),
1556 * unless haproxy is deployed in front of an outbound cache. In HTTP/2,
1557 * URIs are almost always sent in absolute form with their scheme. In
1558 * this case, the scheme is almost always "https". In order to support
1559 * sharing of cache objects between H1 and H2, we'll hash the absolute
1560 * URI whenever known, or prepend "https://" + the Host header for
1561 * relative URIs. The difference will only appear on absolute HTTP/1
1562 * requests sent to an origin server, which practically is never met in
1563 * the real world so we don't care about the ability to share the same
1564 * key here.URIs are normalized from the absolute URI to an origin form as
1565 * well.
1566 */
1567 if (!(sl->flags & HTX_SL_F_HAS_AUTHORITY)) {
Willy Tarreau20020ae2019-10-29 13:02:15 +01001568 chunk_istcat(trash, ist("https://"));
Willy Tarreauccc61d82019-10-17 09:28:28 +02001569 if (!http_find_header(htx, ist("Host"), &ctx, 0))
1570 return 0;
Willy Tarreau20020ae2019-10-29 13:02:15 +01001571 chunk_istcat(trash, ctx.value);
Willy Tarreauccc61d82019-10-17 09:28:28 +02001572 }
1573
1574 chunk_memcat(trash, uri.ptr, uri.len);
William Lallemandf528fff2017-11-23 19:43:17 +01001575
1576 /* hash everything */
1577 blk_SHA1_Init(&sha1_ctx);
Willy Tarreau843b7cb2018-07-13 10:54:26 +02001578 blk_SHA1_Update(&sha1_ctx, trash->area, trash->data);
William Lallemandf528fff2017-11-23 19:43:17 +01001579 blk_SHA1_Final((unsigned char *)txn->cache_hash, &sha1_ctx);
1580
1581 return 1;
1582}
1583
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001584/* Looks for "If-None-Match" headers in the request and compares their value
1585 * with the one that might have been stored in the cache_entry. If any of them
1586 * matches, a "304 Not Modified" response should be sent instead of the cached
1587 * data.
1588 * Although unlikely in a GET/HEAD request, the "If-None-Match: *" syntax is
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001589 * valid and should receive a "304 Not Modified" response (RFC 7234#4.3.2).
1590 *
1591 * If no "If-None-Match" header was found, look for an "If-Modified-Since"
1592 * header and compare its value (date) to the one stored in the cache_entry.
1593 * If the request's date is later than the cached one, we also send a
1594 * "304 Not Modified" response (see RFCs 7232#3.3 and 7234#4.3.2).
1595 *
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001596 * Returns 1 if "304 Not Modified" should be sent, 0 otherwise.
1597 */
1598static int should_send_notmodified_response(struct cache *cache, struct htx *htx,
1599 struct cache_entry *entry)
1600{
1601 int retval = 0;
1602
1603 struct http_hdr_ctx ctx = { .blk = NULL };
1604 struct ist cache_entry_etag = IST_NULL;
1605 struct buffer *etag_buffer = NULL;
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001606 int if_none_match_found = 0;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001607
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001608 struct tm tm = {};
1609 time_t if_modified_since = 0;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001610
1611 /* If we find a "If-None-Match" header in the request, rebuild the
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001612 * cache_entry's ETag in order to perform comparisons.
1613 * There could be multiple "if-none-match" header lines. */
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001614 while (http_find_header(htx, ist("if-none-match"), &ctx, 0)) {
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001615 if_none_match_found = 1;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001616
1617 /* A '*' matches everything. */
1618 if (isteq(ctx.value, ist("*")) != 0) {
1619 retval = 1;
1620 break;
1621 }
1622
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001623 /* No need to rebuild an etag if none was stored in the cache. */
1624 if (entry->etag_length == 0)
1625 break;
1626
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001627 /* Rebuild the stored ETag. */
1628 if (etag_buffer == NULL) {
1629 etag_buffer = get_trash_chunk();
1630
1631 if (shctx_row_data_get(shctx_ptr(cache), block_ptr(entry),
1632 (unsigned char*)b_orig(etag_buffer),
1633 entry->etag_offset, entry->etag_length) == 0) {
1634 cache_entry_etag = ist2(b_orig(etag_buffer), entry->etag_length);
1635 } else {
1636 /* We could not rebuild the ETag in one go, we
1637 * won't send a "304 Not Modified" response. */
1638 break;
1639 }
1640 }
1641
1642 if (http_compare_etags(cache_entry_etag, ctx.value) == 1) {
1643 retval = 1;
1644 break;
1645 }
1646 }
1647
Remi Tricot-Le Breton53161d82020-10-23 10:51:28 +02001648 /* If the request did not contain an "If-None-Match" header, we look for
1649 * an "If-Modified-Since" header (see RFC 7232#3.3). */
1650 if (retval == 0 && if_none_match_found == 0) {
1651 ctx.blk = NULL;
1652 if (http_find_header(htx, ist("if-modified-since"), &ctx, 1)) {
1653 if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
1654 if_modified_since = my_timegm(&tm);
1655
1656 /* We send a "304 Not Modified" response if the
1657 * entry's last modified date is earlier than
1658 * the one found in the "If-Modified-Since"
1659 * header. */
1660 retval = (entry->last_modified <= if_modified_since);
1661 }
1662 }
1663 }
1664
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001665 return retval;
1666}
1667
William Lallemand41db4602017-10-30 11:15:51 +01001668enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *px,
1669 struct session *sess, struct stream *s, int flags)
1670{
William Lallemand77c11972017-10-31 20:43:01 +01001671
Christopher Fauletb3d4bca2019-02-25 10:59:33 +01001672 struct http_txn *txn = s->txn;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001673 struct cache_entry *res, *sec_entry = NULL;
Christopher Faulet95220e22018-12-07 17:34:39 +01001674 struct cache_flt_conf *cconf = rule->arg.act.p[0];
1675 struct cache *cache = cconf->c.cache;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001676 struct shared_block *entry_block;
1677
William Lallemand77c11972017-10-31 20:43:01 +01001678
Willy Tarreau6905d182019-10-01 17:59:17 +02001679 /* Ignore cache for HTTP/1.0 requests and for requests other than GET
1680 * and HEAD */
Christopher Fauletb3d4bca2019-02-25 10:59:33 +01001681 if (!(txn->req.flags & HTTP_MSGF_VER_11) ||
Willy Tarreau6905d182019-10-01 17:59:17 +02001682 (txn->meth != HTTP_METH_GET && txn->meth != HTTP_METH_HEAD))
Christopher Fauletb3d4bca2019-02-25 10:59:33 +01001683 txn->flags |= TX_CACHE_IGNORE;
1684
Christopher Fauletfc9cfe42019-07-16 14:54:53 +02001685 http_check_request_for_cacheability(s, &s->req);
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01001686
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001687 /* The request's hash has to be calculated for all requests, even POSTs
Ilya Shipitsinf38a0182020-12-21 01:16:17 +05001688 * or PUTs for instance because RFC7234 specifies that a successful
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001689 * "unsafe" method on a stored resource must invalidate it
1690 * (see RFC7234#4.4). */
1691 if (!sha1_hosturi(s))
Willy Tarreau504455c2017-12-22 17:47:35 +01001692 return ACT_RET_CONT;
1693
Remi Tricot-Le Breton72cffaf2020-12-03 18:19:31 +01001694 if ((s->txn->flags & (TX_CACHE_IGNORE|TX_CACHEABLE)) == TX_CACHE_IGNORE)
Willy Tarreau7704b1e2017-12-22 16:32:43 +01001695 return ACT_RET_CONT;
William Lallemandf528fff2017-11-23 19:43:17 +01001696
Willy Tarreau504455c2017-12-22 17:47:35 +01001697 if (s->txn->flags & TX_CACHE_IGNORE)
1698 return ACT_RET_CONT;
1699
Willy Tarreaua1214a52018-12-14 14:00:25 +01001700 if (px == strm_fe(s))
Olivier Houchardaa090d42019-03-08 18:49:24 +01001701 _HA_ATOMIC_ADD(&px->fe_counters.p.http.cache_lookups, 1);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001702 else
Olivier Houchardaa090d42019-03-08 18:49:24 +01001703 _HA_ATOMIC_ADD(&px->be_counters.p.http.cache_lookups, 1);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001704
William Lallemanda400a3a2017-11-20 19:13:12 +01001705 shctx_lock(shctx_ptr(cache));
William Lallemandf528fff2017-11-23 19:43:17 +01001706 res = entry_exist(cache, s->txn->cache_hash);
Remi Tricot-Le Breton32434472020-11-25 10:09:43 +01001707 /* We must not use an entry that is not complete. */
1708 if (res && res->complete) {
William Lallemand77c11972017-10-31 20:43:01 +01001709 struct appctx *appctx;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001710 entry_block = block_ptr(res);
1711 shctx_row_inc_hot(shctx_ptr(cache), entry_block);
William Lallemanda400a3a2017-11-20 19:13:12 +01001712 shctx_unlock(shctx_ptr(cache));
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001713
1714 /* In case of Vary, we could have multiple entries with the same
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01001715 * primary hash. We need to calculate the secondary hash in order
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001716 * to find the actual entry we want (if it exists). */
1717 if (res->secondary_key_signature) {
1718 if (!http_request_build_secondary_key(s, res->secondary_key_signature)) {
1719 shctx_lock(shctx_ptr(cache));
1720 sec_entry = secondary_entry_exist(cache, res,
1721 s->txn->cache_secondary_hash);
1722 if (sec_entry && sec_entry != res) {
1723 /* The wrong row was added to the hot list. */
1724 shctx_row_dec_hot(shctx_ptr(cache), entry_block);
1725 entry_block = block_ptr(sec_entry);
1726 shctx_row_inc_hot(shctx_ptr(cache), entry_block);
1727 }
1728 res = sec_entry;
1729 shctx_unlock(shctx_ptr(cache));
1730 }
1731 else
1732 res = NULL;
1733 }
1734
1735 /* We looked for a valid secondary entry and could not find one,
1736 * the request must be forwarded to the server. */
1737 if (!res) {
1738 shctx_lock(shctx_ptr(cache));
1739 shctx_row_dec_hot(shctx_ptr(cache), entry_block);
1740 shctx_unlock(shctx_ptr(cache));
1741 return ACT_RET_CONT;
1742 }
1743
William Lallemand77c11972017-10-31 20:43:01 +01001744 s->target = &http_cache_applet.obj_type;
Willy Tarreau14bfe9a2018-12-19 15:19:27 +01001745 if ((appctx = si_register_handler(&s->si[1], objt_applet(s->target)))) {
Christopher Faulet95e7ea32019-07-15 21:01:29 +02001746 appctx->st0 = HTX_CACHE_INIT;
William Lallemand77c11972017-10-31 20:43:01 +01001747 appctx->rule = rule;
1748 appctx->ctx.cache.entry = res;
Frédéric Lécaille8df65ae2018-10-22 18:01:48 +02001749 appctx->ctx.cache.next = NULL;
1750 appctx->ctx.cache.sent = 0;
Remi Tricot-Le Breton6cb10382020-10-22 10:40:05 +02001751 appctx->ctx.cache.send_notmodified =
1752 should_send_notmodified_response(cache, htxbuf(&s->req.buf), res);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001753
1754 if (px == strm_fe(s))
Olivier Houchardaa090d42019-03-08 18:49:24 +01001755 _HA_ATOMIC_ADD(&px->fe_counters.p.http.cache_hits, 1);
Willy Tarreaua1214a52018-12-14 14:00:25 +01001756 else
Olivier Houchardaa090d42019-03-08 18:49:24 +01001757 _HA_ATOMIC_ADD(&px->be_counters.p.http.cache_hits, 1);
Olivier Houchardfccf8402017-11-01 14:04:02 +01001758 return ACT_RET_CONT;
William Lallemand77c11972017-10-31 20:43:01 +01001759 } else {
William Lallemand55e76742017-11-21 20:01:28 +01001760 shctx_lock(shctx_ptr(cache));
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001761 shctx_row_dec_hot(shctx_ptr(cache), entry_block);
William Lallemand55e76742017-11-21 20:01:28 +01001762 shctx_unlock(shctx_ptr(cache));
Olivier Houchardfccf8402017-11-01 14:04:02 +01001763 return ACT_RET_YIELD;
William Lallemand77c11972017-10-31 20:43:01 +01001764 }
1765 }
William Lallemanda400a3a2017-11-20 19:13:12 +01001766 shctx_unlock(shctx_ptr(cache));
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001767
1768 /* Shared context does not need to be locked while we calculate the
1769 * secondary hash. */
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001770 if (!res && cache->vary_processing_enabled) {
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01001771 /* Build a complete secondary hash until the server response
1772 * tells us which fields should be kept (if any). */
1773 http_request_prebuild_full_secondary_key(s);
1774 }
Olivier Houchardfccf8402017-11-01 14:04:02 +01001775 return ACT_RET_CONT;
William Lallemand41db4602017-10-30 11:15:51 +01001776}
1777
1778
1779enum act_parse_ret parse_cache_use(const char **args, int *orig_arg, struct proxy *proxy,
1780 struct act_rule *rule, char **err)
1781{
William Lallemand41db4602017-10-30 11:15:51 +01001782 rule->action = ACT_CUSTOM;
1783 rule->action_ptr = http_action_req_cache_use;
1784
Christopher Faulet95220e22018-12-07 17:34:39 +01001785 if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
William Lallemand41db4602017-10-30 11:15:51 +01001786 return ACT_RET_PRS_ERR;
William Lallemand41db4602017-10-30 11:15:51 +01001787
1788 (*orig_arg)++;
1789 return ACT_RET_PRS_OK;
William Lallemand41db4602017-10-30 11:15:51 +01001790}
1791
1792int cfg_parse_cache(const char *file, int linenum, char **args, int kwm)
1793{
1794 int err_code = 0;
1795
1796 if (strcmp(args[0], "cache") == 0) { /* new cache section */
1797
1798 if (!*args[1]) {
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001799 ha_alert("parsing [%s:%d] : '%s' expects a <name> argument\n",
Christopher Faulet767a84b2017-11-24 16:50:31 +01001800 file, linenum, args[0]);
William Lallemand41db4602017-10-30 11:15:51 +01001801 err_code |= ERR_ALERT | ERR_ABORT;
1802 goto out;
1803 }
1804
1805 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1806 err_code |= ERR_ABORT;
1807 goto out;
1808 }
1809
1810 if (tmp_cache_config == NULL) {
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001811 struct cache *cache_config;
1812
William Lallemand41db4602017-10-30 11:15:51 +01001813 tmp_cache_config = calloc(1, sizeof(*tmp_cache_config));
1814 if (!tmp_cache_config) {
Christopher Faulet767a84b2017-11-24 16:50:31 +01001815 ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
William Lallemand41db4602017-10-30 11:15:51 +01001816 err_code |= ERR_ALERT | ERR_ABORT;
1817 goto out;
1818 }
1819
1820 strlcpy2(tmp_cache_config->id, args[1], 33);
1821 if (strlen(args[1]) > 32) {
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001822 ha_warning("parsing [%s:%d]: cache name is limited to 32 characters, truncate to '%s'.\n",
Christopher Faulet767a84b2017-11-24 16:50:31 +01001823 file, linenum, tmp_cache_config->id);
William Lallemand41db4602017-10-30 11:15:51 +01001824 err_code |= ERR_WARN;
1825 }
Tim Duesterhusff4d86b2020-08-18 22:20:27 +02001826
1827 list_for_each_entry(cache_config, &caches_config, list) {
1828 if (strcmp(tmp_cache_config->id, cache_config->id) == 0) {
1829 ha_alert("parsing [%s:%d]: Duplicate cache name '%s'.\n",
1830 file, linenum, tmp_cache_config->id);
1831 err_code |= ERR_ALERT | ERR_ABORT;
1832 goto out;
1833 }
1834 }
1835
William Lallemand49b44532017-11-24 18:53:43 +01001836 tmp_cache_config->maxage = 60;
William Lallemand41db4602017-10-30 11:15:51 +01001837 tmp_cache_config->maxblocks = 0;
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001838 tmp_cache_config->maxobjsz = 0;
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +01001839 tmp_cache_config->max_secondary_entries = DEFAULT_MAX_SECONDARY_ENTRY;
William Lallemand41db4602017-10-30 11:15:51 +01001840 }
1841 } else if (strcmp(args[0], "total-max-size") == 0) {
Frédéric Lécailleb9b8b6b2018-10-25 20:17:45 +02001842 unsigned long int maxsize;
1843 char *err;
William Lallemand41db4602017-10-30 11:15:51 +01001844
1845 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1846 err_code |= ERR_ABORT;
1847 goto out;
1848 }
1849
Frédéric Lécailleb9b8b6b2018-10-25 20:17:45 +02001850 maxsize = strtoul(args[1], &err, 10);
1851 if (err == args[1] || *err != '\0') {
1852 ha_warning("parsing [%s:%d]: total-max-size wrong value '%s'\n",
1853 file, linenum, args[1]);
1854 err_code |= ERR_ABORT;
1855 goto out;
1856 }
1857
1858 if (maxsize > (UINT_MAX >> 20)) {
1859 ha_warning("parsing [%s:%d]: \"total-max-size\" (%s) must not be greater than %u\n",
1860 file, linenum, args[1], UINT_MAX >> 20);
1861 err_code |= ERR_ABORT;
1862 goto out;
1863 }
1864
William Lallemand41db4602017-10-30 11:15:51 +01001865 /* size in megabytes */
Frédéric Lécailleb9b8b6b2018-10-25 20:17:45 +02001866 maxsize *= 1024 * 1024 / CACHE_BLOCKSIZE;
William Lallemand41db4602017-10-30 11:15:51 +01001867 tmp_cache_config->maxblocks = maxsize;
William Lallemand49b44532017-11-24 18:53:43 +01001868 } else if (strcmp(args[0], "max-age") == 0) {
1869 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1870 err_code |= ERR_ABORT;
1871 goto out;
1872 }
1873
1874 if (!*args[1]) {
1875 ha_warning("parsing [%s:%d]: '%s' expects an age parameter in seconds.\n",
1876 file, linenum, args[0]);
1877 err_code |= ERR_WARN;
1878 }
1879
1880 tmp_cache_config->maxage = atoi(args[1]);
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001881 } else if (strcmp(args[0], "max-object-size") == 0) {
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02001882 unsigned int maxobjsz;
1883 char *err;
1884
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001885 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1886 err_code |= ERR_ABORT;
1887 goto out;
1888 }
1889
1890 if (!*args[1]) {
1891 ha_warning("parsing [%s:%d]: '%s' expects a maximum file size parameter in bytes.\n",
1892 file, linenum, args[0]);
1893 err_code |= ERR_WARN;
1894 }
1895
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02001896 maxobjsz = strtoul(args[1], &err, 10);
1897 if (err == args[1] || *err != '\0') {
1898 ha_warning("parsing [%s:%d]: max-object-size wrong value '%s'\n",
1899 file, linenum, args[1]);
1900 err_code |= ERR_ABORT;
1901 goto out;
1902 }
1903 tmp_cache_config->maxobjsz = maxobjsz;
Remi Tricot-Le Breton754b2422020-11-16 15:56:10 +01001904 } else if (strcmp(args[0], "process-vary") == 0) {
1905 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1906 err_code |= ERR_ABORT;
1907 goto out;
1908 }
1909
1910 if (!*args[1]) {
1911 ha_warning("parsing [%s:%d]: '%s' expects 0 or 1 (disable or enable vary processing).\n",
1912 file, linenum, args[0]);
1913 err_code |= ERR_WARN;
1914 }
1915
1916 tmp_cache_config->vary_processing_enabled = atoi(args[1]);
Remi Tricot-Le Breton5853c0c2020-12-10 17:58:43 +01001917 } else if (strcmp(args[0], "max-secondary-entries") == 0) {
1918 unsigned int max_sec_entries;
1919 char *err;
1920
1921 if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
1922 err_code |= ERR_ABORT;
1923 goto out;
1924 }
1925
1926 if (!*args[1]) {
1927 ha_warning("parsing [%s:%d]: '%s' expects a strictly positive number.\n",
1928 file, linenum, args[0]);
1929 err_code |= ERR_WARN;
1930 }
1931
1932 max_sec_entries = strtoul(args[1], &err, 10);
1933 if (err == args[1] || *err != '\0' || max_sec_entries == 0) {
1934 ha_warning("parsing [%s:%d]: max-secondary-entries wrong value '%s'\n",
1935 file, linenum, args[1]);
1936 err_code |= ERR_ABORT;
1937 goto out;
1938 }
1939 tmp_cache_config->max_secondary_entries = max_sec_entries;
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001940 }
1941 else if (*args[0] != 0) {
Christopher Faulet767a84b2017-11-24 16:50:31 +01001942 ha_alert("parsing [%s:%d] : unknown keyword '%s' in 'cache' section\n", file, linenum, args[0]);
William Lallemand41db4602017-10-30 11:15:51 +01001943 err_code |= ERR_ALERT | ERR_FATAL;
1944 goto out;
1945 }
1946out:
1947 return err_code;
1948}
1949
1950/* once the cache section is parsed */
1951
1952int cfg_post_parse_section_cache()
1953{
William Lallemand41db4602017-10-30 11:15:51 +01001954 int err_code = 0;
William Lallemand41db4602017-10-30 11:15:51 +01001955
1956 if (tmp_cache_config) {
William Lallemand41db4602017-10-30 11:15:51 +01001957
1958 if (tmp_cache_config->maxblocks <= 0) {
Christopher Faulet767a84b2017-11-24 16:50:31 +01001959 ha_alert("Size not specified for cache '%s'\n", tmp_cache_config->id);
William Lallemand41db4602017-10-30 11:15:51 +01001960 err_code |= ERR_FATAL | ERR_ALERT;
1961 goto out;
1962 }
1963
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02001964 if (!tmp_cache_config->maxobjsz) {
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001965 /* Default max. file size is a 256th of the cache size. */
1966 tmp_cache_config->maxobjsz =
1967 (tmp_cache_config->maxblocks * CACHE_BLOCKSIZE) >> 8;
Frédéric Lécaille4eba5442018-10-25 20:29:31 +02001968 }
1969 else if (tmp_cache_config->maxobjsz > tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2) {
1970 ha_alert("\"max-object-size\" is limited to an half of \"total-max-size\" => %u\n", tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2);
1971 err_code |= ERR_FATAL | ERR_ALERT;
1972 goto out;
1973 }
Frédéric Lécaillea2219f52018-10-22 16:59:13 +02001974
William Lallemandd1d1e222019-08-28 15:22:49 +02001975 /* add to the list of cache to init and reinit tmp_cache_config
1976 * for next cache section, if any.
1977 */
1978 LIST_ADDQ(&caches_config, &tmp_cache_config->list);
1979 tmp_cache_config = NULL;
1980 return err_code;
1981 }
1982out:
1983 free(tmp_cache_config);
1984 tmp_cache_config = NULL;
1985 return err_code;
1986
1987}
1988
1989int post_check_cache()
1990{
1991 struct proxy *px;
1992 struct cache *back, *cache_config, *cache;
1993 struct shared_context *shctx;
1994 int ret_shctx;
Christopher Fauletfc633b62020-11-06 15:24:23 +01001995 int err_code = ERR_NONE;
William Lallemandd1d1e222019-08-28 15:22:49 +02001996
1997 list_for_each_entry_safe(cache_config, back, &caches_config, list) {
1998
1999 ret_shctx = shctx_init(&shctx, cache_config->maxblocks, CACHE_BLOCKSIZE,
2000 cache_config->maxobjsz, sizeof(struct cache), 1);
William Lallemand4da3f8a2017-10-31 14:33:34 +01002001
Frédéric Lécaillebc584492018-10-25 20:18:59 +02002002 if (ret_shctx <= 0) {
William Lallemand41db4602017-10-30 11:15:51 +01002003 if (ret_shctx == SHCTX_E_INIT_LOCK)
Christopher Faulet767a84b2017-11-24 16:50:31 +01002004 ha_alert("Unable to initialize the lock for the cache.\n");
William Lallemand41db4602017-10-30 11:15:51 +01002005 else
Christopher Faulet767a84b2017-11-24 16:50:31 +01002006 ha_alert("Unable to allocate cache.\n");
William Lallemand41db4602017-10-30 11:15:51 +01002007
2008 err_code |= ERR_FATAL | ERR_ALERT;
2009 goto out;
2010 }
William Lallemanda400a3a2017-11-20 19:13:12 +01002011 shctx->free_block = cache_free_blocks;
William Lallemandd1d1e222019-08-28 15:22:49 +02002012 /* the cache structure is stored in the shctx and added to the
2013 * caches list, we can remove the entry from the caches_config
2014 * list */
2015 memcpy(shctx->data, cache_config, sizeof(struct cache));
William Lallemand41db4602017-10-30 11:15:51 +01002016 cache = (struct cache *)shctx->data;
Remi Tricot-Le Breton1785f3d2020-11-16 15:56:09 +01002017 cache->entries = EB_ROOT;
William Lallemand41db4602017-10-30 11:15:51 +01002018 LIST_ADDQ(&caches, &cache->list);
William Lallemandd1d1e222019-08-28 15:22:49 +02002019 LIST_DEL(&cache_config->list);
2020 free(cache_config);
2021
2022 /* Find all references for this cache in the existing filters
2023 * (over all proxies) and reference it in matching filters.
2024 */
2025 for (px = proxies_list; px; px = px->next) {
2026 struct flt_conf *fconf;
2027 struct cache_flt_conf *cconf;
2028
2029 list_for_each_entry(fconf, &px->filter_configs, list) {
2030 if (fconf->id != cache_store_flt_id)
2031 continue;
2032
2033 cconf = fconf->conf;
2034 if (!strcmp(cache->id, cconf->c.name)) {
2035 free(cconf->c.name);
Tim Duesterhusd7c6e6a2020-09-14 18:01:33 +02002036 cconf->flags |= CACHE_FLT_INIT;
William Lallemandd1d1e222019-08-28 15:22:49 +02002037 cconf->c.cache = cache;
2038 break;
2039 }
2040 }
2041 }
William Lallemand41db4602017-10-30 11:15:51 +01002042 }
William Lallemandd1d1e222019-08-28 15:22:49 +02002043
William Lallemand41db4602017-10-30 11:15:51 +01002044out:
William Lallemand41db4602017-10-30 11:15:51 +01002045 return err_code;
2046
William Lallemand41db4602017-10-30 11:15:51 +01002047}
2048
William Lallemand41db4602017-10-30 11:15:51 +01002049struct flt_ops cache_ops = {
2050 .init = cache_store_init,
Christopher Faulet95220e22018-12-07 17:34:39 +01002051 .check = cache_store_check,
2052 .deinit = cache_store_deinit,
William Lallemand41db4602017-10-30 11:15:51 +01002053
Christopher Faulet65554e12020-03-06 14:52:06 +01002054 /* Handle stream init/deinit */
2055 .attach = cache_store_strm_init,
2056 .detach = cache_store_strm_deinit,
2057
William Lallemand4da3f8a2017-10-31 14:33:34 +01002058 /* Handle channels activity */
Christopher Faulet839791a2019-01-07 16:12:07 +01002059 .channel_post_analyze = cache_store_post_analyze,
William Lallemand4da3f8a2017-10-31 14:33:34 +01002060
2061 /* Filter HTTP requests and responses */
2062 .http_headers = cache_store_http_headers,
Christopher Faulet54a8d5a2018-12-07 12:21:11 +01002063 .http_payload = cache_store_http_payload,
William Lallemand4da3f8a2017-10-31 14:33:34 +01002064 .http_end = cache_store_http_end,
William Lallemand41db4602017-10-30 11:15:51 +01002065};
2066
Christopher Faulet99a17a22018-12-11 09:18:27 +01002067
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002068int accept_encoding_cmp(const void *a, const void *b)
2069{
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002070 unsigned int int_a = *(unsigned int*)a;
2071 unsigned int int_b = *(unsigned int*)b;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002072
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002073 if (int_a < int_b)
2074 return -1;
2075 if (int_a > int_b)
2076 return 1;
2077 return 0;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002078}
2079
Tim Duesterhus23b29452020-11-24 22:22:56 +01002080#define ACCEPT_ENCODING_MAX_ENTRIES 16
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002081/*
2082 * Build a hash of the accept-encoding header. The different parts of the
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002083 * header value are converted to lower case, hashed, sorted and then all
2084 * the unique sub-hashes are merged into a single hash that is copied into
2085 * the buffer.
2086 * Returns 0 in case of success, -1 in case of error.
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002087 */
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002088static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
2089 char *buf, unsigned int *buf_len)
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002090{
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002091 unsigned int values[ACCEPT_ENCODING_MAX_ENTRIES] = {};
Tim Duesterhus23b29452020-11-24 22:22:56 +01002092 size_t count = 0;
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002093 unsigned int hash_value = 0;
2094 unsigned int prev = 0, curr = 0;
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002095 struct http_hdr_ctx ctx = { .blk = NULL };
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002096
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002097 /* Iterate over all the ACCEPT_ENCODING_MAX_ENTRIES first accept-encoding
2098 * values that might span acrosse multiple accept-encoding headers. */
2099 while (http_find_header(htx, hdr_name, &ctx, 0) && count < ACCEPT_ENCODING_MAX_ENTRIES) {
2100 /* Turn accept-encoding value to lower case */
2101 ist2bin_lc(istptr(ctx.value), ctx.value);
Tim Duesterhus23b29452020-11-24 22:22:56 +01002102
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002103 values[count++] = hash_crc32(istptr(ctx.value), istlen(ctx.value));
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002104 }
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002105
2106 /* A request with more than ACCEPT_ENCODING_MAX_ENTRIES accepted
2107 * encodings might be illegitimate so we will not use it. */
2108 if (count == ACCEPT_ENCODING_MAX_ENTRIES)
2109 return -1;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002110
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002111 /* Sort the values alphabetically. */
2112 qsort(values, count, sizeof(*values), &accept_encoding_cmp);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002113
Remi Tricot-Le Breton8bb72aa2020-11-30 17:06:03 +01002114 while (count) {
2115 curr = values[--count];
2116 if (curr != prev) {
2117 hash_value ^= curr;
2118 }
2119 prev = curr;
2120 }
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002121
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002122 write_u32(buf, hash_value);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002123 *buf_len = sizeof(hash_value);
2124
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002125 /* This function fills the hash buffer correctly even if no header was
2126 * found, hence the 0 return value (success). */
Tim Duesterhus23b29452020-11-24 22:22:56 +01002127 return 0;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002128}
Tim Duesterhus23b29452020-11-24 22:22:56 +01002129#undef ACCEPT_ENCODING_MAX_ENTRIES
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002130
2131/*
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002132 * Normalizer used by default for the Referer header. It only
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002133 * calculates a simple crc of the whole value.
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002134 * Only the first occurrence of the header will be taken into account in the
2135 * hash.
2136 * Returns 0 in case of success, 1 if the hash buffer should be filled with 0s
2137 * and -1 in case of error.
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002138 */
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002139static int default_normalizer(struct htx *htx, struct ist hdr_name,
2140 char *buf, unsigned int *buf_len)
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002141{
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002142 int retval = 1;
2143 struct http_hdr_ctx ctx = { .blk = NULL };
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002144
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002145 if (http_find_header(htx, hdr_name, &ctx, 1)) {
2146 retval = 0;
2147 write_u32(buf, hash_crc32(istptr(ctx.value), istlen(ctx.value)));
2148 *buf_len = sizeof(int);
2149 }
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002150
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002151 return retval;
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002152}
2153
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002154/*
2155 * Pre-calculate the hashes of all the supported headers (in our Vary
2156 * implementation) of a given request. We have to calculate all the hashes
2157 * in advance because the actual Vary signature won't be known until the first
2158 * response.
2159 * Only the first occurrence of every header will be taken into account in the
2160 * hash.
2161 * If the header is not present, the hash portion of the given header will be
2162 * filled with zeros.
2163 * Returns 0 in case of success.
2164 */
2165static int http_request_prebuild_full_secondary_key(struct stream *s)
2166{
Remi Tricot-Le Bretonbba29122020-12-23 18:13:44 +01002167 /* The fake signature (second parameter) will ensure that every part of the
2168 * secondary key is calculated. */
2169 return http_request_build_secondary_key(s, ~0);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002170}
2171
2172
2173/*
2174 * Calculate the secondary key for a request for which we already have a known
2175 * vary signature. The key is made by aggregating hashes calculated for every
2176 * header mentioned in the vary signature.
2177 * Only the first occurrence of every header will be taken into account in the
2178 * hash.
2179 * If the header is not present, the hash portion of the given header will be
2180 * filled with zeros.
2181 * Returns 0 in case of success.
2182 */
2183static int http_request_build_secondary_key(struct stream *s, int vary_signature)
2184{
2185 struct http_txn *txn = s->txn;
2186 struct htx *htx = htxbuf(&s->req.buf);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002187
2188 unsigned int idx;
2189 const struct vary_hashing_information *info = NULL;
2190 unsigned int hash_length = 0;
2191 int retval = 0;
2192 int offset = 0;
2193
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002194 for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && retval >= 0; ++idx) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002195 info = &vary_information[idx];
2196
Remi Tricot-Le Bretone4421de2020-12-23 18:13:46 +01002197 /* The normalizing functions will be in charge of getting the
2198 * header values from the htx. This way they can manage multiple
2199 * occurrences of their processed header. */
2200 if ((vary_signature & info->value) && info->norm_fn != NULL &&
2201 !(retval = info->norm_fn(htx, info->hdr_name, &txn->cache_secondary_hash[offset], &hash_length))) {
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002202 offset += hash_length;
2203 }
2204 else {
2205 /* Fill hash with 0s. */
2206 hash_length = info->hash_length;
2207 memset(&txn->cache_secondary_hash[offset], 0, hash_length);
2208 offset += hash_length;
2209 }
2210 }
2211
Remi Tricot-Le Breton2b5c5cb2020-12-23 18:13:45 +01002212 if (retval >= 0)
2213 txn->flags |= TX_CACHE_HAS_SEC_KEY;
2214
2215 return (retval < 0);
Remi Tricot-Le Breton3d082362020-11-16 15:56:08 +01002216}
2217
2218/*
2219 * Build the actual secondary key of a given request out of the prebuilt key and
2220 * the actual vary signature (extracted from the response).
2221 * Returns 0 in case of success.
2222 */
2223static int http_request_reduce_secondary_key(unsigned int vary_signature,
2224 char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN])
2225{
2226 int offset = 0;
2227 int global_offset = 0;
2228 int vary_info_count = 0;
2229 int keep = 0;
2230 unsigned int vary_idx;
2231 const struct vary_hashing_information *vary_info;
2232
2233 vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
2234 for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
2235 vary_info = &vary_information[vary_idx];
2236 keep = (vary_signature & vary_info->value) ? 0xff : 0;
2237
2238 for (offset = 0; offset < vary_info->hash_length; ++offset,++global_offset) {
2239 prebuilt_key[global_offset] &= keep;
2240 }
2241 }
2242
2243 return 0;
2244}
2245
2246
Christopher Faulet99a17a22018-12-11 09:18:27 +01002247
2248static int
2249parse_cache_flt(char **args, int *cur_arg, struct proxy *px,
2250 struct flt_conf *fconf, char **err, void *private)
2251{
2252 struct flt_conf *f, *back;
Willy Tarreaua73da1e2018-12-14 10:19:28 +01002253 struct cache_flt_conf *cconf = NULL;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002254 char *name = NULL;
2255 int pos = *cur_arg;
2256
Christopher Faulet2a37cdb2020-05-18 11:58:16 +02002257 /* Get the cache filter name. <pos> point on "cache" keyword */
2258 if (!*args[pos + 1]) {
Tim Duesterhusea969f62020-08-18 22:06:51 +02002259 memprintf(err, "%s : expects a <name> argument", args[pos]);
Christopher Faulet2a37cdb2020-05-18 11:58:16 +02002260 goto error;
2261 }
2262 name = strdup(args[pos + 1]);
2263 if (!name) {
2264 memprintf(err, "%s '%s' : out of memory", args[pos], args[pos + 1]);
2265 goto error;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002266 }
Christopher Faulet2a37cdb2020-05-18 11:58:16 +02002267 pos += 2;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002268
2269 /* Check if an implicit filter with the same name already exists. If so,
2270 * we remove the implicit filter to use the explicit one. */
2271 list_for_each_entry_safe(f, back, &px->filter_configs, list) {
2272 if (f->id != cache_store_flt_id)
2273 continue;
2274
2275 cconf = f->conf;
2276 if (strcmp(name, cconf->c.name)) {
2277 cconf = NULL;
2278 continue;
2279 }
2280
2281 if (!(cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
2282 cconf = NULL;
2283 memprintf(err, "%s: multiple explicit declarations of the cache filter '%s'",
2284 px->id, name);
Tim Duesterhusd34b1ce2020-01-18 01:46:18 +01002285 goto error;
Christopher Faulet99a17a22018-12-11 09:18:27 +01002286 }
2287
2288 /* Remove the implicit filter. <cconf> is kept for the explicit one */
2289 LIST_DEL(&f->list);
2290 free(f);
2291 free(name);
2292 break;
2293 }
2294
2295 /* No implicit cache filter found, create configuration for the explicit one */
2296 if (!cconf) {
2297 cconf = calloc(1, sizeof(*cconf));
2298 if (!cconf) {
2299 memprintf(err, "%s: out of memory", args[*cur_arg]);
2300 goto error;
2301 }
2302 cconf->c.name = name;
2303 }
2304
2305 cconf->flags = 0;
2306 fconf->id = cache_store_flt_id;
2307 fconf->conf = cconf;
2308 fconf->ops = &cache_ops;
2309
2310 *cur_arg = pos;
2311 return 0;
2312
2313 error:
2314 free(name);
2315 free(cconf);
2316 return -1;
2317}
2318
Aurélien Nephtaliabbf6072018-04-18 13:26:46 +02002319static int cli_parse_show_cache(char **args, char *payload, struct appctx *appctx, void *private)
William Lallemand1f49a362017-11-21 20:01:26 +01002320{
2321 if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
2322 return 1;
2323
2324 return 0;
2325}
2326
2327static int cli_io_handler_show_cache(struct appctx *appctx)
2328{
2329 struct cache* cache = appctx->ctx.cli.p0;
2330 struct stream_interface *si = appctx->owner;
2331
William Lallemand1f49a362017-11-21 20:01:26 +01002332 if (cache == NULL) {
2333 cache = LIST_ELEM((caches).n, typeof(struct cache *), list);
2334 }
2335
2336 list_for_each_entry_from(cache, &caches, list) {
2337 struct eb32_node *node = NULL;
2338 unsigned int next_key;
2339 struct cache_entry *entry;
Remi Tricot-Le Bretone3e1e5f2020-11-27 15:48:40 +01002340 unsigned int i;
William Lallemand1f49a362017-11-21 20:01:26 +01002341
William Lallemand1f49a362017-11-21 20:01:26 +01002342 next_key = appctx->ctx.cli.i0;
Willy Tarreauafe1de52018-04-04 11:56:43 +02002343 if (!next_key) {
2344 chunk_printf(&trash, "%p: %s (shctx:%p, available blocks:%d)\n", cache, cache->id, shctx_ptr(cache), shctx_ptr(cache)->nbav);
2345 if (ci_putchk(si_ic(si), &trash) == -1) {
Willy Tarreaudb398432018-11-15 11:08:52 +01002346 si_rx_room_blk(si);
Willy Tarreauafe1de52018-04-04 11:56:43 +02002347 return 0;
2348 }
2349 }
William Lallemand1f49a362017-11-21 20:01:26 +01002350
2351 appctx->ctx.cli.p0 = cache;
2352
2353 while (1) {
2354
2355 shctx_lock(shctx_ptr(cache));
Remi Tricot-Le Bretone3e1e5f2020-11-27 15:48:40 +01002356 if (!node || (node = eb32_next_dup(node)) == NULL)
2357 node = eb32_lookup_ge(&cache->entries, next_key);
William Lallemand1f49a362017-11-21 20:01:26 +01002358 if (!node) {
2359 shctx_unlock(shctx_ptr(cache));
Willy Tarreauafe1de52018-04-04 11:56:43 +02002360 appctx->ctx.cli.i0 = 0;
William Lallemand1f49a362017-11-21 20:01:26 +01002361 break;
2362 }
2363
2364 entry = container_of(node, struct cache_entry, eb);
Remi Tricot-Le Bretone3e1e5f2020-11-27 15:48:40 +01002365 chunk_printf(&trash, "%p hash:%u vary:0x", entry, read_u32(entry->hash));
2366 for (i = 0; i < HTTP_CACHE_SEC_KEY_LEN; ++i)
2367 chunk_appendf(&trash, "%02x", (unsigned char)entry->secondary_key[i]);
2368 chunk_appendf(&trash, " size:%u (%u blocks), refcount:%u, expire:%d\n", block_ptr(entry)->len, block_ptr(entry)->block_count, block_ptr(entry)->refcount, entry->expire - (int)now.tv_sec);
William Lallemand1f49a362017-11-21 20:01:26 +01002369
2370 next_key = node->key + 1;
2371 appctx->ctx.cli.i0 = next_key;
2372
2373 shctx_unlock(shctx_ptr(cache));
2374
2375 if (ci_putchk(si_ic(si), &trash) == -1) {
Willy Tarreaudb398432018-11-15 11:08:52 +01002376 si_rx_room_blk(si);
William Lallemand1f49a362017-11-21 20:01:26 +01002377 return 0;
2378 }
2379 }
2380
2381 }
2382
2383 return 1;
2384
2385}
2386
Remi Tricot-Le Bretonbf971212020-10-27 11:55:57 +01002387
2388/*
2389 * boolean, returns true if response was built out of a cache entry.
2390 */
2391static int
2392smp_fetch_res_cache_hit(const struct arg *args, struct sample *smp,
2393 const char *kw, void *private)
2394{
2395 smp->data.type = SMP_T_BOOL;
2396 smp->data.u.sint = (smp->strm ? (smp->strm->target == &http_cache_applet.obj_type) : 0);
2397
2398 return 1;
2399}
2400
2401/*
2402 * string, returns cache name (if response came from a cache).
2403 */
2404static int
2405smp_fetch_res_cache_name(const struct arg *args, struct sample *smp,
2406 const char *kw, void *private)
2407{
2408 struct appctx *appctx = NULL;
2409
2410 struct cache_flt_conf *cconf = NULL;
2411 struct cache *cache = NULL;
2412
2413 if (!smp->strm || smp->strm->target != &http_cache_applet.obj_type)
2414 return 0;
2415
2416 /* Get appctx from the stream_interface. */
2417 appctx = si_appctx(&smp->strm->si[1]);
2418 if (appctx && appctx->rule) {
2419 cconf = appctx->rule->arg.act.p[0];
2420 if (cconf) {
2421 cache = cconf->c.cache;
2422
2423 smp->data.type = SMP_T_STR;
2424 smp->flags = SMP_F_CONST;
2425 smp->data.u.str.area = cache->id;
2426 smp->data.u.str.data = strlen(cache->id);
2427 return 1;
2428 }
2429 }
2430
2431 return 0;
2432}
2433
Christopher Faulet99a17a22018-12-11 09:18:27 +01002434/* Declare the filter parser for "cache" keyword */
2435static struct flt_kw_list filter_kws = { "CACHE", { }, {
2436 { "cache", parse_cache_flt, NULL },
2437 { NULL, NULL, NULL },
2438 }
2439};
2440
2441INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
2442
William Lallemand1f49a362017-11-21 20:01:26 +01002443static struct cli_kw_list cli_kws = {{},{
William Lallemande899af82017-11-22 16:41:26 +01002444 { { "show", "cache", NULL }, "show cache : show cache status", cli_parse_show_cache, cli_io_handler_show_cache, NULL, NULL },
2445 {{},}
William Lallemand1f49a362017-11-21 20:01:26 +01002446}};
2447
Willy Tarreau0108d902018-11-25 19:14:37 +01002448INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
William Lallemand1f49a362017-11-21 20:01:26 +01002449
William Lallemand41db4602017-10-30 11:15:51 +01002450static struct action_kw_list http_res_actions = {
2451 .kw = {
2452 { "cache-store", parse_cache_store },
2453 { NULL, NULL }
2454 }
2455};
2456
Willy Tarreau0108d902018-11-25 19:14:37 +01002457INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
2458
William Lallemand41db4602017-10-30 11:15:51 +01002459static struct action_kw_list http_req_actions = {
2460 .kw = {
2461 { "cache-use", parse_cache_use },
2462 { NULL, NULL }
2463 }
2464};
2465
Willy Tarreau0108d902018-11-25 19:14:37 +01002466INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
2467
Willy Tarreau2231b632019-03-29 18:26:52 +01002468struct applet http_cache_applet = {
William Lallemand41db4602017-10-30 11:15:51 +01002469 .obj_type = OBJ_TYPE_APPLET,
2470 .name = "<CACHE>", /* used for logging */
William Lallemand77c11972017-10-31 20:43:01 +01002471 .fct = http_cache_io_handler,
William Lallemandecb73b12017-11-24 14:33:55 +01002472 .release = http_cache_applet_release,
William Lallemand41db4602017-10-30 11:15:51 +01002473};
2474
Willy Tarreaue6552512018-11-26 11:33:13 +01002475/* config parsers for this section */
2476REGISTER_CONFIG_SECTION("cache", cfg_parse_cache, cfg_post_parse_section_cache);
William Lallemandd1d1e222019-08-28 15:22:49 +02002477REGISTER_POST_CHECK(post_check_cache);
Remi Tricot-Le Bretonbf971212020-10-27 11:55:57 +01002478
2479
2480/* Note: must not be declared <const> as its list will be overwritten */
2481static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
2482 { "res.cache_hit", smp_fetch_res_cache_hit, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
2483 { "res.cache_name", smp_fetch_res_cache_name, 0, NULL, SMP_T_STR, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
2484 { /* END */ },
2485 }
2486};
2487
2488INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);