blob: c9eb16bf92bdd5308352de5237ebb82c8a85a00f [file] [log] [blame]
Christopher Faulet3d97c902015-12-09 14:59:38 +01001/*
2 * Stream filters related variables and functions.
3 *
4 * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020013#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020014#include <haproxy/cfgparse.h>
Willy Tarreau0a3bd392020-06-04 08:52:38 +020015#include <haproxy/compression.h>
Willy Tarreau2741c8c2020-06-02 11:28:02 +020016#include <haproxy/dynbuf.h>
Willy Tarreauc7babd82020-06-04 21:29:29 +020017#include <haproxy/filters.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020018#include <haproxy/http.h>
Willy Tarreauc2b1ff02020-06-04 21:21:03 +020019#include <haproxy/http_ana-t.h>
Willy Tarreau87735332020-06-04 09:08:41 +020020#include <haproxy/http_htx.h>
Willy Tarreau16f958c2020-06-03 08:44:35 +020021#include <haproxy/htx.h>
Willy Tarreau853b2972020-05-27 18:01:47 +020022#include <haproxy/list.h>
Willy Tarreaua264d962020-06-04 22:29:18 +020023#include <haproxy/proxy-t.h>
Willy Tarreaue6ce10b2020-06-04 15:33:47 +020024#include <haproxy/sample.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020025#include <haproxy/stream.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020026#include <haproxy/tools.h>
Christopher Faulet3d97c902015-12-09 14:59:38 +010027
Christopher Fauletf4a4ef72018-12-07 17:39:53 +010028const char *http_comp_flt_id = "compression filter";
Christopher Faulet92d36382015-11-05 13:35:03 +010029
30struct flt_ops comp_ops;
31
Christopher Faulet92d36382015-11-05 13:35:03 +010032struct comp_state {
33 struct comp_ctx *comp_ctx; /* compression context */
34 struct comp_algo *comp_algo; /* compression algorithm if not NULL */
Christopher Faulet92d36382015-11-05 13:35:03 +010035};
36
Willy Tarreau8ceae722018-11-26 11:58:30 +010037/* Pools used to allocate comp_state structs */
38DECLARE_STATIC_POOL(pool_head_comp_state, "comp_state", sizeof(struct comp_state));
39
40static THREAD_LOCAL struct buffer tmpbuf;
41static THREAD_LOCAL struct buffer zbuf;
Willy Tarreau8ceae722018-11-26 11:58:30 +010042
Christopher Faulet92d36382015-11-05 13:35:03 +010043static int select_compression_request_header(struct comp_state *st,
44 struct stream *s,
45 struct http_msg *msg);
46static int select_compression_response_header(struct comp_state *st,
47 struct stream *s,
48 struct http_msg *msg);
Christopher Faulet27d93c32018-12-15 22:32:02 +010049static int set_compression_response_header(struct comp_state *st,
50 struct stream *s,
51 struct http_msg *msg);
Christopher Faulet92d36382015-11-05 13:35:03 +010052
Christopher Faulete6902cd2018-11-30 22:29:48 +010053static int htx_compression_buffer_init(struct htx *htx, struct buffer *out);
54static int htx_compression_buffer_add_data(struct comp_state *st, const char *data, size_t len,
55 struct buffer *out);
56static int htx_compression_buffer_end(struct comp_state *st, struct buffer *out, int end);
57
Christopher Faulet92d36382015-11-05 13:35:03 +010058/***********************************************************************/
59static int
Christopher Faulete6902cd2018-11-30 22:29:48 +010060comp_flt_init(struct proxy *px, struct flt_conf *fconf)
61{
Christopher Faulet6e540952018-12-03 22:43:41 +010062 fconf->flags |= FLT_CFG_FL_HTX;
Christopher Faulete6902cd2018-11-30 22:29:48 +010063 return 0;
64}
65
66static int
Christopher Faulet8ca3b4b2017-07-25 11:07:15 +020067comp_flt_init_per_thread(struct proxy *px, struct flt_conf *fconf)
Christopher Faulet92d36382015-11-05 13:35:03 +010068{
Willy Tarreau862ad822021-03-22 16:16:22 +010069 if (b_alloc(&tmpbuf) == NULL)
Christopher Fauletb77c5c22015-12-07 16:48:42 +010070 return -1;
Willy Tarreau862ad822021-03-22 16:16:22 +010071 if (b_alloc(&zbuf) == NULL)
Christopher Fauletb77c5c22015-12-07 16:48:42 +010072 return -1;
Christopher Faulet92d36382015-11-05 13:35:03 +010073 return 0;
74}
75
76static void
Christopher Faulet8ca3b4b2017-07-25 11:07:15 +020077comp_flt_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
Christopher Faulet92d36382015-11-05 13:35:03 +010078{
Willy Tarreauc9fa0482018-07-10 17:43:27 +020079 if (tmpbuf.size)
Christopher Faulet92d36382015-11-05 13:35:03 +010080 b_free(&tmpbuf);
Willy Tarreauc9fa0482018-07-10 17:43:27 +020081 if (zbuf.size)
Christopher Fauletb77c5c22015-12-07 16:48:42 +010082 b_free(&zbuf);
Christopher Faulet92d36382015-11-05 13:35:03 +010083}
84
85static int
Christopher Faulet5e896512020-03-06 14:59:05 +010086comp_strm_init(struct stream *s, struct filter *filter)
Christopher Faulet92d36382015-11-05 13:35:03 +010087{
Christopher Faulet5e896512020-03-06 14:59:05 +010088 struct comp_state *st;
Christopher Faulet8ca3b4b2017-07-25 11:07:15 +020089
Willy Tarreau5bfeb212021-03-22 15:08:17 +010090 st = pool_alloc(pool_head_comp_state);
Christopher Faulet5e896512020-03-06 14:59:05 +010091 if (st == NULL)
92 return -1;
Christopher Faulet92d36382015-11-05 13:35:03 +010093
Christopher Faulet5e896512020-03-06 14:59:05 +010094 st->comp_algo = NULL;
95 st->comp_ctx = NULL;
96 filter->ctx = st;
Christopher Faulet3dc860d2017-09-15 11:39:36 +020097
Christopher Faulet5e896512020-03-06 14:59:05 +010098 /* Register post-analyzer on AN_RES_WAIT_HTTP because we need to
99 * analyze response headers before http-response rules execution
100 * to be sure we can use res.comp and res.comp_algo sample
101 * fetches */
102 filter->post_analyzers |= AN_RES_WAIT_HTTP;
Christopher Faulet92d36382015-11-05 13:35:03 +0100103 return 1;
104}
105
Christopher Faulet5e896512020-03-06 14:59:05 +0100106static void
107comp_strm_deinit(struct stream *s, struct filter *filter)
Christopher Faulet92d36382015-11-05 13:35:03 +0100108{
109 struct comp_state *st = filter->ctx;
Christopher Faulet92d36382015-11-05 13:35:03 +0100110
Christopher Fauletd60b3cf2017-06-26 11:47:13 +0200111 if (!st)
Christopher Faulet5e896512020-03-06 14:59:05 +0100112 return;
Christopher Faulet92d36382015-11-05 13:35:03 +0100113
Christopher Faulet92d36382015-11-05 13:35:03 +0100114 /* release any possible compression context */
Christopher Fauletd60b3cf2017-06-26 11:47:13 +0200115 if (st->comp_algo)
116 st->comp_algo->end(&st->comp_ctx);
Willy Tarreaubafbe012017-11-24 17:34:44 +0100117 pool_free(pool_head_comp_state, st);
Christopher Faulet92d36382015-11-05 13:35:03 +0100118 filter->ctx = NULL;
Christopher Faulet92d36382015-11-05 13:35:03 +0100119}
120
121static int
Christopher Faulet1339d742016-05-11 16:48:33 +0200122comp_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
123{
124 struct comp_state *st = filter->ctx;
125
126 if (!strm_fe(s)->comp && !s->be->comp)
127 goto end;
128
129 if (!(msg->chn->flags & CF_ISRESP))
130 select_compression_request_header(st, s, msg);
131 else {
Christopher Faulet3dc860d2017-09-15 11:39:36 +0200132 /* Response headers have already been checked in
133 * comp_http_post_analyze callback. */
Christopher Faulet1339d742016-05-11 16:48:33 +0200134 if (st->comp_algo) {
Christopher Faulet27d93c32018-12-15 22:32:02 +0100135 if (!set_compression_response_header(st, s, msg))
136 goto end;
Christopher Faulet1339d742016-05-11 16:48:33 +0200137 register_data_filter(s, msg->chn, filter);
Christopher Faulet1339d742016-05-11 16:48:33 +0200138 }
139 }
140
141 end:
142 return 1;
143}
144
145static int
Christopher Faulet3dc860d2017-09-15 11:39:36 +0200146comp_http_post_analyze(struct stream *s, struct filter *filter,
147 struct channel *chn, unsigned an_bit)
148{
149 struct http_txn *txn = s->txn;
150 struct http_msg *msg = &txn->rsp;
151 struct comp_state *st = filter->ctx;
152
153 if (an_bit != AN_RES_WAIT_HTTP)
154 goto end;
155
156 if (!strm_fe(s)->comp && !s->be->comp)
157 goto end;
158
159 select_compression_response_header(st, s, msg);
160
161 end:
162 return 1;
163}
164
165static int
Christopher Faulete6902cd2018-11-30 22:29:48 +0100166comp_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
167 unsigned int offset, unsigned int len)
168{
169 struct comp_state *st = filter->ctx;
Christopher Faulet27ba2dc2018-12-05 11:53:24 +0100170 struct htx *htx = htxbuf(&msg->chn->buf);
Christopher Faulete6a62bf2020-03-02 16:20:05 +0100171 struct htx_ret htxret = htx_find_offset(htx, offset);
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100172 struct htx_blk *blk, *next;
173 int ret, consumed = 0, to_forward = 0, last = 0;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100174
Christopher Faulete6a62bf2020-03-02 16:20:05 +0100175 blk = htxret.blk;
176 offset = htxret.ret;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100177 for (next = NULL; blk && len; blk = next) {
Christopher Faulete6902cd2018-11-30 22:29:48 +0100178 enum htx_blk_type type = htx_get_blk_type(blk);
179 uint32_t sz = htx_get_blksz(blk);
180 struct ist v;
181
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100182 next = htx_get_next_blk(htx, blk);
183 while (next && htx_get_blk_type(next) == HTX_BLK_UNUSED)
184 next = htx_get_next_blk(htx, blk);
Christopher Faulete6902cd2018-11-30 22:29:48 +0100185
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100186 if (!(msg->flags & HTTP_MSGF_COMPRESSING))
187 goto consume;
188
189 if (htx_compression_buffer_init(htx, &trash) < 0) {
190 msg->chn->flags |= CF_WAKE_WRITE;
191 goto end;
192 }
193
194 switch (type) {
Christopher Faulete6902cd2018-11-30 22:29:48 +0100195 case HTX_BLK_DATA:
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100196 /* it is the last data block */
197 last = ((!next && (htx->flags & HTX_FL_EOM)) || (next && htx_get_blk_type(next) != HTX_BLK_DATA));
Christopher Faulete6902cd2018-11-30 22:29:48 +0100198 v = htx_get_blk_value(htx, blk);
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100199 v = istadv(v, offset);
200 if (v.len > len) {
201 last = 0;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100202 v.len = len;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100203 }
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100204
Christopher Faulete6902cd2018-11-30 22:29:48 +0100205 ret = htx_compression_buffer_add_data(st, v.ptr, v.len, &trash);
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100206 if (ret < 0 || htx_compression_buffer_end(st, &trash, last) < 0)
Christopher Faulete6902cd2018-11-30 22:29:48 +0100207 goto error;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100208 BUG_ON(v.len != ret);
209
210 if (ret == sz && !b_data(&trash))
211 next = htx_remove_blk(htx, blk);
212 else
213 blk = htx_replace_blk_value(htx, blk, v, ist2(b_head(&trash), b_data(&trash)));
214
Christopher Faulete6902cd2018-11-30 22:29:48 +0100215 len -= ret;
216 consumed += ret;
217 to_forward += b_data(&trash);
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100218 if (last)
219 msg->flags &= ~HTTP_MSGF_COMPRESSING;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100220 break;
221
Christopher Faulete6902cd2018-11-30 22:29:48 +0100222 case HTX_BLK_TLR:
Christopher Faulet2d7c5392019-06-03 10:41:26 +0200223 case HTX_BLK_EOT:
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100224 if (htx_compression_buffer_end(st, &trash, 1) < 0)
225 goto error;
226 if (b_data(&trash)) {
227 struct htx_blk *last = htx_add_last_data(htx, ist2(b_head(&trash), b_data(&trash)));
228 if (!last)
Christopher Faulete6902cd2018-11-30 22:29:48 +0100229 goto error;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100230 blk = htx_get_next_blk(htx, last);
231 if (!blk)
232 goto error;
233 to_forward += b_data(&trash);
Christopher Faulete6902cd2018-11-30 22:29:48 +0100234 }
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100235 msg->flags &= ~HTTP_MSGF_COMPRESSING;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100236 /* fall through */
237
238 default:
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100239 consume:
Christopher Faulete6902cd2018-11-30 22:29:48 +0100240 sz -= offset;
241 if (sz > len)
242 sz = len;
243 consumed += sz;
244 to_forward += sz;
245 len -= sz;
246 break;
247 }
248
249 offset = 0;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100250 }
251
252 end:
253 if (to_forward != consumed)
254 flt_update_offsets(filter, msg->chn, to_forward - consumed);
255
256 if (st->comp_ctx && st->comp_ctx->cur_lvl > 0) {
Willy Tarreauef6fd852019-02-04 11:48:03 +0100257 update_freq_ctr(&global.comp_bps_in, consumed);
Olivier Houchard43da3432019-03-08 18:50:27 +0100258 _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_in, consumed);
259 _HA_ATOMIC_ADD(&s->be->be_counters.comp_in, consumed);
Christopher Faulete6902cd2018-11-30 22:29:48 +0100260 update_freq_ctr(&global.comp_bps_out, to_forward);
Olivier Houchard43da3432019-03-08 18:50:27 +0100261 _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_out, to_forward);
262 _HA_ATOMIC_ADD(&s->be->be_counters.comp_out, to_forward);
Willy Tarreauef6fd852019-02-04 11:48:03 +0100263 } else {
Olivier Houchard43da3432019-03-08 18:50:27 +0100264 _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_byp, consumed);
265 _HA_ATOMIC_ADD(&s->be->be_counters.comp_byp, consumed);
Christopher Faulete6902cd2018-11-30 22:29:48 +0100266 }
267 return to_forward;
268
269 error:
270 return -1;
271}
272
Christopher Faulet2fb28802015-12-01 10:40:57 +0100273
Christopher Faulet92d36382015-11-05 13:35:03 +0100274static int
Christopher Fauletd60b3cf2017-06-26 11:47:13 +0200275comp_http_end(struct stream *s, struct filter *filter,
276 struct http_msg *msg)
277{
278 struct comp_state *st = filter->ctx;
279
280 if (!(msg->chn->flags & CF_ISRESP) || !st || !st->comp_algo)
281 goto end;
282
283 if (strm_fe(s)->mode == PR_MODE_HTTP)
Olivier Houchard43da3432019-03-08 18:50:27 +0100284 _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.p.http.comp_rsp, 1);
Christopher Fauletd60b3cf2017-06-26 11:47:13 +0200285 if ((s->flags & SF_BE_ASSIGNED) && (s->be->mode == PR_MODE_HTTP))
Olivier Houchard43da3432019-03-08 18:50:27 +0100286 _HA_ATOMIC_ADD(&s->be->be_counters.p.http.comp_rsp, 1);
Christopher Fauletd60b3cf2017-06-26 11:47:13 +0200287 end:
288 return 1;
289}
Christopher Faulet27d93c32018-12-15 22:32:02 +0100290
Christopher Faulet89f2b162019-07-15 21:16:04 +0200291/***********************************************************************/
Christopher Faulet27d93c32018-12-15 22:32:02 +0100292static int
Christopher Faulet89f2b162019-07-15 21:16:04 +0200293set_compression_response_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
Christopher Faulet27d93c32018-12-15 22:32:02 +0100294{
295 struct htx *htx = htxbuf(&msg->chn->buf);
Tim Duesterhusb229f012019-01-29 16:38:56 +0100296 struct http_hdr_ctx ctx;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100297
298 /*
299 * Add Content-Encoding header when it's not identity encoding.
300 * RFC 2616 : Identity encoding: This content-coding is used only in the
301 * Accept-Encoding header, and SHOULD NOT be used in the Content-Encoding
302 * header.
303 */
304 if (st->comp_algo->cfg_name_len != 8 || memcmp(st->comp_algo->cfg_name, "identity", 8) != 0) {
305 struct ist v = ist2(st->comp_algo->ua_name, st->comp_algo->ua_name_len);
306
307 if (!http_add_header(htx, ist("Content-Encoding"), v))
308 goto error;
309 }
310
311 /* remove Content-Length header */
312 if (msg->flags & HTTP_MSGF_CNT_LEN) {
Christopher Faulet27d93c32018-12-15 22:32:02 +0100313 ctx.blk = NULL;
314 while (http_find_header(htx, ist("Content-Length"), &ctx, 1))
315 http_remove_header(htx, &ctx);
316 }
317
318 /* add "Transfer-Encoding: chunked" header */
319 if (!(msg->flags & HTTP_MSGF_TE_CHNK)) {
320 if (!http_add_header(htx, ist("Transfer-Encoding"), ist("chunked")))
321 goto error;
322 }
323
Tim Duesterhusb229f012019-01-29 16:38:56 +0100324 /* convert "ETag" header to a weak ETag */
325 ctx.blk = NULL;
326 if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
327 if (ctx.value.ptr[0] == '"') {
328 /* This a strong ETag. Convert it to a weak one. */
329 struct ist v = ist2(trash.area, 0);
330 if (istcat(&v, ist("W/"), trash.size) == -1 || istcat(&v, ctx.value, trash.size) == -1)
331 goto error;
332
333 if (!http_replace_header_value(htx, &ctx, v))
334 goto error;
335 }
336 }
337
Tim Duesterhus721d6862019-06-17 16:10:07 +0200338 if (!http_add_header(htx, ist("Vary"), ist("Accept-Encoding")))
339 goto error;
340
Christopher Faulet27d93c32018-12-15 22:32:02 +0100341 return 1;
342
343 error:
344 st->comp_algo->end(&st->comp_ctx);
345 st->comp_algo = NULL;
346 return 0;
347}
348
Christopher Faulet3d97c902015-12-09 14:59:38 +0100349/*
350 * Selects a compression algorithm depending on the client request.
351 */
Christopher Faulete6902cd2018-11-30 22:29:48 +0100352static int
Christopher Faulet89f2b162019-07-15 21:16:04 +0200353select_compression_request_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
Christopher Faulete6902cd2018-11-30 22:29:48 +0100354{
Christopher Faulet27ba2dc2018-12-05 11:53:24 +0100355 struct htx *htx = htxbuf(&msg->chn->buf);
Christopher Faulete6902cd2018-11-30 22:29:48 +0100356 struct http_hdr_ctx ctx;
357 struct comp_algo *comp_algo = NULL;
358 struct comp_algo *comp_algo_back = NULL;
359
360 /* Disable compression for older user agents announcing themselves as "Mozilla/4"
361 * unless they are known good (MSIE 6 with XP SP2, or MSIE 7 and later).
362 * See http://zoompf.com/2012/02/lose-the-wait-http-compression for more details.
363 */
364 ctx.blk = NULL;
365 if (http_find_header(htx, ist("User-Agent"), &ctx, 1) &&
366 ctx.value.len >= 9 &&
367 memcmp(ctx.value.ptr, "Mozilla/4", 9) == 0 &&
368 (ctx.value.len < 31 ||
369 memcmp(ctx.value.ptr + 25, "MSIE ", 5) != 0 ||
370 *(ctx.value.ptr + 30) < '6' ||
371 (*(ctx.value.ptr + 30) == '6' &&
372 (ctx.value.len < 54 || memcmp(ctx.value.ptr + 51, "SV1", 3) != 0)))) {
373 st->comp_algo = NULL;
374 return 0;
375 }
376
377 /* search for the algo in the backend in priority or the frontend */
378 if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) ||
379 (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) {
380 int best_q = 0;
381
382 ctx.blk = NULL;
383 while (http_find_header(htx, ist("Accept-Encoding"), &ctx, 0)) {
384 const char *qval;
385 int q;
386 int toklen;
387
388 /* try to isolate the token from the optional q-value */
389 toklen = 0;
390 while (toklen < ctx.value.len && HTTP_IS_TOKEN(*(ctx.value.ptr + toklen)))
391 toklen++;
392
393 qval = ctx.value.ptr + toklen;
394 while (1) {
395 while (qval < ctx.value.ptr + ctx.value.len && HTTP_IS_LWS(*qval))
396 qval++;
397
398 if (qval >= ctx.value.ptr + ctx.value.len || *qval != ';') {
399 qval = NULL;
400 break;
401 }
402 qval++;
403
404 while (qval < ctx.value.ptr + ctx.value.len && HTTP_IS_LWS(*qval))
405 qval++;
406
407 if (qval >= ctx.value.ptr + ctx.value.len) {
408 qval = NULL;
409 break;
410 }
411 if (strncmp(qval, "q=", MIN(ctx.value.ptr + ctx.value.len - qval, 2)) == 0)
412 break;
413
414 while (qval < ctx.value.ptr + ctx.value.len && *qval != ';')
415 qval++;
416 }
417
418 /* here we have qval pointing to the first "q=" attribute or NULL if not found */
419 q = qval ? http_parse_qvalue(qval + 2, NULL) : 1000;
420
421 if (q <= best_q)
422 continue;
423
424 for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
425 if (*(ctx.value.ptr) == '*' ||
426 word_match(ctx.value.ptr, toklen, comp_algo->ua_name, comp_algo->ua_name_len)) {
427 st->comp_algo = comp_algo;
428 best_q = q;
429 break;
430 }
431 }
432 }
433 }
434
435 /* remove all occurrences of the header when "compression offload" is set */
436 if (st->comp_algo) {
437 if ((s->be->comp && s->be->comp->offload) ||
438 (strm_fe(s)->comp && strm_fe(s)->comp->offload)) {
439 http_remove_header(htx, &ctx);
440 ctx.blk = NULL;
441 while (http_find_header(htx, ist("Accept-Encoding"), &ctx, 1))
442 http_remove_header(htx, &ctx);
443 }
Christopher Faulet3d97c902015-12-09 14:59:38 +0100444 return 1;
445 }
446
447 /* identity is implicit does not require headers */
Christopher Faulet92d36382015-11-05 13:35:03 +0100448 if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) ||
449 (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) {
Christopher Faulet3d97c902015-12-09 14:59:38 +0100450 for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
451 if (comp_algo->cfg_name_len == 8 && memcmp(comp_algo->cfg_name, "identity", 8) == 0) {
Christopher Faulet92d36382015-11-05 13:35:03 +0100452 st->comp_algo = comp_algo;
Christopher Faulet3d97c902015-12-09 14:59:38 +0100453 return 1;
454 }
455 }
456 }
457
Christopher Faulet92d36382015-11-05 13:35:03 +0100458 st->comp_algo = NULL;
Christopher Faulet3d97c902015-12-09 14:59:38 +0100459 return 0;
460}
461
462/*
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500463 * Selects a compression algorithm depending of the server response.
Christopher Faulet3d97c902015-12-09 14:59:38 +0100464 */
Christopher Faulet92d36382015-11-05 13:35:03 +0100465static int
Christopher Faulet89f2b162019-07-15 21:16:04 +0200466select_compression_response_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
Christopher Faulete6902cd2018-11-30 22:29:48 +0100467{
Christopher Faulet27ba2dc2018-12-05 11:53:24 +0100468 struct htx *htx = htxbuf(&msg->chn->buf);
Christopher Faulete6902cd2018-11-30 22:29:48 +0100469 struct http_txn *txn = s->txn;
470 struct http_hdr_ctx ctx;
471 struct comp_type *comp_type;
472
473 /* no common compression algorithm was found in request header */
474 if (st->comp_algo == NULL)
475 goto fail;
476
Christopher Faulet1d3613a2019-01-07 14:41:59 +0100477 /* compression already in progress */
478 if (msg->flags & HTTP_MSGF_COMPRESSING)
479 goto fail;
480
Christopher Faulete6902cd2018-11-30 22:29:48 +0100481 /* HTTP < 1.1 should not be compressed */
482 if (!(msg->flags & HTTP_MSGF_VER_11) || !(txn->req.flags & HTTP_MSGF_VER_11))
483 goto fail;
484
485 if (txn->meth == HTTP_METH_HEAD)
486 goto fail;
487
488 /* compress 200,201,202,203 responses only */
489 if ((txn->status != 200) &&
490 (txn->status != 201) &&
491 (txn->status != 202) &&
492 (txn->status != 203))
493 goto fail;
494
Christopher Fauletc963eb22018-12-21 14:53:54 +0100495 if (!(msg->flags & HTTP_MSGF_XFER_LEN) || msg->flags & HTTP_MSGF_BODYLESS)
Christopher Faulete6902cd2018-11-30 22:29:48 +0100496 goto fail;
497
498 /* content is already compressed */
499 ctx.blk = NULL;
500 if (http_find_header(htx, ist("Content-Encoding"), &ctx, 1))
501 goto fail;
502
503 /* no compression when Cache-Control: no-transform is present in the message */
504 ctx.blk = NULL;
505 while (http_find_header(htx, ist("Cache-Control"), &ctx, 0)) {
506 if (word_match(ctx.value.ptr, ctx.value.len, "no-transform", 12))
507 goto fail;
508 }
509
Tim Duesterhusb229f012019-01-29 16:38:56 +0100510 /* no compression when ETag is malformed */
511 ctx.blk = NULL;
512 if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
Tim Duesterhus6414cd12020-09-01 18:32:35 +0200513 if (http_get_etag_type(ctx.value) == ETAG_INVALID)
Tim Duesterhusb229f012019-01-29 16:38:56 +0100514 goto fail;
Tim Duesterhusb229f012019-01-29 16:38:56 +0100515 }
516 /* no compression when multiple ETags are present
517 * Note: Do not reset ctx.blk!
518 */
519 if (http_find_header(htx, ist("ETag"), &ctx, 1))
520 goto fail;
521
Christopher Faulete6902cd2018-11-30 22:29:48 +0100522 comp_type = NULL;
523
524 /* we don't want to compress multipart content-types, nor content-types that are
525 * not listed in the "compression type" directive if any. If no content-type was
526 * found but configuration requires one, we don't compress either. Backend has
527 * the priority.
528 */
529 ctx.blk = NULL;
530 if (http_find_header(htx, ist("Content-Type"), &ctx, 1)) {
531 if (ctx.value.len >= 9 && strncasecmp("multipart", ctx.value.ptr, 9) == 0)
532 goto fail;
533
534 if ((s->be->comp && (comp_type = s->be->comp->types)) ||
535 (strm_fe(s)->comp && (comp_type = strm_fe(s)->comp->types))) {
536 for (; comp_type; comp_type = comp_type->next) {
537 if (ctx.value.len >= comp_type->name_len &&
538 strncasecmp(ctx.value.ptr, comp_type->name, comp_type->name_len) == 0)
539 /* this Content-Type should be compressed */
540 break;
541 }
542 /* this Content-Type should not be compressed */
543 if (comp_type == NULL)
544 goto fail;
545 }
546 }
547 else { /* no content-type header */
548 if ((s->be->comp && s->be->comp->types) ||
549 (strm_fe(s)->comp && strm_fe(s)->comp->types))
550 goto fail; /* a content-type was required */
551 }
552
553 /* limit compression rate */
554 if (global.comp_rate_lim > 0)
555 if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
556 goto fail;
557
558 /* limit cpu usage */
Willy Tarreau81036f22019-05-20 19:24:50 +0200559 if (ti->idle_pct < compress_min_idle)
Christopher Faulete6902cd2018-11-30 22:29:48 +0100560 goto fail;
561
562 /* initialize compression */
563 if (st->comp_algo->init(&st->comp_ctx, global.tune.comp_maxlevel) < 0)
564 goto fail;
Christopher Faulete6902cd2018-11-30 22:29:48 +0100565 msg->flags |= HTTP_MSGF_COMPRESSING;
566 return 1;
567
Christopher Faulete6902cd2018-11-30 22:29:48 +0100568 fail:
569 st->comp_algo = NULL;
570 return 0;
571}
572
Christopher Faulet3d97c902015-12-09 14:59:38 +0100573/***********************************************************************/
Christopher Faulete6902cd2018-11-30 22:29:48 +0100574static int
575htx_compression_buffer_init(struct htx *htx, struct buffer *out)
576{
577 /* output stream requires at least 10 bytes for the gzip header, plus
578 * at least 8 bytes for the gzip trailer (crc+len), plus a possible
579 * plus at most 5 bytes per 32kB block and 2 bytes to close the stream.
580 */
581 if (htx_free_space(htx) < 20 + 5 * ((htx->data + 32767) >> 15))
582 return -1;
583 b_reset(out);
584 return 0;
585}
586
Christopher Faulete6902cd2018-11-30 22:29:48 +0100587static int
588htx_compression_buffer_add_data(struct comp_state *st, const char *data, size_t len,
589 struct buffer *out)
590{
591 return st->comp_algo->add_data(st->comp_ctx, data, len, out);
592}
593
Christopher Faulete6902cd2018-11-30 22:29:48 +0100594static int
595htx_compression_buffer_end(struct comp_state *st, struct buffer *out, int end)
596{
597 if (end)
598 return st->comp_algo->finish(st->comp_ctx, out);
599 else
600 return st->comp_algo->flush(st->comp_ctx, out);
601}
602
Christopher Faulet3d97c902015-12-09 14:59:38 +0100603
604/***********************************************************************/
Christopher Faulet92d36382015-11-05 13:35:03 +0100605struct flt_ops comp_ops = {
Christopher Faulete6902cd2018-11-30 22:29:48 +0100606 .init = comp_flt_init,
Christopher Faulet8ca3b4b2017-07-25 11:07:15 +0200607 .init_per_thread = comp_flt_init_per_thread,
608 .deinit_per_thread = comp_flt_deinit_per_thread,
Christopher Faulet92d36382015-11-05 13:35:03 +0100609
Christopher Faulet5e896512020-03-06 14:59:05 +0100610 .attach = comp_strm_init,
611 .detach = comp_strm_deinit,
612
Christopher Faulet3dc860d2017-09-15 11:39:36 +0200613 .channel_post_analyze = comp_http_post_analyze,
Christopher Faulet92d36382015-11-05 13:35:03 +0100614
Christopher Faulet1339d742016-05-11 16:48:33 +0200615 .http_headers = comp_http_headers,
Christopher Faulete6902cd2018-11-30 22:29:48 +0100616 .http_payload = comp_http_payload,
617 .http_end = comp_http_end,
Christopher Faulet92d36382015-11-05 13:35:03 +0100618};
619
Christopher Faulet3d97c902015-12-09 14:59:38 +0100620static int
621parse_compression_options(char **args, int section, struct proxy *proxy,
Willy Tarreau01825162021-03-09 09:53:46 +0100622 const struct proxy *defpx, const char *file, int line,
Christopher Faulet3d97c902015-12-09 14:59:38 +0100623 char **err)
624{
Christopher Faulet92d36382015-11-05 13:35:03 +0100625 struct comp *comp;
Christopher Faulet3d97c902015-12-09 14:59:38 +0100626
627 if (proxy->comp == NULL) {
Vincent Bernat02779b62016-04-03 13:48:43 +0200628 comp = calloc(1, sizeof(*comp));
Christopher Faulet3d97c902015-12-09 14:59:38 +0100629 proxy->comp = comp;
630 }
631 else
632 comp = proxy->comp;
633
Tim Duesterhuse5ff1412021-01-02 22:31:53 +0100634 if (strcmp(args[1], "algo") == 0) {
Christopher Faulet3d97c902015-12-09 14:59:38 +0100635 struct comp_ctx *ctx;
636 int cur_arg = 2;
637
638 if (!*args[cur_arg]) {
639 memprintf(err, "parsing [%s:%d] : '%s' expects <algorithm>\n",
640 file, line, args[0]);
641 return -1;
642 }
643 while (*(args[cur_arg])) {
644 if (comp_append_algo(comp, args[cur_arg]) < 0) {
645 memprintf(err, "'%s' : '%s' is not a supported algorithm.\n",
646 args[0], args[cur_arg]);
647 return -1;
648 }
649 if (proxy->comp->algos->init(&ctx, 9) == 0)
650 proxy->comp->algos->end(&ctx);
651 else {
652 memprintf(err, "'%s' : Can't init '%s' algorithm.\n",
653 args[0], args[cur_arg]);
654 return -1;
655 }
656 cur_arg++;
657 continue;
658 }
659 }
Tim Duesterhuse5ff1412021-01-02 22:31:53 +0100660 else if (strcmp(args[1], "offload") == 0)
Christopher Faulet3d97c902015-12-09 14:59:38 +0100661 comp->offload = 1;
Tim Duesterhuse5ff1412021-01-02 22:31:53 +0100662 else if (strcmp(args[1], "type") == 0) {
Christopher Faulet3d97c902015-12-09 14:59:38 +0100663 int cur_arg = 2;
664
665 if (!*args[cur_arg]) {
666 memprintf(err, "'%s' expects <type>\n", args[0]);
667 return -1;
668 }
669 while (*(args[cur_arg])) {
670 comp_append_type(comp, args[cur_arg]);
671 cur_arg++;
672 continue;
673 }
674 }
675 else {
676 memprintf(err, "'%s' expects 'algo', 'type' or 'offload'\n",
677 args[0]);
678 return -1;
679 }
680
681 return 0;
682}
683
Christopher Faulet92d36382015-11-05 13:35:03 +0100684static int
685parse_http_comp_flt(char **args, int *cur_arg, struct proxy *px,
Thierry Fournier3610c392016-04-13 18:27:51 +0200686 struct flt_conf *fconf, char **err, void *private)
Christopher Faulet92d36382015-11-05 13:35:03 +0100687{
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100688 struct flt_conf *fc, *back;
Christopher Faulet92d36382015-11-05 13:35:03 +0100689
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100690 list_for_each_entry_safe(fc, back, &px->filter_configs, list) {
691 if (fc->id == http_comp_flt_id) {
Christopher Faulet92d36382015-11-05 13:35:03 +0100692 memprintf(err, "%s: Proxy supports only one compression filter\n", px->id);
693 return -1;
694 }
695 }
696
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100697 fconf->id = http_comp_flt_id;
698 fconf->conf = NULL;
699 fconf->ops = &comp_ops;
Christopher Faulet92d36382015-11-05 13:35:03 +0100700 (*cur_arg)++;
701
702 return 0;
703}
704
705
706int
Christopher Fauletc9df7f72018-12-10 16:14:04 +0100707check_implicit_http_comp_flt(struct proxy *proxy)
Christopher Faulet92d36382015-11-05 13:35:03 +0100708{
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100709 struct flt_conf *fconf;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100710 int explicit = 0;
711 int comp = 0;
Christopher Faulet92d36382015-11-05 13:35:03 +0100712 int err = 0;
713
714 if (proxy->comp == NULL)
715 goto end;
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100716 if (!LIST_ISEMPTY(&proxy->filter_configs)) {
717 list_for_each_entry(fconf, &proxy->filter_configs, list) {
718 if (fconf->id == http_comp_flt_id)
Christopher Faulet27d93c32018-12-15 22:32:02 +0100719 comp = 1;
720 else if (fconf->id == cache_store_flt_id) {
721 if (comp) {
722 ha_alert("config: %s '%s': unable to enable the compression filter "
723 "before any cache filter.\n",
724 proxy_type_str(proxy), proxy->id);
725 err++;
726 goto end;
727 }
728 }
Christopher Faulet78fbb9f2019-08-11 23:11:03 +0200729 else if (fconf->id == fcgi_flt_id)
730 continue;
Christopher Faulet27d93c32018-12-15 22:32:02 +0100731 else
732 explicit = 1;
Christopher Faulet92d36382015-11-05 13:35:03 +0100733 }
Christopher Faulet27d93c32018-12-15 22:32:02 +0100734 }
735 if (comp)
736 goto end;
737 else if (explicit) {
738 ha_alert("config: %s '%s': require an explicit filter declaration to use "
739 "HTTP compression\n", proxy_type_str(proxy), proxy->id);
Christopher Faulet92d36382015-11-05 13:35:03 +0100740 err++;
741 goto end;
742 }
743
Christopher Faulet27d93c32018-12-15 22:32:02 +0100744 /* Implicit declaration of the compression filter is always the last
745 * one */
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100746 fconf = calloc(1, sizeof(*fconf));
747 if (!fconf) {
Christopher Faulet767a84b2017-11-24 16:50:31 +0100748 ha_alert("config: %s '%s': out of memory\n",
749 proxy_type_str(proxy), proxy->id);
Christopher Faulet92d36382015-11-05 13:35:03 +0100750 err++;
751 goto end;
752 }
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100753 fconf->id = http_comp_flt_id;
754 fconf->conf = NULL;
755 fconf->ops = &comp_ops;
756 LIST_ADDQ(&proxy->filter_configs, &fconf->list);
Christopher Faulet92d36382015-11-05 13:35:03 +0100757 end:
758 return err;
759}
760
761/*
762 * boolean, returns true if compression is used (either gzip or deflate) in the
763 * response.
764 */
Christopher Faulet3d97c902015-12-09 14:59:38 +0100765static int
Christopher Faulet92d36382015-11-05 13:35:03 +0100766smp_fetch_res_comp(const struct arg *args, struct sample *smp, const char *kw,
767 void *private)
Christopher Faulet3d97c902015-12-09 14:59:38 +0100768{
Willy Tarreaube508f12016-03-10 11:47:01 +0100769 struct http_txn *txn = smp->strm ? smp->strm->txn : NULL;
Christopher Faulet92d36382015-11-05 13:35:03 +0100770
Christopher Faulet3d97c902015-12-09 14:59:38 +0100771 smp->data.type = SMP_T_BOOL;
Christopher Faulet92d36382015-11-05 13:35:03 +0100772 smp->data.u.sint = (txn && (txn->rsp.flags & HTTP_MSGF_COMPRESSING));
Christopher Faulet3d97c902015-12-09 14:59:38 +0100773 return 1;
774}
775
Christopher Faulet92d36382015-11-05 13:35:03 +0100776/*
777 * string, returns algo
778 */
Christopher Faulet3d97c902015-12-09 14:59:38 +0100779static int
Christopher Faulet92d36382015-11-05 13:35:03 +0100780smp_fetch_res_comp_algo(const struct arg *args, struct sample *smp,
781 const char *kw, void *private)
Christopher Faulet3d97c902015-12-09 14:59:38 +0100782{
Willy Tarreaube508f12016-03-10 11:47:01 +0100783 struct http_txn *txn = smp->strm ? smp->strm->txn : NULL;
Christopher Faulet92d36382015-11-05 13:35:03 +0100784 struct filter *filter;
785 struct comp_state *st;
786
Christopher Faulet03d85532017-09-15 10:14:43 +0200787 if (!txn || !(txn->rsp.flags & HTTP_MSGF_COMPRESSING))
Christopher Faulet3d97c902015-12-09 14:59:38 +0100788 return 0;
789
Christopher Fauletfcf035c2015-12-03 11:48:03 +0100790 list_for_each_entry(filter, &strm_flt(smp->strm)->filters, list) {
Christopher Faulet443ea1a2016-02-04 13:40:26 +0100791 if (FLT_ID(filter) != http_comp_flt_id)
Christopher Faulet92d36382015-11-05 13:35:03 +0100792 continue;
793
794 if (!(st = filter->ctx))
795 break;
796
797 smp->data.type = SMP_T_STR;
798 smp->flags = SMP_F_CONST;
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200799 smp->data.u.str.area = st->comp_algo->cfg_name;
800 smp->data.u.str.data = st->comp_algo->cfg_name_len;
Christopher Faulet92d36382015-11-05 13:35:03 +0100801 return 1;
802 }
803 return 0;
Christopher Faulet3d97c902015-12-09 14:59:38 +0100804}
805
806/* Declare the config parser for "compression" keyword */
807static struct cfg_kw_list cfg_kws = {ILH, {
808 { CFG_LISTEN, "compression", parse_compression_options },
809 { 0, NULL, NULL },
810 }
811};
812
Willy Tarreau0108d902018-11-25 19:14:37 +0100813INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
814
Christopher Faulet92d36382015-11-05 13:35:03 +0100815/* Declare the filter parser for "compression" keyword */
816static struct flt_kw_list filter_kws = { "COMP", { }, {
Thierry Fournier3610c392016-04-13 18:27:51 +0200817 { "compression", parse_http_comp_flt, NULL },
818 { NULL, NULL, NULL },
Christopher Faulet92d36382015-11-05 13:35:03 +0100819 }
820};
821
Willy Tarreau0108d902018-11-25 19:14:37 +0100822INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
823
Christopher Faulet3d97c902015-12-09 14:59:38 +0100824/* Note: must not be declared <const> as its list will be overwritten */
825static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
Christopher Faulet92d36382015-11-05 13:35:03 +0100826 { "res.comp", smp_fetch_res_comp, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP },
827 { "res.comp_algo", smp_fetch_res_comp_algo, 0, NULL, SMP_T_STR, SMP_USE_HRSHP },
828 { /* END */ },
829 }
830};
Christopher Faulet3d97c902015-12-09 14:59:38 +0100831
Willy Tarreau0108d902018-11-25 19:14:37 +0100832INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);