blob: f33336aae453b70c4d67e2a451d57a8c1d9fdd39 [file] [log] [blame]
Willy Tarreau79e57332018-10-02 16:01:16 +02001/*
2 * HTTP sample conversion
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <sys/types.h>
14
15#include <ctype.h>
16#include <string.h>
17#include <time.h>
18
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020019#include <haproxy/api.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020020#include <haproxy/arg.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020021#include <haproxy/capture-t.h>
Willy Tarreauc13ed532020-06-02 10:22:45 +020022#include <haproxy/chunk.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020023#include <haproxy/http.h>
Willy Tarreaud0ef4392020-06-02 09:38:52 +020024#include <haproxy/pool.h>
Willy Tarreaue6ce10b2020-06-04 15:33:47 +020025#include <haproxy/sample.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020026#include <haproxy/stream.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020027#include <haproxy/tools.h>
Willy Tarreaud6788052020-05-27 15:59:00 +020028#include <haproxy/version.h>
Willy Tarreau79e57332018-10-02 16:01:16 +020029
Damien Claisseae6f1252019-10-30 15:57:28 +000030static int smp_check_http_date_unit(struct arg *args, struct sample_conv *conv,
31 const char *file, int line, char **err)
32{
33 return smp_check_date_unit(args, err);
34}
Willy Tarreau79e57332018-10-02 16:01:16 +020035
36/* takes an UINT value on input supposed to represent the time since EPOCH,
37 * adds an optional offset found in args[0] and emits a string representing
Damien Claisseae6f1252019-10-30 15:57:28 +000038 * the date in RFC-1123/5322 format. If optional unit param in args[1] is
39 * provided, decode timestamp in milliseconds ("ms") or microseconds("us"),
40 * and use relevant output date format.
Willy Tarreau79e57332018-10-02 16:01:16 +020041 */
42static int sample_conv_http_date(const struct arg *args, struct sample *smp, void *private)
43{
44 const char day[7][4] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
45 const char mon[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
46 struct buffer *temp;
Tim Duesterhus9da4b1f2021-08-28 23:57:01 +020047 struct tm tm;
Damien Claisseae6f1252019-10-30 15:57:28 +000048 int sec_frac = 0;
49 time_t curr_date;
Willy Tarreau79e57332018-10-02 16:01:16 +020050
51 /* add offset */
Christopher Faulet72dbcfe2021-01-29 11:25:02 +010052 if (args[0].type == ARGT_SINT)
Damien Claisseae6f1252019-10-30 15:57:28 +000053 smp->data.u.sint += args[0].data.sint;
54
55 /* report in milliseconds */
Christopher Faulet72dbcfe2021-01-29 11:25:02 +010056 if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
Damien Claisseae6f1252019-10-30 15:57:28 +000057 sec_frac = smp->data.u.sint % 1000;
58 smp->data.u.sint /= 1000;
59 }
60 /* report in microseconds */
Christopher Faulet72dbcfe2021-01-29 11:25:02 +010061 else if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
Damien Claisseae6f1252019-10-30 15:57:28 +000062 sec_frac = smp->data.u.sint % 1000000;
63 smp->data.u.sint /= 1000000;
64 }
65
66 /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
67 curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
Willy Tarreau79e57332018-10-02 16:01:16 +020068
Tim Duesterhus9da4b1f2021-08-28 23:57:01 +020069 get_gmtime(curr_date, &tm);
Willy Tarreau79e57332018-10-02 16:01:16 +020070
71 temp = get_trash_chunk();
Christopher Faulet72dbcfe2021-01-29 11:25:02 +010072 if (args[1].type == ARGT_SINT && args[1].data.sint != TIME_UNIT_S) {
Damien Claisseae6f1252019-10-30 15:57:28 +000073 temp->data = snprintf(temp->area, temp->size - temp->data,
74 "%s, %02d %s %04d %02d:%02d:%02d.%d GMT",
Tim Duesterhus9da4b1f2021-08-28 23:57:01 +020075 day[tm.tm_wday], tm.tm_mday, mon[tm.tm_mon],
76 1900+tm.tm_year,
77 tm.tm_hour, tm.tm_min, tm.tm_sec, sec_frac);
Damien Claisseae6f1252019-10-30 15:57:28 +000078 } else {
79 temp->data = snprintf(temp->area, temp->size - temp->data,
80 "%s, %02d %s %04d %02d:%02d:%02d GMT",
Tim Duesterhus9da4b1f2021-08-28 23:57:01 +020081 day[tm.tm_wday], tm.tm_mday, mon[tm.tm_mon],
82 1900+tm.tm_year,
83 tm.tm_hour, tm.tm_min, tm.tm_sec);
Damien Claisseae6f1252019-10-30 15:57:28 +000084 }
Willy Tarreau79e57332018-10-02 16:01:16 +020085
86 smp->data.u.str = *temp;
87 smp->data.type = SMP_T_STR;
88 return 1;
89}
90
91/* Arguments: The list of expected value, the number of parts returned and the separator */
92static int sample_conv_q_preferred(const struct arg *args, struct sample *smp, void *private)
93{
94 const char *al = smp->data.u.str.area;
95 const char *end = al + smp->data.u.str.data;
96 const char *token;
97 int toklen;
98 int qvalue;
99 const char *str;
100 const char *w;
101 int best_q = 0;
102
103 /* Set the constant to the sample, because the output of the
104 * function will be peek in the constant configuration string.
105 */
106 smp->flags |= SMP_F_CONST;
107 smp->data.u.str.size = 0;
108 smp->data.u.str.area = "";
109 smp->data.u.str.data = 0;
110
111 /* Parse the accept language */
112 while (1) {
113
114 /* Jump spaces, quit if the end is detected. */
115 while (al < end && isspace((unsigned char)*al))
116 al++;
117 if (al >= end)
118 break;
119
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500120 /* Start of the first word. */
Willy Tarreau79e57332018-10-02 16:01:16 +0200121 token = al;
122
123 /* Look for separator: isspace(), ',' or ';'. Next value if 0 length word. */
124 while (al < end && *al != ';' && *al != ',' && !isspace((unsigned char)*al))
125 al++;
126 if (al == token)
127 goto expect_comma;
128
129 /* Length of the token. */
130 toklen = al - token;
131 qvalue = 1000;
132
133 /* Check if the token exists in the list. If the token not exists,
134 * jump to the next token.
135 */
136 str = args[0].data.str.area;
137 w = str;
138 while (1) {
139 if (*str == ';' || *str == '\0') {
140 if (http_language_range_match(token, toklen, w, str - w))
141 goto look_for_q;
142 if (*str == '\0')
143 goto expect_comma;
144 w = str + 1;
145 }
146 str++;
147 }
148 goto expect_comma;
149
150look_for_q:
151
152 /* Jump spaces, quit if the end is detected. */
153 while (al < end && isspace((unsigned char)*al))
154 al++;
155 if (al >= end)
156 goto process_value;
157
158 /* If ',' is found, process the result */
159 if (*al == ',')
160 goto process_value;
161
162 /* If the character is different from ';', look
163 * for the end of the header part in best effort.
164 */
165 if (*al != ';')
166 goto expect_comma;
167
168 /* Assumes that the char is ';', now expect "q=". */
169 al++;
170
171 /* Jump spaces, process value if the end is detected. */
172 while (al < end && isspace((unsigned char)*al))
173 al++;
174 if (al >= end)
175 goto process_value;
176
177 /* Expect 'q'. If no 'q', continue in best effort */
178 if (*al != 'q')
179 goto process_value;
180 al++;
181
182 /* Jump spaces, process value if the end is detected. */
183 while (al < end && isspace((unsigned char)*al))
184 al++;
185 if (al >= end)
186 goto process_value;
187
188 /* Expect '='. If no '=', continue in best effort */
189 if (*al != '=')
190 goto process_value;
191 al++;
192
193 /* Jump spaces, process value if the end is detected. */
194 while (al < end && isspace((unsigned char)*al))
195 al++;
196 if (al >= end)
197 goto process_value;
198
199 /* Parse the q value. */
200 qvalue = http_parse_qvalue(al, &al);
201
202process_value:
203
204 /* If the new q value is the best q value, then store the associated
205 * language in the response. If qvalue is the biggest value (1000),
206 * break the process.
207 */
208 if (qvalue > best_q) {
209 smp->data.u.str.area = (char *)w;
210 smp->data.u.str.data = str - w;
211 if (qvalue >= 1000)
212 break;
213 best_q = qvalue;
214 }
215
216expect_comma:
217
218 /* Expect comma or end. If the end is detected, quit the loop. */
219 while (al < end && *al != ',')
220 al++;
221 if (al >= end)
222 break;
223
224 /* Comma is found, jump it and restart the analyzer. */
225 al++;
226 }
227
228 /* Set default value if required. */
229 if (smp->data.u.str.data == 0 && args[1].type == ARGT_STR) {
230 smp->data.u.str.area = args[1].data.str.area;
231 smp->data.u.str.data = args[1].data.str.data;
232 }
233
234 /* Return true only if a matching language was found. */
235 return smp->data.u.str.data != 0;
236}
237
238/* This fetch url-decode any input string. */
239static int sample_conv_url_dec(const struct arg *args, struct sample *smp, void *private)
240{
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200241 int in_form = 0;
Willy Tarreau79e57332018-10-02 16:01:16 +0200242 int len;
243
Joseph Herlant942eea32018-11-15 13:57:22 -0800244 /* If the constant flag is set or if not size is available at
Willy Tarreau79e57332018-10-02 16:01:16 +0200245 * the end of the buffer, copy the string in other buffer
246 * before decoding.
247 */
248 if (smp->flags & SMP_F_CONST || smp->data.u.str.size <= smp->data.u.str.data) {
249 struct buffer *str = get_trash_chunk();
250 memcpy(str->area, smp->data.u.str.area, smp->data.u.str.data);
251 smp->data.u.str.area = str->area;
252 smp->data.u.str.size = str->size;
253 smp->flags &= ~SMP_F_CONST;
254 }
255
256 /* Add final \0 required by url_decode(), and convert the input string. */
257 smp->data.u.str.area[smp->data.u.str.data] = '\0';
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200258
Christopher Faulet72dbcfe2021-01-29 11:25:02 +0100259 if (args[0].type == ARGT_SINT)
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200260 in_form = !!args[0].data.sint;
261
262 len = url_decode(smp->data.u.str.area, in_form);
Willy Tarreau79e57332018-10-02 16:01:16 +0200263 if (len < 0)
264 return 0;
265 smp->data.u.str.data = len;
266 return 1;
267}
268
William Dauchy888b0ae2021-01-06 23:39:50 +0100269/* url-encode types and encode maps */
270enum encode_type {
271 ENC_QUERY = 0,
272};
273long query_encode_map[(256 / 8) / sizeof(long)];
274
275/* Check url-encode type */
276static int sample_conv_url_enc_check(struct arg *arg, struct sample_conv *conv,
277 const char *file, int line, char **err)
278{
279 enum encode_type enc_type;
280
281 if (strcmp(arg->data.str.area, "") == 0)
282 enc_type = ENC_QUERY;
283 else if (strcmp(arg->data.str.area, "query") == 0)
284 enc_type = ENC_QUERY;
285 else {
286 memprintf(err, "Unexpected encode type. "
287 "Allowed value is 'query'");
288 return 0;
289 }
290
291 chunk_destroy(&arg->data.str);
292 arg->type = ARGT_SINT;
293 arg->data.sint = enc_type;
294 return 1;
295}
296
297/* Initializes some url encode data at boot */
298static void sample_conf_url_enc_init()
299{
300 int i;
301
302 memset(query_encode_map, 0, sizeof(query_encode_map));
303 /* use rfc3986 to determine list of characters to keep unchanged for
304 * query string */
305 for (i = 0; i < 256; i++) {
306 if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
307 || (i >= '0' && i <= '9') ||
308 i == '-' || i == '.' || i == '_' || i == '~'))
309 ha_bit_set(i, query_encode_map);
310 }
311}
312
313INITCALL0(STG_PREPARE, sample_conf_url_enc_init);
314
315/* This fetch url-encode any input string. Only support query string for now */
316static int sample_conv_url_enc(const struct arg *args, struct sample *smp, void
317 *private)
318{
319 enum encode_type enc_type;
320 struct buffer *trash = get_trash_chunk();
321 long *encode_map;
322 char *ret;
323
324 enc_type = ENC_QUERY;
Christopher Faulet72dbcfe2021-01-29 11:25:02 +0100325 enc_type = args->data.sint;
William Dauchy888b0ae2021-01-06 23:39:50 +0100326
William Dauchy888b0ae2021-01-06 23:39:50 +0100327 if (enc_type == ENC_QUERY)
328 encode_map = query_encode_map;
329 else
330 return 0;
331
Christopher Faulet7c6b03e2022-04-08 10:04:05 +0200332 ret = encode_chunk(trash->area, trash->area + trash->size, '%',
333 encode_map, &smp->data.u.str);
William Dauchy888b0ae2021-01-06 23:39:50 +0100334 if (ret == NULL || *ret != '\0')
335 return 0;
336 trash->data = ret - trash->area;
337 smp->data.u.str = *trash;
338 return 1;
339}
340
Willy Tarreau79e57332018-10-02 16:01:16 +0200341static int smp_conv_req_capture(const struct arg *args, struct sample *smp, void *private)
342{
Willy Tarreau55758962020-04-29 11:22:08 +0200343 struct proxy *fe;
Willy Tarreau79e57332018-10-02 16:01:16 +0200344 int idx, i;
345 struct cap_hdr *hdr;
346 int len;
347
Christopher Faulet72dbcfe2021-01-29 11:25:02 +0100348 if (args->type != ARGT_SINT)
Willy Tarreau79e57332018-10-02 16:01:16 +0200349 return 0;
350
Willy Tarreau55758962020-04-29 11:22:08 +0200351 if (!smp->strm)
352 return 0;
353
354 fe = strm_fe(smp->strm);
Willy Tarreau79e57332018-10-02 16:01:16 +0200355 idx = args->data.sint;
356
357 /* Check the availibity of the capture id. */
358 if (idx > fe->nb_req_cap - 1)
359 return 0;
360
361 /* Look for the original configuration. */
362 for (hdr = fe->req_cap, i = fe->nb_req_cap - 1;
363 hdr != NULL && i != idx ;
364 i--, hdr = hdr->next);
365 if (!hdr)
366 return 0;
367
368 /* check for the memory allocation */
369 if (smp->strm->req_cap[hdr->index] == NULL)
370 smp->strm->req_cap[hdr->index] = pool_alloc(hdr->pool);
371 if (smp->strm->req_cap[hdr->index] == NULL)
372 return 0;
373
374 /* Check length. */
375 len = smp->data.u.str.data;
376 if (len > hdr->len)
377 len = hdr->len;
378
379 /* Capture input data. */
380 memcpy(smp->strm->req_cap[idx], smp->data.u.str.area, len);
381 smp->strm->req_cap[idx][len] = '\0';
382
383 return 1;
384}
385
386static int smp_conv_res_capture(const struct arg *args, struct sample *smp, void *private)
387{
Willy Tarreau55758962020-04-29 11:22:08 +0200388 struct proxy *fe;
Willy Tarreau79e57332018-10-02 16:01:16 +0200389 int idx, i;
390 struct cap_hdr *hdr;
391 int len;
392
Christopher Faulet72dbcfe2021-01-29 11:25:02 +0100393 if (args->type != ARGT_SINT)
Willy Tarreau79e57332018-10-02 16:01:16 +0200394 return 0;
395
Willy Tarreau55758962020-04-29 11:22:08 +0200396 if (!smp->strm)
397 return 0;
398
399 fe = strm_fe(smp->strm);
Willy Tarreau79e57332018-10-02 16:01:16 +0200400 idx = args->data.sint;
401
402 /* Check the availibity of the capture id. */
403 if (idx > fe->nb_rsp_cap - 1)
404 return 0;
405
406 /* Look for the original configuration. */
407 for (hdr = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
408 hdr != NULL && i != idx ;
409 i--, hdr = hdr->next);
410 if (!hdr)
411 return 0;
412
413 /* check for the memory allocation */
414 if (smp->strm->res_cap[hdr->index] == NULL)
415 smp->strm->res_cap[hdr->index] = pool_alloc(hdr->pool);
416 if (smp->strm->res_cap[hdr->index] == NULL)
417 return 0;
418
419 /* Check length. */
420 len = smp->data.u.str.data;
421 if (len > hdr->len)
422 len = hdr->len;
423
424 /* Capture input data. */
425 memcpy(smp->strm->res_cap[idx], smp->data.u.str.area, len);
426 smp->strm->res_cap[idx][len] = '\0';
427
428 return 1;
429}
430
431/************************************************************************/
432/* All supported converter keywords must be declared here. */
433/************************************************************************/
434
435/* Note: must not be declared <const> as its list will be overwritten */
436static struct sample_conv_kw_list sample_conv_kws = {ILH, {
Damien Claisseae6f1252019-10-30 15:57:28 +0000437 { "http_date", sample_conv_http_date, ARG2(0,SINT,STR), smp_check_http_date_unit, SMP_T_SINT, SMP_T_STR},
Willy Tarreau79e57332018-10-02 16:01:16 +0200438 { "language", sample_conv_q_preferred, ARG2(1,STR,STR), NULL, SMP_T_STR, SMP_T_STR},
439 { "capture-req", smp_conv_req_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
440 { "capture-res", smp_conv_res_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200441 { "url_dec", sample_conv_url_dec, ARG1(0,SINT), NULL, SMP_T_STR, SMP_T_STR},
William Dauchy888b0ae2021-01-06 23:39:50 +0100442 { "url_enc", sample_conv_url_enc, ARG1(1,STR), sample_conv_url_enc_check, SMP_T_STR, SMP_T_STR},
Willy Tarreau79e57332018-10-02 16:01:16 +0200443 { NULL, NULL, 0, 0, 0 },
444}};
445
Willy Tarreau0108d902018-11-25 19:14:37 +0100446INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
Willy Tarreau79e57332018-10-02 16:01:16 +0200447
448/*
449 * Local variables:
450 * c-indent-level: 8
451 * c-basic-offset: 8
452 * End:
453 */