blob: 730809a9f6f0ca010959593d3872379e33a55a26 [file] [log] [blame]
Willy Tarreau79e57332018-10-02 16:01:16 +02001/*
2 * HTTP sample conversion
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <sys/types.h>
14
15#include <ctype.h>
16#include <string.h>
17#include <time.h>
18
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020019#include <haproxy/api.h>
Willy Tarreauc13ed532020-06-02 10:22:45 +020020#include <haproxy/chunk.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020021#include <haproxy/http.h>
Willy Tarreaud0ef4392020-06-02 09:38:52 +020022#include <haproxy/pool.h>
Willy Tarreaue6ce10b2020-06-04 15:33:47 +020023#include <haproxy/sample.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020024#include <haproxy/tools.h>
Willy Tarreaud6788052020-05-27 15:59:00 +020025#include <haproxy/version.h>
Willy Tarreau79e57332018-10-02 16:01:16 +020026
Willy Tarreau278161c2020-06-04 11:18:28 +020027#include <haproxy/capture-t.h>
Willy Tarreau79e57332018-10-02 16:01:16 +020028#include <types/global.h>
29
Willy Tarreauaa74c4e2020-06-04 10:19:23 +020030#include <haproxy/arg.h>
Willy Tarreau79e57332018-10-02 16:01:16 +020031#include <proto/stream.h>
32
Damien Claisseae6f1252019-10-30 15:57:28 +000033static int smp_check_http_date_unit(struct arg *args, struct sample_conv *conv,
34 const char *file, int line, char **err)
35{
36 return smp_check_date_unit(args, err);
37}
Willy Tarreau79e57332018-10-02 16:01:16 +020038
39/* takes an UINT value on input supposed to represent the time since EPOCH,
40 * adds an optional offset found in args[0] and emits a string representing
Damien Claisseae6f1252019-10-30 15:57:28 +000041 * the date in RFC-1123/5322 format. If optional unit param in args[1] is
42 * provided, decode timestamp in milliseconds ("ms") or microseconds("us"),
43 * and use relevant output date format.
Willy Tarreau79e57332018-10-02 16:01:16 +020044 */
45static int sample_conv_http_date(const struct arg *args, struct sample *smp, void *private)
46{
47 const char day[7][4] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
48 const char mon[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
49 struct buffer *temp;
50 struct tm *tm;
Damien Claisseae6f1252019-10-30 15:57:28 +000051 int sec_frac = 0;
52 time_t curr_date;
Willy Tarreau79e57332018-10-02 16:01:16 +020053
54 /* add offset */
55 if (args && (args[0].type == ARGT_SINT))
Damien Claisseae6f1252019-10-30 15:57:28 +000056 smp->data.u.sint += args[0].data.sint;
57
58 /* report in milliseconds */
59 if (args && args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
60 sec_frac = smp->data.u.sint % 1000;
61 smp->data.u.sint /= 1000;
62 }
63 /* report in microseconds */
64 else if (args && args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
65 sec_frac = smp->data.u.sint % 1000000;
66 smp->data.u.sint /= 1000000;
67 }
68
69 /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
70 curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
Willy Tarreau79e57332018-10-02 16:01:16 +020071
72 tm = gmtime(&curr_date);
73 if (!tm)
74 return 0;
75
76 temp = get_trash_chunk();
Damien Claisseae6f1252019-10-30 15:57:28 +000077 if (args && args[1].type == ARGT_SINT && args[1].data.sint != TIME_UNIT_S) {
78 temp->data = snprintf(temp->area, temp->size - temp->data,
79 "%s, %02d %s %04d %02d:%02d:%02d.%d GMT",
80 day[tm->tm_wday], tm->tm_mday, mon[tm->tm_mon],
81 1900+tm->tm_year,
82 tm->tm_hour, tm->tm_min, tm->tm_sec, sec_frac);
83 } else {
84 temp->data = snprintf(temp->area, temp->size - temp->data,
85 "%s, %02d %s %04d %02d:%02d:%02d GMT",
86 day[tm->tm_wday], tm->tm_mday, mon[tm->tm_mon],
87 1900+tm->tm_year,
88 tm->tm_hour, tm->tm_min, tm->tm_sec);
89 }
Willy Tarreau79e57332018-10-02 16:01:16 +020090
91 smp->data.u.str = *temp;
92 smp->data.type = SMP_T_STR;
93 return 1;
94}
95
96/* Arguments: The list of expected value, the number of parts returned and the separator */
97static int sample_conv_q_preferred(const struct arg *args, struct sample *smp, void *private)
98{
99 const char *al = smp->data.u.str.area;
100 const char *end = al + smp->data.u.str.data;
101 const char *token;
102 int toklen;
103 int qvalue;
104 const char *str;
105 const char *w;
106 int best_q = 0;
107
108 /* Set the constant to the sample, because the output of the
109 * function will be peek in the constant configuration string.
110 */
111 smp->flags |= SMP_F_CONST;
112 smp->data.u.str.size = 0;
113 smp->data.u.str.area = "";
114 smp->data.u.str.data = 0;
115
116 /* Parse the accept language */
117 while (1) {
118
119 /* Jump spaces, quit if the end is detected. */
120 while (al < end && isspace((unsigned char)*al))
121 al++;
122 if (al >= end)
123 break;
124
125 /* Start of the fisrt word. */
126 token = al;
127
128 /* Look for separator: isspace(), ',' or ';'. Next value if 0 length word. */
129 while (al < end && *al != ';' && *al != ',' && !isspace((unsigned char)*al))
130 al++;
131 if (al == token)
132 goto expect_comma;
133
134 /* Length of the token. */
135 toklen = al - token;
136 qvalue = 1000;
137
138 /* Check if the token exists in the list. If the token not exists,
139 * jump to the next token.
140 */
141 str = args[0].data.str.area;
142 w = str;
143 while (1) {
144 if (*str == ';' || *str == '\0') {
145 if (http_language_range_match(token, toklen, w, str - w))
146 goto look_for_q;
147 if (*str == '\0')
148 goto expect_comma;
149 w = str + 1;
150 }
151 str++;
152 }
153 goto expect_comma;
154
155look_for_q:
156
157 /* Jump spaces, quit if the end is detected. */
158 while (al < end && isspace((unsigned char)*al))
159 al++;
160 if (al >= end)
161 goto process_value;
162
163 /* If ',' is found, process the result */
164 if (*al == ',')
165 goto process_value;
166
167 /* If the character is different from ';', look
168 * for the end of the header part in best effort.
169 */
170 if (*al != ';')
171 goto expect_comma;
172
173 /* Assumes that the char is ';', now expect "q=". */
174 al++;
175
176 /* Jump spaces, process value if the end is detected. */
177 while (al < end && isspace((unsigned char)*al))
178 al++;
179 if (al >= end)
180 goto process_value;
181
182 /* Expect 'q'. If no 'q', continue in best effort */
183 if (*al != 'q')
184 goto process_value;
185 al++;
186
187 /* Jump spaces, process value if the end is detected. */
188 while (al < end && isspace((unsigned char)*al))
189 al++;
190 if (al >= end)
191 goto process_value;
192
193 /* Expect '='. If no '=', continue in best effort */
194 if (*al != '=')
195 goto process_value;
196 al++;
197
198 /* Jump spaces, process value if the end is detected. */
199 while (al < end && isspace((unsigned char)*al))
200 al++;
201 if (al >= end)
202 goto process_value;
203
204 /* Parse the q value. */
205 qvalue = http_parse_qvalue(al, &al);
206
207process_value:
208
209 /* If the new q value is the best q value, then store the associated
210 * language in the response. If qvalue is the biggest value (1000),
211 * break the process.
212 */
213 if (qvalue > best_q) {
214 smp->data.u.str.area = (char *)w;
215 smp->data.u.str.data = str - w;
216 if (qvalue >= 1000)
217 break;
218 best_q = qvalue;
219 }
220
221expect_comma:
222
223 /* Expect comma or end. If the end is detected, quit the loop. */
224 while (al < end && *al != ',')
225 al++;
226 if (al >= end)
227 break;
228
229 /* Comma is found, jump it and restart the analyzer. */
230 al++;
231 }
232
233 /* Set default value if required. */
234 if (smp->data.u.str.data == 0 && args[1].type == ARGT_STR) {
235 smp->data.u.str.area = args[1].data.str.area;
236 smp->data.u.str.data = args[1].data.str.data;
237 }
238
239 /* Return true only if a matching language was found. */
240 return smp->data.u.str.data != 0;
241}
242
243/* This fetch url-decode any input string. */
244static int sample_conv_url_dec(const struct arg *args, struct sample *smp, void *private)
245{
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200246 int in_form = 0;
Willy Tarreau79e57332018-10-02 16:01:16 +0200247 int len;
248
Joseph Herlant942eea32018-11-15 13:57:22 -0800249 /* If the constant flag is set or if not size is available at
Willy Tarreau79e57332018-10-02 16:01:16 +0200250 * the end of the buffer, copy the string in other buffer
251 * before decoding.
252 */
253 if (smp->flags & SMP_F_CONST || smp->data.u.str.size <= smp->data.u.str.data) {
254 struct buffer *str = get_trash_chunk();
255 memcpy(str->area, smp->data.u.str.area, smp->data.u.str.data);
256 smp->data.u.str.area = str->area;
257 smp->data.u.str.size = str->size;
258 smp->flags &= ~SMP_F_CONST;
259 }
260
261 /* Add final \0 required by url_decode(), and convert the input string. */
262 smp->data.u.str.area[smp->data.u.str.data] = '\0';
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200263
264 if (args && (args[0].type == ARGT_SINT))
265 in_form = !!args[0].data.sint;
266
267 len = url_decode(smp->data.u.str.area, in_form);
Willy Tarreau79e57332018-10-02 16:01:16 +0200268 if (len < 0)
269 return 0;
270 smp->data.u.str.data = len;
271 return 1;
272}
273
274static int smp_conv_req_capture(const struct arg *args, struct sample *smp, void *private)
275{
Willy Tarreau55758962020-04-29 11:22:08 +0200276 struct proxy *fe;
Willy Tarreau79e57332018-10-02 16:01:16 +0200277 int idx, i;
278 struct cap_hdr *hdr;
279 int len;
280
281 if (!args || args->type != ARGT_SINT)
282 return 0;
283
Willy Tarreau55758962020-04-29 11:22:08 +0200284 if (!smp->strm)
285 return 0;
286
287 fe = strm_fe(smp->strm);
Willy Tarreau79e57332018-10-02 16:01:16 +0200288 idx = args->data.sint;
289
290 /* Check the availibity of the capture id. */
291 if (idx > fe->nb_req_cap - 1)
292 return 0;
293
294 /* Look for the original configuration. */
295 for (hdr = fe->req_cap, i = fe->nb_req_cap - 1;
296 hdr != NULL && i != idx ;
297 i--, hdr = hdr->next);
298 if (!hdr)
299 return 0;
300
301 /* check for the memory allocation */
302 if (smp->strm->req_cap[hdr->index] == NULL)
303 smp->strm->req_cap[hdr->index] = pool_alloc(hdr->pool);
304 if (smp->strm->req_cap[hdr->index] == NULL)
305 return 0;
306
307 /* Check length. */
308 len = smp->data.u.str.data;
309 if (len > hdr->len)
310 len = hdr->len;
311
312 /* Capture input data. */
313 memcpy(smp->strm->req_cap[idx], smp->data.u.str.area, len);
314 smp->strm->req_cap[idx][len] = '\0';
315
316 return 1;
317}
318
319static int smp_conv_res_capture(const struct arg *args, struct sample *smp, void *private)
320{
Willy Tarreau55758962020-04-29 11:22:08 +0200321 struct proxy *fe;
Willy Tarreau79e57332018-10-02 16:01:16 +0200322 int idx, i;
323 struct cap_hdr *hdr;
324 int len;
325
326 if (!args || args->type != ARGT_SINT)
327 return 0;
328
Willy Tarreau55758962020-04-29 11:22:08 +0200329 if (!smp->strm)
330 return 0;
331
332 fe = strm_fe(smp->strm);
Willy Tarreau79e57332018-10-02 16:01:16 +0200333 idx = args->data.sint;
334
335 /* Check the availibity of the capture id. */
336 if (idx > fe->nb_rsp_cap - 1)
337 return 0;
338
339 /* Look for the original configuration. */
340 for (hdr = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
341 hdr != NULL && i != idx ;
342 i--, hdr = hdr->next);
343 if (!hdr)
344 return 0;
345
346 /* check for the memory allocation */
347 if (smp->strm->res_cap[hdr->index] == NULL)
348 smp->strm->res_cap[hdr->index] = pool_alloc(hdr->pool);
349 if (smp->strm->res_cap[hdr->index] == NULL)
350 return 0;
351
352 /* Check length. */
353 len = smp->data.u.str.data;
354 if (len > hdr->len)
355 len = hdr->len;
356
357 /* Capture input data. */
358 memcpy(smp->strm->res_cap[idx], smp->data.u.str.area, len);
359 smp->strm->res_cap[idx][len] = '\0';
360
361 return 1;
362}
363
364/************************************************************************/
365/* All supported converter keywords must be declared here. */
366/************************************************************************/
367
368/* Note: must not be declared <const> as its list will be overwritten */
369static struct sample_conv_kw_list sample_conv_kws = {ILH, {
Damien Claisseae6f1252019-10-30 15:57:28 +0000370 { "http_date", sample_conv_http_date, ARG2(0,SINT,STR), smp_check_http_date_unit, SMP_T_SINT, SMP_T_STR},
Willy Tarreau79e57332018-10-02 16:01:16 +0200371 { "language", sample_conv_q_preferred, ARG2(1,STR,STR), NULL, SMP_T_STR, SMP_T_STR},
372 { "capture-req", smp_conv_req_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
373 { "capture-res", smp_conv_res_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200374 { "url_dec", sample_conv_url_dec, ARG1(0,SINT), NULL, SMP_T_STR, SMP_T_STR},
Willy Tarreau79e57332018-10-02 16:01:16 +0200375 { NULL, NULL, 0, 0, 0 },
376}};
377
Willy Tarreau0108d902018-11-25 19:14:37 +0100378INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
Willy Tarreau79e57332018-10-02 16:01:16 +0200379
380/*
381 * Local variables:
382 * c-indent-level: 8
383 * c-basic-offset: 8
384 * End:
385 */