blob: 4afa6a2fd696a426215bf06c1ecc8a22b5d44e04 [file] [log] [blame]
Willy Tarreau79e57332018-10-02 16:01:16 +02001/*
2 * HTTP sample conversion
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <sys/types.h>
14
15#include <ctype.h>
16#include <string.h>
17#include <time.h>
18
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020019#include <haproxy/api.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020020#include <haproxy/arg.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020021#include <haproxy/capture-t.h>
Willy Tarreauc13ed532020-06-02 10:22:45 +020022#include <haproxy/chunk.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020023#include <haproxy/http.h>
Willy Tarreaud0ef4392020-06-02 09:38:52 +020024#include <haproxy/pool.h>
Willy Tarreaue6ce10b2020-06-04 15:33:47 +020025#include <haproxy/sample.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020026#include <haproxy/stream.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020027#include <haproxy/tools.h>
Willy Tarreaud6788052020-05-27 15:59:00 +020028#include <haproxy/version.h>
Willy Tarreau79e57332018-10-02 16:01:16 +020029
Damien Claisseae6f1252019-10-30 15:57:28 +000030static int smp_check_http_date_unit(struct arg *args, struct sample_conv *conv,
31 const char *file, int line, char **err)
32{
33 return smp_check_date_unit(args, err);
34}
Willy Tarreau79e57332018-10-02 16:01:16 +020035
36/* takes an UINT value on input supposed to represent the time since EPOCH,
37 * adds an optional offset found in args[0] and emits a string representing
Damien Claisseae6f1252019-10-30 15:57:28 +000038 * the date in RFC-1123/5322 format. If optional unit param in args[1] is
39 * provided, decode timestamp in milliseconds ("ms") or microseconds("us"),
40 * and use relevant output date format.
Willy Tarreau79e57332018-10-02 16:01:16 +020041 */
42static int sample_conv_http_date(const struct arg *args, struct sample *smp, void *private)
43{
44 const char day[7][4] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
45 const char mon[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
46 struct buffer *temp;
47 struct tm *tm;
Damien Claisseae6f1252019-10-30 15:57:28 +000048 int sec_frac = 0;
49 time_t curr_date;
Willy Tarreau79e57332018-10-02 16:01:16 +020050
51 /* add offset */
52 if (args && (args[0].type == ARGT_SINT))
Damien Claisseae6f1252019-10-30 15:57:28 +000053 smp->data.u.sint += args[0].data.sint;
54
55 /* report in milliseconds */
56 if (args && args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
57 sec_frac = smp->data.u.sint % 1000;
58 smp->data.u.sint /= 1000;
59 }
60 /* report in microseconds */
61 else if (args && args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
62 sec_frac = smp->data.u.sint % 1000000;
63 smp->data.u.sint /= 1000000;
64 }
65
66 /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
67 curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
Willy Tarreau79e57332018-10-02 16:01:16 +020068
69 tm = gmtime(&curr_date);
70 if (!tm)
71 return 0;
72
73 temp = get_trash_chunk();
Damien Claisseae6f1252019-10-30 15:57:28 +000074 if (args && args[1].type == ARGT_SINT && args[1].data.sint != TIME_UNIT_S) {
75 temp->data = snprintf(temp->area, temp->size - temp->data,
76 "%s, %02d %s %04d %02d:%02d:%02d.%d GMT",
77 day[tm->tm_wday], tm->tm_mday, mon[tm->tm_mon],
78 1900+tm->tm_year,
79 tm->tm_hour, tm->tm_min, tm->tm_sec, sec_frac);
80 } else {
81 temp->data = snprintf(temp->area, temp->size - temp->data,
82 "%s, %02d %s %04d %02d:%02d:%02d GMT",
83 day[tm->tm_wday], tm->tm_mday, mon[tm->tm_mon],
84 1900+tm->tm_year,
85 tm->tm_hour, tm->tm_min, tm->tm_sec);
86 }
Willy Tarreau79e57332018-10-02 16:01:16 +020087
88 smp->data.u.str = *temp;
89 smp->data.type = SMP_T_STR;
90 return 1;
91}
92
93/* Arguments: The list of expected value, the number of parts returned and the separator */
94static int sample_conv_q_preferred(const struct arg *args, struct sample *smp, void *private)
95{
96 const char *al = smp->data.u.str.area;
97 const char *end = al + smp->data.u.str.data;
98 const char *token;
99 int toklen;
100 int qvalue;
101 const char *str;
102 const char *w;
103 int best_q = 0;
104
105 /* Set the constant to the sample, because the output of the
106 * function will be peek in the constant configuration string.
107 */
108 smp->flags |= SMP_F_CONST;
109 smp->data.u.str.size = 0;
110 smp->data.u.str.area = "";
111 smp->data.u.str.data = 0;
112
113 /* Parse the accept language */
114 while (1) {
115
116 /* Jump spaces, quit if the end is detected. */
117 while (al < end && isspace((unsigned char)*al))
118 al++;
119 if (al >= end)
120 break;
121
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500122 /* Start of the first word. */
Willy Tarreau79e57332018-10-02 16:01:16 +0200123 token = al;
124
125 /* Look for separator: isspace(), ',' or ';'. Next value if 0 length word. */
126 while (al < end && *al != ';' && *al != ',' && !isspace((unsigned char)*al))
127 al++;
128 if (al == token)
129 goto expect_comma;
130
131 /* Length of the token. */
132 toklen = al - token;
133 qvalue = 1000;
134
135 /* Check if the token exists in the list. If the token not exists,
136 * jump to the next token.
137 */
138 str = args[0].data.str.area;
139 w = str;
140 while (1) {
141 if (*str == ';' || *str == '\0') {
142 if (http_language_range_match(token, toklen, w, str - w))
143 goto look_for_q;
144 if (*str == '\0')
145 goto expect_comma;
146 w = str + 1;
147 }
148 str++;
149 }
150 goto expect_comma;
151
152look_for_q:
153
154 /* Jump spaces, quit if the end is detected. */
155 while (al < end && isspace((unsigned char)*al))
156 al++;
157 if (al >= end)
158 goto process_value;
159
160 /* If ',' is found, process the result */
161 if (*al == ',')
162 goto process_value;
163
164 /* If the character is different from ';', look
165 * for the end of the header part in best effort.
166 */
167 if (*al != ';')
168 goto expect_comma;
169
170 /* Assumes that the char is ';', now expect "q=". */
171 al++;
172
173 /* Jump spaces, process value if the end is detected. */
174 while (al < end && isspace((unsigned char)*al))
175 al++;
176 if (al >= end)
177 goto process_value;
178
179 /* Expect 'q'. If no 'q', continue in best effort */
180 if (*al != 'q')
181 goto process_value;
182 al++;
183
184 /* Jump spaces, process value if the end is detected. */
185 while (al < end && isspace((unsigned char)*al))
186 al++;
187 if (al >= end)
188 goto process_value;
189
190 /* Expect '='. If no '=', continue in best effort */
191 if (*al != '=')
192 goto process_value;
193 al++;
194
195 /* Jump spaces, process value if the end is detected. */
196 while (al < end && isspace((unsigned char)*al))
197 al++;
198 if (al >= end)
199 goto process_value;
200
201 /* Parse the q value. */
202 qvalue = http_parse_qvalue(al, &al);
203
204process_value:
205
206 /* If the new q value is the best q value, then store the associated
207 * language in the response. If qvalue is the biggest value (1000),
208 * break the process.
209 */
210 if (qvalue > best_q) {
211 smp->data.u.str.area = (char *)w;
212 smp->data.u.str.data = str - w;
213 if (qvalue >= 1000)
214 break;
215 best_q = qvalue;
216 }
217
218expect_comma:
219
220 /* Expect comma or end. If the end is detected, quit the loop. */
221 while (al < end && *al != ',')
222 al++;
223 if (al >= end)
224 break;
225
226 /* Comma is found, jump it and restart the analyzer. */
227 al++;
228 }
229
230 /* Set default value if required. */
231 if (smp->data.u.str.data == 0 && args[1].type == ARGT_STR) {
232 smp->data.u.str.area = args[1].data.str.area;
233 smp->data.u.str.data = args[1].data.str.data;
234 }
235
236 /* Return true only if a matching language was found. */
237 return smp->data.u.str.data != 0;
238}
239
240/* This fetch url-decode any input string. */
241static int sample_conv_url_dec(const struct arg *args, struct sample *smp, void *private)
242{
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200243 int in_form = 0;
Willy Tarreau79e57332018-10-02 16:01:16 +0200244 int len;
245
Joseph Herlant942eea32018-11-15 13:57:22 -0800246 /* If the constant flag is set or if not size is available at
Willy Tarreau79e57332018-10-02 16:01:16 +0200247 * the end of the buffer, copy the string in other buffer
248 * before decoding.
249 */
250 if (smp->flags & SMP_F_CONST || smp->data.u.str.size <= smp->data.u.str.data) {
251 struct buffer *str = get_trash_chunk();
252 memcpy(str->area, smp->data.u.str.area, smp->data.u.str.data);
253 smp->data.u.str.area = str->area;
254 smp->data.u.str.size = str->size;
255 smp->flags &= ~SMP_F_CONST;
256 }
257
258 /* Add final \0 required by url_decode(), and convert the input string. */
259 smp->data.u.str.area[smp->data.u.str.data] = '\0';
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200260
261 if (args && (args[0].type == ARGT_SINT))
262 in_form = !!args[0].data.sint;
263
264 len = url_decode(smp->data.u.str.area, in_form);
Willy Tarreau79e57332018-10-02 16:01:16 +0200265 if (len < 0)
266 return 0;
267 smp->data.u.str.data = len;
268 return 1;
269}
270
271static int smp_conv_req_capture(const struct arg *args, struct sample *smp, void *private)
272{
Willy Tarreau55758962020-04-29 11:22:08 +0200273 struct proxy *fe;
Willy Tarreau79e57332018-10-02 16:01:16 +0200274 int idx, i;
275 struct cap_hdr *hdr;
276 int len;
277
278 if (!args || args->type != ARGT_SINT)
279 return 0;
280
Willy Tarreau55758962020-04-29 11:22:08 +0200281 if (!smp->strm)
282 return 0;
283
284 fe = strm_fe(smp->strm);
Willy Tarreau79e57332018-10-02 16:01:16 +0200285 idx = args->data.sint;
286
287 /* Check the availibity of the capture id. */
288 if (idx > fe->nb_req_cap - 1)
289 return 0;
290
291 /* Look for the original configuration. */
292 for (hdr = fe->req_cap, i = fe->nb_req_cap - 1;
293 hdr != NULL && i != idx ;
294 i--, hdr = hdr->next);
295 if (!hdr)
296 return 0;
297
298 /* check for the memory allocation */
299 if (smp->strm->req_cap[hdr->index] == NULL)
300 smp->strm->req_cap[hdr->index] = pool_alloc(hdr->pool);
301 if (smp->strm->req_cap[hdr->index] == NULL)
302 return 0;
303
304 /* Check length. */
305 len = smp->data.u.str.data;
306 if (len > hdr->len)
307 len = hdr->len;
308
309 /* Capture input data. */
310 memcpy(smp->strm->req_cap[idx], smp->data.u.str.area, len);
311 smp->strm->req_cap[idx][len] = '\0';
312
313 return 1;
314}
315
316static int smp_conv_res_capture(const struct arg *args, struct sample *smp, void *private)
317{
Willy Tarreau55758962020-04-29 11:22:08 +0200318 struct proxy *fe;
Willy Tarreau79e57332018-10-02 16:01:16 +0200319 int idx, i;
320 struct cap_hdr *hdr;
321 int len;
322
323 if (!args || args->type != ARGT_SINT)
324 return 0;
325
Willy Tarreau55758962020-04-29 11:22:08 +0200326 if (!smp->strm)
327 return 0;
328
329 fe = strm_fe(smp->strm);
Willy Tarreau79e57332018-10-02 16:01:16 +0200330 idx = args->data.sint;
331
332 /* Check the availibity of the capture id. */
333 if (idx > fe->nb_rsp_cap - 1)
334 return 0;
335
336 /* Look for the original configuration. */
337 for (hdr = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
338 hdr != NULL && i != idx ;
339 i--, hdr = hdr->next);
340 if (!hdr)
341 return 0;
342
343 /* check for the memory allocation */
344 if (smp->strm->res_cap[hdr->index] == NULL)
345 smp->strm->res_cap[hdr->index] = pool_alloc(hdr->pool);
346 if (smp->strm->res_cap[hdr->index] == NULL)
347 return 0;
348
349 /* Check length. */
350 len = smp->data.u.str.data;
351 if (len > hdr->len)
352 len = hdr->len;
353
354 /* Capture input data. */
355 memcpy(smp->strm->res_cap[idx], smp->data.u.str.area, len);
356 smp->strm->res_cap[idx][len] = '\0';
357
358 return 1;
359}
360
361/************************************************************************/
362/* All supported converter keywords must be declared here. */
363/************************************************************************/
364
365/* Note: must not be declared <const> as its list will be overwritten */
366static struct sample_conv_kw_list sample_conv_kws = {ILH, {
Damien Claisseae6f1252019-10-30 15:57:28 +0000367 { "http_date", sample_conv_http_date, ARG2(0,SINT,STR), smp_check_http_date_unit, SMP_T_SINT, SMP_T_STR},
Willy Tarreau79e57332018-10-02 16:01:16 +0200368 { "language", sample_conv_q_preferred, ARG2(1,STR,STR), NULL, SMP_T_STR, SMP_T_STR},
369 { "capture-req", smp_conv_req_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
370 { "capture-res", smp_conv_res_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
Willy Tarreau62ba9ba2020-04-23 17:54:47 +0200371 { "url_dec", sample_conv_url_dec, ARG1(0,SINT), NULL, SMP_T_STR, SMP_T_STR},
Willy Tarreau79e57332018-10-02 16:01:16 +0200372 { NULL, NULL, 0, 0, 0 },
373}};
374
Willy Tarreau0108d902018-11-25 19:14:37 +0100375INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
Willy Tarreau79e57332018-10-02 16:01:16 +0200376
377/*
378 * Local variables:
379 * c-indent-level: 8
380 * c-basic-offset: 8
381 * End:
382 */