Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 1 | /* |
| 2 | * HTTP sample conversion |
| 3 | * |
| 4 | * Copyright 2000-2018 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <sys/types.h> |
| 14 | |
| 15 | #include <ctype.h> |
| 16 | #include <string.h> |
| 17 | #include <time.h> |
| 18 | |
Willy Tarreau | 4c7e4b7 | 2020-05-27 12:58:42 +0200 | [diff] [blame] | 19 | #include <haproxy/api.h> |
Willy Tarreau | b255105 | 2020-06-09 09:07:15 +0200 | [diff] [blame] | 20 | #include <haproxy/arg.h> |
Willy Tarreau | dfd3de8 | 2020-06-04 23:46:14 +0200 | [diff] [blame] | 21 | #include <haproxy/capture-t.h> |
Willy Tarreau | c13ed53 | 2020-06-02 10:22:45 +0200 | [diff] [blame] | 22 | #include <haproxy/chunk.h> |
Willy Tarreau | cd72d8c | 2020-06-02 19:11:26 +0200 | [diff] [blame] | 23 | #include <haproxy/http.h> |
Willy Tarreau | d0ef439 | 2020-06-02 09:38:52 +0200 | [diff] [blame] | 24 | #include <haproxy/pool.h> |
Willy Tarreau | e6ce10b | 2020-06-04 15:33:47 +0200 | [diff] [blame] | 25 | #include <haproxy/sample.h> |
Willy Tarreau | dfd3de8 | 2020-06-04 23:46:14 +0200 | [diff] [blame] | 26 | #include <haproxy/stream.h> |
Willy Tarreau | 48fbcae | 2020-06-03 18:09:46 +0200 | [diff] [blame] | 27 | #include <haproxy/tools.h> |
Willy Tarreau | d678805 | 2020-05-27 15:59:00 +0200 | [diff] [blame] | 28 | #include <haproxy/version.h> |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 29 | |
Damien Claisse | ae6f125 | 2019-10-30 15:57:28 +0000 | [diff] [blame] | 30 | static int smp_check_http_date_unit(struct arg *args, struct sample_conv *conv, |
| 31 | const char *file, int line, char **err) |
| 32 | { |
| 33 | return smp_check_date_unit(args, err); |
| 34 | } |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 35 | |
| 36 | /* takes an UINT value on input supposed to represent the time since EPOCH, |
| 37 | * adds an optional offset found in args[0] and emits a string representing |
Damien Claisse | ae6f125 | 2019-10-30 15:57:28 +0000 | [diff] [blame] | 38 | * the date in RFC-1123/5322 format. If optional unit param in args[1] is |
| 39 | * provided, decode timestamp in milliseconds ("ms") or microseconds("us"), |
| 40 | * and use relevant output date format. |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 41 | */ |
| 42 | static int sample_conv_http_date(const struct arg *args, struct sample *smp, void *private) |
| 43 | { |
| 44 | const char day[7][4] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; |
| 45 | const char mon[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; |
| 46 | struct buffer *temp; |
| 47 | struct tm *tm; |
Damien Claisse | ae6f125 | 2019-10-30 15:57:28 +0000 | [diff] [blame] | 48 | int sec_frac = 0; |
| 49 | time_t curr_date; |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 50 | |
| 51 | /* add offset */ |
| 52 | if (args && (args[0].type == ARGT_SINT)) |
Damien Claisse | ae6f125 | 2019-10-30 15:57:28 +0000 | [diff] [blame] | 53 | smp->data.u.sint += args[0].data.sint; |
| 54 | |
| 55 | /* report in milliseconds */ |
| 56 | if (args && args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) { |
| 57 | sec_frac = smp->data.u.sint % 1000; |
| 58 | smp->data.u.sint /= 1000; |
| 59 | } |
| 60 | /* report in microseconds */ |
| 61 | else if (args && args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) { |
| 62 | sec_frac = smp->data.u.sint % 1000000; |
| 63 | smp->data.u.sint /= 1000000; |
| 64 | } |
| 65 | |
| 66 | /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */ |
| 67 | curr_date = smp->data.u.sint & 0x007fffffffffffffLL; |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 68 | |
| 69 | tm = gmtime(&curr_date); |
| 70 | if (!tm) |
| 71 | return 0; |
| 72 | |
| 73 | temp = get_trash_chunk(); |
Damien Claisse | ae6f125 | 2019-10-30 15:57:28 +0000 | [diff] [blame] | 74 | if (args && args[1].type == ARGT_SINT && args[1].data.sint != TIME_UNIT_S) { |
| 75 | temp->data = snprintf(temp->area, temp->size - temp->data, |
| 76 | "%s, %02d %s %04d %02d:%02d:%02d.%d GMT", |
| 77 | day[tm->tm_wday], tm->tm_mday, mon[tm->tm_mon], |
| 78 | 1900+tm->tm_year, |
| 79 | tm->tm_hour, tm->tm_min, tm->tm_sec, sec_frac); |
| 80 | } else { |
| 81 | temp->data = snprintf(temp->area, temp->size - temp->data, |
| 82 | "%s, %02d %s %04d %02d:%02d:%02d GMT", |
| 83 | day[tm->tm_wday], tm->tm_mday, mon[tm->tm_mon], |
| 84 | 1900+tm->tm_year, |
| 85 | tm->tm_hour, tm->tm_min, tm->tm_sec); |
| 86 | } |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 87 | |
| 88 | smp->data.u.str = *temp; |
| 89 | smp->data.type = SMP_T_STR; |
| 90 | return 1; |
| 91 | } |
| 92 | |
| 93 | /* Arguments: The list of expected value, the number of parts returned and the separator */ |
| 94 | static int sample_conv_q_preferred(const struct arg *args, struct sample *smp, void *private) |
| 95 | { |
| 96 | const char *al = smp->data.u.str.area; |
| 97 | const char *end = al + smp->data.u.str.data; |
| 98 | const char *token; |
| 99 | int toklen; |
| 100 | int qvalue; |
| 101 | const char *str; |
| 102 | const char *w; |
| 103 | int best_q = 0; |
| 104 | |
| 105 | /* Set the constant to the sample, because the output of the |
| 106 | * function will be peek in the constant configuration string. |
| 107 | */ |
| 108 | smp->flags |= SMP_F_CONST; |
| 109 | smp->data.u.str.size = 0; |
| 110 | smp->data.u.str.area = ""; |
| 111 | smp->data.u.str.data = 0; |
| 112 | |
| 113 | /* Parse the accept language */ |
| 114 | while (1) { |
| 115 | |
| 116 | /* Jump spaces, quit if the end is detected. */ |
| 117 | while (al < end && isspace((unsigned char)*al)) |
| 118 | al++; |
| 119 | if (al >= end) |
| 120 | break; |
| 121 | |
Ilya Shipitsin | 46a030c | 2020-07-05 16:36:08 +0500 | [diff] [blame] | 122 | /* Start of the first word. */ |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 123 | token = al; |
| 124 | |
| 125 | /* Look for separator: isspace(), ',' or ';'. Next value if 0 length word. */ |
| 126 | while (al < end && *al != ';' && *al != ',' && !isspace((unsigned char)*al)) |
| 127 | al++; |
| 128 | if (al == token) |
| 129 | goto expect_comma; |
| 130 | |
| 131 | /* Length of the token. */ |
| 132 | toklen = al - token; |
| 133 | qvalue = 1000; |
| 134 | |
| 135 | /* Check if the token exists in the list. If the token not exists, |
| 136 | * jump to the next token. |
| 137 | */ |
| 138 | str = args[0].data.str.area; |
| 139 | w = str; |
| 140 | while (1) { |
| 141 | if (*str == ';' || *str == '\0') { |
| 142 | if (http_language_range_match(token, toklen, w, str - w)) |
| 143 | goto look_for_q; |
| 144 | if (*str == '\0') |
| 145 | goto expect_comma; |
| 146 | w = str + 1; |
| 147 | } |
| 148 | str++; |
| 149 | } |
| 150 | goto expect_comma; |
| 151 | |
| 152 | look_for_q: |
| 153 | |
| 154 | /* Jump spaces, quit if the end is detected. */ |
| 155 | while (al < end && isspace((unsigned char)*al)) |
| 156 | al++; |
| 157 | if (al >= end) |
| 158 | goto process_value; |
| 159 | |
| 160 | /* If ',' is found, process the result */ |
| 161 | if (*al == ',') |
| 162 | goto process_value; |
| 163 | |
| 164 | /* If the character is different from ';', look |
| 165 | * for the end of the header part in best effort. |
| 166 | */ |
| 167 | if (*al != ';') |
| 168 | goto expect_comma; |
| 169 | |
| 170 | /* Assumes that the char is ';', now expect "q=". */ |
| 171 | al++; |
| 172 | |
| 173 | /* Jump spaces, process value if the end is detected. */ |
| 174 | while (al < end && isspace((unsigned char)*al)) |
| 175 | al++; |
| 176 | if (al >= end) |
| 177 | goto process_value; |
| 178 | |
| 179 | /* Expect 'q'. If no 'q', continue in best effort */ |
| 180 | if (*al != 'q') |
| 181 | goto process_value; |
| 182 | al++; |
| 183 | |
| 184 | /* Jump spaces, process value if the end is detected. */ |
| 185 | while (al < end && isspace((unsigned char)*al)) |
| 186 | al++; |
| 187 | if (al >= end) |
| 188 | goto process_value; |
| 189 | |
| 190 | /* Expect '='. If no '=', continue in best effort */ |
| 191 | if (*al != '=') |
| 192 | goto process_value; |
| 193 | al++; |
| 194 | |
| 195 | /* Jump spaces, process value if the end is detected. */ |
| 196 | while (al < end && isspace((unsigned char)*al)) |
| 197 | al++; |
| 198 | if (al >= end) |
| 199 | goto process_value; |
| 200 | |
| 201 | /* Parse the q value. */ |
| 202 | qvalue = http_parse_qvalue(al, &al); |
| 203 | |
| 204 | process_value: |
| 205 | |
| 206 | /* If the new q value is the best q value, then store the associated |
| 207 | * language in the response. If qvalue is the biggest value (1000), |
| 208 | * break the process. |
| 209 | */ |
| 210 | if (qvalue > best_q) { |
| 211 | smp->data.u.str.area = (char *)w; |
| 212 | smp->data.u.str.data = str - w; |
| 213 | if (qvalue >= 1000) |
| 214 | break; |
| 215 | best_q = qvalue; |
| 216 | } |
| 217 | |
| 218 | expect_comma: |
| 219 | |
| 220 | /* Expect comma or end. If the end is detected, quit the loop. */ |
| 221 | while (al < end && *al != ',') |
| 222 | al++; |
| 223 | if (al >= end) |
| 224 | break; |
| 225 | |
| 226 | /* Comma is found, jump it and restart the analyzer. */ |
| 227 | al++; |
| 228 | } |
| 229 | |
| 230 | /* Set default value if required. */ |
| 231 | if (smp->data.u.str.data == 0 && args[1].type == ARGT_STR) { |
| 232 | smp->data.u.str.area = args[1].data.str.area; |
| 233 | smp->data.u.str.data = args[1].data.str.data; |
| 234 | } |
| 235 | |
| 236 | /* Return true only if a matching language was found. */ |
| 237 | return smp->data.u.str.data != 0; |
| 238 | } |
| 239 | |
| 240 | /* This fetch url-decode any input string. */ |
| 241 | static int sample_conv_url_dec(const struct arg *args, struct sample *smp, void *private) |
| 242 | { |
Willy Tarreau | 62ba9ba | 2020-04-23 17:54:47 +0200 | [diff] [blame] | 243 | int in_form = 0; |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 244 | int len; |
| 245 | |
Joseph Herlant | 942eea3 | 2018-11-15 13:57:22 -0800 | [diff] [blame] | 246 | /* If the constant flag is set or if not size is available at |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 247 | * the end of the buffer, copy the string in other buffer |
| 248 | * before decoding. |
| 249 | */ |
| 250 | if (smp->flags & SMP_F_CONST || smp->data.u.str.size <= smp->data.u.str.data) { |
| 251 | struct buffer *str = get_trash_chunk(); |
| 252 | memcpy(str->area, smp->data.u.str.area, smp->data.u.str.data); |
| 253 | smp->data.u.str.area = str->area; |
| 254 | smp->data.u.str.size = str->size; |
| 255 | smp->flags &= ~SMP_F_CONST; |
| 256 | } |
| 257 | |
| 258 | /* Add final \0 required by url_decode(), and convert the input string. */ |
| 259 | smp->data.u.str.area[smp->data.u.str.data] = '\0'; |
Willy Tarreau | 62ba9ba | 2020-04-23 17:54:47 +0200 | [diff] [blame] | 260 | |
| 261 | if (args && (args[0].type == ARGT_SINT)) |
| 262 | in_form = !!args[0].data.sint; |
| 263 | |
| 264 | len = url_decode(smp->data.u.str.area, in_form); |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 265 | if (len < 0) |
| 266 | return 0; |
| 267 | smp->data.u.str.data = len; |
| 268 | return 1; |
| 269 | } |
| 270 | |
| 271 | static int smp_conv_req_capture(const struct arg *args, struct sample *smp, void *private) |
| 272 | { |
Willy Tarreau | 5575896 | 2020-04-29 11:22:08 +0200 | [diff] [blame] | 273 | struct proxy *fe; |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 274 | int idx, i; |
| 275 | struct cap_hdr *hdr; |
| 276 | int len; |
| 277 | |
| 278 | if (!args || args->type != ARGT_SINT) |
| 279 | return 0; |
| 280 | |
Willy Tarreau | 5575896 | 2020-04-29 11:22:08 +0200 | [diff] [blame] | 281 | if (!smp->strm) |
| 282 | return 0; |
| 283 | |
| 284 | fe = strm_fe(smp->strm); |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 285 | idx = args->data.sint; |
| 286 | |
| 287 | /* Check the availibity of the capture id. */ |
| 288 | if (idx > fe->nb_req_cap - 1) |
| 289 | return 0; |
| 290 | |
| 291 | /* Look for the original configuration. */ |
| 292 | for (hdr = fe->req_cap, i = fe->nb_req_cap - 1; |
| 293 | hdr != NULL && i != idx ; |
| 294 | i--, hdr = hdr->next); |
| 295 | if (!hdr) |
| 296 | return 0; |
| 297 | |
| 298 | /* check for the memory allocation */ |
| 299 | if (smp->strm->req_cap[hdr->index] == NULL) |
| 300 | smp->strm->req_cap[hdr->index] = pool_alloc(hdr->pool); |
| 301 | if (smp->strm->req_cap[hdr->index] == NULL) |
| 302 | return 0; |
| 303 | |
| 304 | /* Check length. */ |
| 305 | len = smp->data.u.str.data; |
| 306 | if (len > hdr->len) |
| 307 | len = hdr->len; |
| 308 | |
| 309 | /* Capture input data. */ |
| 310 | memcpy(smp->strm->req_cap[idx], smp->data.u.str.area, len); |
| 311 | smp->strm->req_cap[idx][len] = '\0'; |
| 312 | |
| 313 | return 1; |
| 314 | } |
| 315 | |
| 316 | static int smp_conv_res_capture(const struct arg *args, struct sample *smp, void *private) |
| 317 | { |
Willy Tarreau | 5575896 | 2020-04-29 11:22:08 +0200 | [diff] [blame] | 318 | struct proxy *fe; |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 319 | int idx, i; |
| 320 | struct cap_hdr *hdr; |
| 321 | int len; |
| 322 | |
| 323 | if (!args || args->type != ARGT_SINT) |
| 324 | return 0; |
| 325 | |
Willy Tarreau | 5575896 | 2020-04-29 11:22:08 +0200 | [diff] [blame] | 326 | if (!smp->strm) |
| 327 | return 0; |
| 328 | |
| 329 | fe = strm_fe(smp->strm); |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 330 | idx = args->data.sint; |
| 331 | |
| 332 | /* Check the availibity of the capture id. */ |
| 333 | if (idx > fe->nb_rsp_cap - 1) |
| 334 | return 0; |
| 335 | |
| 336 | /* Look for the original configuration. */ |
| 337 | for (hdr = fe->rsp_cap, i = fe->nb_rsp_cap - 1; |
| 338 | hdr != NULL && i != idx ; |
| 339 | i--, hdr = hdr->next); |
| 340 | if (!hdr) |
| 341 | return 0; |
| 342 | |
| 343 | /* check for the memory allocation */ |
| 344 | if (smp->strm->res_cap[hdr->index] == NULL) |
| 345 | smp->strm->res_cap[hdr->index] = pool_alloc(hdr->pool); |
| 346 | if (smp->strm->res_cap[hdr->index] == NULL) |
| 347 | return 0; |
| 348 | |
| 349 | /* Check length. */ |
| 350 | len = smp->data.u.str.data; |
| 351 | if (len > hdr->len) |
| 352 | len = hdr->len; |
| 353 | |
| 354 | /* Capture input data. */ |
| 355 | memcpy(smp->strm->res_cap[idx], smp->data.u.str.area, len); |
| 356 | smp->strm->res_cap[idx][len] = '\0'; |
| 357 | |
| 358 | return 1; |
| 359 | } |
| 360 | |
| 361 | /************************************************************************/ |
| 362 | /* All supported converter keywords must be declared here. */ |
| 363 | /************************************************************************/ |
| 364 | |
| 365 | /* Note: must not be declared <const> as its list will be overwritten */ |
| 366 | static struct sample_conv_kw_list sample_conv_kws = {ILH, { |
Damien Claisse | ae6f125 | 2019-10-30 15:57:28 +0000 | [diff] [blame] | 367 | { "http_date", sample_conv_http_date, ARG2(0,SINT,STR), smp_check_http_date_unit, SMP_T_SINT, SMP_T_STR}, |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 368 | { "language", sample_conv_q_preferred, ARG2(1,STR,STR), NULL, SMP_T_STR, SMP_T_STR}, |
| 369 | { "capture-req", smp_conv_req_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR}, |
| 370 | { "capture-res", smp_conv_res_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR}, |
Willy Tarreau | 62ba9ba | 2020-04-23 17:54:47 +0200 | [diff] [blame] | 371 | { "url_dec", sample_conv_url_dec, ARG1(0,SINT), NULL, SMP_T_STR, SMP_T_STR}, |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 372 | { NULL, NULL, 0, 0, 0 }, |
| 373 | }}; |
| 374 | |
Willy Tarreau | 0108d90 | 2018-11-25 19:14:37 +0100 | [diff] [blame] | 375 | INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws); |
Willy Tarreau | 79e5733 | 2018-10-02 16:01:16 +0200 | [diff] [blame] | 376 | |
| 377 | /* |
| 378 | * Local variables: |
| 379 | * c-indent-level: 8 |
| 380 | * c-basic-offset: 8 |
| 381 | * End: |
| 382 | */ |