blob: 932f3cf70a618477fe2e1af2641fff6530a6ce42 [file] [log] [blame]
Willy Tarreau35b51c62018-09-10 15:38:55 +02001/*
2 * HTTP semantics
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <common/config.h>
15#include <common/http.h>
Willy Tarreau04f1e2d2018-09-10 18:04:24 +020016#include <common/standard.h>
Willy Tarreau35b51c62018-09-10 15:38:55 +020017
18/* It is about twice as fast on recent architectures to lookup a byte in a
19 * table than to perform a boolean AND or OR between two tests. Refer to
20 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
21 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
22 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
23 * digit. Note: please do not overwrite values in assignment since gcc-2.95
24 * will not handle them correctly. It's worth noting that chars 128..255 are
25 * nothing, not even control chars.
26 */
27const unsigned char http_char_classes[256] = {
28 [ 0] = HTTP_FLG_CTL,
29 [ 1] = HTTP_FLG_CTL,
30 [ 2] = HTTP_FLG_CTL,
31 [ 3] = HTTP_FLG_CTL,
32 [ 4] = HTTP_FLG_CTL,
33 [ 5] = HTTP_FLG_CTL,
34 [ 6] = HTTP_FLG_CTL,
35 [ 7] = HTTP_FLG_CTL,
36 [ 8] = HTTP_FLG_CTL,
37 [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
38 [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
39 [ 11] = HTTP_FLG_CTL,
40 [ 12] = HTTP_FLG_CTL,
41 [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
42 [ 14] = HTTP_FLG_CTL,
43 [ 15] = HTTP_FLG_CTL,
44 [ 16] = HTTP_FLG_CTL,
45 [ 17] = HTTP_FLG_CTL,
46 [ 18] = HTTP_FLG_CTL,
47 [ 19] = HTTP_FLG_CTL,
48 [ 20] = HTTP_FLG_CTL,
49 [ 21] = HTTP_FLG_CTL,
50 [ 22] = HTTP_FLG_CTL,
51 [ 23] = HTTP_FLG_CTL,
52 [ 24] = HTTP_FLG_CTL,
53 [ 25] = HTTP_FLG_CTL,
54 [ 26] = HTTP_FLG_CTL,
55 [ 27] = HTTP_FLG_CTL,
56 [ 28] = HTTP_FLG_CTL,
57 [ 29] = HTTP_FLG_CTL,
58 [ 30] = HTTP_FLG_CTL,
59 [ 31] = HTTP_FLG_CTL,
60 [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
61 ['!'] = HTTP_FLG_TOK,
62 ['"'] = HTTP_FLG_SEP,
63 ['#'] = HTTP_FLG_TOK,
64 ['$'] = HTTP_FLG_TOK,
65 ['%'] = HTTP_FLG_TOK,
66 ['&'] = HTTP_FLG_TOK,
67 [ 39] = HTTP_FLG_TOK,
68 ['('] = HTTP_FLG_SEP,
69 [')'] = HTTP_FLG_SEP,
70 ['*'] = HTTP_FLG_TOK,
71 ['+'] = HTTP_FLG_TOK,
72 [','] = HTTP_FLG_SEP,
73 ['-'] = HTTP_FLG_TOK,
74 ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
75 ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
76 ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
77 ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
78 ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
79 ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
80 ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
81 ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
82 ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
83 ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
84 ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
85 ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
86 [':'] = HTTP_FLG_SEP,
87 [';'] = HTTP_FLG_SEP,
88 ['<'] = HTTP_FLG_SEP,
89 ['='] = HTTP_FLG_SEP,
90 ['>'] = HTTP_FLG_SEP,
91 ['?'] = HTTP_FLG_SEP,
92 ['@'] = HTTP_FLG_SEP,
93 ['A'] = HTTP_FLG_TOK,
94 ['B'] = HTTP_FLG_TOK,
95 ['C'] = HTTP_FLG_TOK,
96 ['D'] = HTTP_FLG_TOK,
97 ['E'] = HTTP_FLG_TOK,
98 ['F'] = HTTP_FLG_TOK,
99 ['G'] = HTTP_FLG_TOK,
100 ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
101 ['I'] = HTTP_FLG_TOK,
102 ['J'] = HTTP_FLG_TOK,
103 ['K'] = HTTP_FLG_TOK,
104 ['L'] = HTTP_FLG_TOK,
105 ['M'] = HTTP_FLG_TOK,
106 ['N'] = HTTP_FLG_TOK,
107 ['O'] = HTTP_FLG_TOK,
108 ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
109 ['Q'] = HTTP_FLG_TOK,
110 ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
111 ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
112 ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
113 ['U'] = HTTP_FLG_TOK,
114 ['V'] = HTTP_FLG_TOK,
115 ['W'] = HTTP_FLG_TOK,
116 ['X'] = HTTP_FLG_TOK,
117 ['Y'] = HTTP_FLG_TOK,
118 ['Z'] = HTTP_FLG_TOK,
119 ['['] = HTTP_FLG_SEP,
120 [ 92] = HTTP_FLG_SEP,
121 [']'] = HTTP_FLG_SEP,
122 ['^'] = HTTP_FLG_TOK,
123 ['_'] = HTTP_FLG_TOK,
124 ['`'] = HTTP_FLG_TOK,
125 ['a'] = HTTP_FLG_TOK,
126 ['b'] = HTTP_FLG_TOK,
127 ['c'] = HTTP_FLG_TOK,
128 ['d'] = HTTP_FLG_TOK,
129 ['e'] = HTTP_FLG_TOK,
130 ['f'] = HTTP_FLG_TOK,
131 ['g'] = HTTP_FLG_TOK,
132 ['h'] = HTTP_FLG_TOK,
133 ['i'] = HTTP_FLG_TOK,
134 ['j'] = HTTP_FLG_TOK,
135 ['k'] = HTTP_FLG_TOK,
136 ['l'] = HTTP_FLG_TOK,
137 ['m'] = HTTP_FLG_TOK,
138 ['n'] = HTTP_FLG_TOK,
139 ['o'] = HTTP_FLG_TOK,
140 ['p'] = HTTP_FLG_TOK,
141 ['q'] = HTTP_FLG_TOK,
142 ['r'] = HTTP_FLG_TOK,
143 ['s'] = HTTP_FLG_TOK,
144 ['t'] = HTTP_FLG_TOK,
145 ['u'] = HTTP_FLG_TOK,
146 ['v'] = HTTP_FLG_TOK,
147 ['w'] = HTTP_FLG_TOK,
148 ['x'] = HTTP_FLG_TOK,
149 ['y'] = HTTP_FLG_TOK,
150 ['z'] = HTTP_FLG_TOK,
151 ['{'] = HTTP_FLG_SEP,
152 ['|'] = HTTP_FLG_TOK,
153 ['}'] = HTTP_FLG_SEP,
154 ['~'] = HTTP_FLG_TOK,
155 [127] = HTTP_FLG_CTL,
156};
157
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200158/* We must put the messages here since GCC cannot initialize consts depending
159 * on strlen().
160 */
161struct buffer http_err_chunks[HTTP_ERR_SIZE];
162
163const struct ist HTTP_100 = IST("HTTP/1.1 100 Continue\r\n\r\n");
164
165/* Warning: no "connection" header is provided with the 3xx messages below */
166const char *HTTP_301 =
167 "HTTP/1.1 301 Moved Permanently\r\n"
168 "Content-length: 0\r\n"
169 "Location: "; /* not terminated since it will be concatenated with the URL */
170
171const char *HTTP_302 =
172 "HTTP/1.1 302 Found\r\n"
173 "Cache-Control: no-cache\r\n"
174 "Content-length: 0\r\n"
175 "Location: "; /* not terminated since it will be concatenated with the URL */
176
177/* same as 302 except that the browser MUST retry with the GET method */
178const char *HTTP_303 =
179 "HTTP/1.1 303 See Other\r\n"
180 "Cache-Control: no-cache\r\n"
181 "Content-length: 0\r\n"
182 "Location: "; /* not terminated since it will be concatenated with the URL */
183
184/* same as 302 except that the browser MUST retry with the same method */
185const char *HTTP_307 =
186 "HTTP/1.1 307 Temporary Redirect\r\n"
187 "Cache-Control: no-cache\r\n"
188 "Content-length: 0\r\n"
189 "Location: "; /* not terminated since it will be concatenated with the URL */
190
191/* same as 301 except that the browser MUST retry with the same method */
192const char *HTTP_308 =
193 "HTTP/1.1 308 Permanent Redirect\r\n"
194 "Content-length: 0\r\n"
195 "Location: "; /* not terminated since it will be concatenated with the URL */
196
197/* Warning: this one is an sprintf() fmt string, with <realm> as its only argument */
198const char *HTTP_401_fmt =
199 "HTTP/1.0 401 Unauthorized\r\n"
200 "Cache-Control: no-cache\r\n"
201 "Connection: close\r\n"
202 "Content-Type: text/html\r\n"
203 "WWW-Authenticate: Basic realm=\"%s\"\r\n"
204 "\r\n"
205 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
206
207const char *HTTP_407_fmt =
208 "HTTP/1.0 407 Unauthorized\r\n"
209 "Cache-Control: no-cache\r\n"
210 "Connection: close\r\n"
211 "Content-Type: text/html\r\n"
212 "Proxy-Authenticate: Basic realm=\"%s\"\r\n"
213 "\r\n"
214 "<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
215
216const int http_err_codes[HTTP_ERR_SIZE] = {
217 [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
218 [HTTP_ERR_400] = 400,
219 [HTTP_ERR_403] = 403,
220 [HTTP_ERR_405] = 405,
221 [HTTP_ERR_408] = 408,
222 [HTTP_ERR_421] = 421,
223 [HTTP_ERR_425] = 425,
224 [HTTP_ERR_429] = 429,
225 [HTTP_ERR_500] = 500,
226 [HTTP_ERR_502] = 502,
227 [HTTP_ERR_503] = 503,
228 [HTTP_ERR_504] = 504,
229};
230
231static const char *http_err_msgs[HTTP_ERR_SIZE] = {
232 [HTTP_ERR_200] =
233 "HTTP/1.0 200 OK\r\n"
234 "Cache-Control: no-cache\r\n"
235 "Connection: close\r\n"
236 "Content-Type: text/html\r\n"
237 "\r\n"
238 "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
239
240 [HTTP_ERR_400] =
241 "HTTP/1.0 400 Bad request\r\n"
242 "Cache-Control: no-cache\r\n"
243 "Connection: close\r\n"
244 "Content-Type: text/html\r\n"
245 "\r\n"
246 "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
247
248 [HTTP_ERR_403] =
249 "HTTP/1.0 403 Forbidden\r\n"
250 "Cache-Control: no-cache\r\n"
251 "Connection: close\r\n"
252 "Content-Type: text/html\r\n"
253 "\r\n"
254 "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
255
256 [HTTP_ERR_405] =
257 "HTTP/1.0 405 Method Not Allowed\r\n"
258 "Cache-Control: no-cache\r\n"
259 "Connection: close\r\n"
260 "Content-Type: text/html\r\n"
261 "\r\n"
262 "<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
263
264 [HTTP_ERR_408] =
265 "HTTP/1.0 408 Request Time-out\r\n"
266 "Cache-Control: no-cache\r\n"
267 "Connection: close\r\n"
268 "Content-Type: text/html\r\n"
269 "\r\n"
270 "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
271
272 [HTTP_ERR_421] =
273 "HTTP/1.0 421 Misdirected Request\r\n"
274 "Cache-Control: no-cache\r\n"
275 "Connection: close\r\n"
276 "Content-Type: text/html\r\n"
277 "\r\n"
278 "<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
279
280 [HTTP_ERR_425] =
281 "HTTP/1.0 425 Too Early\r\n"
282 "Cache-Control: no-cache\r\n"
283 "Connection: close\r\n"
284 "Content-Type: text/html\r\n"
285 "\r\n"
286 "<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
287
288 [HTTP_ERR_429] =
289 "HTTP/1.0 429 Too Many Requests\r\n"
290 "Cache-Control: no-cache\r\n"
291 "Connection: close\r\n"
292 "Content-Type: text/html\r\n"
293 "\r\n"
294 "<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
295
296 [HTTP_ERR_500] =
297 "HTTP/1.0 500 Internal Server Error\r\n"
298 "Cache-Control: no-cache\r\n"
299 "Connection: close\r\n"
300 "Content-Type: text/html\r\n"
301 "\r\n"
302 "<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occured.\n</body></html>\n",
303
304 [HTTP_ERR_502] =
305 "HTTP/1.0 502 Bad Gateway\r\n"
306 "Cache-Control: no-cache\r\n"
307 "Connection: close\r\n"
308 "Content-Type: text/html\r\n"
309 "\r\n"
310 "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
311
312 [HTTP_ERR_503] =
313 "HTTP/1.0 503 Service Unavailable\r\n"
314 "Cache-Control: no-cache\r\n"
315 "Connection: close\r\n"
316 "Content-Type: text/html\r\n"
317 "\r\n"
318 "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
319
320 [HTTP_ERR_504] =
321 "HTTP/1.0 504 Gateway Time-out\r\n"
322 "Cache-Control: no-cache\r\n"
323 "Connection: close\r\n"
324 "Content-Type: text/html\r\n"
325 "\r\n"
326 "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
327
328};
329
Willy Tarreau35b51c62018-09-10 15:38:55 +0200330const struct ist http_known_methods[HTTP_METH_OTHER] = {
331 [HTTP_METH_OPTIONS] = IST("OPTIONS"),
332 [HTTP_METH_GET] = IST("GET"),
333 [HTTP_METH_HEAD] = IST("HEAD"),
334 [HTTP_METH_POST] = IST("POST"),
335 [HTTP_METH_PUT] = IST("PUT"),
336 [HTTP_METH_DELETE] = IST("DELETE"),
337 [HTTP_METH_TRACE] = IST("TRACE"),
338 [HTTP_METH_CONNECT] = IST("CONNECT"),
339};
340
341/*
342 * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
343 * ones.
344 */
345enum http_meth_t find_http_meth(const char *str, const int len)
346{
347 const struct ist m = ist2(str, len);
348
349 if (isteq(m, ist("GET"))) return HTTP_METH_GET;
350 else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
351 else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
352 else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
353 else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
354 else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
355 else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
356 else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
357 else return HTTP_METH_OTHER;
358}
Willy Tarreau6b952c82018-09-10 17:45:34 +0200359
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200360/* This function returns HTTP_ERR_<num> (enum) matching http status code.
361 * Returned value should match codes from http_err_codes.
362 */
363const int http_get_status_idx(unsigned int status)
364{
365 switch (status) {
366 case 200: return HTTP_ERR_200;
367 case 400: return HTTP_ERR_400;
368 case 403: return HTTP_ERR_403;
369 case 405: return HTTP_ERR_405;
370 case 408: return HTTP_ERR_408;
371 case 421: return HTTP_ERR_421;
372 case 425: return HTTP_ERR_425;
373 case 429: return HTTP_ERR_429;
374 case 500: return HTTP_ERR_500;
375 case 502: return HTTP_ERR_502;
376 case 503: return HTTP_ERR_503;
377 case 504: return HTTP_ERR_504;
378 default: return HTTP_ERR_500;
379 }
380}
381
382/* This function returns a reason associated with the HTTP status.
383 * This function never fails, a message is always returned.
384 */
385const char *http_get_reason(unsigned int status)
386{
387 switch (status) {
388 case 100: return "Continue";
389 case 101: return "Switching Protocols";
390 case 102: return "Processing";
391 case 200: return "OK";
392 case 201: return "Created";
393 case 202: return "Accepted";
394 case 203: return "Non-Authoritative Information";
395 case 204: return "No Content";
396 case 205: return "Reset Content";
397 case 206: return "Partial Content";
398 case 207: return "Multi-Status";
399 case 210: return "Content Different";
400 case 226: return "IM Used";
401 case 300: return "Multiple Choices";
402 case 301: return "Moved Permanently";
403 case 302: return "Moved Temporarily";
404 case 303: return "See Other";
405 case 304: return "Not Modified";
406 case 305: return "Use Proxy";
407 case 307: return "Temporary Redirect";
408 case 308: return "Permanent Redirect";
409 case 310: return "Too many Redirects";
410 case 400: return "Bad Request";
411 case 401: return "Unauthorized";
412 case 402: return "Payment Required";
413 case 403: return "Forbidden";
414 case 404: return "Not Found";
415 case 405: return "Method Not Allowed";
416 case 406: return "Not Acceptable";
417 case 407: return "Proxy Authentication Required";
418 case 408: return "Request Time-out";
419 case 409: return "Conflict";
420 case 410: return "Gone";
421 case 411: return "Length Required";
422 case 412: return "Precondition Failed";
423 case 413: return "Request Entity Too Large";
424 case 414: return "Request-URI Too Long";
425 case 415: return "Unsupported Media Type";
426 case 416: return "Requested range unsatisfiable";
427 case 417: return "Expectation failed";
428 case 418: return "I'm a teapot";
429 case 421: return "Misdirected Request";
430 case 422: return "Unprocessable entity";
431 case 423: return "Locked";
432 case 424: return "Method failure";
433 case 425: return "Too Early";
434 case 426: return "Upgrade Required";
435 case 428: return "Precondition Required";
436 case 429: return "Too Many Requests";
437 case 431: return "Request Header Fields Too Large";
438 case 449: return "Retry With";
439 case 450: return "Blocked by Windows Parental Controls";
440 case 451: return "Unavailable For Legal Reasons";
441 case 456: return "Unrecoverable Error";
442 case 499: return "client has closed connection";
443 case 500: return "Internal Server Error";
444 case 501: return "Not Implemented";
445 case 502: return "Bad Gateway or Proxy Error";
446 case 503: return "Service Unavailable";
447 case 504: return "Gateway Time-out";
448 case 505: return "HTTP Version not supported";
449 case 506: return "Variant also negociate";
450 case 507: return "Insufficient storage";
451 case 508: return "Loop detected";
452 case 509: return "Bandwidth Limit Exceeded";
453 case 510: return "Not extended";
454 case 511: return "Network authentication required";
455 case 520: return "Web server is returning an unknown error";
456 default:
457 switch (status) {
458 case 100 ... 199: return "Informational";
459 case 200 ... 299: return "Success";
460 case 300 ... 399: return "Redirection";
461 case 400 ... 499: return "Client Error";
462 case 500 ... 599: return "Server Error";
463 default: return "Other";
464 }
465 }
466}
467
Willy Tarreau6b952c82018-09-10 17:45:34 +0200468/* Parse the URI from the given transaction (which is assumed to be in request
469 * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
470 * returned. Otherwise the pointer and length are returned.
471 */
472struct ist http_get_path(const struct ist uri)
473{
474 const char *ptr, *end;
475
476 if (!uri.len)
477 goto not_found;
478
479 ptr = uri.ptr;
480 end = ptr + uri.len;
481
482 /* RFC7230, par. 2.7 :
483 * Request-URI = "*" | absuri | abspath | authority
484 */
485
486 if (*ptr == '*')
487 goto not_found;
488
489 if (isalpha((unsigned char)*ptr)) {
490 /* this is a scheme as described by RFC3986, par. 3.1 */
491 ptr++;
492 while (ptr < end &&
493 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
494 ptr++;
495 /* skip '://' */
496 if (ptr == end || *ptr++ != ':')
497 goto not_found;
498 if (ptr == end || *ptr++ != '/')
499 goto not_found;
500 if (ptr == end || *ptr++ != '/')
501 goto not_found;
502 }
503 /* skip [user[:passwd]@]host[:[port]] */
504
505 while (ptr < end && *ptr != '/')
506 ptr++;
507
508 if (ptr == end)
509 goto not_found;
510
511 /* OK, we got the '/' ! */
512 return ist2(ptr, end - ptr);
513
514 not_found:
515 return ist2(NULL, 0);
516}
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200517
Willy Tarreauab813a42018-09-10 18:41:28 +0200518/*
519 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
520 * If so, returns the position of the first non-space character relative to
521 * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
522 * to return a pointer to the place after the first space. Returns 0 if the
523 * header name does not match. Checks are case-insensitive.
524 */
525int http_header_match2(const char *hdr, const char *end,
526 const char *name, int len)
527{
528 const char *val;
529
530 if (hdr + len >= end)
531 return 0;
532 if (hdr[len] != ':')
533 return 0;
534 if (strncasecmp(hdr, name, len) != 0)
535 return 0;
536 val = hdr + len + 1;
537 while (val < end && HTTP_IS_SPHT(*val))
538 val++;
539 if ((val >= end) && (len + 2 <= end - hdr))
540 return len + 2; /* we may replace starting from second space */
541 return val - hdr;
542}
543
544/* Find the end of the header value contained between <s> and <e>. See RFC7230,
545 * par 3.2 for more information. Note that it requires a valid header to return
546 * a valid result. This works for headers defined as comma-separated lists.
547 */
548char *http_find_hdr_value_end(char *s, const char *e)
549{
550 int quoted, qdpair;
551
552 quoted = qdpair = 0;
553
554#if defined(__x86_64__) || \
555 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
556 defined(__ARM_ARCH_7A__)
557 /* speedup: skip everything not a comma nor a double quote */
558 for (; s <= e - sizeof(int); s += sizeof(int)) {
559 unsigned int c = *(int *)s; // comma
560 unsigned int q = c; // quote
561
562 c ^= 0x2c2c2c2c; // contains one zero on a comma
563 q ^= 0x22222222; // contains one zero on a quote
564
565 c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
566 q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
567
568 if ((c | q) & 0x80808080)
569 break; // found a comma or a quote
570 }
571#endif
572 for (; s < e; s++) {
573 if (qdpair) qdpair = 0;
574 else if (quoted) {
575 if (*s == '\\') qdpair = 1;
576 else if (*s == '"') quoted = 0;
577 }
578 else if (*s == '"') quoted = 1;
579 else if (*s == ',') return s;
580 }
581 return s;
582}
583
584/* Find the end of a cookie value contained between <s> and <e>. It works the
585 * same way as with headers above except that the semi-colon also ends a token.
586 * See RFC2965 for more information. Note that it requires a valid header to
587 * return a valid result.
588 */
589char *http_find_cookie_value_end(char *s, const char *e)
590{
591 int quoted, qdpair;
592
593 quoted = qdpair = 0;
594 for (; s < e; s++) {
595 if (qdpair) qdpair = 0;
596 else if (quoted) {
597 if (*s == '\\') qdpair = 1;
598 else if (*s == '"') quoted = 0;
599 }
600 else if (*s == '"') quoted = 1;
601 else if (*s == ',' || *s == ';') return s;
602 }
603 return s;
604}
605
606/* Try to find the next occurrence of a cookie name in a cookie header value.
607 * The lookup begins at <hdr>. The pointer and size of the next occurrence of
608 * the cookie value is returned into *value and *value_l, and the function
609 * returns a pointer to the next pointer to search from if the value was found.
610 * Otherwise if the cookie was not found, NULL is returned and neither value
611 * nor value_l are touched. The input <hdr> string should first point to the
612 * header's value, and the <hdr_end> pointer must point to the first character
613 * not part of the value. <list> must be non-zero if value may represent a list
614 * of values (cookie headers). This makes it faster to abort parsing when no
615 * list is expected.
616 */
617char *http_extract_cookie_value(char *hdr, const char *hdr_end,
618 char *cookie_name, size_t cookie_name_l,
619 int list, char **value, size_t *value_l)
620{
621 char *equal, *att_end, *att_beg, *val_beg, *val_end;
622 char *next;
623
624 /* we search at least a cookie name followed by an equal, and more
625 * generally something like this :
626 * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
627 */
628 for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
629 /* Iterate through all cookies on this line */
630
631 while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
632 att_beg++;
633
634 /* find att_end : this is the first character after the last non
635 * space before the equal. It may be equal to hdr_end.
636 */
637 equal = att_end = att_beg;
638
639 while (equal < hdr_end) {
640 if (*equal == '=' || *equal == ';' || (list && *equal == ','))
641 break;
642 if (HTTP_IS_SPHT(*equal++))
643 continue;
644 att_end = equal;
645 }
646
647 /* here, <equal> points to '=', a delimitor or the end. <att_end>
648 * is between <att_beg> and <equal>, both may be identical.
649 */
650
651 /* look for end of cookie if there is an equal sign */
652 if (equal < hdr_end && *equal == '=') {
653 /* look for the beginning of the value */
654 val_beg = equal + 1;
655 while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
656 val_beg++;
657
658 /* find the end of the value, respecting quotes */
659 next = http_find_cookie_value_end(val_beg, hdr_end);
660
661 /* make val_end point to the first white space or delimitor after the value */
662 val_end = next;
663 while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
664 val_end--;
665 } else {
666 val_beg = val_end = next = equal;
667 }
668
669 /* We have nothing to do with attributes beginning with '$'. However,
670 * they will automatically be removed if a header before them is removed,
671 * since they're supposed to be linked together.
672 */
673 if (*att_beg == '$')
674 continue;
675
676 /* Ignore cookies with no equal sign */
677 if (equal == next)
678 continue;
679
680 /* Now we have the cookie name between att_beg and att_end, and
681 * its value between val_beg and val_end.
682 */
683
684 if (att_end - att_beg == cookie_name_l &&
685 memcmp(att_beg, cookie_name, cookie_name_l) == 0) {
686 /* let's return this value and indicate where to go on from */
687 *value = val_beg;
688 *value_l = val_end - val_beg;
689 return next + 1;
690 }
691
692 /* Set-Cookie headers only have the name in the first attr=value part */
693 if (!list)
694 break;
695 }
696
697 return NULL;
698}
699
700/* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
701 * value is larger than 1000, it is bound to 1000. The parser consumes up to
702 * 1 digit, one dot and 3 digits and stops on the first invalid character.
703 * Unparsable qvalues return 1000 as "q=1.000".
704 */
705int http_parse_qvalue(const char *qvalue, const char **end)
706{
707 int q = 1000;
708
709 if (!isdigit((unsigned char)*qvalue))
710 goto out;
711 q = (*qvalue++ - '0') * 1000;
712
713 if (*qvalue++ != '.')
714 goto out;
715
716 if (!isdigit((unsigned char)*qvalue))
717 goto out;
718 q += (*qvalue++ - '0') * 100;
719
720 if (!isdigit((unsigned char)*qvalue))
721 goto out;
722 q += (*qvalue++ - '0') * 10;
723
724 if (!isdigit((unsigned char)*qvalue))
725 goto out;
726 q += (*qvalue++ - '0') * 1;
727 out:
728 if (q > 1000)
729 q = 1000;
730 if (end)
731 *end = qvalue;
732 return q;
733}
734
735/*
736 * Given a url parameter, find the starting position of the first occurence,
737 * or NULL if the parameter is not found.
738 *
739 * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
740 * the function will return query_string+8.
741 *
742 * Warning: this function returns a pointer that can point to the first chunk
743 * or the second chunk. The caller must be check the position before using the
744 * result.
745 */
746const char *http_find_url_param_pos(const char **chunks,
747 const char* url_param_name, size_t url_param_name_l,
748 char delim)
749{
750 const char *pos, *last, *equal;
751 const char **bufs = chunks;
752 int l1, l2;
753
754
755 pos = bufs[0];
756 last = bufs[1];
757 while (pos < last) {
758 /* Check the equal. */
759 equal = pos + url_param_name_l;
760 if (fix_pointer_if_wrap(chunks, &equal)) {
761 if (equal >= chunks[3])
762 return NULL;
763 } else {
764 if (equal >= chunks[1])
765 return NULL;
766 }
767 if (*equal == '=') {
768 if (pos + url_param_name_l > last) {
769 /* process wrap case, we detect a wrap. In this case, the
770 * comparison is performed in two parts.
771 */
772
773 /* This is the end, we dont have any other chunk. */
774 if (bufs != chunks || !bufs[2])
775 return NULL;
776
777 /* Compute the length of each part of the comparison. */
778 l1 = last - pos;
779 l2 = url_param_name_l - l1;
780
781 /* The second buffer is too short to contain the compared string. */
782 if (bufs[2] + l2 > bufs[3])
783 return NULL;
784
785 if (memcmp(pos, url_param_name, l1) == 0 &&
786 memcmp(bufs[2], url_param_name+l1, l2) == 0)
787 return pos;
788
789 /* Perform wrapping and jump the string who fail the comparison. */
790 bufs += 2;
791 pos = bufs[0] + l2;
792 last = bufs[1];
793
794 } else {
795 /* process a simple comparison. */
796 if (memcmp(pos, url_param_name, url_param_name_l) == 0)
797 return pos;
798 pos += url_param_name_l + 1;
799 if (fix_pointer_if_wrap(chunks, &pos))
800 last = bufs[2];
801 }
802 }
803
804 while (1) {
805 /* Look for the next delimiter. */
806 while (pos < last && !http_is_param_delimiter(*pos, delim))
807 pos++;
808 if (pos < last)
809 break;
810 /* process buffer wrapping. */
811 if (bufs != chunks || !bufs[2])
812 return NULL;
813 bufs += 2;
814 pos = bufs[0];
815 last = bufs[1];
816 }
817 pos++;
818 }
819 return NULL;
820}
821
822/*
823 * Given a url parameter name and a query string, find the next value.
824 * An empty url_param_name matches the first available parameter.
825 * If the parameter is found, 1 is returned and *vstart / *vend are updated to
826 * respectively provide a pointer to the value and its end.
827 * Otherwise, 0 is returned and vstart/vend are not modified.
828 */
829int http_find_next_url_param(const char **chunks,
830 const char* url_param_name, size_t url_param_name_l,
831 const char **vstart, const char **vend, char delim)
832{
833 const char *arg_start, *qs_end;
834 const char *value_start, *value_end;
835
836 arg_start = chunks[0];
837 qs_end = chunks[1];
838 if (url_param_name_l) {
839 /* Looks for an argument name. */
840 arg_start = http_find_url_param_pos(chunks,
841 url_param_name, url_param_name_l,
842 delim);
843 /* Check for wrapping. */
844 if (arg_start >= qs_end)
845 qs_end = chunks[3];
846 }
847 if (!arg_start)
848 return 0;
849
850 if (!url_param_name_l) {
851 while (1) {
852 /* looks for the first argument. */
853 value_start = memchr(arg_start, '=', qs_end - arg_start);
854 if (!value_start) {
855 /* Check for wrapping. */
856 if (arg_start >= chunks[0] &&
857 arg_start < chunks[1] &&
858 chunks[2]) {
859 arg_start = chunks[2];
860 qs_end = chunks[3];
861 continue;
862 }
863 return 0;
864 }
865 break;
866 }
867 value_start++;
868 }
869 else {
870 /* Jump the argument length. */
871 value_start = arg_start + url_param_name_l + 1;
872
873 /* Check for pointer wrapping. */
874 if (fix_pointer_if_wrap(chunks, &value_start)) {
875 /* Update the end pointer. */
876 qs_end = chunks[3];
877
878 /* Check for overflow. */
879 if (value_start >= qs_end)
880 return 0;
881 }
882 }
883
884 value_end = value_start;
885
886 while (1) {
887 while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
888 value_end++;
889 if (value_end < qs_end)
890 break;
891 /* process buffer wrapping. */
892 if (value_end >= chunks[0] &&
893 value_end < chunks[1] &&
894 chunks[2]) {
895 value_end = chunks[2];
896 qs_end = chunks[3];
897 continue;
898 }
899 break;
900 }
901
902 *vstart = value_start;
903 *vend = value_end;
904 return 1;
905}
906
907
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200908/* post-initializes the HTTP parts. Returns non-zero on error, with <err>
909 * pointing to the error message.
910 */
911int init_http(char **err)
912{
913 int msg;
914
915 for (msg = 0; msg < HTTP_ERR_SIZE; msg++) {
916 if (!http_err_msgs[msg]) {
917 memprintf(err, "Internal error: no message defined for HTTP return code %d", msg);
918 return 0;
919 }
920
921 http_err_chunks[msg].area = (char *)http_err_msgs[msg];
922 http_err_chunks[msg].data = strlen(http_err_msgs[msg]);
923 }
924 return 1;
925}