blob: ec6b4a2826e0cadcf0cf251c6a5cfa8c4da4be8f [file] [log] [blame]
Willy Tarreau35b51c62018-09-10 15:38:55 +02001/*
2 * HTTP semantics
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <common/config.h>
15#include <common/http.h>
Willy Tarreau04f1e2d2018-09-10 18:04:24 +020016#include <common/standard.h>
Willy Tarreau35b51c62018-09-10 15:38:55 +020017
18/* It is about twice as fast on recent architectures to lookup a byte in a
19 * table than to perform a boolean AND or OR between two tests. Refer to
20 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
21 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
22 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
23 * digit. Note: please do not overwrite values in assignment since gcc-2.95
24 * will not handle them correctly. It's worth noting that chars 128..255 are
25 * nothing, not even control chars.
26 */
27const unsigned char http_char_classes[256] = {
28 [ 0] = HTTP_FLG_CTL,
29 [ 1] = HTTP_FLG_CTL,
30 [ 2] = HTTP_FLG_CTL,
31 [ 3] = HTTP_FLG_CTL,
32 [ 4] = HTTP_FLG_CTL,
33 [ 5] = HTTP_FLG_CTL,
34 [ 6] = HTTP_FLG_CTL,
35 [ 7] = HTTP_FLG_CTL,
36 [ 8] = HTTP_FLG_CTL,
37 [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
38 [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
39 [ 11] = HTTP_FLG_CTL,
40 [ 12] = HTTP_FLG_CTL,
41 [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
42 [ 14] = HTTP_FLG_CTL,
43 [ 15] = HTTP_FLG_CTL,
44 [ 16] = HTTP_FLG_CTL,
45 [ 17] = HTTP_FLG_CTL,
46 [ 18] = HTTP_FLG_CTL,
47 [ 19] = HTTP_FLG_CTL,
48 [ 20] = HTTP_FLG_CTL,
49 [ 21] = HTTP_FLG_CTL,
50 [ 22] = HTTP_FLG_CTL,
51 [ 23] = HTTP_FLG_CTL,
52 [ 24] = HTTP_FLG_CTL,
53 [ 25] = HTTP_FLG_CTL,
54 [ 26] = HTTP_FLG_CTL,
55 [ 27] = HTTP_FLG_CTL,
56 [ 28] = HTTP_FLG_CTL,
57 [ 29] = HTTP_FLG_CTL,
58 [ 30] = HTTP_FLG_CTL,
59 [ 31] = HTTP_FLG_CTL,
60 [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
61 ['!'] = HTTP_FLG_TOK,
62 ['"'] = HTTP_FLG_SEP,
63 ['#'] = HTTP_FLG_TOK,
64 ['$'] = HTTP_FLG_TOK,
65 ['%'] = HTTP_FLG_TOK,
66 ['&'] = HTTP_FLG_TOK,
67 [ 39] = HTTP_FLG_TOK,
68 ['('] = HTTP_FLG_SEP,
69 [')'] = HTTP_FLG_SEP,
70 ['*'] = HTTP_FLG_TOK,
71 ['+'] = HTTP_FLG_TOK,
72 [','] = HTTP_FLG_SEP,
73 ['-'] = HTTP_FLG_TOK,
74 ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
75 ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
76 ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
77 ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
78 ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
79 ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
80 ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
81 ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
82 ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
83 ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
84 ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
85 ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
86 [':'] = HTTP_FLG_SEP,
87 [';'] = HTTP_FLG_SEP,
88 ['<'] = HTTP_FLG_SEP,
89 ['='] = HTTP_FLG_SEP,
90 ['>'] = HTTP_FLG_SEP,
91 ['?'] = HTTP_FLG_SEP,
92 ['@'] = HTTP_FLG_SEP,
93 ['A'] = HTTP_FLG_TOK,
94 ['B'] = HTTP_FLG_TOK,
95 ['C'] = HTTP_FLG_TOK,
96 ['D'] = HTTP_FLG_TOK,
97 ['E'] = HTTP_FLG_TOK,
98 ['F'] = HTTP_FLG_TOK,
99 ['G'] = HTTP_FLG_TOK,
100 ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
101 ['I'] = HTTP_FLG_TOK,
102 ['J'] = HTTP_FLG_TOK,
103 ['K'] = HTTP_FLG_TOK,
104 ['L'] = HTTP_FLG_TOK,
105 ['M'] = HTTP_FLG_TOK,
106 ['N'] = HTTP_FLG_TOK,
107 ['O'] = HTTP_FLG_TOK,
108 ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
109 ['Q'] = HTTP_FLG_TOK,
110 ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
111 ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
112 ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
113 ['U'] = HTTP_FLG_TOK,
114 ['V'] = HTTP_FLG_TOK,
115 ['W'] = HTTP_FLG_TOK,
116 ['X'] = HTTP_FLG_TOK,
117 ['Y'] = HTTP_FLG_TOK,
118 ['Z'] = HTTP_FLG_TOK,
119 ['['] = HTTP_FLG_SEP,
120 [ 92] = HTTP_FLG_SEP,
121 [']'] = HTTP_FLG_SEP,
122 ['^'] = HTTP_FLG_TOK,
123 ['_'] = HTTP_FLG_TOK,
124 ['`'] = HTTP_FLG_TOK,
125 ['a'] = HTTP_FLG_TOK,
126 ['b'] = HTTP_FLG_TOK,
127 ['c'] = HTTP_FLG_TOK,
128 ['d'] = HTTP_FLG_TOK,
129 ['e'] = HTTP_FLG_TOK,
130 ['f'] = HTTP_FLG_TOK,
131 ['g'] = HTTP_FLG_TOK,
132 ['h'] = HTTP_FLG_TOK,
133 ['i'] = HTTP_FLG_TOK,
134 ['j'] = HTTP_FLG_TOK,
135 ['k'] = HTTP_FLG_TOK,
136 ['l'] = HTTP_FLG_TOK,
137 ['m'] = HTTP_FLG_TOK,
138 ['n'] = HTTP_FLG_TOK,
139 ['o'] = HTTP_FLG_TOK,
140 ['p'] = HTTP_FLG_TOK,
141 ['q'] = HTTP_FLG_TOK,
142 ['r'] = HTTP_FLG_TOK,
143 ['s'] = HTTP_FLG_TOK,
144 ['t'] = HTTP_FLG_TOK,
145 ['u'] = HTTP_FLG_TOK,
146 ['v'] = HTTP_FLG_TOK,
147 ['w'] = HTTP_FLG_TOK,
148 ['x'] = HTTP_FLG_TOK,
149 ['y'] = HTTP_FLG_TOK,
150 ['z'] = HTTP_FLG_TOK,
151 ['{'] = HTTP_FLG_SEP,
152 ['|'] = HTTP_FLG_TOK,
153 ['}'] = HTTP_FLG_SEP,
154 ['~'] = HTTP_FLG_TOK,
155 [127] = HTTP_FLG_CTL,
156};
157
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200158const struct ist HTTP_100 = IST("HTTP/1.1 100 Continue\r\n\r\n");
159
Frédéric Lécaille9ca51aa2018-11-12 10:06:54 +0100160const struct ist HTTP_103 = IST("HTTP/1.1 103 Early Hints\r\n");
161
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200162/* Warning: no "connection" header is provided with the 3xx messages below */
163const char *HTTP_301 =
164 "HTTP/1.1 301 Moved Permanently\r\n"
165 "Content-length: 0\r\n"
166 "Location: "; /* not terminated since it will be concatenated with the URL */
167
168const char *HTTP_302 =
169 "HTTP/1.1 302 Found\r\n"
170 "Cache-Control: no-cache\r\n"
171 "Content-length: 0\r\n"
172 "Location: "; /* not terminated since it will be concatenated with the URL */
173
174/* same as 302 except that the browser MUST retry with the GET method */
175const char *HTTP_303 =
176 "HTTP/1.1 303 See Other\r\n"
177 "Cache-Control: no-cache\r\n"
178 "Content-length: 0\r\n"
179 "Location: "; /* not terminated since it will be concatenated with the URL */
180
181/* same as 302 except that the browser MUST retry with the same method */
182const char *HTTP_307 =
183 "HTTP/1.1 307 Temporary Redirect\r\n"
184 "Cache-Control: no-cache\r\n"
185 "Content-length: 0\r\n"
186 "Location: "; /* not terminated since it will be concatenated with the URL */
187
188/* same as 301 except that the browser MUST retry with the same method */
189const char *HTTP_308 =
190 "HTTP/1.1 308 Permanent Redirect\r\n"
191 "Content-length: 0\r\n"
192 "Location: "; /* not terminated since it will be concatenated with the URL */
193
194/* Warning: this one is an sprintf() fmt string, with <realm> as its only argument */
195const char *HTTP_401_fmt =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200196 "HTTP/1.1 401 Unauthorized\r\n"
197 "Content-length: 112\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200198 "Cache-Control: no-cache\r\n"
199 "Connection: close\r\n"
200 "Content-Type: text/html\r\n"
201 "WWW-Authenticate: Basic realm=\"%s\"\r\n"
202 "\r\n"
203 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
204
205const char *HTTP_407_fmt =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200206 "HTTP/1.1 407 Unauthorized\r\n"
207 "Content-length: 112\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200208 "Cache-Control: no-cache\r\n"
209 "Connection: close\r\n"
210 "Content-Type: text/html\r\n"
211 "Proxy-Authenticate: Basic realm=\"%s\"\r\n"
212 "\r\n"
213 "<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
214
215const int http_err_codes[HTTP_ERR_SIZE] = {
216 [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
217 [HTTP_ERR_400] = 400,
218 [HTTP_ERR_403] = 403,
Florian Tham9205fea2020-01-08 13:35:30 +0100219 [HTTP_ERR_404] = 404,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200220 [HTTP_ERR_405] = 405,
221 [HTTP_ERR_408] = 408,
Florian Tham272e29b2020-01-08 10:19:05 +0100222 [HTTP_ERR_410] = 410,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200223 [HTTP_ERR_421] = 421,
224 [HTTP_ERR_425] = 425,
225 [HTTP_ERR_429] = 429,
226 [HTTP_ERR_500] = 500,
227 [HTTP_ERR_502] = 502,
228 [HTTP_ERR_503] = 503,
229 [HTTP_ERR_504] = 504,
230};
231
Christopher Fauleta7b677c2018-11-29 16:48:49 +0100232const char *http_err_msgs[HTTP_ERR_SIZE] = {
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200233 [HTTP_ERR_200] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200234 "HTTP/1.1 200 OK\r\n"
235 "Content-length: 58\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200236 "Cache-Control: no-cache\r\n"
237 "Connection: close\r\n"
238 "Content-Type: text/html\r\n"
239 "\r\n"
240 "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
241
242 [HTTP_ERR_400] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200243 "HTTP/1.1 400 Bad request\r\n"
244 "Content-length: 90\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200245 "Cache-Control: no-cache\r\n"
246 "Connection: close\r\n"
247 "Content-Type: text/html\r\n"
248 "\r\n"
249 "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
250
251 [HTTP_ERR_403] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200252 "HTTP/1.1 403 Forbidden\r\n"
253 "Content-length: 93\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200254 "Cache-Control: no-cache\r\n"
255 "Connection: close\r\n"
256 "Content-Type: text/html\r\n"
257 "\r\n"
258 "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
259
Florian Tham9205fea2020-01-08 13:35:30 +0100260 [HTTP_ERR_404] =
261 "HTTP/1.1 404 Not Found\r\n"
262 "Content-length: 83\r\n"
263 "Cache-Control: no-cache\r\n"
264 "Connection: close\r\n"
265 "Content-Type: text/html\r\n"
266 "\r\n"
267 "<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
268
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200269 [HTTP_ERR_405] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200270 "HTTP/1.1 405 Method Not Allowed\r\n"
271 "Content-length: 146\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200272 "Cache-Control: no-cache\r\n"
273 "Connection: close\r\n"
274 "Content-Type: text/html\r\n"
275 "\r\n"
276 "<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
277
278 [HTTP_ERR_408] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200279 "HTTP/1.1 408 Request Time-out\r\n"
280 "Content-length: 110\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200281 "Cache-Control: no-cache\r\n"
282 "Connection: close\r\n"
283 "Content-Type: text/html\r\n"
284 "\r\n"
285 "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
286
Florian Tham272e29b2020-01-08 10:19:05 +0100287 [HTTP_ERR_410] =
288 "HTTP/1.1 410 Gone\r\n"
289 "Content-length: 114\r\n"
290 "Cache-Control: no-cache\r\n"
291 "Connection: close\r\n"
292 "Content-Type: text/html\r\n"
293 "\r\n"
294 "<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
295
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200296 [HTTP_ERR_421] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200297 "HTTP/1.1 421 Misdirected Request\r\n"
298 "Content-length: 104\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200299 "Cache-Control: no-cache\r\n"
300 "Connection: close\r\n"
301 "Content-Type: text/html\r\n"
302 "\r\n"
303 "<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
304
305 [HTTP_ERR_425] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200306 "HTTP/1.1 425 Too Early\r\n"
307 "Content-length: 80\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200308 "Cache-Control: no-cache\r\n"
309 "Connection: close\r\n"
310 "Content-Type: text/html\r\n"
311 "\r\n"
312 "<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
313
314 [HTTP_ERR_429] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200315 "HTTP/1.1 429 Too Many Requests\r\n"
316 "Content-length: 117\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200317 "Cache-Control: no-cache\r\n"
318 "Connection: close\r\n"
319 "Content-Type: text/html\r\n"
320 "\r\n"
321 "<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
322
323 [HTTP_ERR_500] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200324 "HTTP/1.1 500 Internal Server Error\r\n"
325 "Content-length: 96\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200326 "Cache-Control: no-cache\r\n"
327 "Connection: close\r\n"
328 "Content-Type: text/html\r\n"
329 "\r\n"
330 "<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occured.\n</body></html>\n",
331
332 [HTTP_ERR_502] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200333 "HTTP/1.1 502 Bad Gateway\r\n"
334 "Content-length: 107\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200335 "Cache-Control: no-cache\r\n"
336 "Connection: close\r\n"
337 "Content-Type: text/html\r\n"
338 "\r\n"
339 "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
340
341 [HTTP_ERR_503] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200342 "HTTP/1.1 503 Service Unavailable\r\n"
343 "Content-length: 107\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200344 "Cache-Control: no-cache\r\n"
345 "Connection: close\r\n"
346 "Content-Type: text/html\r\n"
347 "\r\n"
348 "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
349
350 [HTTP_ERR_504] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200351 "HTTP/1.1 504 Gateway Time-out\r\n"
352 "Content-length: 92\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200353 "Cache-Control: no-cache\r\n"
354 "Connection: close\r\n"
355 "Content-Type: text/html\r\n"
356 "\r\n"
357 "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
358
359};
360
Willy Tarreau35b51c62018-09-10 15:38:55 +0200361const struct ist http_known_methods[HTTP_METH_OTHER] = {
362 [HTTP_METH_OPTIONS] = IST("OPTIONS"),
363 [HTTP_METH_GET] = IST("GET"),
364 [HTTP_METH_HEAD] = IST("HEAD"),
365 [HTTP_METH_POST] = IST("POST"),
366 [HTTP_METH_PUT] = IST("PUT"),
367 [HTTP_METH_DELETE] = IST("DELETE"),
368 [HTTP_METH_TRACE] = IST("TRACE"),
369 [HTTP_METH_CONNECT] = IST("CONNECT"),
370};
371
372/*
373 * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
374 * ones.
375 */
376enum http_meth_t find_http_meth(const char *str, const int len)
377{
378 const struct ist m = ist2(str, len);
379
380 if (isteq(m, ist("GET"))) return HTTP_METH_GET;
381 else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
382 else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
383 else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
384 else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
385 else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
386 else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
387 else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
388 else return HTTP_METH_OTHER;
389}
Willy Tarreau6b952c82018-09-10 17:45:34 +0200390
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200391/* This function returns HTTP_ERR_<num> (enum) matching http status code.
392 * Returned value should match codes from http_err_codes.
393 */
Willy Tarreau8de1df92019-04-15 21:27:18 +0200394int http_get_status_idx(unsigned int status)
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200395{
396 switch (status) {
397 case 200: return HTTP_ERR_200;
398 case 400: return HTTP_ERR_400;
399 case 403: return HTTP_ERR_403;
Florian Tham9205fea2020-01-08 13:35:30 +0100400 case 404: return HTTP_ERR_404;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200401 case 405: return HTTP_ERR_405;
402 case 408: return HTTP_ERR_408;
Florian Tham272e29b2020-01-08 10:19:05 +0100403 case 410: return HTTP_ERR_410;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200404 case 421: return HTTP_ERR_421;
405 case 425: return HTTP_ERR_425;
406 case 429: return HTTP_ERR_429;
407 case 500: return HTTP_ERR_500;
408 case 502: return HTTP_ERR_502;
409 case 503: return HTTP_ERR_503;
410 case 504: return HTTP_ERR_504;
411 default: return HTTP_ERR_500;
412 }
413}
414
415/* This function returns a reason associated with the HTTP status.
416 * This function never fails, a message is always returned.
417 */
418const char *http_get_reason(unsigned int status)
419{
420 switch (status) {
421 case 100: return "Continue";
422 case 101: return "Switching Protocols";
423 case 102: return "Processing";
424 case 200: return "OK";
425 case 201: return "Created";
426 case 202: return "Accepted";
427 case 203: return "Non-Authoritative Information";
428 case 204: return "No Content";
429 case 205: return "Reset Content";
430 case 206: return "Partial Content";
431 case 207: return "Multi-Status";
432 case 210: return "Content Different";
433 case 226: return "IM Used";
434 case 300: return "Multiple Choices";
435 case 301: return "Moved Permanently";
436 case 302: return "Moved Temporarily";
437 case 303: return "See Other";
438 case 304: return "Not Modified";
439 case 305: return "Use Proxy";
440 case 307: return "Temporary Redirect";
441 case 308: return "Permanent Redirect";
442 case 310: return "Too many Redirects";
443 case 400: return "Bad Request";
444 case 401: return "Unauthorized";
445 case 402: return "Payment Required";
446 case 403: return "Forbidden";
447 case 404: return "Not Found";
448 case 405: return "Method Not Allowed";
449 case 406: return "Not Acceptable";
450 case 407: return "Proxy Authentication Required";
451 case 408: return "Request Time-out";
452 case 409: return "Conflict";
453 case 410: return "Gone";
454 case 411: return "Length Required";
455 case 412: return "Precondition Failed";
456 case 413: return "Request Entity Too Large";
457 case 414: return "Request-URI Too Long";
458 case 415: return "Unsupported Media Type";
459 case 416: return "Requested range unsatisfiable";
460 case 417: return "Expectation failed";
461 case 418: return "I'm a teapot";
462 case 421: return "Misdirected Request";
463 case 422: return "Unprocessable entity";
464 case 423: return "Locked";
465 case 424: return "Method failure";
466 case 425: return "Too Early";
467 case 426: return "Upgrade Required";
468 case 428: return "Precondition Required";
469 case 429: return "Too Many Requests";
470 case 431: return "Request Header Fields Too Large";
471 case 449: return "Retry With";
472 case 450: return "Blocked by Windows Parental Controls";
473 case 451: return "Unavailable For Legal Reasons";
474 case 456: return "Unrecoverable Error";
475 case 499: return "client has closed connection";
476 case 500: return "Internal Server Error";
477 case 501: return "Not Implemented";
478 case 502: return "Bad Gateway or Proxy Error";
479 case 503: return "Service Unavailable";
480 case 504: return "Gateway Time-out";
481 case 505: return "HTTP Version not supported";
482 case 506: return "Variant also negociate";
483 case 507: return "Insufficient storage";
484 case 508: return "Loop detected";
485 case 509: return "Bandwidth Limit Exceeded";
486 case 510: return "Not extended";
487 case 511: return "Network authentication required";
488 case 520: return "Web server is returning an unknown error";
489 default:
490 switch (status) {
491 case 100 ... 199: return "Informational";
492 case 200 ... 299: return "Success";
493 case 300 ... 399: return "Redirection";
494 case 400 ... 499: return "Client Error";
495 case 500 ... 599: return "Server Error";
496 default: return "Other";
497 }
498 }
499}
500
Christopher Faulet16fdc552019-10-08 14:56:58 +0200501/* Parse the uri and looks for the authority, between the scheme and the
502 * path. if no_userinfo is not zero, the part before the '@' (including it) is
503 * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
504 * on the authority is returned.
505 */
506struct ist http_get_authority(const struct ist uri, int no_userinfo)
507{
508 const char *ptr, *start, *end;
509
510 if (!uri.len)
511 goto not_found;
512
513 ptr = uri.ptr;
514 start = ptr;
515 end = ptr + uri.len;
516
517 /* RFC7230, par. 2.7 :
518 * Request-URI = "*" | absuri | abspath | authority
519 */
520
521 if (*ptr == '*' || *ptr == '/')
522 goto not_found;
523
524 if (isalpha((unsigned char)*ptr)) {
525 /* this is a scheme as described by RFC3986, par. 3.1, or only
526 * an authority (in case of a CONNECT method).
527 */
528 ptr++;
529 while (ptr < end &&
530 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
531 ptr++;
532 /* skip '://' or take the whole as authority if not found */
533 if (ptr == end || *ptr++ != ':')
534 goto authority;
535 if (ptr == end || *ptr++ != '/')
536 goto authority;
537 if (ptr == end || *ptr++ != '/')
538 goto authority;
539 }
540
541 start = ptr;
542 while (ptr < end && *ptr != '/') {
543 if (*ptr++ == '@' && no_userinfo)
544 start = ptr;
545 }
546
547 /* OK, ptr point on the '/' or the end */
548 end = ptr;
549
550 authority:
551 return ist2(start, end - start);
552
553 not_found:
Tim Duesterhus241e29e2020-03-05 17:56:30 +0100554 return IST_NULL;
Christopher Faulet16fdc552019-10-08 14:56:58 +0200555}
556
Willy Tarreau6b952c82018-09-10 17:45:34 +0200557/* Parse the URI from the given transaction (which is assumed to be in request
558 * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
559 * returned. Otherwise the pointer and length are returned.
560 */
561struct ist http_get_path(const struct ist uri)
562{
563 const char *ptr, *end;
564
565 if (!uri.len)
566 goto not_found;
567
568 ptr = uri.ptr;
569 end = ptr + uri.len;
570
571 /* RFC7230, par. 2.7 :
572 * Request-URI = "*" | absuri | abspath | authority
573 */
574
575 if (*ptr == '*')
576 goto not_found;
577
578 if (isalpha((unsigned char)*ptr)) {
579 /* this is a scheme as described by RFC3986, par. 3.1 */
580 ptr++;
581 while (ptr < end &&
582 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
583 ptr++;
584 /* skip '://' */
585 if (ptr == end || *ptr++ != ':')
586 goto not_found;
587 if (ptr == end || *ptr++ != '/')
588 goto not_found;
589 if (ptr == end || *ptr++ != '/')
590 goto not_found;
591 }
592 /* skip [user[:passwd]@]host[:[port]] */
593
594 while (ptr < end && *ptr != '/')
595 ptr++;
596
597 if (ptr == end)
598 goto not_found;
599
600 /* OK, we got the '/' ! */
601 return ist2(ptr, end - ptr);
602
603 not_found:
Tim Duesterhus241e29e2020-03-05 17:56:30 +0100604 return IST_NULL;
Willy Tarreau6b952c82018-09-10 17:45:34 +0200605}
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200606
Willy Tarreauab813a42018-09-10 18:41:28 +0200607/*
608 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
609 * If so, returns the position of the first non-space character relative to
610 * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
611 * to return a pointer to the place after the first space. Returns 0 if the
612 * header name does not match. Checks are case-insensitive.
613 */
614int http_header_match2(const char *hdr, const char *end,
615 const char *name, int len)
616{
617 const char *val;
618
619 if (hdr + len >= end)
620 return 0;
621 if (hdr[len] != ':')
622 return 0;
623 if (strncasecmp(hdr, name, len) != 0)
624 return 0;
625 val = hdr + len + 1;
626 while (val < end && HTTP_IS_SPHT(*val))
627 val++;
628 if ((val >= end) && (len + 2 <= end - hdr))
629 return len + 2; /* we may replace starting from second space */
630 return val - hdr;
631}
632
633/* Find the end of the header value contained between <s> and <e>. See RFC7230,
634 * par 3.2 for more information. Note that it requires a valid header to return
635 * a valid result. This works for headers defined as comma-separated lists.
636 */
637char *http_find_hdr_value_end(char *s, const char *e)
638{
639 int quoted, qdpair;
640
641 quoted = qdpair = 0;
642
Willy Tarreau02ac9502020-02-21 16:31:22 +0100643#ifdef HA_UNALIGNED_LE
Willy Tarreauab813a42018-09-10 18:41:28 +0200644 /* speedup: skip everything not a comma nor a double quote */
645 for (; s <= e - sizeof(int); s += sizeof(int)) {
646 unsigned int c = *(int *)s; // comma
647 unsigned int q = c; // quote
648
649 c ^= 0x2c2c2c2c; // contains one zero on a comma
650 q ^= 0x22222222; // contains one zero on a quote
651
652 c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
653 q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
654
655 if ((c | q) & 0x80808080)
656 break; // found a comma or a quote
657 }
658#endif
659 for (; s < e; s++) {
660 if (qdpair) qdpair = 0;
661 else if (quoted) {
662 if (*s == '\\') qdpair = 1;
663 else if (*s == '"') quoted = 0;
664 }
665 else if (*s == '"') quoted = 1;
666 else if (*s == ',') return s;
667 }
668 return s;
669}
670
671/* Find the end of a cookie value contained between <s> and <e>. It works the
672 * same way as with headers above except that the semi-colon also ends a token.
673 * See RFC2965 for more information. Note that it requires a valid header to
674 * return a valid result.
675 */
676char *http_find_cookie_value_end(char *s, const char *e)
677{
678 int quoted, qdpair;
679
680 quoted = qdpair = 0;
681 for (; s < e; s++) {
682 if (qdpair) qdpair = 0;
683 else if (quoted) {
684 if (*s == '\\') qdpair = 1;
685 else if (*s == '"') quoted = 0;
686 }
687 else if (*s == '"') quoted = 1;
688 else if (*s == ',' || *s == ';') return s;
689 }
690 return s;
691}
692
693/* Try to find the next occurrence of a cookie name in a cookie header value.
694 * The lookup begins at <hdr>. The pointer and size of the next occurrence of
695 * the cookie value is returned into *value and *value_l, and the function
696 * returns a pointer to the next pointer to search from if the value was found.
697 * Otherwise if the cookie was not found, NULL is returned and neither value
698 * nor value_l are touched. The input <hdr> string should first point to the
699 * header's value, and the <hdr_end> pointer must point to the first character
700 * not part of the value. <list> must be non-zero if value may represent a list
701 * of values (cookie headers). This makes it faster to abort parsing when no
702 * list is expected.
703 */
704char *http_extract_cookie_value(char *hdr, const char *hdr_end,
705 char *cookie_name, size_t cookie_name_l,
706 int list, char **value, size_t *value_l)
707{
708 char *equal, *att_end, *att_beg, *val_beg, *val_end;
709 char *next;
710
711 /* we search at least a cookie name followed by an equal, and more
712 * generally something like this :
713 * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
714 */
715 for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
716 /* Iterate through all cookies on this line */
717
718 while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
719 att_beg++;
720
721 /* find att_end : this is the first character after the last non
722 * space before the equal. It may be equal to hdr_end.
723 */
724 equal = att_end = att_beg;
725
726 while (equal < hdr_end) {
727 if (*equal == '=' || *equal == ';' || (list && *equal == ','))
728 break;
729 if (HTTP_IS_SPHT(*equal++))
730 continue;
731 att_end = equal;
732 }
733
734 /* here, <equal> points to '=', a delimitor or the end. <att_end>
735 * is between <att_beg> and <equal>, both may be identical.
736 */
737
738 /* look for end of cookie if there is an equal sign */
739 if (equal < hdr_end && *equal == '=') {
740 /* look for the beginning of the value */
741 val_beg = equal + 1;
742 while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
743 val_beg++;
744
745 /* find the end of the value, respecting quotes */
746 next = http_find_cookie_value_end(val_beg, hdr_end);
747
748 /* make val_end point to the first white space or delimitor after the value */
749 val_end = next;
750 while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
751 val_end--;
752 } else {
753 val_beg = val_end = next = equal;
754 }
755
756 /* We have nothing to do with attributes beginning with '$'. However,
757 * they will automatically be removed if a header before them is removed,
758 * since they're supposed to be linked together.
759 */
760 if (*att_beg == '$')
761 continue;
762
763 /* Ignore cookies with no equal sign */
764 if (equal == next)
765 continue;
766
767 /* Now we have the cookie name between att_beg and att_end, and
768 * its value between val_beg and val_end.
769 */
770
771 if (att_end - att_beg == cookie_name_l &&
772 memcmp(att_beg, cookie_name, cookie_name_l) == 0) {
773 /* let's return this value and indicate where to go on from */
774 *value = val_beg;
775 *value_l = val_end - val_beg;
776 return next + 1;
777 }
778
779 /* Set-Cookie headers only have the name in the first attr=value part */
780 if (!list)
781 break;
782 }
783
784 return NULL;
785}
786
Joseph Herlant942eea32018-11-15 13:57:22 -0800787/* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
Willy Tarreauab813a42018-09-10 18:41:28 +0200788 * value is larger than 1000, it is bound to 1000. The parser consumes up to
789 * 1 digit, one dot and 3 digits and stops on the first invalid character.
790 * Unparsable qvalues return 1000 as "q=1.000".
791 */
792int http_parse_qvalue(const char *qvalue, const char **end)
793{
794 int q = 1000;
795
796 if (!isdigit((unsigned char)*qvalue))
797 goto out;
798 q = (*qvalue++ - '0') * 1000;
799
800 if (*qvalue++ != '.')
801 goto out;
802
803 if (!isdigit((unsigned char)*qvalue))
804 goto out;
805 q += (*qvalue++ - '0') * 100;
806
807 if (!isdigit((unsigned char)*qvalue))
808 goto out;
809 q += (*qvalue++ - '0') * 10;
810
811 if (!isdigit((unsigned char)*qvalue))
812 goto out;
813 q += (*qvalue++ - '0') * 1;
814 out:
815 if (q > 1000)
816 q = 1000;
817 if (end)
818 *end = qvalue;
819 return q;
820}
821
822/*
Joseph Herlant942eea32018-11-15 13:57:22 -0800823 * Given a url parameter, find the starting position of the first occurrence,
Willy Tarreauab813a42018-09-10 18:41:28 +0200824 * or NULL if the parameter is not found.
825 *
826 * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
827 * the function will return query_string+8.
828 *
829 * Warning: this function returns a pointer that can point to the first chunk
830 * or the second chunk. The caller must be check the position before using the
831 * result.
832 */
833const char *http_find_url_param_pos(const char **chunks,
834 const char* url_param_name, size_t url_param_name_l,
835 char delim)
836{
837 const char *pos, *last, *equal;
838 const char **bufs = chunks;
839 int l1, l2;
840
841
842 pos = bufs[0];
843 last = bufs[1];
844 while (pos < last) {
845 /* Check the equal. */
846 equal = pos + url_param_name_l;
847 if (fix_pointer_if_wrap(chunks, &equal)) {
848 if (equal >= chunks[3])
849 return NULL;
850 } else {
851 if (equal >= chunks[1])
852 return NULL;
853 }
854 if (*equal == '=') {
855 if (pos + url_param_name_l > last) {
856 /* process wrap case, we detect a wrap. In this case, the
857 * comparison is performed in two parts.
858 */
859
860 /* This is the end, we dont have any other chunk. */
861 if (bufs != chunks || !bufs[2])
862 return NULL;
863
864 /* Compute the length of each part of the comparison. */
865 l1 = last - pos;
866 l2 = url_param_name_l - l1;
867
868 /* The second buffer is too short to contain the compared string. */
869 if (bufs[2] + l2 > bufs[3])
870 return NULL;
871
872 if (memcmp(pos, url_param_name, l1) == 0 &&
873 memcmp(bufs[2], url_param_name+l1, l2) == 0)
874 return pos;
875
876 /* Perform wrapping and jump the string who fail the comparison. */
877 bufs += 2;
878 pos = bufs[0] + l2;
879 last = bufs[1];
880
881 } else {
882 /* process a simple comparison. */
883 if (memcmp(pos, url_param_name, url_param_name_l) == 0)
884 return pos;
885 pos += url_param_name_l + 1;
886 if (fix_pointer_if_wrap(chunks, &pos))
887 last = bufs[2];
888 }
889 }
890
891 while (1) {
892 /* Look for the next delimiter. */
893 while (pos < last && !http_is_param_delimiter(*pos, delim))
894 pos++;
895 if (pos < last)
896 break;
897 /* process buffer wrapping. */
898 if (bufs != chunks || !bufs[2])
899 return NULL;
900 bufs += 2;
901 pos = bufs[0];
902 last = bufs[1];
903 }
904 pos++;
905 }
906 return NULL;
907}
908
909/*
910 * Given a url parameter name and a query string, find the next value.
911 * An empty url_param_name matches the first available parameter.
912 * If the parameter is found, 1 is returned and *vstart / *vend are updated to
913 * respectively provide a pointer to the value and its end.
914 * Otherwise, 0 is returned and vstart/vend are not modified.
915 */
916int http_find_next_url_param(const char **chunks,
917 const char* url_param_name, size_t url_param_name_l,
918 const char **vstart, const char **vend, char delim)
919{
920 const char *arg_start, *qs_end;
921 const char *value_start, *value_end;
922
923 arg_start = chunks[0];
924 qs_end = chunks[1];
925 if (url_param_name_l) {
926 /* Looks for an argument name. */
927 arg_start = http_find_url_param_pos(chunks,
928 url_param_name, url_param_name_l,
929 delim);
930 /* Check for wrapping. */
931 if (arg_start >= qs_end)
932 qs_end = chunks[3];
933 }
934 if (!arg_start)
935 return 0;
936
937 if (!url_param_name_l) {
938 while (1) {
939 /* looks for the first argument. */
940 value_start = memchr(arg_start, '=', qs_end - arg_start);
941 if (!value_start) {
942 /* Check for wrapping. */
943 if (arg_start >= chunks[0] &&
944 arg_start < chunks[1] &&
945 chunks[2]) {
946 arg_start = chunks[2];
947 qs_end = chunks[3];
948 continue;
949 }
950 return 0;
951 }
952 break;
953 }
954 value_start++;
955 }
956 else {
957 /* Jump the argument length. */
958 value_start = arg_start + url_param_name_l + 1;
959
960 /* Check for pointer wrapping. */
961 if (fix_pointer_if_wrap(chunks, &value_start)) {
962 /* Update the end pointer. */
963 qs_end = chunks[3];
964
965 /* Check for overflow. */
966 if (value_start >= qs_end)
967 return 0;
968 }
969 }
970
971 value_end = value_start;
972
973 while (1) {
974 while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
975 value_end++;
976 if (value_end < qs_end)
977 break;
978 /* process buffer wrapping. */
979 if (value_end >= chunks[0] &&
980 value_end < chunks[1] &&
981 chunks[2]) {
982 value_end = chunks[2];
983 qs_end = chunks[3];
984 continue;
985 }
986 break;
987 }
988
989 *vstart = value_start;
990 *vend = value_end;
991 return 1;
992}
993
Christopher Faulet8277ca72018-10-22 15:12:04 +0200994/* Parses a single header line (without the CRLF) and splits it into its name
995 * and its value. The parsing is pretty naive and just skip spaces.
996 */
997int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value)
998{
999 char *p = hdr.ptr;
1000 char *end = p + hdr.len;
1001
1002 name->len = value->len = 0;
1003
1004 /* Skip leading spaces */
1005 for (; p < end && HTTP_IS_SPHT(*p); p++);
1006
1007 /* Set the header name */
1008 name->ptr = p;
1009 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1010 name->len = p - name->ptr;
1011
1012 /* Skip the ':' and spaces before and after it */
1013 for (; p < end && HTTP_IS_SPHT(*p); p++);
1014 if (p < end && *p == ':') p++;
1015 for (; p < end && HTTP_IS_SPHT(*p); p++);
1016
1017 /* Set the header value */
1018 value->ptr = p;
1019 value->len = end - p;
1020
1021 return 1;
1022}
1023
1024/* Parses a single start line (without the CRLF) and splits it into 3 parts. The
1025 * parsing is pretty naive and just skip spaces.
1026 */
1027int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3)
1028{
1029 char *p = line.ptr;
1030 char *end = p + line.len;
1031
1032 p1->len = p2->len = p3->len = 0;
1033
1034 /* Skip leading spaces */
1035 for (; p < end && HTTP_IS_SPHT(*p); p++);
1036
1037 /* Set the first part */
1038 p1->ptr = p;
1039 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1040 p1->len = p - p1->ptr;
1041
1042 /* Skip spaces between p1 and p2 */
1043 for (; p < end && HTTP_IS_SPHT(*p); p++);
1044
1045 /* Set the second part */
1046 p2->ptr = p;
1047 for (; p < end && !HTTP_IS_SPHT(*p); p++);
1048 p2->len = p - p2->ptr;
1049
1050 /* Skip spaces between p2 and p3 */
1051 for (; p < end && HTTP_IS_SPHT(*p); p++);
1052
1053 /* The remaing is the third value */
1054 p3->ptr = p;
1055 p3->len = end - p;
1056
1057 return 1;
1058}
Christopher Faulet341fac12019-09-16 11:37:05 +02001059
1060/* Parses value of a Status header with the following format: "Status: Code[
1061 * Reason]". The parsing is pretty naive and just skip spaces. It return the
1062 * numeric value of the status code.
1063 */
1064int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason)
1065{
1066 char *p = value.ptr;
1067 char *end = p + value.len;
1068 uint16_t code;
1069
1070 status->len = reason->len = 0;
1071
1072 /* Skip leading spaces */
1073 for (; p < end && HTTP_IS_SPHT(*p); p++);
1074
1075 /* Set the status part */
1076 status->ptr = p;
1077 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1078 status->len = p - status->ptr;
1079
1080 /* Skip spaces between status and reason */
1081 for (; p < end && HTTP_IS_SPHT(*p); p++);
1082
1083 /* the remaining is the reason */
1084 reason->ptr = p;
1085 reason->len = end - p;
1086
1087 code = strl2ui(status->ptr, status->len);
1088 return code;
1089}