blob: 647f4334a4ad9373f3a31405970920ae02538a1e [file] [log] [blame]
Willy Tarreau35b51c62018-09-10 15:38:55 +02001/*
2 * HTTP semantics
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020014#include <haproxy/api.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020015#include <haproxy/http.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020016#include <haproxy/tools.h>
Willy Tarreau35b51c62018-09-10 15:38:55 +020017
18/* It is about twice as fast on recent architectures to lookup a byte in a
19 * table than to perform a boolean AND or OR between two tests. Refer to
20 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
21 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
22 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
23 * digit. Note: please do not overwrite values in assignment since gcc-2.95
24 * will not handle them correctly. It's worth noting that chars 128..255 are
25 * nothing, not even control chars.
26 */
27const unsigned char http_char_classes[256] = {
28 [ 0] = HTTP_FLG_CTL,
29 [ 1] = HTTP_FLG_CTL,
30 [ 2] = HTTP_FLG_CTL,
31 [ 3] = HTTP_FLG_CTL,
32 [ 4] = HTTP_FLG_CTL,
33 [ 5] = HTTP_FLG_CTL,
34 [ 6] = HTTP_FLG_CTL,
35 [ 7] = HTTP_FLG_CTL,
36 [ 8] = HTTP_FLG_CTL,
37 [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
38 [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
39 [ 11] = HTTP_FLG_CTL,
40 [ 12] = HTTP_FLG_CTL,
41 [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
42 [ 14] = HTTP_FLG_CTL,
43 [ 15] = HTTP_FLG_CTL,
44 [ 16] = HTTP_FLG_CTL,
45 [ 17] = HTTP_FLG_CTL,
46 [ 18] = HTTP_FLG_CTL,
47 [ 19] = HTTP_FLG_CTL,
48 [ 20] = HTTP_FLG_CTL,
49 [ 21] = HTTP_FLG_CTL,
50 [ 22] = HTTP_FLG_CTL,
51 [ 23] = HTTP_FLG_CTL,
52 [ 24] = HTTP_FLG_CTL,
53 [ 25] = HTTP_FLG_CTL,
54 [ 26] = HTTP_FLG_CTL,
55 [ 27] = HTTP_FLG_CTL,
56 [ 28] = HTTP_FLG_CTL,
57 [ 29] = HTTP_FLG_CTL,
58 [ 30] = HTTP_FLG_CTL,
59 [ 31] = HTTP_FLG_CTL,
60 [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
61 ['!'] = HTTP_FLG_TOK,
62 ['"'] = HTTP_FLG_SEP,
63 ['#'] = HTTP_FLG_TOK,
64 ['$'] = HTTP_FLG_TOK,
65 ['%'] = HTTP_FLG_TOK,
66 ['&'] = HTTP_FLG_TOK,
67 [ 39] = HTTP_FLG_TOK,
68 ['('] = HTTP_FLG_SEP,
69 [')'] = HTTP_FLG_SEP,
70 ['*'] = HTTP_FLG_TOK,
71 ['+'] = HTTP_FLG_TOK,
72 [','] = HTTP_FLG_SEP,
73 ['-'] = HTTP_FLG_TOK,
74 ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
75 ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
76 ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
77 ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
78 ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
79 ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
80 ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
81 ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
82 ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
83 ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
84 ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
85 ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
86 [':'] = HTTP_FLG_SEP,
87 [';'] = HTTP_FLG_SEP,
88 ['<'] = HTTP_FLG_SEP,
89 ['='] = HTTP_FLG_SEP,
90 ['>'] = HTTP_FLG_SEP,
91 ['?'] = HTTP_FLG_SEP,
92 ['@'] = HTTP_FLG_SEP,
93 ['A'] = HTTP_FLG_TOK,
94 ['B'] = HTTP_FLG_TOK,
95 ['C'] = HTTP_FLG_TOK,
96 ['D'] = HTTP_FLG_TOK,
97 ['E'] = HTTP_FLG_TOK,
98 ['F'] = HTTP_FLG_TOK,
99 ['G'] = HTTP_FLG_TOK,
100 ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
101 ['I'] = HTTP_FLG_TOK,
102 ['J'] = HTTP_FLG_TOK,
103 ['K'] = HTTP_FLG_TOK,
104 ['L'] = HTTP_FLG_TOK,
105 ['M'] = HTTP_FLG_TOK,
106 ['N'] = HTTP_FLG_TOK,
107 ['O'] = HTTP_FLG_TOK,
108 ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
109 ['Q'] = HTTP_FLG_TOK,
110 ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
111 ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
112 ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
113 ['U'] = HTTP_FLG_TOK,
114 ['V'] = HTTP_FLG_TOK,
115 ['W'] = HTTP_FLG_TOK,
116 ['X'] = HTTP_FLG_TOK,
117 ['Y'] = HTTP_FLG_TOK,
118 ['Z'] = HTTP_FLG_TOK,
119 ['['] = HTTP_FLG_SEP,
120 [ 92] = HTTP_FLG_SEP,
121 [']'] = HTTP_FLG_SEP,
122 ['^'] = HTTP_FLG_TOK,
123 ['_'] = HTTP_FLG_TOK,
124 ['`'] = HTTP_FLG_TOK,
125 ['a'] = HTTP_FLG_TOK,
126 ['b'] = HTTP_FLG_TOK,
127 ['c'] = HTTP_FLG_TOK,
128 ['d'] = HTTP_FLG_TOK,
129 ['e'] = HTTP_FLG_TOK,
130 ['f'] = HTTP_FLG_TOK,
131 ['g'] = HTTP_FLG_TOK,
132 ['h'] = HTTP_FLG_TOK,
133 ['i'] = HTTP_FLG_TOK,
134 ['j'] = HTTP_FLG_TOK,
135 ['k'] = HTTP_FLG_TOK,
136 ['l'] = HTTP_FLG_TOK,
137 ['m'] = HTTP_FLG_TOK,
138 ['n'] = HTTP_FLG_TOK,
139 ['o'] = HTTP_FLG_TOK,
140 ['p'] = HTTP_FLG_TOK,
141 ['q'] = HTTP_FLG_TOK,
142 ['r'] = HTTP_FLG_TOK,
143 ['s'] = HTTP_FLG_TOK,
144 ['t'] = HTTP_FLG_TOK,
145 ['u'] = HTTP_FLG_TOK,
146 ['v'] = HTTP_FLG_TOK,
147 ['w'] = HTTP_FLG_TOK,
148 ['x'] = HTTP_FLG_TOK,
149 ['y'] = HTTP_FLG_TOK,
150 ['z'] = HTTP_FLG_TOK,
151 ['{'] = HTTP_FLG_SEP,
152 ['|'] = HTTP_FLG_TOK,
153 ['}'] = HTTP_FLG_SEP,
154 ['~'] = HTTP_FLG_TOK,
155 [127] = HTTP_FLG_CTL,
156};
157
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200158const int http_err_codes[HTTP_ERR_SIZE] = {
159 [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
160 [HTTP_ERR_400] = 400,
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200161 [HTTP_ERR_401] = 401,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200162 [HTTP_ERR_403] = 403,
Florian Tham9205fea2020-01-08 13:35:30 +0100163 [HTTP_ERR_404] = 404,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200164 [HTTP_ERR_405] = 405,
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200165 [HTTP_ERR_407] = 407,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200166 [HTTP_ERR_408] = 408,
Florian Tham272e29b2020-01-08 10:19:05 +0100167 [HTTP_ERR_410] = 410,
Anthonin Bonnefoy85048f82020-06-22 09:17:01 +0200168 [HTTP_ERR_413] = 413,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200169 [HTTP_ERR_421] = 421,
170 [HTTP_ERR_425] = 425,
171 [HTTP_ERR_429] = 429,
172 [HTTP_ERR_500] = 500,
Christopher Faulete095f312020-12-07 11:22:24 +0100173 [HTTP_ERR_501] = 501,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200174 [HTTP_ERR_502] = 502,
175 [HTTP_ERR_503] = 503,
176 [HTTP_ERR_504] = 504,
177};
178
Christopher Fauleta7b677c2018-11-29 16:48:49 +0100179const char *http_err_msgs[HTTP_ERR_SIZE] = {
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200180 [HTTP_ERR_200] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200181 "HTTP/1.1 200 OK\r\n"
182 "Content-length: 58\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200183 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200184 "Content-Type: text/html\r\n"
185 "\r\n"
186 "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
187
188 [HTTP_ERR_400] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200189 "HTTP/1.1 400 Bad request\r\n"
190 "Content-length: 90\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200191 "Cache-Control: no-cache\r\n"
192 "Connection: close\r\n"
193 "Content-Type: text/html\r\n"
194 "\r\n"
195 "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
196
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200197 [HTTP_ERR_401] =
198 "HTTP/1.1 401 Unauthorized\r\n"
199 "Content-length: 112\r\n"
200 "Cache-Control: no-cache\r\n"
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200201 "Content-Type: text/html\r\n"
202 "\r\n"
203 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
204
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200205 [HTTP_ERR_403] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200206 "HTTP/1.1 403 Forbidden\r\n"
207 "Content-length: 93\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200208 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200209 "Content-Type: text/html\r\n"
210 "\r\n"
211 "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
212
Florian Tham9205fea2020-01-08 13:35:30 +0100213 [HTTP_ERR_404] =
214 "HTTP/1.1 404 Not Found\r\n"
215 "Content-length: 83\r\n"
216 "Cache-Control: no-cache\r\n"
Florian Tham9205fea2020-01-08 13:35:30 +0100217 "Content-Type: text/html\r\n"
218 "\r\n"
219 "<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
220
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200221 [HTTP_ERR_405] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200222 "HTTP/1.1 405 Method Not Allowed\r\n"
223 "Content-length: 146\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200224 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200225 "Content-Type: text/html\r\n"
226 "\r\n"
227 "<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
228
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200229 [HTTP_ERR_407] =
230 "HTTP/1.1 407 Unauthorized\r\n"
231 "Content-length: 112\r\n"
232 "Cache-Control: no-cache\r\n"
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200233 "Content-Type: text/html\r\n"
234 "\r\n"
235 "<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
236
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200237 [HTTP_ERR_408] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200238 "HTTP/1.1 408 Request Time-out\r\n"
239 "Content-length: 110\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200240 "Cache-Control: no-cache\r\n"
241 "Connection: close\r\n"
242 "Content-Type: text/html\r\n"
243 "\r\n"
244 "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
245
Florian Tham272e29b2020-01-08 10:19:05 +0100246 [HTTP_ERR_410] =
247 "HTTP/1.1 410 Gone\r\n"
248 "Content-length: 114\r\n"
249 "Cache-Control: no-cache\r\n"
Florian Tham272e29b2020-01-08 10:19:05 +0100250 "Content-Type: text/html\r\n"
251 "\r\n"
252 "<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
253
Anthonin Bonnefoy85048f82020-06-22 09:17:01 +0200254 [HTTP_ERR_413] =
255 "HTTP/1.1 413 Payload Too Large\r\n"
256 "Content-length: 106\r\n"
257 "Cache-Control: no-cache\r\n"
258 "Content-Type: text/html\r\n"
259 "\r\n"
260 "<html><body><h1>413 Payload Too Large</h1>\nThe request entity exceeds the maximum allowed.\n</body></html>\n",
261
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200262 [HTTP_ERR_421] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200263 "HTTP/1.1 421 Misdirected Request\r\n"
264 "Content-length: 104\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200265 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200266 "Content-Type: text/html\r\n"
267 "\r\n"
268 "<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
269
270 [HTTP_ERR_425] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200271 "HTTP/1.1 425 Too Early\r\n"
272 "Content-length: 80\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200273 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200274 "Content-Type: text/html\r\n"
275 "\r\n"
276 "<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
277
278 [HTTP_ERR_429] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200279 "HTTP/1.1 429 Too Many Requests\r\n"
280 "Content-length: 117\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200281 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200282 "Content-Type: text/html\r\n"
283 "\r\n"
284 "<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
285
286 [HTTP_ERR_500] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200287 "HTTP/1.1 500 Internal Server Error\r\n"
Christopher Faulet55633922020-10-09 08:39:26 +0200288 "Content-length: 97\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200289 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200290 "Content-Type: text/html\r\n"
291 "\r\n"
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500292 "<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occurred.\n</body></html>\n",
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200293
Christopher Faulete095f312020-12-07 11:22:24 +0100294 [HTTP_ERR_501] =
295 "HTTP/1.1 501 Not Implemented\r\n"
296 "Content-length: 136\r\n"
297 "Cache-Control: no-cache\r\n"
298 "Content-Type: text/html\r\n"
299 "\r\n"
300 "<html><body><h1>501 Not Implemented</h1>\n.The server does not support the functionality required to fulfill the request.\n</body></html>\n",
301
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200302 [HTTP_ERR_502] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200303 "HTTP/1.1 502 Bad Gateway\r\n"
304 "Content-length: 107\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200305 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200306 "Content-Type: text/html\r\n"
307 "\r\n"
308 "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
309
310 [HTTP_ERR_503] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200311 "HTTP/1.1 503 Service Unavailable\r\n"
312 "Content-length: 107\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200313 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200314 "Content-Type: text/html\r\n"
315 "\r\n"
316 "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
317
318 [HTTP_ERR_504] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200319 "HTTP/1.1 504 Gateway Time-out\r\n"
320 "Content-length: 92\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200321 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200322 "Content-Type: text/html\r\n"
323 "\r\n"
324 "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200325};
326
Willy Tarreau35b51c62018-09-10 15:38:55 +0200327const struct ist http_known_methods[HTTP_METH_OTHER] = {
328 [HTTP_METH_OPTIONS] = IST("OPTIONS"),
329 [HTTP_METH_GET] = IST("GET"),
330 [HTTP_METH_HEAD] = IST("HEAD"),
331 [HTTP_METH_POST] = IST("POST"),
332 [HTTP_METH_PUT] = IST("PUT"),
333 [HTTP_METH_DELETE] = IST("DELETE"),
334 [HTTP_METH_TRACE] = IST("TRACE"),
335 [HTTP_METH_CONNECT] = IST("CONNECT"),
336};
337
338/*
339 * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
340 * ones.
341 */
342enum http_meth_t find_http_meth(const char *str, const int len)
343{
344 const struct ist m = ist2(str, len);
345
346 if (isteq(m, ist("GET"))) return HTTP_METH_GET;
347 else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
348 else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
349 else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
350 else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
351 else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
352 else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
353 else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
354 else return HTTP_METH_OTHER;
355}
Willy Tarreau6b952c82018-09-10 17:45:34 +0200356
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200357/* This function returns HTTP_ERR_<num> (enum) matching http status code.
358 * Returned value should match codes from http_err_codes.
359 */
Willy Tarreau8de1df92019-04-15 21:27:18 +0200360int http_get_status_idx(unsigned int status)
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200361{
362 switch (status) {
363 case 200: return HTTP_ERR_200;
364 case 400: return HTTP_ERR_400;
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200365 case 401: return HTTP_ERR_401;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200366 case 403: return HTTP_ERR_403;
Florian Tham9205fea2020-01-08 13:35:30 +0100367 case 404: return HTTP_ERR_404;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200368 case 405: return HTTP_ERR_405;
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200369 case 407: return HTTP_ERR_407;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200370 case 408: return HTTP_ERR_408;
Florian Tham272e29b2020-01-08 10:19:05 +0100371 case 410: return HTTP_ERR_410;
Anthonin Bonnefoy85048f82020-06-22 09:17:01 +0200372 case 413: return HTTP_ERR_413;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200373 case 421: return HTTP_ERR_421;
374 case 425: return HTTP_ERR_425;
375 case 429: return HTTP_ERR_429;
376 case 500: return HTTP_ERR_500;
Christopher Faulete095f312020-12-07 11:22:24 +0100377 case 501: return HTTP_ERR_501;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200378 case 502: return HTTP_ERR_502;
379 case 503: return HTTP_ERR_503;
380 case 504: return HTTP_ERR_504;
381 default: return HTTP_ERR_500;
382 }
383}
384
385/* This function returns a reason associated with the HTTP status.
386 * This function never fails, a message is always returned.
387 */
388const char *http_get_reason(unsigned int status)
389{
390 switch (status) {
391 case 100: return "Continue";
392 case 101: return "Switching Protocols";
393 case 102: return "Processing";
394 case 200: return "OK";
395 case 201: return "Created";
396 case 202: return "Accepted";
397 case 203: return "Non-Authoritative Information";
398 case 204: return "No Content";
399 case 205: return "Reset Content";
400 case 206: return "Partial Content";
401 case 207: return "Multi-Status";
402 case 210: return "Content Different";
403 case 226: return "IM Used";
404 case 300: return "Multiple Choices";
405 case 301: return "Moved Permanently";
Christopher Faulet36367082023-07-17 08:47:12 +0200406 case 302: return "Found";
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200407 case 303: return "See Other";
408 case 304: return "Not Modified";
409 case 305: return "Use Proxy";
410 case 307: return "Temporary Redirect";
411 case 308: return "Permanent Redirect";
412 case 310: return "Too many Redirects";
413 case 400: return "Bad Request";
414 case 401: return "Unauthorized";
415 case 402: return "Payment Required";
416 case 403: return "Forbidden";
417 case 404: return "Not Found";
418 case 405: return "Method Not Allowed";
419 case 406: return "Not Acceptable";
420 case 407: return "Proxy Authentication Required";
421 case 408: return "Request Time-out";
422 case 409: return "Conflict";
423 case 410: return "Gone";
424 case 411: return "Length Required";
425 case 412: return "Precondition Failed";
426 case 413: return "Request Entity Too Large";
427 case 414: return "Request-URI Too Long";
428 case 415: return "Unsupported Media Type";
429 case 416: return "Requested range unsatisfiable";
430 case 417: return "Expectation failed";
431 case 418: return "I'm a teapot";
432 case 421: return "Misdirected Request";
433 case 422: return "Unprocessable entity";
434 case 423: return "Locked";
435 case 424: return "Method failure";
436 case 425: return "Too Early";
437 case 426: return "Upgrade Required";
438 case 428: return "Precondition Required";
439 case 429: return "Too Many Requests";
440 case 431: return "Request Header Fields Too Large";
441 case 449: return "Retry With";
442 case 450: return "Blocked by Windows Parental Controls";
443 case 451: return "Unavailable For Legal Reasons";
444 case 456: return "Unrecoverable Error";
445 case 499: return "client has closed connection";
446 case 500: return "Internal Server Error";
447 case 501: return "Not Implemented";
448 case 502: return "Bad Gateway or Proxy Error";
449 case 503: return "Service Unavailable";
450 case 504: return "Gateway Time-out";
451 case 505: return "HTTP Version not supported";
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500452 case 506: return "Variant also negotiate";
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200453 case 507: return "Insufficient storage";
454 case 508: return "Loop detected";
455 case 509: return "Bandwidth Limit Exceeded";
456 case 510: return "Not extended";
457 case 511: return "Network authentication required";
458 case 520: return "Web server is returning an unknown error";
459 default:
460 switch (status) {
461 case 100 ... 199: return "Informational";
462 case 200 ... 299: return "Success";
463 case 300 ... 399: return "Redirection";
464 case 400 ... 499: return "Client Error";
465 case 500 ... 599: return "Server Error";
466 default: return "Other";
467 }
468 }
469}
470
Christopher Faulet026ca8a2022-07-05 09:48:39 +0200471/* Returns the ist string corresponding to port part (without ':') in the host
472 * <host> or IST_NULL if not found.
473*/
474struct ist http_get_host_port(const struct ist host)
475{
476 char *start, *end, *ptr;
477
478 start = istptr(host);
479 end = istend(host);
480 for (ptr = end; ptr > start && isdigit((unsigned char)*--ptr););
481
482 /* no port found */
483 if (likely(*ptr != ':' || ptr+1 == end || ptr == start))
484 return IST_NULL;
485
486 return istnext(ist2(ptr, end - ptr));
487}
488
Christopher Faulet2eea0e92022-07-05 09:53:37 +0200489
490/* Return non-zero if the port <port> is a default port. If the scheme <schm> is
491 * set, it is used to detect default ports (HTTP => 80 and HTTPS => 443)
492 * port. Otherwise, both are considered as default ports.
493 */
494int http_is_default_port(const struct ist schm, const struct ist port)
495{
496 if (!isttest(schm))
497 return (isteq(port, ist("443")) || isteq(port, ist("80")));
498 else
499 return (isteq(port, ist("443")) && isteqi(schm, ist("https://"))) ||
500 (isteq(port, ist("80")) && isteqi(schm, ist("http://")));
501}
502
Willy Tarreaua1205492021-08-10 15:35:36 +0200503/* Returns non-zero if the scheme <schm> is syntactically correct according to
504 * RFC3986#3.1, otherwise zero. It expects only the scheme and nothing else
505 * (particularly not the following "://").
506 * Scheme = alpha *(alpha|digit|'+'|'-'|'.')
507 */
508int http_validate_scheme(const struct ist schm)
509{
510 size_t i;
511
512 for (i = 0; i < schm.len; i++) {
513 if (likely((schm.ptr[i] >= 'a' && schm.ptr[i] <= 'z') ||
514 (schm.ptr[i] >= 'A' && schm.ptr[i] <= 'Z')))
515 continue;
516 if (unlikely(!i)) // first char must be alpha
517 return 0;
518 if ((schm.ptr[i] >= '0' && schm.ptr[i] <= '9') ||
519 schm.ptr[i] == '+' || schm.ptr[i] == '-' || schm.ptr[i] == '.')
520 continue;
521 return 0;
522 }
523 return !!i;
524}
525
Amaury Denoyellee09651e2021-07-07 10:49:25 +0200526/* Parse the uri and looks for the scheme. If not found, an empty ist is
527 * returned. Otherwise, the ist pointing to the scheme is returned.
528 */
529struct ist http_get_scheme(const struct ist uri)
530{
531 const char *ptr, *start, *end;
532
533 if (!uri.len)
534 goto not_found;
535
536 ptr = uri.ptr;
537 start = ptr;
538 end = ptr + uri.len;
539
540 /* RFC7230, par. 2.7 :
541 * Request-URI = "*" | absuri | abspath | authority
542 */
543
544 if (*ptr == '*' || *ptr == '/')
545 goto not_found;
546
547 if (isalpha((unsigned char)*ptr)) {
548 /* this is a scheme as described by RFC3986, par. 3.1, or only
549 * an authority (in case of a CONNECT method).
550 */
551 ptr++;
552 /* retrieve the scheme up to the suffix '://'. If the suffix is
553 * not found, this means there is no scheme and it is an
554 * authority-only uri.
555 */
556 while (ptr < end &&
557 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
558 ptr++;
559 if (ptr == end || *ptr++ != ':')
560 goto not_found;
561 if (ptr == end || *ptr++ != '/')
562 goto not_found;
563 if (ptr == end || *ptr++ != '/')
564 goto not_found;
565 }
566 else {
567 goto not_found;
568 }
569
570 return ist2(start, ptr - start);
571
572 not_found:
573 return IST_NULL;
574}
575
Christopher Faulet16fdc552019-10-08 14:56:58 +0200576/* Parse the uri and looks for the authority, between the scheme and the
577 * path. if no_userinfo is not zero, the part before the '@' (including it) is
578 * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
579 * on the authority is returned.
580 */
581struct ist http_get_authority(const struct ist uri, int no_userinfo)
582{
583 const char *ptr, *start, *end;
584
585 if (!uri.len)
586 goto not_found;
587
588 ptr = uri.ptr;
589 start = ptr;
590 end = ptr + uri.len;
591
592 /* RFC7230, par. 2.7 :
593 * Request-URI = "*" | absuri | abspath | authority
594 */
595
596 if (*ptr == '*' || *ptr == '/')
597 goto not_found;
598
599 if (isalpha((unsigned char)*ptr)) {
600 /* this is a scheme as described by RFC3986, par. 3.1, or only
601 * an authority (in case of a CONNECT method).
602 */
603 ptr++;
604 while (ptr < end &&
605 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
606 ptr++;
607 /* skip '://' or take the whole as authority if not found */
608 if (ptr == end || *ptr++ != ':')
609 goto authority;
610 if (ptr == end || *ptr++ != '/')
611 goto authority;
612 if (ptr == end || *ptr++ != '/')
613 goto authority;
614 }
615
616 start = ptr;
617 while (ptr < end && *ptr != '/') {
618 if (*ptr++ == '@' && no_userinfo)
619 start = ptr;
620 }
621
622 /* OK, ptr point on the '/' or the end */
623 end = ptr;
624
625 authority:
626 return ist2(start, end - start);
627
628 not_found:
Tim Duesterhus241e29e2020-03-05 17:56:30 +0100629 return IST_NULL;
Christopher Faulet16fdc552019-10-08 14:56:58 +0200630}
631
Willy Tarreau6b952c82018-09-10 17:45:34 +0200632/* Parse the URI from the given transaction (which is assumed to be in request
633 * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
634 * returned. Otherwise the pointer and length are returned.
635 */
636struct ist http_get_path(const struct ist uri)
637{
638 const char *ptr, *end;
639
640 if (!uri.len)
641 goto not_found;
642
643 ptr = uri.ptr;
644 end = ptr + uri.len;
645
646 /* RFC7230, par. 2.7 :
647 * Request-URI = "*" | absuri | abspath | authority
648 */
649
650 if (*ptr == '*')
651 goto not_found;
652
653 if (isalpha((unsigned char)*ptr)) {
654 /* this is a scheme as described by RFC3986, par. 3.1 */
655 ptr++;
656 while (ptr < end &&
657 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
658 ptr++;
659 /* skip '://' */
660 if (ptr == end || *ptr++ != ':')
661 goto not_found;
662 if (ptr == end || *ptr++ != '/')
663 goto not_found;
664 if (ptr == end || *ptr++ != '/')
665 goto not_found;
666 }
667 /* skip [user[:passwd]@]host[:[port]] */
668
669 while (ptr < end && *ptr != '/')
670 ptr++;
671
672 if (ptr == end)
673 goto not_found;
674
675 /* OK, we got the '/' ! */
676 return ist2(ptr, end - ptr);
677
678 not_found:
Tim Duesterhus241e29e2020-03-05 17:56:30 +0100679 return IST_NULL;
Willy Tarreau6b952c82018-09-10 17:45:34 +0200680}
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200681
Willy Tarreauab813a42018-09-10 18:41:28 +0200682/*
683 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
684 * If so, returns the position of the first non-space character relative to
685 * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
686 * to return a pointer to the place after the first space. Returns 0 if the
687 * header name does not match. Checks are case-insensitive.
688 */
689int http_header_match2(const char *hdr, const char *end,
690 const char *name, int len)
691{
692 const char *val;
693
694 if (hdr + len >= end)
695 return 0;
696 if (hdr[len] != ':')
697 return 0;
698 if (strncasecmp(hdr, name, len) != 0)
699 return 0;
700 val = hdr + len + 1;
701 while (val < end && HTTP_IS_SPHT(*val))
702 val++;
703 if ((val >= end) && (len + 2 <= end - hdr))
704 return len + 2; /* we may replace starting from second space */
705 return val - hdr;
706}
707
708/* Find the end of the header value contained between <s> and <e>. See RFC7230,
709 * par 3.2 for more information. Note that it requires a valid header to return
710 * a valid result. This works for headers defined as comma-separated lists.
711 */
712char *http_find_hdr_value_end(char *s, const char *e)
713{
714 int quoted, qdpair;
715
716 quoted = qdpair = 0;
717
Willy Tarreau02ac9502020-02-21 16:31:22 +0100718#ifdef HA_UNALIGNED_LE
Willy Tarreauab813a42018-09-10 18:41:28 +0200719 /* speedup: skip everything not a comma nor a double quote */
720 for (; s <= e - sizeof(int); s += sizeof(int)) {
721 unsigned int c = *(int *)s; // comma
722 unsigned int q = c; // quote
723
724 c ^= 0x2c2c2c2c; // contains one zero on a comma
725 q ^= 0x22222222; // contains one zero on a quote
726
727 c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
728 q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
729
730 if ((c | q) & 0x80808080)
731 break; // found a comma or a quote
732 }
733#endif
734 for (; s < e; s++) {
735 if (qdpair) qdpair = 0;
736 else if (quoted) {
737 if (*s == '\\') qdpair = 1;
738 else if (*s == '"') quoted = 0;
739 }
740 else if (*s == '"') quoted = 1;
741 else if (*s == ',') return s;
742 }
743 return s;
744}
745
746/* Find the end of a cookie value contained between <s> and <e>. It works the
747 * same way as with headers above except that the semi-colon also ends a token.
748 * See RFC2965 for more information. Note that it requires a valid header to
749 * return a valid result.
750 */
751char *http_find_cookie_value_end(char *s, const char *e)
752{
753 int quoted, qdpair;
754
755 quoted = qdpair = 0;
756 for (; s < e; s++) {
757 if (qdpair) qdpair = 0;
758 else if (quoted) {
759 if (*s == '\\') qdpair = 1;
760 else if (*s == '"') quoted = 0;
761 }
762 else if (*s == '"') quoted = 1;
763 else if (*s == ',' || *s == ';') return s;
764 }
765 return s;
766}
767
768/* Try to find the next occurrence of a cookie name in a cookie header value.
Maciej Zdebdea7c202020-11-13 09:38:06 +0000769 * To match on any cookie name, <cookie_name_l> must be set to 0.
Willy Tarreauab813a42018-09-10 18:41:28 +0200770 * The lookup begins at <hdr>. The pointer and size of the next occurrence of
771 * the cookie value is returned into *value and *value_l, and the function
772 * returns a pointer to the next pointer to search from if the value was found.
773 * Otherwise if the cookie was not found, NULL is returned and neither value
774 * nor value_l are touched. The input <hdr> string should first point to the
775 * header's value, and the <hdr_end> pointer must point to the first character
776 * not part of the value. <list> must be non-zero if value may represent a list
777 * of values (cookie headers). This makes it faster to abort parsing when no
778 * list is expected.
779 */
780char *http_extract_cookie_value(char *hdr, const char *hdr_end,
781 char *cookie_name, size_t cookie_name_l,
782 int list, char **value, size_t *value_l)
783{
784 char *equal, *att_end, *att_beg, *val_beg, *val_end;
785 char *next;
786
787 /* we search at least a cookie name followed by an equal, and more
788 * generally something like this :
789 * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
790 */
791 for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
792 /* Iterate through all cookies on this line */
793
794 while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
795 att_beg++;
796
797 /* find att_end : this is the first character after the last non
798 * space before the equal. It may be equal to hdr_end.
799 */
800 equal = att_end = att_beg;
801
802 while (equal < hdr_end) {
803 if (*equal == '=' || *equal == ';' || (list && *equal == ','))
804 break;
805 if (HTTP_IS_SPHT(*equal++))
806 continue;
807 att_end = equal;
808 }
809
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500810 /* here, <equal> points to '=', a delimiter or the end. <att_end>
Willy Tarreauab813a42018-09-10 18:41:28 +0200811 * is between <att_beg> and <equal>, both may be identical.
812 */
813
814 /* look for end of cookie if there is an equal sign */
815 if (equal < hdr_end && *equal == '=') {
816 /* look for the beginning of the value */
817 val_beg = equal + 1;
818 while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
819 val_beg++;
820
821 /* find the end of the value, respecting quotes */
822 next = http_find_cookie_value_end(val_beg, hdr_end);
823
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500824 /* make val_end point to the first white space or delimiter after the value */
Willy Tarreauab813a42018-09-10 18:41:28 +0200825 val_end = next;
826 while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
827 val_end--;
828 } else {
829 val_beg = val_end = next = equal;
830 }
831
832 /* We have nothing to do with attributes beginning with '$'. However,
833 * they will automatically be removed if a header before them is removed,
834 * since they're supposed to be linked together.
835 */
836 if (*att_beg == '$')
837 continue;
838
839 /* Ignore cookies with no equal sign */
840 if (equal == next)
841 continue;
842
843 /* Now we have the cookie name between att_beg and att_end, and
844 * its value between val_beg and val_end.
845 */
846
Maciej Zdebdea7c202020-11-13 09:38:06 +0000847 if (cookie_name_l == 0 || (att_end - att_beg == cookie_name_l &&
848 memcmp(att_beg, cookie_name, cookie_name_l) == 0)) {
Willy Tarreauab813a42018-09-10 18:41:28 +0200849 /* let's return this value and indicate where to go on from */
850 *value = val_beg;
851 *value_l = val_end - val_beg;
852 return next + 1;
853 }
854
855 /* Set-Cookie headers only have the name in the first attr=value part */
856 if (!list)
857 break;
858 }
859
860 return NULL;
861}
862
Joseph Herlant942eea32018-11-15 13:57:22 -0800863/* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
Willy Tarreauab813a42018-09-10 18:41:28 +0200864 * value is larger than 1000, it is bound to 1000. The parser consumes up to
865 * 1 digit, one dot and 3 digits and stops on the first invalid character.
866 * Unparsable qvalues return 1000 as "q=1.000".
867 */
868int http_parse_qvalue(const char *qvalue, const char **end)
869{
870 int q = 1000;
871
872 if (!isdigit((unsigned char)*qvalue))
873 goto out;
874 q = (*qvalue++ - '0') * 1000;
875
876 if (*qvalue++ != '.')
877 goto out;
878
879 if (!isdigit((unsigned char)*qvalue))
880 goto out;
881 q += (*qvalue++ - '0') * 100;
882
883 if (!isdigit((unsigned char)*qvalue))
884 goto out;
885 q += (*qvalue++ - '0') * 10;
886
887 if (!isdigit((unsigned char)*qvalue))
888 goto out;
889 q += (*qvalue++ - '0') * 1;
890 out:
891 if (q > 1000)
892 q = 1000;
893 if (end)
894 *end = qvalue;
895 return q;
896}
897
898/*
Joseph Herlant942eea32018-11-15 13:57:22 -0800899 * Given a url parameter, find the starting position of the first occurrence,
Willy Tarreauab813a42018-09-10 18:41:28 +0200900 * or NULL if the parameter is not found.
901 *
902 * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
903 * the function will return query_string+8.
904 *
905 * Warning: this function returns a pointer that can point to the first chunk
906 * or the second chunk. The caller must be check the position before using the
907 * result.
908 */
909const char *http_find_url_param_pos(const char **chunks,
910 const char* url_param_name, size_t url_param_name_l,
911 char delim)
912{
913 const char *pos, *last, *equal;
914 const char **bufs = chunks;
915 int l1, l2;
916
917
918 pos = bufs[0];
919 last = bufs[1];
920 while (pos < last) {
921 /* Check the equal. */
922 equal = pos + url_param_name_l;
923 if (fix_pointer_if_wrap(chunks, &equal)) {
924 if (equal >= chunks[3])
925 return NULL;
926 } else {
927 if (equal >= chunks[1])
928 return NULL;
929 }
930 if (*equal == '=') {
931 if (pos + url_param_name_l > last) {
932 /* process wrap case, we detect a wrap. In this case, the
933 * comparison is performed in two parts.
934 */
935
Thayne McCombs8f0cc5c2021-01-07 21:35:52 -0700936 /* This is the end, we don't have any other chunk. */
Willy Tarreauab813a42018-09-10 18:41:28 +0200937 if (bufs != chunks || !bufs[2])
938 return NULL;
939
940 /* Compute the length of each part of the comparison. */
941 l1 = last - pos;
942 l2 = url_param_name_l - l1;
943
944 /* The second buffer is too short to contain the compared string. */
945 if (bufs[2] + l2 > bufs[3])
946 return NULL;
947
948 if (memcmp(pos, url_param_name, l1) == 0 &&
949 memcmp(bufs[2], url_param_name+l1, l2) == 0)
950 return pos;
951
952 /* Perform wrapping and jump the string who fail the comparison. */
953 bufs += 2;
954 pos = bufs[0] + l2;
955 last = bufs[1];
956
957 } else {
958 /* process a simple comparison. */
959 if (memcmp(pos, url_param_name, url_param_name_l) == 0)
960 return pos;
961 pos += url_param_name_l + 1;
962 if (fix_pointer_if_wrap(chunks, &pos))
963 last = bufs[2];
964 }
965 }
966
967 while (1) {
968 /* Look for the next delimiter. */
969 while (pos < last && !http_is_param_delimiter(*pos, delim))
970 pos++;
971 if (pos < last)
972 break;
973 /* process buffer wrapping. */
974 if (bufs != chunks || !bufs[2])
975 return NULL;
976 bufs += 2;
977 pos = bufs[0];
978 last = bufs[1];
979 }
980 pos++;
981 }
982 return NULL;
983}
984
985/*
986 * Given a url parameter name and a query string, find the next value.
987 * An empty url_param_name matches the first available parameter.
988 * If the parameter is found, 1 is returned and *vstart / *vend are updated to
989 * respectively provide a pointer to the value and its end.
990 * Otherwise, 0 is returned and vstart/vend are not modified.
991 */
992int http_find_next_url_param(const char **chunks,
993 const char* url_param_name, size_t url_param_name_l,
994 const char **vstart, const char **vend, char delim)
995{
996 const char *arg_start, *qs_end;
997 const char *value_start, *value_end;
998
999 arg_start = chunks[0];
1000 qs_end = chunks[1];
1001 if (url_param_name_l) {
1002 /* Looks for an argument name. */
1003 arg_start = http_find_url_param_pos(chunks,
1004 url_param_name, url_param_name_l,
1005 delim);
1006 /* Check for wrapping. */
1007 if (arg_start >= qs_end)
1008 qs_end = chunks[3];
1009 }
1010 if (!arg_start)
1011 return 0;
1012
1013 if (!url_param_name_l) {
1014 while (1) {
1015 /* looks for the first argument. */
1016 value_start = memchr(arg_start, '=', qs_end - arg_start);
1017 if (!value_start) {
1018 /* Check for wrapping. */
1019 if (arg_start >= chunks[0] &&
1020 arg_start < chunks[1] &&
1021 chunks[2]) {
1022 arg_start = chunks[2];
1023 qs_end = chunks[3];
1024 continue;
1025 }
1026 return 0;
1027 }
1028 break;
1029 }
1030 value_start++;
1031 }
1032 else {
1033 /* Jump the argument length. */
1034 value_start = arg_start + url_param_name_l + 1;
1035
1036 /* Check for pointer wrapping. */
1037 if (fix_pointer_if_wrap(chunks, &value_start)) {
1038 /* Update the end pointer. */
1039 qs_end = chunks[3];
1040
1041 /* Check for overflow. */
1042 if (value_start >= qs_end)
1043 return 0;
1044 }
1045 }
1046
1047 value_end = value_start;
1048
1049 while (1) {
1050 while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
1051 value_end++;
1052 if (value_end < qs_end)
1053 break;
1054 /* process buffer wrapping. */
1055 if (value_end >= chunks[0] &&
1056 value_end < chunks[1] &&
1057 chunks[2]) {
1058 value_end = chunks[2];
1059 qs_end = chunks[3];
1060 continue;
1061 }
1062 break;
1063 }
1064
1065 *vstart = value_start;
1066 *vend = value_end;
1067 return 1;
1068}
1069
Christopher Faulet8277ca72018-10-22 15:12:04 +02001070/* Parses a single header line (without the CRLF) and splits it into its name
1071 * and its value. The parsing is pretty naive and just skip spaces.
1072 */
1073int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value)
1074{
1075 char *p = hdr.ptr;
1076 char *end = p + hdr.len;
1077
1078 name->len = value->len = 0;
1079
1080 /* Skip leading spaces */
1081 for (; p < end && HTTP_IS_SPHT(*p); p++);
1082
1083 /* Set the header name */
1084 name->ptr = p;
1085 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1086 name->len = p - name->ptr;
1087
1088 /* Skip the ':' and spaces before and after it */
1089 for (; p < end && HTTP_IS_SPHT(*p); p++);
1090 if (p < end && *p == ':') p++;
1091 for (; p < end && HTTP_IS_SPHT(*p); p++);
1092
1093 /* Set the header value */
1094 value->ptr = p;
1095 value->len = end - p;
1096
1097 return 1;
1098}
1099
1100/* Parses a single start line (without the CRLF) and splits it into 3 parts. The
1101 * parsing is pretty naive and just skip spaces.
1102 */
1103int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3)
1104{
1105 char *p = line.ptr;
1106 char *end = p + line.len;
1107
1108 p1->len = p2->len = p3->len = 0;
1109
1110 /* Skip leading spaces */
1111 for (; p < end && HTTP_IS_SPHT(*p); p++);
1112
1113 /* Set the first part */
1114 p1->ptr = p;
1115 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1116 p1->len = p - p1->ptr;
1117
1118 /* Skip spaces between p1 and p2 */
1119 for (; p < end && HTTP_IS_SPHT(*p); p++);
1120
1121 /* Set the second part */
1122 p2->ptr = p;
1123 for (; p < end && !HTTP_IS_SPHT(*p); p++);
1124 p2->len = p - p2->ptr;
1125
1126 /* Skip spaces between p2 and p3 */
1127 for (; p < end && HTTP_IS_SPHT(*p); p++);
1128
Ilya Shipitsin46a030c2020-07-05 16:36:08 +05001129 /* The remaining is the third value */
Christopher Faulet8277ca72018-10-22 15:12:04 +02001130 p3->ptr = p;
1131 p3->len = end - p;
1132
1133 return 1;
1134}
Christopher Faulet341fac12019-09-16 11:37:05 +02001135
1136/* Parses value of a Status header with the following format: "Status: Code[
1137 * Reason]". The parsing is pretty naive and just skip spaces. It return the
1138 * numeric value of the status code.
1139 */
1140int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason)
1141{
1142 char *p = value.ptr;
1143 char *end = p + value.len;
1144 uint16_t code;
1145
1146 status->len = reason->len = 0;
1147
1148 /* Skip leading spaces */
1149 for (; p < end && HTTP_IS_SPHT(*p); p++);
1150
1151 /* Set the status part */
1152 status->ptr = p;
1153 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1154 status->len = p - status->ptr;
1155
1156 /* Skip spaces between status and reason */
1157 for (; p < end && HTTP_IS_SPHT(*p); p++);
1158
1159 /* the remaining is the reason */
1160 reason->ptr = p;
1161 reason->len = end - p;
1162
1163 code = strl2ui(status->ptr, status->len);
1164 return code;
1165}
Remi Tricot-Le Bretonbcced092020-10-22 10:40:03 +02001166
1167
1168/* Returns non-zero if the two ETags are comparable (see RFC 7232#2.3.2).
1169 * If any of them is a weak ETag, we discard the weakness prefix and perform
1170 * a strict string comparison.
1171 * Returns 0 otherwise.
1172 */
1173int http_compare_etags(struct ist etag1, struct ist etag2)
1174{
1175 enum http_etag_type etag_type1;
1176 enum http_etag_type etag_type2;
1177
1178 etag_type1 = http_get_etag_type(etag1);
1179 etag_type2 = http_get_etag_type(etag2);
1180
1181 if (etag_type1 == ETAG_INVALID || etag_type2 == ETAG_INVALID)
1182 return 0;
1183
1184 /* Discard the 'W/' prefix an ETag is a weak one. */
1185 if (etag_type1 == ETAG_WEAK)
1186 etag1 = istadv(etag1, 2);
1187 if (etag_type2 == ETAG_WEAK)
1188 etag2 = istadv(etag2, 2);
1189
1190 return isteq(etag1, etag2);
1191}
Remi Tricot-Le Breton56e46cb2020-12-23 18:13:48 +01001192
1193
1194/*
1195 * Trim leading space or horizontal tab characters from <value> string.
1196 * Returns the trimmed string.
1197 */
1198struct ist http_trim_leading_spht(struct ist value)
1199{
1200 struct ist ret = value;
1201
1202 while (ret.len && HTTP_IS_SPHT(ret.ptr[0])) {
1203 ++ret.ptr;
1204 --ret.len;
1205 }
1206
1207 return ret;
1208}
1209
1210/*
1211 * Trim trailing space or horizontal tab characters from <value> string.
1212 * Returns the trimmed string.
1213 */
1214struct ist http_trim_trailing_spht(struct ist value)
1215{
1216 struct ist ret = value;
1217
1218 while (ret.len && HTTP_IS_SPHT(ret.ptr[-1]))
1219 --ret.len;
1220
1221 return ret;
1222}