blob: 2436292b209f8a3d5a1f35355d1439d7c793d437 [file] [log] [blame]
Willy Tarreau35b51c62018-09-10 15:38:55 +02001/*
2 * HTTP semantics
3 *
4 * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020014#include <haproxy/api.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020015#include <haproxy/http.h>
Willy Tarreau48fbcae2020-06-03 18:09:46 +020016#include <haproxy/tools.h>
Willy Tarreau35b51c62018-09-10 15:38:55 +020017
18/* It is about twice as fast on recent architectures to lookup a byte in a
19 * table than to perform a boolean AND or OR between two tests. Refer to
20 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
21 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
22 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
23 * digit. Note: please do not overwrite values in assignment since gcc-2.95
24 * will not handle them correctly. It's worth noting that chars 128..255 are
25 * nothing, not even control chars.
26 */
27const unsigned char http_char_classes[256] = {
28 [ 0] = HTTP_FLG_CTL,
29 [ 1] = HTTP_FLG_CTL,
30 [ 2] = HTTP_FLG_CTL,
31 [ 3] = HTTP_FLG_CTL,
32 [ 4] = HTTP_FLG_CTL,
33 [ 5] = HTTP_FLG_CTL,
34 [ 6] = HTTP_FLG_CTL,
35 [ 7] = HTTP_FLG_CTL,
36 [ 8] = HTTP_FLG_CTL,
37 [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
38 [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
39 [ 11] = HTTP_FLG_CTL,
40 [ 12] = HTTP_FLG_CTL,
41 [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
42 [ 14] = HTTP_FLG_CTL,
43 [ 15] = HTTP_FLG_CTL,
44 [ 16] = HTTP_FLG_CTL,
45 [ 17] = HTTP_FLG_CTL,
46 [ 18] = HTTP_FLG_CTL,
47 [ 19] = HTTP_FLG_CTL,
48 [ 20] = HTTP_FLG_CTL,
49 [ 21] = HTTP_FLG_CTL,
50 [ 22] = HTTP_FLG_CTL,
51 [ 23] = HTTP_FLG_CTL,
52 [ 24] = HTTP_FLG_CTL,
53 [ 25] = HTTP_FLG_CTL,
54 [ 26] = HTTP_FLG_CTL,
55 [ 27] = HTTP_FLG_CTL,
56 [ 28] = HTTP_FLG_CTL,
57 [ 29] = HTTP_FLG_CTL,
58 [ 30] = HTTP_FLG_CTL,
59 [ 31] = HTTP_FLG_CTL,
60 [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
61 ['!'] = HTTP_FLG_TOK,
62 ['"'] = HTTP_FLG_SEP,
63 ['#'] = HTTP_FLG_TOK,
64 ['$'] = HTTP_FLG_TOK,
65 ['%'] = HTTP_FLG_TOK,
66 ['&'] = HTTP_FLG_TOK,
67 [ 39] = HTTP_FLG_TOK,
68 ['('] = HTTP_FLG_SEP,
69 [')'] = HTTP_FLG_SEP,
70 ['*'] = HTTP_FLG_TOK,
71 ['+'] = HTTP_FLG_TOK,
72 [','] = HTTP_FLG_SEP,
73 ['-'] = HTTP_FLG_TOK,
74 ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
75 ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
76 ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
77 ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
78 ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
79 ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
80 ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
81 ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
82 ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
83 ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
84 ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
85 ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
86 [':'] = HTTP_FLG_SEP,
87 [';'] = HTTP_FLG_SEP,
88 ['<'] = HTTP_FLG_SEP,
89 ['='] = HTTP_FLG_SEP,
90 ['>'] = HTTP_FLG_SEP,
91 ['?'] = HTTP_FLG_SEP,
92 ['@'] = HTTP_FLG_SEP,
Willy Tarreau1ba30162022-05-24 15:34:26 +020093 ['A'] = HTTP_FLG_TOK | HTTP_FLG_VER,
94 ['B'] = HTTP_FLG_TOK | HTTP_FLG_VER,
95 ['C'] = HTTP_FLG_TOK | HTTP_FLG_VER,
96 ['D'] = HTTP_FLG_TOK | HTTP_FLG_VER,
97 ['E'] = HTTP_FLG_TOK | HTTP_FLG_VER,
98 ['F'] = HTTP_FLG_TOK | HTTP_FLG_VER,
99 ['G'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau35b51c62018-09-10 15:38:55 +0200100 ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau1ba30162022-05-24 15:34:26 +0200101 ['I'] = HTTP_FLG_TOK | HTTP_FLG_VER,
102 ['J'] = HTTP_FLG_TOK | HTTP_FLG_VER,
103 ['K'] = HTTP_FLG_TOK | HTTP_FLG_VER,
104 ['L'] = HTTP_FLG_TOK | HTTP_FLG_VER,
105 ['M'] = HTTP_FLG_TOK | HTTP_FLG_VER,
106 ['N'] = HTTP_FLG_TOK | HTTP_FLG_VER,
107 ['O'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau35b51c62018-09-10 15:38:55 +0200108 ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau1ba30162022-05-24 15:34:26 +0200109 ['Q'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau35b51c62018-09-10 15:38:55 +0200110 ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
111 ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
112 ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau1ba30162022-05-24 15:34:26 +0200113 ['U'] = HTTP_FLG_TOK | HTTP_FLG_VER,
114 ['V'] = HTTP_FLG_TOK | HTTP_FLG_VER,
115 ['W'] = HTTP_FLG_TOK | HTTP_FLG_VER,
116 ['X'] = HTTP_FLG_TOK | HTTP_FLG_VER,
117 ['Y'] = HTTP_FLG_TOK | HTTP_FLG_VER,
118 ['Z'] = HTTP_FLG_TOK | HTTP_FLG_VER,
Willy Tarreau35b51c62018-09-10 15:38:55 +0200119 ['['] = HTTP_FLG_SEP,
120 [ 92] = HTTP_FLG_SEP,
121 [']'] = HTTP_FLG_SEP,
122 ['^'] = HTTP_FLG_TOK,
123 ['_'] = HTTP_FLG_TOK,
124 ['`'] = HTTP_FLG_TOK,
125 ['a'] = HTTP_FLG_TOK,
126 ['b'] = HTTP_FLG_TOK,
127 ['c'] = HTTP_FLG_TOK,
128 ['d'] = HTTP_FLG_TOK,
129 ['e'] = HTTP_FLG_TOK,
130 ['f'] = HTTP_FLG_TOK,
131 ['g'] = HTTP_FLG_TOK,
132 ['h'] = HTTP_FLG_TOK,
133 ['i'] = HTTP_FLG_TOK,
134 ['j'] = HTTP_FLG_TOK,
135 ['k'] = HTTP_FLG_TOK,
136 ['l'] = HTTP_FLG_TOK,
137 ['m'] = HTTP_FLG_TOK,
138 ['n'] = HTTP_FLG_TOK,
139 ['o'] = HTTP_FLG_TOK,
140 ['p'] = HTTP_FLG_TOK,
141 ['q'] = HTTP_FLG_TOK,
142 ['r'] = HTTP_FLG_TOK,
143 ['s'] = HTTP_FLG_TOK,
144 ['t'] = HTTP_FLG_TOK,
145 ['u'] = HTTP_FLG_TOK,
146 ['v'] = HTTP_FLG_TOK,
147 ['w'] = HTTP_FLG_TOK,
148 ['x'] = HTTP_FLG_TOK,
149 ['y'] = HTTP_FLG_TOK,
150 ['z'] = HTTP_FLG_TOK,
151 ['{'] = HTTP_FLG_SEP,
152 ['|'] = HTTP_FLG_TOK,
153 ['}'] = HTTP_FLG_SEP,
154 ['~'] = HTTP_FLG_TOK,
155 [127] = HTTP_FLG_CTL,
156};
157
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200158const int http_err_codes[HTTP_ERR_SIZE] = {
159 [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
160 [HTTP_ERR_400] = 400,
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200161 [HTTP_ERR_401] = 401,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200162 [HTTP_ERR_403] = 403,
Florian Tham9205fea2020-01-08 13:35:30 +0100163 [HTTP_ERR_404] = 404,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200164 [HTTP_ERR_405] = 405,
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200165 [HTTP_ERR_407] = 407,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200166 [HTTP_ERR_408] = 408,
Florian Tham272e29b2020-01-08 10:19:05 +0100167 [HTTP_ERR_410] = 410,
Anthonin Bonnefoy85048f82020-06-22 09:17:01 +0200168 [HTTP_ERR_413] = 413,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200169 [HTTP_ERR_421] = 421,
Christopher Faulet92cafb32021-09-28 08:48:51 +0200170 [HTTP_ERR_422] = 422,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200171 [HTTP_ERR_425] = 425,
172 [HTTP_ERR_429] = 429,
173 [HTTP_ERR_500] = 500,
Christopher Faulete095f312020-12-07 11:22:24 +0100174 [HTTP_ERR_501] = 501,
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200175 [HTTP_ERR_502] = 502,
176 [HTTP_ERR_503] = 503,
177 [HTTP_ERR_504] = 504,
178};
179
Christopher Fauleta7b677c2018-11-29 16:48:49 +0100180const char *http_err_msgs[HTTP_ERR_SIZE] = {
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200181 [HTTP_ERR_200] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200182 "HTTP/1.1 200 OK\r\n"
183 "Content-length: 58\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200184 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200185 "Content-Type: text/html\r\n"
186 "\r\n"
187 "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
188
189 [HTTP_ERR_400] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200190 "HTTP/1.1 400 Bad request\r\n"
191 "Content-length: 90\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200192 "Cache-Control: no-cache\r\n"
193 "Connection: close\r\n"
194 "Content-Type: text/html\r\n"
195 "\r\n"
196 "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
197
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200198 [HTTP_ERR_401] =
199 "HTTP/1.1 401 Unauthorized\r\n"
200 "Content-length: 112\r\n"
201 "Cache-Control: no-cache\r\n"
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200202 "Content-Type: text/html\r\n"
203 "\r\n"
204 "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
205
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200206 [HTTP_ERR_403] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200207 "HTTP/1.1 403 Forbidden\r\n"
208 "Content-length: 93\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200209 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200210 "Content-Type: text/html\r\n"
211 "\r\n"
212 "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
213
Florian Tham9205fea2020-01-08 13:35:30 +0100214 [HTTP_ERR_404] =
215 "HTTP/1.1 404 Not Found\r\n"
216 "Content-length: 83\r\n"
217 "Cache-Control: no-cache\r\n"
Florian Tham9205fea2020-01-08 13:35:30 +0100218 "Content-Type: text/html\r\n"
219 "\r\n"
220 "<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
221
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200222 [HTTP_ERR_405] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200223 "HTTP/1.1 405 Method Not Allowed\r\n"
224 "Content-length: 146\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200225 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200226 "Content-Type: text/html\r\n"
227 "\r\n"
228 "<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
229
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200230 [HTTP_ERR_407] =
231 "HTTP/1.1 407 Unauthorized\r\n"
232 "Content-length: 112\r\n"
233 "Cache-Control: no-cache\r\n"
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200234 "Content-Type: text/html\r\n"
235 "\r\n"
236 "<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
237
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200238 [HTTP_ERR_408] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200239 "HTTP/1.1 408 Request Time-out\r\n"
240 "Content-length: 110\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200241 "Cache-Control: no-cache\r\n"
242 "Connection: close\r\n"
243 "Content-Type: text/html\r\n"
244 "\r\n"
245 "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
246
Florian Tham272e29b2020-01-08 10:19:05 +0100247 [HTTP_ERR_410] =
248 "HTTP/1.1 410 Gone\r\n"
249 "Content-length: 114\r\n"
250 "Cache-Control: no-cache\r\n"
Florian Tham272e29b2020-01-08 10:19:05 +0100251 "Content-Type: text/html\r\n"
252 "\r\n"
253 "<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
254
Anthonin Bonnefoy85048f82020-06-22 09:17:01 +0200255 [HTTP_ERR_413] =
256 "HTTP/1.1 413 Payload Too Large\r\n"
257 "Content-length: 106\r\n"
258 "Cache-Control: no-cache\r\n"
259 "Content-Type: text/html\r\n"
260 "\r\n"
261 "<html><body><h1>413 Payload Too Large</h1>\nThe request entity exceeds the maximum allowed.\n</body></html>\n",
262
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200263 [HTTP_ERR_421] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200264 "HTTP/1.1 421 Misdirected Request\r\n"
265 "Content-length: 104\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200266 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200267 "Content-Type: text/html\r\n"
268 "\r\n"
269 "<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
270
Christopher Faulet92cafb32021-09-28 08:48:51 +0200271 [HTTP_ERR_422] =
272 "HTTP/1.1 422 Unprocessable Content\r\n"
273 "Content-length: 116\r\n"
274 "Cache-Control: no-cache\r\n"
275 "Content-Type: text/html\r\n"
276 "\r\n"
277 "<html><body><h1>422 Unprocessable Content</h1>\nThe server cannot process the contained instructions.\n</body></html>\n",
278
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200279 [HTTP_ERR_425] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200280 "HTTP/1.1 425 Too Early\r\n"
281 "Content-length: 80\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200282 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200283 "Content-Type: text/html\r\n"
284 "\r\n"
285 "<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
286
287 [HTTP_ERR_429] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200288 "HTTP/1.1 429 Too Many Requests\r\n"
289 "Content-length: 117\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200290 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200291 "Content-Type: text/html\r\n"
292 "\r\n"
293 "<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
294
295 [HTTP_ERR_500] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200296 "HTTP/1.1 500 Internal Server Error\r\n"
Christopher Faulet55633922020-10-09 08:39:26 +0200297 "Content-length: 97\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200298 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200299 "Content-Type: text/html\r\n"
300 "\r\n"
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500301 "<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occurred.\n</body></html>\n",
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200302
Christopher Faulete095f312020-12-07 11:22:24 +0100303 [HTTP_ERR_501] =
304 "HTTP/1.1 501 Not Implemented\r\n"
305 "Content-length: 136\r\n"
306 "Cache-Control: no-cache\r\n"
307 "Content-Type: text/html\r\n"
308 "\r\n"
309 "<html><body><h1>501 Not Implemented</h1>\n.The server does not support the functionality required to fulfill the request.\n</body></html>\n",
310
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200311 [HTTP_ERR_502] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200312 "HTTP/1.1 502 Bad Gateway\r\n"
313 "Content-length: 107\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200314 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200315 "Content-Type: text/html\r\n"
316 "\r\n"
317 "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
318
319 [HTTP_ERR_503] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200320 "HTTP/1.1 503 Service Unavailable\r\n"
321 "Content-length: 107\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200322 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200323 "Content-Type: text/html\r\n"
324 "\r\n"
325 "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
326
327 [HTTP_ERR_504] =
Willy Tarreaub5ba2b02019-06-11 16:08:25 +0200328 "HTTP/1.1 504 Gateway Time-out\r\n"
329 "Content-length: 92\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200330 "Cache-Control: no-cache\r\n"
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200331 "Content-Type: text/html\r\n"
332 "\r\n"
333 "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200334};
335
Willy Tarreau35b51c62018-09-10 15:38:55 +0200336const struct ist http_known_methods[HTTP_METH_OTHER] = {
337 [HTTP_METH_OPTIONS] = IST("OPTIONS"),
338 [HTTP_METH_GET] = IST("GET"),
339 [HTTP_METH_HEAD] = IST("HEAD"),
340 [HTTP_METH_POST] = IST("POST"),
341 [HTTP_METH_PUT] = IST("PUT"),
342 [HTTP_METH_DELETE] = IST("DELETE"),
343 [HTTP_METH_TRACE] = IST("TRACE"),
344 [HTTP_METH_CONNECT] = IST("CONNECT"),
345};
346
347/*
348 * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
349 * ones.
350 */
351enum http_meth_t find_http_meth(const char *str, const int len)
352{
353 const struct ist m = ist2(str, len);
354
355 if (isteq(m, ist("GET"))) return HTTP_METH_GET;
356 else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
357 else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
358 else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
359 else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
360 else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
361 else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
362 else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
363 else return HTTP_METH_OTHER;
364}
Willy Tarreau6b952c82018-09-10 17:45:34 +0200365
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200366/* This function returns HTTP_ERR_<num> (enum) matching http status code.
367 * Returned value should match codes from http_err_codes.
368 */
Willy Tarreau8de1df92019-04-15 21:27:18 +0200369int http_get_status_idx(unsigned int status)
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200370{
371 switch (status) {
372 case 200: return HTTP_ERR_200;
373 case 400: return HTTP_ERR_400;
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200374 case 401: return HTTP_ERR_401;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200375 case 403: return HTTP_ERR_403;
Florian Tham9205fea2020-01-08 13:35:30 +0100376 case 404: return HTTP_ERR_404;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200377 case 405: return HTTP_ERR_405;
Christopher Faulet612f2ea2020-05-27 09:57:28 +0200378 case 407: return HTTP_ERR_407;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200379 case 408: return HTTP_ERR_408;
Florian Tham272e29b2020-01-08 10:19:05 +0100380 case 410: return HTTP_ERR_410;
Anthonin Bonnefoy85048f82020-06-22 09:17:01 +0200381 case 413: return HTTP_ERR_413;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200382 case 421: return HTTP_ERR_421;
Christopher Faulet92cafb32021-09-28 08:48:51 +0200383 case 422: return HTTP_ERR_422;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200384 case 425: return HTTP_ERR_425;
385 case 429: return HTTP_ERR_429;
386 case 500: return HTTP_ERR_500;
Christopher Faulete095f312020-12-07 11:22:24 +0100387 case 501: return HTTP_ERR_501;
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200388 case 502: return HTTP_ERR_502;
389 case 503: return HTTP_ERR_503;
390 case 504: return HTTP_ERR_504;
391 default: return HTTP_ERR_500;
392 }
393}
394
395/* This function returns a reason associated with the HTTP status.
396 * This function never fails, a message is always returned.
397 */
398const char *http_get_reason(unsigned int status)
399{
400 switch (status) {
401 case 100: return "Continue";
402 case 101: return "Switching Protocols";
403 case 102: return "Processing";
404 case 200: return "OK";
405 case 201: return "Created";
406 case 202: return "Accepted";
407 case 203: return "Non-Authoritative Information";
408 case 204: return "No Content";
409 case 205: return "Reset Content";
410 case 206: return "Partial Content";
411 case 207: return "Multi-Status";
412 case 210: return "Content Different";
413 case 226: return "IM Used";
414 case 300: return "Multiple Choices";
415 case 301: return "Moved Permanently";
Christopher Faulet7bddacb2023-07-17 08:47:12 +0200416 case 302: return "Found";
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200417 case 303: return "See Other";
418 case 304: return "Not Modified";
419 case 305: return "Use Proxy";
420 case 307: return "Temporary Redirect";
421 case 308: return "Permanent Redirect";
422 case 310: return "Too many Redirects";
423 case 400: return "Bad Request";
424 case 401: return "Unauthorized";
425 case 402: return "Payment Required";
426 case 403: return "Forbidden";
427 case 404: return "Not Found";
428 case 405: return "Method Not Allowed";
429 case 406: return "Not Acceptable";
430 case 407: return "Proxy Authentication Required";
431 case 408: return "Request Time-out";
432 case 409: return "Conflict";
433 case 410: return "Gone";
434 case 411: return "Length Required";
435 case 412: return "Precondition Failed";
436 case 413: return "Request Entity Too Large";
437 case 414: return "Request-URI Too Long";
438 case 415: return "Unsupported Media Type";
439 case 416: return "Requested range unsatisfiable";
440 case 417: return "Expectation failed";
441 case 418: return "I'm a teapot";
442 case 421: return "Misdirected Request";
Christopher Faulet92cafb32021-09-28 08:48:51 +0200443 case 422: return "Unprocessable Content";
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200444 case 423: return "Locked";
445 case 424: return "Method failure";
446 case 425: return "Too Early";
447 case 426: return "Upgrade Required";
448 case 428: return "Precondition Required";
449 case 429: return "Too Many Requests";
450 case 431: return "Request Header Fields Too Large";
451 case 449: return "Retry With";
452 case 450: return "Blocked by Windows Parental Controls";
453 case 451: return "Unavailable For Legal Reasons";
454 case 456: return "Unrecoverable Error";
455 case 499: return "client has closed connection";
456 case 500: return "Internal Server Error";
457 case 501: return "Not Implemented";
458 case 502: return "Bad Gateway or Proxy Error";
459 case 503: return "Service Unavailable";
460 case 504: return "Gateway Time-out";
461 case 505: return "HTTP Version not supported";
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500462 case 506: return "Variant also negotiate";
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200463 case 507: return "Insufficient storage";
464 case 508: return "Loop detected";
465 case 509: return "Bandwidth Limit Exceeded";
466 case 510: return "Not extended";
467 case 511: return "Network authentication required";
468 case 520: return "Web server is returning an unknown error";
469 default:
470 switch (status) {
471 case 100 ... 199: return "Informational";
472 case 200 ... 299: return "Success";
473 case 300 ... 399: return "Redirection";
474 case 400 ... 499: return "Client Error";
475 case 500 ... 599: return "Server Error";
476 default: return "Other";
477 }
478 }
479}
480
Christopher Faulet658f9712022-07-05 09:48:39 +0200481/* Returns the ist string corresponding to port part (without ':') in the host
Christopher Faulet99ade9e2022-11-21 18:57:49 +0100482 * <host>, IST_NULL if no ':' is found or an empty IST if there is no digit. In
Ilya Shipitsin6f86eaa2022-11-30 16:22:42 +0500483 * the last case, the result is the original ist trimmed to 0. So be sure to test
Christopher Faulet99ade9e2022-11-21 18:57:49 +0100484 * the result length before doing any pointer arithmetic.
Christopher Faulet658f9712022-07-05 09:48:39 +0200485*/
486struct ist http_get_host_port(const struct ist host)
487{
488 char *start, *end, *ptr;
489
490 start = istptr(host);
491 end = istend(host);
492 for (ptr = end; ptr > start && isdigit((unsigned char)*--ptr););
493
494 /* no port found */
Christopher Faulet99ade9e2022-11-21 18:57:49 +0100495 if (likely(*ptr != ':'))
Christopher Faulet658f9712022-07-05 09:48:39 +0200496 return IST_NULL;
Christopher Faulet99ade9e2022-11-21 18:57:49 +0100497 if (ptr+1 == end)
498 return isttrim(host, 0);
Christopher Faulet658f9712022-07-05 09:48:39 +0200499
500 return istnext(ist2(ptr, end - ptr));
501}
502
Christopher Fauletca7218a2022-07-05 09:53:37 +0200503
504/* Return non-zero if the port <port> is a default port. If the scheme <schm> is
505 * set, it is used to detect default ports (HTTP => 80 and HTTPS => 443)
506 * port. Otherwise, both are considered as default ports.
507 */
508int http_is_default_port(const struct ist schm, const struct ist port)
509{
Christopher Faulet99ade9e2022-11-21 18:57:49 +0100510 if (!istlen(port))
511 return 1;
512
Christopher Fauletca7218a2022-07-05 09:53:37 +0200513 if (!isttest(schm))
514 return (isteq(port, ist("443")) || isteq(port, ist("80")));
515 else
516 return (isteq(port, ist("443")) && isteqi(schm, ist("https://"))) ||
517 (isteq(port, ist("80")) && isteqi(schm, ist("http://")));
518}
519
Willy Tarreaud3d8d032021-08-10 15:35:36 +0200520/* Returns non-zero if the scheme <schm> is syntactically correct according to
521 * RFC3986#3.1, otherwise zero. It expects only the scheme and nothing else
522 * (particularly not the following "://").
523 * Scheme = alpha *(alpha|digit|'+'|'-'|'.')
524 */
525int http_validate_scheme(const struct ist schm)
526{
527 size_t i;
528
529 for (i = 0; i < schm.len; i++) {
530 if (likely((schm.ptr[i] >= 'a' && schm.ptr[i] <= 'z') ||
531 (schm.ptr[i] >= 'A' && schm.ptr[i] <= 'Z')))
532 continue;
533 if (unlikely(!i)) // first char must be alpha
534 return 0;
535 if ((schm.ptr[i] >= '0' && schm.ptr[i] <= '9') ||
536 schm.ptr[i] == '+' || schm.ptr[i] == '-' || schm.ptr[i] == '.')
537 continue;
538 return 0;
539 }
540 return !!i;
541}
542
Amaury Denoyelleef088112021-07-07 10:49:25 +0200543/* Parse the uri and looks for the scheme. If not found, an empty ist is
544 * returned. Otherwise, the ist pointing to the scheme is returned.
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200545 *
546 * <parser> must have been initialized via http_uri_parser_init. See the
547 * related http_uri_parser documentation for the specific API usage.
Amaury Denoyelleef088112021-07-07 10:49:25 +0200548 */
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200549struct ist http_parse_scheme(struct http_uri_parser *parser)
Amaury Denoyelleef088112021-07-07 10:49:25 +0200550{
551 const char *ptr, *start, *end;
552
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200553 if (parser->state >= URI_PARSER_STATE_SCHEME_DONE)
Amaury Denoyelleef088112021-07-07 10:49:25 +0200554 goto not_found;
555
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200556 if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
Amaury Denoyelleef088112021-07-07 10:49:25 +0200557 goto not_found;
558
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200559 ptr = start = istptr(parser->uri);
560 end = istend(parser->uri);
561
Amaury Denoyelleef088112021-07-07 10:49:25 +0200562 if (isalpha((unsigned char)*ptr)) {
563 /* this is a scheme as described by RFC3986, par. 3.1, or only
564 * an authority (in case of a CONNECT method).
565 */
566 ptr++;
567 /* retrieve the scheme up to the suffix '://'. If the suffix is
568 * not found, this means there is no scheme and it is an
569 * authority-only uri.
570 */
571 while (ptr < end &&
572 (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
573 ptr++;
574 if (ptr == end || *ptr++ != ':')
575 goto not_found;
576 if (ptr == end || *ptr++ != '/')
577 goto not_found;
578 if (ptr == end || *ptr++ != '/')
579 goto not_found;
580 }
581 else {
582 goto not_found;
583 }
584
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200585 parser->uri = ist2(ptr, end - ptr);
586 parser->state = URI_PARSER_STATE_SCHEME_DONE;
Amaury Denoyelleef088112021-07-07 10:49:25 +0200587 return ist2(start, ptr - start);
588
589 not_found:
Amaury Denoyelle8ac8cbf2021-07-06 10:52:58 +0200590 parser->state = URI_PARSER_STATE_SCHEME_DONE;
Amaury Denoyelleef088112021-07-07 10:49:25 +0200591 return IST_NULL;
592}
593
Christopher Faulet16fdc552019-10-08 14:56:58 +0200594/* Parse the uri and looks for the authority, between the scheme and the
595 * path. if no_userinfo is not zero, the part before the '@' (including it) is
596 * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
597 * on the authority is returned.
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200598 *
599 * <parser> must have been initialized via http_uri_parser_init. See the
600 * related http_uri_parser documentation for the specific API usage.
Christopher Faulet16fdc552019-10-08 14:56:58 +0200601 */
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200602struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo)
Christopher Faulet16fdc552019-10-08 14:56:58 +0200603{
604 const char *ptr, *start, *end;
605
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200606 if (parser->state >= URI_PARSER_STATE_AUTHORITY_DONE)
Christopher Faulet16fdc552019-10-08 14:56:58 +0200607 goto not_found;
608
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200609 if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
Christopher Faulet16fdc552019-10-08 14:56:58 +0200610 goto not_found;
611
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200612 if (parser->state < URI_PARSER_STATE_SCHEME_DONE)
613 http_parse_scheme(parser);
Christopher Faulet16fdc552019-10-08 14:56:58 +0200614
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200615 ptr = start = istptr(parser->uri);
616 end = istend(parser->uri);
617
Christopher Faulet16fdc552019-10-08 14:56:58 +0200618 while (ptr < end && *ptr != '/') {
619 if (*ptr++ == '@' && no_userinfo)
620 start = ptr;
621 }
622
623 /* OK, ptr point on the '/' or the end */
Christopher Faulet16fdc552019-10-08 14:56:58 +0200624
625 authority:
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200626 parser->uri = ist2(ptr, end - ptr);
627 parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
628 return ist2(start, ptr - start);
Christopher Faulet16fdc552019-10-08 14:56:58 +0200629
630 not_found:
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200631 parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
Tim Duesterhus241e29e2020-03-05 17:56:30 +0100632 return IST_NULL;
Christopher Faulet16fdc552019-10-08 14:56:58 +0200633}
634
Willy Tarreau6b952c82018-09-10 17:45:34 +0200635/* Parse the URI from the given transaction (which is assumed to be in request
636 * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
637 * returned. Otherwise the pointer and length are returned.
Amaury Denoyellec453f952021-07-06 11:40:12 +0200638 *
639 * <parser> must have been initialized via http_uri_parser_init. See the
640 * related http_uri_parser documentation for the specific API usage.
Willy Tarreau6b952c82018-09-10 17:45:34 +0200641 */
Amaury Denoyellec453f952021-07-06 11:40:12 +0200642struct ist http_parse_path(struct http_uri_parser *parser)
Willy Tarreau6b952c82018-09-10 17:45:34 +0200643{
644 const char *ptr, *end;
645
Amaury Denoyellec453f952021-07-06 11:40:12 +0200646 if (parser->state >= URI_PARSER_STATE_PATH_DONE)
Willy Tarreau6b952c82018-09-10 17:45:34 +0200647 goto not_found;
648
Amaury Denoyellec453f952021-07-06 11:40:12 +0200649 if (parser->format == URI_PARSER_FORMAT_EMPTY ||
650 parser->format == URI_PARSER_FORMAT_ASTERISK) {
651 goto not_found;
652 }
653
654 ptr = istptr(parser->uri);
655 end = istend(parser->uri);
Willy Tarreau6b952c82018-09-10 17:45:34 +0200656
Amaury Denoyellec453f952021-07-06 11:40:12 +0200657 /* If the uri is in absolute-path format, first skip the scheme and
658 * authority parts. No scheme will be found if the uri is in authority
659 * format, which indicates that the path won't be present.
Willy Tarreau6b952c82018-09-10 17:45:34 +0200660 */
Amaury Denoyellec453f952021-07-06 11:40:12 +0200661 if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) {
662 if (parser->state < URI_PARSER_STATE_SCHEME_DONE) {
663 /* If no scheme found, uri is in authority format. No
664 * path is present.
665 */
666 if (!isttest(http_parse_scheme(parser)))
667 goto not_found;
668 }
Willy Tarreau6b952c82018-09-10 17:45:34 +0200669
Amaury Denoyellec453f952021-07-06 11:40:12 +0200670 if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE)
671 http_parse_authority(parser, 1);
Willy Tarreau6b952c82018-09-10 17:45:34 +0200672
Amaury Denoyellec453f952021-07-06 11:40:12 +0200673 ptr = istptr(parser->uri);
674
675 if (ptr == end)
Willy Tarreau6b952c82018-09-10 17:45:34 +0200676 goto not_found;
677 }
Willy Tarreau6b952c82018-09-10 17:45:34 +0200678
Amaury Denoyellec453f952021-07-06 11:40:12 +0200679 parser->state = URI_PARSER_STATE_PATH_DONE;
Willy Tarreau6b952c82018-09-10 17:45:34 +0200680 return ist2(ptr, end - ptr);
681
682 not_found:
Amaury Denoyellec453f952021-07-06 11:40:12 +0200683 parser->state = URI_PARSER_STATE_PATH_DONE;
Tim Duesterhus241e29e2020-03-05 17:56:30 +0100684 return IST_NULL;
Willy Tarreau6b952c82018-09-10 17:45:34 +0200685}
Willy Tarreau04f1e2d2018-09-10 18:04:24 +0200686
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100687/* Parse <value> Content-Length header field of an HTTP request. The function
688 * checks all possible occurrences of a comma-delimited value, and verifies if
689 * any of them doesn't match a previous value. <value> is sanitized on return
690 * to contain a single value if several identical values were found.
691 *
692 * <body_len> must be a valid pointer and is used to return the parsed length
693 * unless values differ. Also if <not_first> is true, <body_len> is assumed to
694 * point to previously parsed value and which must be equal to the new length.
695 * This is useful if an HTTP message contains several Content-Length headers.
696 *
697 * Returns <0 if a value differs, 0 if the whole header can be dropped (i.e.
698 * already known), or >0 if the value can be indexed (first one). In the last
699 * case, the value might be adjusted and the caller must only add the updated
700 * value.
701 */
702int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len,
703 int not_first)
704{
705 char *e, *n;
706 unsigned long long cl;
707 struct ist word;
708 int check_prev = not_first;
709
Willy Tarreaua32f99f2023-08-09 08:32:48 +0200710 word.ptr = value->ptr;
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100711 e = value->ptr + value->len;
712
Willy Tarreaua32f99f2023-08-09 08:32:48 +0200713 while (1) {
714 if (word.ptr >= e) {
715 /* empty header or empty value */
716 goto fail;
717 }
718
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100719 /* skip leading delimiter and blanks */
Willy Tarreaua32f99f2023-08-09 08:32:48 +0200720 if (unlikely(HTTP_IS_LWS(*word.ptr))) {
721 word.ptr++;
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100722 continue;
Willy Tarreaua32f99f2023-08-09 08:32:48 +0200723 }
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100724
725 /* digits only now */
726 for (cl = 0, n = word.ptr; n < e; n++) {
727 unsigned int c = *n - '0';
728 if (unlikely(c > 9)) {
729 /* non-digit */
730 if (unlikely(n == word.ptr)) // spaces only
731 goto fail;
732 break;
733 }
Willy Tarreauc33738c2023-08-09 11:02:34 +0200734
735 if (unlikely(!cl && n > word.ptr)) {
736 /* There was a leading zero before this digit,
737 * let's trim it.
738 */
739 word.ptr = n;
740 }
741
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100742 if (unlikely(cl > ULLONG_MAX / 10ULL))
743 goto fail; /* multiply overflow */
744 cl = cl * 10ULL;
745 if (unlikely(cl + c < cl))
746 goto fail; /* addition overflow */
747 cl = cl + c;
748 }
749
750 /* keep a copy of the exact cleaned value */
751 word.len = n - word.ptr;
752
753 /* skip trailing LWS till next comma or EOL */
754 for (; n < e; n++) {
755 if (!HTTP_IS_LWS(*n)) {
756 if (unlikely(*n != ','))
757 goto fail;
758 break;
759 }
760 }
761
762 /* if duplicate, must be equal */
763 if (check_prev && cl != *body_len)
764 goto fail;
765
766 /* OK, store this result as the one to be indexed */
767 *body_len = cl;
768 *value = word;
Willy Tarreaua32f99f2023-08-09 08:32:48 +0200769
770 /* Now either n==e and we're done, or n points to the comma,
771 * and we skip it and continue.
772 */
773 if (n++ == e)
774 break;
775
Amaury Denoyelle15f3cc42022-12-08 16:53:58 +0100776 word.ptr = n;
777 check_prev = 1;
778 }
779
780 /* here we've reached the end with a single value or a series of
781 * identical values, all matching previous series if any. The last
782 * parsed value was sent back into <value>. We just have to decide
783 * if this occurrence has to be indexed (it's the first one) or
784 * silently skipped (it's not the first one)
785 */
786 return !not_first;
787 fail:
788 return -1;
789}
790
Willy Tarreauab813a42018-09-10 18:41:28 +0200791/*
792 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
793 * If so, returns the position of the first non-space character relative to
794 * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
795 * to return a pointer to the place after the first space. Returns 0 if the
796 * header name does not match. Checks are case-insensitive.
797 */
798int http_header_match2(const char *hdr, const char *end,
799 const char *name, int len)
800{
801 const char *val;
802
803 if (hdr + len >= end)
804 return 0;
805 if (hdr[len] != ':')
806 return 0;
807 if (strncasecmp(hdr, name, len) != 0)
808 return 0;
809 val = hdr + len + 1;
810 while (val < end && HTTP_IS_SPHT(*val))
811 val++;
812 if ((val >= end) && (len + 2 <= end - hdr))
813 return len + 2; /* we may replace starting from second space */
814 return val - hdr;
815}
816
817/* Find the end of the header value contained between <s> and <e>. See RFC7230,
818 * par 3.2 for more information. Note that it requires a valid header to return
819 * a valid result. This works for headers defined as comma-separated lists.
820 */
821char *http_find_hdr_value_end(char *s, const char *e)
822{
823 int quoted, qdpair;
824
825 quoted = qdpair = 0;
826
Willy Tarreau02ac9502020-02-21 16:31:22 +0100827#ifdef HA_UNALIGNED_LE
Willy Tarreauab813a42018-09-10 18:41:28 +0200828 /* speedup: skip everything not a comma nor a double quote */
829 for (; s <= e - sizeof(int); s += sizeof(int)) {
830 unsigned int c = *(int *)s; // comma
831 unsigned int q = c; // quote
832
833 c ^= 0x2c2c2c2c; // contains one zero on a comma
834 q ^= 0x22222222; // contains one zero on a quote
835
836 c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
837 q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
838
839 if ((c | q) & 0x80808080)
840 break; // found a comma or a quote
841 }
842#endif
843 for (; s < e; s++) {
844 if (qdpair) qdpair = 0;
845 else if (quoted) {
846 if (*s == '\\') qdpair = 1;
847 else if (*s == '"') quoted = 0;
848 }
849 else if (*s == '"') quoted = 1;
850 else if (*s == ',') return s;
851 }
852 return s;
853}
854
855/* Find the end of a cookie value contained between <s> and <e>. It works the
856 * same way as with headers above except that the semi-colon also ends a token.
857 * See RFC2965 for more information. Note that it requires a valid header to
858 * return a valid result.
859 */
860char *http_find_cookie_value_end(char *s, const char *e)
861{
862 int quoted, qdpair;
863
864 quoted = qdpair = 0;
865 for (; s < e; s++) {
866 if (qdpair) qdpair = 0;
867 else if (quoted) {
868 if (*s == '\\') qdpair = 1;
869 else if (*s == '"') quoted = 0;
870 }
871 else if (*s == '"') quoted = 1;
872 else if (*s == ',' || *s == ';') return s;
873 }
874 return s;
875}
876
877/* Try to find the next occurrence of a cookie name in a cookie header value.
Maciej Zdebdea7c202020-11-13 09:38:06 +0000878 * To match on any cookie name, <cookie_name_l> must be set to 0.
Willy Tarreauab813a42018-09-10 18:41:28 +0200879 * The lookup begins at <hdr>. The pointer and size of the next occurrence of
880 * the cookie value is returned into *value and *value_l, and the function
881 * returns a pointer to the next pointer to search from if the value was found.
882 * Otherwise if the cookie was not found, NULL is returned and neither value
883 * nor value_l are touched. The input <hdr> string should first point to the
884 * header's value, and the <hdr_end> pointer must point to the first character
885 * not part of the value. <list> must be non-zero if value may represent a list
886 * of values (cookie headers). This makes it faster to abort parsing when no
887 * list is expected.
888 */
889char *http_extract_cookie_value(char *hdr, const char *hdr_end,
890 char *cookie_name, size_t cookie_name_l,
891 int list, char **value, size_t *value_l)
892{
893 char *equal, *att_end, *att_beg, *val_beg, *val_end;
894 char *next;
895
896 /* we search at least a cookie name followed by an equal, and more
897 * generally something like this :
898 * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
899 */
900 for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
901 /* Iterate through all cookies on this line */
902
903 while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
904 att_beg++;
905
906 /* find att_end : this is the first character after the last non
907 * space before the equal. It may be equal to hdr_end.
908 */
909 equal = att_end = att_beg;
910
911 while (equal < hdr_end) {
912 if (*equal == '=' || *equal == ';' || (list && *equal == ','))
913 break;
914 if (HTTP_IS_SPHT(*equal++))
915 continue;
916 att_end = equal;
917 }
918
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500919 /* here, <equal> points to '=', a delimiter or the end. <att_end>
Willy Tarreauab813a42018-09-10 18:41:28 +0200920 * is between <att_beg> and <equal>, both may be identical.
921 */
922
923 /* look for end of cookie if there is an equal sign */
924 if (equal < hdr_end && *equal == '=') {
925 /* look for the beginning of the value */
926 val_beg = equal + 1;
927 while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
928 val_beg++;
929
930 /* find the end of the value, respecting quotes */
931 next = http_find_cookie_value_end(val_beg, hdr_end);
932
Ilya Shipitsin46a030c2020-07-05 16:36:08 +0500933 /* make val_end point to the first white space or delimiter after the value */
Willy Tarreauab813a42018-09-10 18:41:28 +0200934 val_end = next;
935 while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
936 val_end--;
937 } else {
938 val_beg = val_end = next = equal;
939 }
940
941 /* We have nothing to do with attributes beginning with '$'. However,
942 * they will automatically be removed if a header before them is removed,
943 * since they're supposed to be linked together.
944 */
945 if (*att_beg == '$')
946 continue;
947
948 /* Ignore cookies with no equal sign */
949 if (equal == next)
950 continue;
951
952 /* Now we have the cookie name between att_beg and att_end, and
953 * its value between val_beg and val_end.
954 */
955
Maciej Zdebdea7c202020-11-13 09:38:06 +0000956 if (cookie_name_l == 0 || (att_end - att_beg == cookie_name_l &&
957 memcmp(att_beg, cookie_name, cookie_name_l) == 0)) {
Willy Tarreauab813a42018-09-10 18:41:28 +0200958 /* let's return this value and indicate where to go on from */
959 *value = val_beg;
960 *value_l = val_end - val_beg;
961 return next + 1;
962 }
963
964 /* Set-Cookie headers only have the name in the first attr=value part */
965 if (!list)
966 break;
967 }
968
969 return NULL;
970}
971
Joseph Herlant942eea32018-11-15 13:57:22 -0800972/* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
Willy Tarreauab813a42018-09-10 18:41:28 +0200973 * value is larger than 1000, it is bound to 1000. The parser consumes up to
974 * 1 digit, one dot and 3 digits and stops on the first invalid character.
975 * Unparsable qvalues return 1000 as "q=1.000".
976 */
977int http_parse_qvalue(const char *qvalue, const char **end)
978{
979 int q = 1000;
980
981 if (!isdigit((unsigned char)*qvalue))
982 goto out;
983 q = (*qvalue++ - '0') * 1000;
984
985 if (*qvalue++ != '.')
986 goto out;
987
988 if (!isdigit((unsigned char)*qvalue))
989 goto out;
990 q += (*qvalue++ - '0') * 100;
991
992 if (!isdigit((unsigned char)*qvalue))
993 goto out;
994 q += (*qvalue++ - '0') * 10;
995
996 if (!isdigit((unsigned char)*qvalue))
997 goto out;
998 q += (*qvalue++ - '0') * 1;
999 out:
1000 if (q > 1000)
1001 q = 1000;
1002 if (end)
1003 *end = qvalue;
1004 return q;
1005}
1006
1007/*
Joseph Herlant942eea32018-11-15 13:57:22 -08001008 * Given a url parameter, find the starting position of the first occurrence,
Willy Tarreauab813a42018-09-10 18:41:28 +02001009 * or NULL if the parameter is not found.
1010 *
1011 * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
1012 * the function will return query_string+8.
1013 *
1014 * Warning: this function returns a pointer that can point to the first chunk
1015 * or the second chunk. The caller must be check the position before using the
1016 * result.
1017 */
1018const char *http_find_url_param_pos(const char **chunks,
1019 const char* url_param_name, size_t url_param_name_l,
Martin DOLEZ110e4a82023-03-28 09:06:05 -04001020 char delim, char insensitive)
Willy Tarreauab813a42018-09-10 18:41:28 +02001021{
1022 const char *pos, *last, *equal;
1023 const char **bufs = chunks;
1024 int l1, l2;
1025
1026
1027 pos = bufs[0];
1028 last = bufs[1];
1029 while (pos < last) {
1030 /* Check the equal. */
1031 equal = pos + url_param_name_l;
1032 if (fix_pointer_if_wrap(chunks, &equal)) {
1033 if (equal >= chunks[3])
1034 return NULL;
1035 } else {
1036 if (equal >= chunks[1])
1037 return NULL;
1038 }
1039 if (*equal == '=') {
1040 if (pos + url_param_name_l > last) {
1041 /* process wrap case, we detect a wrap. In this case, the
1042 * comparison is performed in two parts.
1043 */
1044
Thayne McCombs8f0cc5c2021-01-07 21:35:52 -07001045 /* This is the end, we don't have any other chunk. */
Willy Tarreauab813a42018-09-10 18:41:28 +02001046 if (bufs != chunks || !bufs[2])
1047 return NULL;
1048
1049 /* Compute the length of each part of the comparison. */
1050 l1 = last - pos;
1051 l2 = url_param_name_l - l1;
1052
1053 /* The second buffer is too short to contain the compared string. */
1054 if (bufs[2] + l2 > bufs[3])
1055 return NULL;
1056
Martin DOLEZ110e4a82023-03-28 09:06:05 -04001057 if (insensitive) {
1058 if (strncasecmp(pos, url_param_name, l1) == 0 &&
1059 strncasecmp(bufs[2], url_param_name+l1, l2) == 0)
1060 return pos;
1061 }
1062 else {
1063 if (memcmp(pos, url_param_name, l1) == 0 &&
1064 memcmp(bufs[2], url_param_name+l1, l2) == 0)
1065 return pos;
1066 }
Willy Tarreauab813a42018-09-10 18:41:28 +02001067
1068 /* Perform wrapping and jump the string who fail the comparison. */
1069 bufs += 2;
1070 pos = bufs[0] + l2;
1071 last = bufs[1];
1072
1073 } else {
Martin DOLEZ110e4a82023-03-28 09:06:05 -04001074 /* process a simple comparison.*/
1075 if (insensitive) {
1076 if (strncasecmp(pos, url_param_name, url_param_name_l) == 0)
1077 return pos;
1078 } else {
1079 if (memcmp(pos, url_param_name, url_param_name_l) == 0)
1080 return pos;
1081 }
Willy Tarreauab813a42018-09-10 18:41:28 +02001082 pos += url_param_name_l + 1;
1083 if (fix_pointer_if_wrap(chunks, &pos))
1084 last = bufs[2];
1085 }
1086 }
1087
1088 while (1) {
1089 /* Look for the next delimiter. */
1090 while (pos < last && !http_is_param_delimiter(*pos, delim))
1091 pos++;
1092 if (pos < last)
1093 break;
1094 /* process buffer wrapping. */
1095 if (bufs != chunks || !bufs[2])
1096 return NULL;
1097 bufs += 2;
1098 pos = bufs[0];
1099 last = bufs[1];
1100 }
1101 pos++;
1102 }
1103 return NULL;
1104}
1105
1106/*
1107 * Given a url parameter name and a query string, find the next value.
1108 * An empty url_param_name matches the first available parameter.
1109 * If the parameter is found, 1 is returned and *vstart / *vend are updated to
1110 * respectively provide a pointer to the value and its end.
1111 * Otherwise, 0 is returned and vstart/vend are not modified.
1112 */
1113int http_find_next_url_param(const char **chunks,
1114 const char* url_param_name, size_t url_param_name_l,
Martin DOLEZ110e4a82023-03-28 09:06:05 -04001115 const char **vstart, const char **vend, char delim, char insensitive)
Willy Tarreauab813a42018-09-10 18:41:28 +02001116{
1117 const char *arg_start, *qs_end;
1118 const char *value_start, *value_end;
1119
1120 arg_start = chunks[0];
1121 qs_end = chunks[1];
1122 if (url_param_name_l) {
1123 /* Looks for an argument name. */
1124 arg_start = http_find_url_param_pos(chunks,
1125 url_param_name, url_param_name_l,
Martin DOLEZ110e4a82023-03-28 09:06:05 -04001126 delim, insensitive);
Willy Tarreauab813a42018-09-10 18:41:28 +02001127 /* Check for wrapping. */
1128 if (arg_start >= qs_end)
1129 qs_end = chunks[3];
1130 }
1131 if (!arg_start)
1132 return 0;
1133
1134 if (!url_param_name_l) {
1135 while (1) {
1136 /* looks for the first argument. */
1137 value_start = memchr(arg_start, '=', qs_end - arg_start);
1138 if (!value_start) {
1139 /* Check for wrapping. */
1140 if (arg_start >= chunks[0] &&
1141 arg_start < chunks[1] &&
1142 chunks[2]) {
1143 arg_start = chunks[2];
1144 qs_end = chunks[3];
1145 continue;
1146 }
1147 return 0;
1148 }
1149 break;
1150 }
1151 value_start++;
1152 }
1153 else {
1154 /* Jump the argument length. */
1155 value_start = arg_start + url_param_name_l + 1;
1156
1157 /* Check for pointer wrapping. */
1158 if (fix_pointer_if_wrap(chunks, &value_start)) {
1159 /* Update the end pointer. */
1160 qs_end = chunks[3];
1161
1162 /* Check for overflow. */
1163 if (value_start >= qs_end)
1164 return 0;
1165 }
1166 }
1167
1168 value_end = value_start;
1169
1170 while (1) {
1171 while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
1172 value_end++;
1173 if (value_end < qs_end)
1174 break;
1175 /* process buffer wrapping. */
1176 if (value_end >= chunks[0] &&
1177 value_end < chunks[1] &&
1178 chunks[2]) {
1179 value_end = chunks[2];
1180 qs_end = chunks[3];
1181 continue;
1182 }
1183 break;
1184 }
1185
1186 *vstart = value_start;
1187 *vend = value_end;
1188 return 1;
1189}
1190
Christopher Faulet8277ca72018-10-22 15:12:04 +02001191/* Parses a single header line (without the CRLF) and splits it into its name
1192 * and its value. The parsing is pretty naive and just skip spaces.
1193 */
1194int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value)
1195{
1196 char *p = hdr.ptr;
1197 char *end = p + hdr.len;
1198
1199 name->len = value->len = 0;
1200
1201 /* Skip leading spaces */
1202 for (; p < end && HTTP_IS_SPHT(*p); p++);
1203
1204 /* Set the header name */
1205 name->ptr = p;
1206 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1207 name->len = p - name->ptr;
1208
1209 /* Skip the ':' and spaces before and after it */
1210 for (; p < end && HTTP_IS_SPHT(*p); p++);
1211 if (p < end && *p == ':') p++;
1212 for (; p < end && HTTP_IS_SPHT(*p); p++);
1213
1214 /* Set the header value */
1215 value->ptr = p;
1216 value->len = end - p;
1217
1218 return 1;
1219}
1220
1221/* Parses a single start line (without the CRLF) and splits it into 3 parts. The
1222 * parsing is pretty naive and just skip spaces.
1223 */
1224int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3)
1225{
1226 char *p = line.ptr;
1227 char *end = p + line.len;
1228
1229 p1->len = p2->len = p3->len = 0;
1230
1231 /* Skip leading spaces */
1232 for (; p < end && HTTP_IS_SPHT(*p); p++);
1233
1234 /* Set the first part */
1235 p1->ptr = p;
1236 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1237 p1->len = p - p1->ptr;
1238
1239 /* Skip spaces between p1 and p2 */
1240 for (; p < end && HTTP_IS_SPHT(*p); p++);
1241
1242 /* Set the second part */
1243 p2->ptr = p;
1244 for (; p < end && !HTTP_IS_SPHT(*p); p++);
1245 p2->len = p - p2->ptr;
1246
1247 /* Skip spaces between p2 and p3 */
1248 for (; p < end && HTTP_IS_SPHT(*p); p++);
1249
Ilya Shipitsin46a030c2020-07-05 16:36:08 +05001250 /* The remaining is the third value */
Christopher Faulet8277ca72018-10-22 15:12:04 +02001251 p3->ptr = p;
1252 p3->len = end - p;
1253
1254 return 1;
1255}
Christopher Faulet341fac12019-09-16 11:37:05 +02001256
1257/* Parses value of a Status header with the following format: "Status: Code[
1258 * Reason]". The parsing is pretty naive and just skip spaces. It return the
1259 * numeric value of the status code.
1260 */
1261int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason)
1262{
1263 char *p = value.ptr;
1264 char *end = p + value.len;
1265 uint16_t code;
1266
1267 status->len = reason->len = 0;
1268
1269 /* Skip leading spaces */
1270 for (; p < end && HTTP_IS_SPHT(*p); p++);
1271
1272 /* Set the status part */
1273 status->ptr = p;
1274 for (; p < end && HTTP_IS_TOKEN(*p); p++);
1275 status->len = p - status->ptr;
1276
1277 /* Skip spaces between status and reason */
1278 for (; p < end && HTTP_IS_SPHT(*p); p++);
1279
1280 /* the remaining is the reason */
1281 reason->ptr = p;
1282 reason->len = end - p;
1283
1284 code = strl2ui(status->ptr, status->len);
1285 return code;
1286}
Remi Tricot-Le Bretonbcced092020-10-22 10:40:03 +02001287
1288
1289/* Returns non-zero if the two ETags are comparable (see RFC 7232#2.3.2).
1290 * If any of them is a weak ETag, we discard the weakness prefix and perform
1291 * a strict string comparison.
1292 * Returns 0 otherwise.
1293 */
1294int http_compare_etags(struct ist etag1, struct ist etag2)
1295{
1296 enum http_etag_type etag_type1;
1297 enum http_etag_type etag_type2;
1298
1299 etag_type1 = http_get_etag_type(etag1);
1300 etag_type2 = http_get_etag_type(etag2);
1301
1302 if (etag_type1 == ETAG_INVALID || etag_type2 == ETAG_INVALID)
1303 return 0;
1304
1305 /* Discard the 'W/' prefix an ETag is a weak one. */
1306 if (etag_type1 == ETAG_WEAK)
1307 etag1 = istadv(etag1, 2);
1308 if (etag_type2 == ETAG_WEAK)
1309 etag2 = istadv(etag2, 2);
1310
1311 return isteq(etag1, etag2);
1312}
Remi Tricot-Le Breton56e46cb2020-12-23 18:13:48 +01001313
1314
1315/*
1316 * Trim leading space or horizontal tab characters from <value> string.
1317 * Returns the trimmed string.
1318 */
1319struct ist http_trim_leading_spht(struct ist value)
1320{
1321 struct ist ret = value;
1322
1323 while (ret.len && HTTP_IS_SPHT(ret.ptr[0])) {
1324 ++ret.ptr;
1325 --ret.len;
1326 }
1327
1328 return ret;
1329}
1330
1331/*
1332 * Trim trailing space or horizontal tab characters from <value> string.
1333 * Returns the trimmed string.
1334 */
1335struct ist http_trim_trailing_spht(struct ist value)
1336{
1337 struct ist ret = value;
1338
1339 while (ret.len && HTTP_IS_SPHT(ret.ptr[-1]))
1340 --ret.len;
1341
1342 return ret;
1343}