Blame - src/http.c - haproxy

blob: 0d6e92d911cb3f9fc89792e5442f7928e2c1278b [file] [log] [blame]

Willy Tarreau	35b51c6	2018-09-10 15:38:55 +0200	[diff] [blame]	1	/*
				2	* HTTP semantics
				3	*
				4	* Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
				13	#include <ctype.h>
Willy Tarreau	4c7e4b7	2020-05-27 12:58:42 +0200	[diff] [blame]	14	#include <haproxy/api.h>
Willy Tarreau	cd72d8c	2020-06-02 19:11:26 +0200	[diff] [blame]	15	#include <haproxy/http.h>
Willy Tarreau	48fbcae	2020-06-03 18:09:46 +0200	[diff] [blame]	16	#include <haproxy/tools.h>
Willy Tarreau	35b51c6	2018-09-10 15:38:55 +0200	[diff] [blame]	17
				18	/* It is about twice as fast on recent architectures to lookup a byte in a
				19	* table than to perform a boolean AND or OR between two tests. Refer to
				20	* RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
				21	* neither a separator nor a CTL char. An http ver_token is any ASCII which can
				22	* be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
				23	* digit. Note: please do not overwrite values in assignment since gcc-2.95
				24	* will not handle them correctly. It's worth noting that chars 128..255 are
				25	* nothing, not even control chars.
				26	*/
				27	const unsigned char http_char_classes[256] = {
				28	[ 0] = HTTP_FLG_CTL,
				29	[ 1] = HTTP_FLG_CTL,
				30	[ 2] = HTTP_FLG_CTL,
				31	[ 3] = HTTP_FLG_CTL,
				32	[ 4] = HTTP_FLG_CTL,
				33	[ 5] = HTTP_FLG_CTL,
				34	[ 6] = HTTP_FLG_CTL,
				35	[ 7] = HTTP_FLG_CTL,
				36	[ 8] = HTTP_FLG_CTL,
				37	[ 9] = HTTP_FLG_SPHT \| HTTP_FLG_LWS \| HTTP_FLG_SEP \| HTTP_FLG_CTL,
				38	[ 10] = HTTP_FLG_CRLF \| HTTP_FLG_LWS \| HTTP_FLG_CTL,
				39	[ 11] = HTTP_FLG_CTL,
				40	[ 12] = HTTP_FLG_CTL,
				41	[ 13] = HTTP_FLG_CRLF \| HTTP_FLG_LWS \| HTTP_FLG_CTL,
				42	[ 14] = HTTP_FLG_CTL,
				43	[ 15] = HTTP_FLG_CTL,
				44	[ 16] = HTTP_FLG_CTL,
				45	[ 17] = HTTP_FLG_CTL,
				46	[ 18] = HTTP_FLG_CTL,
				47	[ 19] = HTTP_FLG_CTL,
				48	[ 20] = HTTP_FLG_CTL,
				49	[ 21] = HTTP_FLG_CTL,
				50	[ 22] = HTTP_FLG_CTL,
				51	[ 23] = HTTP_FLG_CTL,
				52	[ 24] = HTTP_FLG_CTL,
				53	[ 25] = HTTP_FLG_CTL,
				54	[ 26] = HTTP_FLG_CTL,
				55	[ 27] = HTTP_FLG_CTL,
				56	[ 28] = HTTP_FLG_CTL,
				57	[ 29] = HTTP_FLG_CTL,
				58	[ 30] = HTTP_FLG_CTL,
				59	[ 31] = HTTP_FLG_CTL,
				60	[' '] = HTTP_FLG_SPHT \| HTTP_FLG_LWS \| HTTP_FLG_SEP,
				61	['!'] = HTTP_FLG_TOK,
				62	['"'] = HTTP_FLG_SEP,
				63	['#'] = HTTP_FLG_TOK,
				64	['$'] = HTTP_FLG_TOK,
				65	['%'] = HTTP_FLG_TOK,
				66	['&'] = HTTP_FLG_TOK,
				67	[ 39] = HTTP_FLG_TOK,
				68	['('] = HTTP_FLG_SEP,
				69	[')'] = HTTP_FLG_SEP,
				70	['*'] = HTTP_FLG_TOK,
				71	['+'] = HTTP_FLG_TOK,
				72	[','] = HTTP_FLG_SEP,
				73	['-'] = HTTP_FLG_TOK,
				74	['.'] = HTTP_FLG_TOK \| HTTP_FLG_VER,
				75	['/'] = HTTP_FLG_SEP \| HTTP_FLG_VER,
				76	['0'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				77	['1'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				78	['2'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				79	['3'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				80	['4'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				81	['5'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				82	['6'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				83	['7'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				84	['8'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				85	['9'] = HTTP_FLG_TOK \| HTTP_FLG_VER \| HTTP_FLG_DIG,
				86	[':'] = HTTP_FLG_SEP,
				87	[';'] = HTTP_FLG_SEP,
				88	['<'] = HTTP_FLG_SEP,
				89	['='] = HTTP_FLG_SEP,
				90	['>'] = HTTP_FLG_SEP,
				91	['?'] = HTTP_FLG_SEP,
				92	['@'] = HTTP_FLG_SEP,
				93	['A'] = HTTP_FLG_TOK,
				94	['B'] = HTTP_FLG_TOK,
				95	['C'] = HTTP_FLG_TOK,
				96	['D'] = HTTP_FLG_TOK,
				97	['E'] = HTTP_FLG_TOK,
				98	['F'] = HTTP_FLG_TOK,
				99	['G'] = HTTP_FLG_TOK,
				100	['H'] = HTTP_FLG_TOK \| HTTP_FLG_VER,
				101	['I'] = HTTP_FLG_TOK,
				102	['J'] = HTTP_FLG_TOK,
				103	['K'] = HTTP_FLG_TOK,
				104	['L'] = HTTP_FLG_TOK,
				105	['M'] = HTTP_FLG_TOK,
				106	['N'] = HTTP_FLG_TOK,
				107	['O'] = HTTP_FLG_TOK,
				108	['P'] = HTTP_FLG_TOK \| HTTP_FLG_VER,
				109	['Q'] = HTTP_FLG_TOK,
				110	['R'] = HTTP_FLG_TOK \| HTTP_FLG_VER,
				111	['S'] = HTTP_FLG_TOK \| HTTP_FLG_VER,
				112	['T'] = HTTP_FLG_TOK \| HTTP_FLG_VER,
				113	['U'] = HTTP_FLG_TOK,
				114	['V'] = HTTP_FLG_TOK,
				115	['W'] = HTTP_FLG_TOK,
				116	['X'] = HTTP_FLG_TOK,
				117	['Y'] = HTTP_FLG_TOK,
				118	['Z'] = HTTP_FLG_TOK,
				119	['['] = HTTP_FLG_SEP,
				120	[ 92] = HTTP_FLG_SEP,
				121	[']'] = HTTP_FLG_SEP,
				122	['^'] = HTTP_FLG_TOK,
				123	['_'] = HTTP_FLG_TOK,
				124	['`'] = HTTP_FLG_TOK,
				125	['a'] = HTTP_FLG_TOK,
				126	['b'] = HTTP_FLG_TOK,
				127	['c'] = HTTP_FLG_TOK,
				128	['d'] = HTTP_FLG_TOK,
				129	['e'] = HTTP_FLG_TOK,
				130	['f'] = HTTP_FLG_TOK,
				131	['g'] = HTTP_FLG_TOK,
				132	['h'] = HTTP_FLG_TOK,
				133	['i'] = HTTP_FLG_TOK,
				134	['j'] = HTTP_FLG_TOK,
				135	['k'] = HTTP_FLG_TOK,
				136	['l'] = HTTP_FLG_TOK,
				137	['m'] = HTTP_FLG_TOK,
				138	['n'] = HTTP_FLG_TOK,
				139	['o'] = HTTP_FLG_TOK,
				140	['p'] = HTTP_FLG_TOK,
				141	['q'] = HTTP_FLG_TOK,
				142	['r'] = HTTP_FLG_TOK,
				143	['s'] = HTTP_FLG_TOK,
				144	['t'] = HTTP_FLG_TOK,
				145	['u'] = HTTP_FLG_TOK,
				146	['v'] = HTTP_FLG_TOK,
				147	['w'] = HTTP_FLG_TOK,
				148	['x'] = HTTP_FLG_TOK,
				149	['y'] = HTTP_FLG_TOK,
				150	['z'] = HTTP_FLG_TOK,
				151	['{'] = HTTP_FLG_SEP,
				152	['\|'] = HTTP_FLG_TOK,
				153	['}'] = HTTP_FLG_SEP,
				154	['~'] = HTTP_FLG_TOK,
				155	[127] = HTTP_FLG_CTL,
				156	};
				157
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	158	const int http_err_codes[HTTP_ERR_SIZE] = {
				159	[HTTP_ERR_200] = 200, /* used by "monitor-uri" */
				160	[HTTP_ERR_400] = 400,
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	161	[HTTP_ERR_401] = 401,
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	162	[HTTP_ERR_403] = 403,
Florian Tham	9205fea	2020-01-08 13:35:30 +0100	[diff] [blame]	163	[HTTP_ERR_404] = 404,
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	164	[HTTP_ERR_405] = 405,
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	165	[HTTP_ERR_407] = 407,
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	166	[HTTP_ERR_408] = 408,
Florian Tham	272e29b	2020-01-08 10:19:05 +0100	[diff] [blame]	167	[HTTP_ERR_410] = 410,
Anthonin Bonnefoy	85048f8	2020-06-22 09:17:01 +0200	[diff] [blame]	168	[HTTP_ERR_413] = 413,
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	169	[HTTP_ERR_421] = 421,
				170	[HTTP_ERR_425] = 425,
				171	[HTTP_ERR_429] = 429,
				172	[HTTP_ERR_500] = 500,
Christopher Faulet	e095f31	2020-12-07 11:22:24 +0100	[diff] [blame]	173	[HTTP_ERR_501] = 501,
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	174	[HTTP_ERR_502] = 502,
				175	[HTTP_ERR_503] = 503,
				176	[HTTP_ERR_504] = 504,
				177	};
				178
Christopher Faulet	a7b677c	2018-11-29 16:48:49 +0100	[diff] [blame]	179	const char *http_err_msgs[HTTP_ERR_SIZE] = {
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	180	[HTTP_ERR_200] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	181	"HTTP/1.1 200 OK\r\n"
				182	"Content-length: 58\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	183	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	184	"Content-Type: text/html\r\n"
				185	"\r\n"
				186	"<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
				187
				188	[HTTP_ERR_400] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	189	"HTTP/1.1 400 Bad request\r\n"
				190	"Content-length: 90\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	191	"Cache-Control: no-cache\r\n"
				192	"Connection: close\r\n"
				193	"Content-Type: text/html\r\n"
				194	"\r\n"
				195	"<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
				196
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	197	[HTTP_ERR_401] =
				198	"HTTP/1.1 401 Unauthorized\r\n"
				199	"Content-length: 112\r\n"
				200	"Cache-Control: no-cache\r\n"
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	201	"Content-Type: text/html\r\n"
				202	"\r\n"
				203	"<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
				204
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	205	[HTTP_ERR_403] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	206	"HTTP/1.1 403 Forbidden\r\n"
				207	"Content-length: 93\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	208	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	209	"Content-Type: text/html\r\n"
				210	"\r\n"
				211	"<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
				212
Florian Tham	9205fea	2020-01-08 13:35:30 +0100	[diff] [blame]	213	[HTTP_ERR_404] =
				214	"HTTP/1.1 404 Not Found\r\n"
				215	"Content-length: 83\r\n"
				216	"Cache-Control: no-cache\r\n"
Florian Tham	9205fea	2020-01-08 13:35:30 +0100	[diff] [blame]	217	"Content-Type: text/html\r\n"
				218	"\r\n"
				219	"<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
				220
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	221	[HTTP_ERR_405] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	222	"HTTP/1.1 405 Method Not Allowed\r\n"
				223	"Content-length: 146\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	224	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	225	"Content-Type: text/html\r\n"
				226	"\r\n"
				227	"<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
				228
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	229	[HTTP_ERR_407] =
				230	"HTTP/1.1 407 Unauthorized\r\n"
				231	"Content-length: 112\r\n"
				232	"Cache-Control: no-cache\r\n"
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	233	"Content-Type: text/html\r\n"
				234	"\r\n"
				235	"<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
				236
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	237	[HTTP_ERR_408] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	238	"HTTP/1.1 408 Request Time-out\r\n"
				239	"Content-length: 110\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	240	"Cache-Control: no-cache\r\n"
				241	"Connection: close\r\n"
				242	"Content-Type: text/html\r\n"
				243	"\r\n"
				244	"<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
				245
Florian Tham	272e29b	2020-01-08 10:19:05 +0100	[diff] [blame]	246	[HTTP_ERR_410] =
				247	"HTTP/1.1 410 Gone\r\n"
				248	"Content-length: 114\r\n"
				249	"Cache-Control: no-cache\r\n"
Florian Tham	272e29b	2020-01-08 10:19:05 +0100	[diff] [blame]	250	"Content-Type: text/html\r\n"
				251	"\r\n"
				252	"<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
				253
Anthonin Bonnefoy	85048f8	2020-06-22 09:17:01 +0200	[diff] [blame]	254	[HTTP_ERR_413] =
				255	"HTTP/1.1 413 Payload Too Large\r\n"
				256	"Content-length: 106\r\n"
				257	"Cache-Control: no-cache\r\n"
				258	"Content-Type: text/html\r\n"
				259	"\r\n"
				260	"<html><body><h1>413 Payload Too Large</h1>\nThe request entity exceeds the maximum allowed.\n</body></html>\n",
				261
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	262	[HTTP_ERR_421] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	263	"HTTP/1.1 421 Misdirected Request\r\n"
				264	"Content-length: 104\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	265	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	266	"Content-Type: text/html\r\n"
				267	"\r\n"
				268	"<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
				269
				270	[HTTP_ERR_425] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	271	"HTTP/1.1 425 Too Early\r\n"
				272	"Content-length: 80\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	273	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	274	"Content-Type: text/html\r\n"
				275	"\r\n"
				276	"<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
				277
				278	[HTTP_ERR_429] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	279	"HTTP/1.1 429 Too Many Requests\r\n"
				280	"Content-length: 117\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	281	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	282	"Content-Type: text/html\r\n"
				283	"\r\n"
				284	"<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
				285
				286	[HTTP_ERR_500] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	287	"HTTP/1.1 500 Internal Server Error\r\n"
Christopher Faulet	5563392	2020-10-09 08:39:26 +0200	[diff] [blame]	288	"Content-length: 97\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	289	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	290	"Content-Type: text/html\r\n"
				291	"\r\n"
Ilya Shipitsin	46a030c	2020-07-05 16:36:08 +0500	[diff] [blame]	292	"<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occurred.\n</body></html>\n",
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	293
Christopher Faulet	e095f31	2020-12-07 11:22:24 +0100	[diff] [blame]	294	[HTTP_ERR_501] =
				295	"HTTP/1.1 501 Not Implemented\r\n"
				296	"Content-length: 136\r\n"
				297	"Cache-Control: no-cache\r\n"
				298	"Content-Type: text/html\r\n"
				299	"\r\n"
				300	"<html><body><h1>501 Not Implemented</h1>\n.The server does not support the functionality required to fulfill the request.\n</body></html>\n",
				301
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	302	[HTTP_ERR_502] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	303	"HTTP/1.1 502 Bad Gateway\r\n"
				304	"Content-length: 107\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	305	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	306	"Content-Type: text/html\r\n"
				307	"\r\n"
				308	"<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
				309
				310	[HTTP_ERR_503] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	311	"HTTP/1.1 503 Service Unavailable\r\n"
				312	"Content-length: 107\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	313	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	314	"Content-Type: text/html\r\n"
				315	"\r\n"
				316	"<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
				317
				318	[HTTP_ERR_504] =
Willy Tarreau	b5ba2b0	2019-06-11 16:08:25 +0200	[diff] [blame]	319	"HTTP/1.1 504 Gateway Time-out\r\n"
				320	"Content-length: 92\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	321	"Cache-Control: no-cache\r\n"
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	322	"Content-Type: text/html\r\n"
				323	"\r\n"
				324	"<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	325	};
				326
Willy Tarreau	35b51c6	2018-09-10 15:38:55 +0200	[diff] [blame]	327	const struct ist http_known_methods[HTTP_METH_OTHER] = {
				328	[HTTP_METH_OPTIONS] = IST("OPTIONS"),
				329	[HTTP_METH_GET] = IST("GET"),
				330	[HTTP_METH_HEAD] = IST("HEAD"),
				331	[HTTP_METH_POST] = IST("POST"),
				332	[HTTP_METH_PUT] = IST("PUT"),
				333	[HTTP_METH_DELETE] = IST("DELETE"),
				334	[HTTP_METH_TRACE] = IST("TRACE"),
				335	[HTTP_METH_CONNECT] = IST("CONNECT"),
				336	};
				337
				338	/*
				339	* returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
				340	* ones.
				341	*/
				342	enum http_meth_t find_http_meth(const char *str, const int len)
				343	{
				344	const struct ist m = ist2(str, len);
				345
				346	if (isteq(m, ist("GET"))) return HTTP_METH_GET;
				347	else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
				348	else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
				349	else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
				350	else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
				351	else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
				352	else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
				353	else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
				354	else return HTTP_METH_OTHER;
				355	}
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	356
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	357	/* This function returns HTTP_ERR_<num> (enum) matching http status code.
				358	* Returned value should match codes from http_err_codes.
				359	*/
Willy Tarreau	8de1df9	2019-04-15 21:27:18 +0200	[diff] [blame]	360	int http_get_status_idx(unsigned int status)
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	361	{
				362	switch (status) {
				363	case 200: return HTTP_ERR_200;
				364	case 400: return HTTP_ERR_400;
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	365	case 401: return HTTP_ERR_401;
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	366	case 403: return HTTP_ERR_403;
Florian Tham	9205fea	2020-01-08 13:35:30 +0100	[diff] [blame]	367	case 404: return HTTP_ERR_404;
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	368	case 405: return HTTP_ERR_405;
Christopher Faulet	612f2ea	2020-05-27 09:57:28 +0200	[diff] [blame]	369	case 407: return HTTP_ERR_407;
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	370	case 408: return HTTP_ERR_408;
Florian Tham	272e29b	2020-01-08 10:19:05 +0100	[diff] [blame]	371	case 410: return HTTP_ERR_410;
Anthonin Bonnefoy	85048f8	2020-06-22 09:17:01 +0200	[diff] [blame]	372	case 413: return HTTP_ERR_413;
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	373	case 421: return HTTP_ERR_421;
				374	case 425: return HTTP_ERR_425;
				375	case 429: return HTTP_ERR_429;
				376	case 500: return HTTP_ERR_500;
Christopher Faulet	e095f31	2020-12-07 11:22:24 +0100	[diff] [blame]	377	case 501: return HTTP_ERR_501;
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	378	case 502: return HTTP_ERR_502;
				379	case 503: return HTTP_ERR_503;
				380	case 504: return HTTP_ERR_504;
				381	default: return HTTP_ERR_500;
				382	}
				383	}
				384
				385	/* This function returns a reason associated with the HTTP status.
				386	* This function never fails, a message is always returned.
				387	*/
				388	const char *http_get_reason(unsigned int status)
				389	{
				390	switch (status) {
				391	case 100: return "Continue";
				392	case 101: return "Switching Protocols";
				393	case 102: return "Processing";
				394	case 200: return "OK";
				395	case 201: return "Created";
				396	case 202: return "Accepted";
				397	case 203: return "Non-Authoritative Information";
				398	case 204: return "No Content";
				399	case 205: return "Reset Content";
				400	case 206: return "Partial Content";
				401	case 207: return "Multi-Status";
				402	case 210: return "Content Different";
				403	case 226: return "IM Used";
				404	case 300: return "Multiple Choices";
				405	case 301: return "Moved Permanently";
				406	case 302: return "Moved Temporarily";
				407	case 303: return "See Other";
				408	case 304: return "Not Modified";
				409	case 305: return "Use Proxy";
				410	case 307: return "Temporary Redirect";
				411	case 308: return "Permanent Redirect";
				412	case 310: return "Too many Redirects";
				413	case 400: return "Bad Request";
				414	case 401: return "Unauthorized";
				415	case 402: return "Payment Required";
				416	case 403: return "Forbidden";
				417	case 404: return "Not Found";
				418	case 405: return "Method Not Allowed";
				419	case 406: return "Not Acceptable";
				420	case 407: return "Proxy Authentication Required";
				421	case 408: return "Request Time-out";
				422	case 409: return "Conflict";
				423	case 410: return "Gone";
				424	case 411: return "Length Required";
				425	case 412: return "Precondition Failed";
				426	case 413: return "Request Entity Too Large";
				427	case 414: return "Request-URI Too Long";
				428	case 415: return "Unsupported Media Type";
				429	case 416: return "Requested range unsatisfiable";
				430	case 417: return "Expectation failed";
				431	case 418: return "I'm a teapot";
				432	case 421: return "Misdirected Request";
				433	case 422: return "Unprocessable entity";
				434	case 423: return "Locked";
				435	case 424: return "Method failure";
				436	case 425: return "Too Early";
				437	case 426: return "Upgrade Required";
				438	case 428: return "Precondition Required";
				439	case 429: return "Too Many Requests";
				440	case 431: return "Request Header Fields Too Large";
				441	case 449: return "Retry With";
				442	case 450: return "Blocked by Windows Parental Controls";
				443	case 451: return "Unavailable For Legal Reasons";
				444	case 456: return "Unrecoverable Error";
				445	case 499: return "client has closed connection";
				446	case 500: return "Internal Server Error";
				447	case 501: return "Not Implemented";
				448	case 502: return "Bad Gateway or Proxy Error";
				449	case 503: return "Service Unavailable";
				450	case 504: return "Gateway Time-out";
				451	case 505: return "HTTP Version not supported";
Ilya Shipitsin	46a030c	2020-07-05 16:36:08 +0500	[diff] [blame]	452	case 506: return "Variant also negotiate";
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	453	case 507: return "Insufficient storage";
				454	case 508: return "Loop detected";
				455	case 509: return "Bandwidth Limit Exceeded";
				456	case 510: return "Not extended";
				457	case 511: return "Network authentication required";
				458	case 520: return "Web server is returning an unknown error";
				459	default:
				460	switch (status) {
				461	case 100 ... 199: return "Informational";
				462	case 200 ... 299: return "Success";
				463	case 300 ... 399: return "Redirection";
				464	case 400 ... 499: return "Client Error";
				465	case 500 ... 599: return "Server Error";
				466	default: return "Other";
				467	}
				468	}
				469	}
				470
Willy Tarreau	d3d8d03	2021-08-10 15:35:36 +0200	[diff] [blame]	471	/* Returns non-zero if the scheme <schm> is syntactically correct according to
				472	* RFC3986#3.1, otherwise zero. It expects only the scheme and nothing else
				473	* (particularly not the following "://").
				474	* Scheme = alpha *(alpha\|digit\|'+'\|'-'\|'.')
				475	*/
				476	int http_validate_scheme(const struct ist schm)
				477	{
				478	size_t i;
				479
				480	for (i = 0; i < schm.len; i++) {
				481	if (likely((schm.ptr[i] >= 'a' && schm.ptr[i] <= 'z') \|\|
				482	(schm.ptr[i] >= 'A' && schm.ptr[i] <= 'Z')))
				483	continue;
				484	if (unlikely(!i)) // first char must be alpha
				485	return 0;
				486	if ((schm.ptr[i] >= '0' && schm.ptr[i] <= '9') \|\|
				487	schm.ptr[i] == '+' \|\| schm.ptr[i] == '-' \|\| schm.ptr[i] == '.')
				488	continue;
				489	return 0;
				490	}
				491	return !!i;
				492	}
				493
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	494	/* Parse the uri and looks for the scheme. If not found, an empty ist is
				495	* returned. Otherwise, the ist pointing to the scheme is returned.
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	496	*
				497	* <parser> must have been initialized via http_uri_parser_init. See the
				498	* related http_uri_parser documentation for the specific API usage.
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	499	*/
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	500	struct ist http_parse_scheme(struct http_uri_parser *parser)
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	501	{
				502	const char ptr, start, *end;
				503
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	504	if (parser->state >= URI_PARSER_STATE_SCHEME_DONE)
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	505	goto not_found;
				506
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	507	if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	508	goto not_found;
				509
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	510	ptr = start = istptr(parser->uri);
				511	end = istend(parser->uri);
				512
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	513	if (isalpha((unsigned char)*ptr)) {
				514	/* this is a scheme as described by RFC3986, par. 3.1, or only
				515	* an authority (in case of a CONNECT method).
				516	*/
				517	ptr++;
				518	/* retrieve the scheme up to the suffix '://'. If the suffix is
				519	* not found, this means there is no scheme and it is an
				520	* authority-only uri.
				521	*/
				522	while (ptr < end &&
				523	(isalnum((unsigned char)ptr) \|\| ptr == '+' \|\| ptr == '-' \|\| ptr == '.'))
				524	ptr++;
				525	if (ptr == end \|\| *ptr++ != ':')
				526	goto not_found;
				527	if (ptr == end \|\| *ptr++ != '/')
				528	goto not_found;
				529	if (ptr == end \|\| *ptr++ != '/')
				530	goto not_found;
				531	}
				532	else {
				533	goto not_found;
				534	}
				535
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	536	parser->uri = ist2(ptr, end - ptr);
				537	parser->state = URI_PARSER_STATE_SCHEME_DONE;
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	538	return ist2(start, ptr - start);
				539
				540	not_found:
Amaury Denoyelle	8ac8cbf	2021-07-06 10:52:58 +0200	[diff] [blame]	541	parser->state = URI_PARSER_STATE_SCHEME_DONE;
Amaury Denoyelle	ef08811	2021-07-07 10:49:25 +0200	[diff] [blame]	542	return IST_NULL;
				543	}
				544
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	545	/* Parse the uri and looks for the authority, between the scheme and the
				546	* path. if no_userinfo is not zero, the part before the '@' (including it) is
				547	* skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
				548	* on the authority is returned.
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	549	*
				550	* <parser> must have been initialized via http_uri_parser_init. See the
				551	* related http_uri_parser documentation for the specific API usage.
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	552	*/
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	553	struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo)
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	554	{
				555	const char ptr, start, *end;
				556
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	557	if (parser->state >= URI_PARSER_STATE_AUTHORITY_DONE)
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	558	goto not_found;
				559
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	560	if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	561	goto not_found;
				562
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	563	if (parser->state < URI_PARSER_STATE_SCHEME_DONE)
				564	http_parse_scheme(parser);
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	565
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	566	ptr = start = istptr(parser->uri);
				567	end = istend(parser->uri);
				568
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	569	while (ptr < end && *ptr != '/') {
				570	if (*ptr++ == '@' && no_userinfo)
				571	start = ptr;
				572	}
				573
				574	/* OK, ptr point on the '/' or the end */
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	575
				576	authority:
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	577	parser->uri = ist2(ptr, end - ptr);
				578	parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
				579	return ist2(start, ptr - start);
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	580
				581	not_found:
Amaury Denoyelle	69294b2	2021-07-06 11:02:22 +0200	[diff] [blame]	582	parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
Tim Duesterhus	241e29e	2020-03-05 17:56:30 +0100	[diff] [blame]	583	return IST_NULL;
Christopher Faulet	16fdc55	2019-10-08 14:56:58 +0200	[diff] [blame]	584	}
				585
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	586	/* Parse the URI from the given transaction (which is assumed to be in request
				587	* phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
				588	* returned. Otherwise the pointer and length are returned.
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	589	*
				590	* <parser> must have been initialized via http_uri_parser_init. See the
				591	* related http_uri_parser documentation for the specific API usage.
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	592	*/
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	593	struct ist http_parse_path(struct http_uri_parser *parser)
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	594	{
				595	const char ptr, end;
				596
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	597	if (parser->state >= URI_PARSER_STATE_PATH_DONE)
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	598	goto not_found;
				599
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	600	if (parser->format == URI_PARSER_FORMAT_EMPTY \|\|
				601	parser->format == URI_PARSER_FORMAT_ASTERISK) {
				602	goto not_found;
				603	}
				604
				605	ptr = istptr(parser->uri);
				606	end = istend(parser->uri);
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	607
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	608	/* If the uri is in absolute-path format, first skip the scheme and
				609	* authority parts. No scheme will be found if the uri is in authority
				610	* format, which indicates that the path won't be present.
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	611	*/
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	612	if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) {
				613	if (parser->state < URI_PARSER_STATE_SCHEME_DONE) {
				614	/* If no scheme found, uri is in authority format. No
				615	* path is present.
				616	*/
				617	if (!isttest(http_parse_scheme(parser)))
				618	goto not_found;
				619	}
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	620
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	621	if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE)
				622	http_parse_authority(parser, 1);
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	623
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	624	ptr = istptr(parser->uri);
				625
				626	if (ptr == end)
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	627	goto not_found;
				628	}
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	629
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	630	parser->state = URI_PARSER_STATE_PATH_DONE;
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	631	return ist2(ptr, end - ptr);
				632
				633	not_found:
Amaury Denoyelle	c453f95	2021-07-06 11:40:12 +0200	[diff] [blame]	634	parser->state = URI_PARSER_STATE_PATH_DONE;
Tim Duesterhus	241e29e	2020-03-05 17:56:30 +0100	[diff] [blame]	635	return IST_NULL;
Willy Tarreau	6b952c8	2018-09-10 17:45:34 +0200	[diff] [blame]	636	}
Willy Tarreau	04f1e2d	2018-09-10 18:04:24 +0200	[diff] [blame]	637
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	638	/*
				639	* Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
				640	* If so, returns the position of the first non-space character relative to
				641	* <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
				642	* to return a pointer to the place after the first space. Returns 0 if the
				643	* header name does not match. Checks are case-insensitive.
				644	*/
				645	int http_header_match2(const char hdr, const char end,
				646	const char *name, int len)
				647	{
				648	const char *val;
				649
				650	if (hdr + len >= end)
				651	return 0;
				652	if (hdr[len] != ':')
				653	return 0;
				654	if (strncasecmp(hdr, name, len) != 0)
				655	return 0;
				656	val = hdr + len + 1;
				657	while (val < end && HTTP_IS_SPHT(*val))
				658	val++;
				659	if ((val >= end) && (len + 2 <= end - hdr))
				660	return len + 2; /* we may replace starting from second space */
				661	return val - hdr;
				662	}
				663
				664	/* Find the end of the header value contained between <s> and <e>. See RFC7230,
				665	* par 3.2 for more information. Note that it requires a valid header to return
				666	* a valid result. This works for headers defined as comma-separated lists.
				667	*/
				668	char http_find_hdr_value_end(char s, const char *e)
				669	{
				670	int quoted, qdpair;
				671
				672	quoted = qdpair = 0;
				673
Willy Tarreau	02ac950	2020-02-21 16:31:22 +0100	[diff] [blame]	674	#ifdef HA_UNALIGNED_LE
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	675	/* speedup: skip everything not a comma nor a double quote */
				676	for (; s <= e - sizeof(int); s += sizeof(int)) {
				677	unsigned int c = (int )s; // comma
				678	unsigned int q = c; // quote
				679
				680	c ^= 0x2c2c2c2c; // contains one zero on a comma
				681	q ^= 0x22222222; // contains one zero on a quote
				682
				683	c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
				684	q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
				685
				686	if ((c \| q) & 0x80808080)
				687	break; // found a comma or a quote
				688	}
				689	#endif
				690	for (; s < e; s++) {
				691	if (qdpair) qdpair = 0;
				692	else if (quoted) {
				693	if (*s == '\\') qdpair = 1;
				694	else if (*s == '"') quoted = 0;
				695	}
				696	else if (*s == '"') quoted = 1;
				697	else if (*s == ',') return s;
				698	}
				699	return s;
				700	}
				701
				702	/* Find the end of a cookie value contained between <s> and <e>. It works the
				703	* same way as with headers above except that the semi-colon also ends a token.
				704	* See RFC2965 for more information. Note that it requires a valid header to
				705	* return a valid result.
				706	*/
				707	char http_find_cookie_value_end(char s, const char *e)
				708	{
				709	int quoted, qdpair;
				710
				711	quoted = qdpair = 0;
				712	for (; s < e; s++) {
				713	if (qdpair) qdpair = 0;
				714	else if (quoted) {
				715	if (*s == '\\') qdpair = 1;
				716	else if (*s == '"') quoted = 0;
				717	}
				718	else if (*s == '"') quoted = 1;
				719	else if (s == ',' \|\| s == ';') return s;
				720	}
				721	return s;
				722	}
				723
				724	/* Try to find the next occurrence of a cookie name in a cookie header value.
Maciej Zdeb	dea7c20	2020-11-13 09:38:06 +0000	[diff] [blame]	725	* To match on any cookie name, <cookie_name_l> must be set to 0.
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	726	* The lookup begins at <hdr>. The pointer and size of the next occurrence of
				727	* the cookie value is returned into value and value_l, and the function
				728	* returns a pointer to the next pointer to search from if the value was found.
				729	* Otherwise if the cookie was not found, NULL is returned and neither value
				730	* nor value_l are touched. The input <hdr> string should first point to the
				731	* header's value, and the <hdr_end> pointer must point to the first character
				732	* not part of the value. <list> must be non-zero if value may represent a list
				733	* of values (cookie headers). This makes it faster to abort parsing when no
				734	* list is expected.
				735	*/
				736	char http_extract_cookie_value(char hdr, const char *hdr_end,
				737	char *cookie_name, size_t cookie_name_l,
				738	int list, char *value, size_t value_l)
				739	{
				740	char equal, att_end, att_beg, val_beg, *val_end;
				741	char *next;
				742
				743	/* we search at least a cookie name followed by an equal, and more
				744	* generally something like this :
				745	* Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
				746	*/
				747	for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
				748	/* Iterate through all cookies on this line */
				749
				750	while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
				751	att_beg++;
				752
				753	/* find att_end : this is the first character after the last non
				754	* space before the equal. It may be equal to hdr_end.
				755	*/
				756	equal = att_end = att_beg;
				757
				758	while (equal < hdr_end) {
				759	if (equal == '=' \|\| equal == ';' \|\| (list && *equal == ','))
				760	break;
				761	if (HTTP_IS_SPHT(*equal++))
				762	continue;
				763	att_end = equal;
				764	}
				765
Ilya Shipitsin	46a030c	2020-07-05 16:36:08 +0500	[diff] [blame]	766	/* here, <equal> points to '=', a delimiter or the end. <att_end>
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	767	* is between <att_beg> and <equal>, both may be identical.
				768	*/
				769
				770	/* look for end of cookie if there is an equal sign */
				771	if (equal < hdr_end && *equal == '=') {
				772	/* look for the beginning of the value */
				773	val_beg = equal + 1;
				774	while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
				775	val_beg++;
				776
				777	/* find the end of the value, respecting quotes */
				778	next = http_find_cookie_value_end(val_beg, hdr_end);
				779
Ilya Shipitsin	46a030c	2020-07-05 16:36:08 +0500	[diff] [blame]	780	/* make val_end point to the first white space or delimiter after the value */
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	781	val_end = next;
				782	while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
				783	val_end--;
				784	} else {
				785	val_beg = val_end = next = equal;
				786	}
				787
				788	/* We have nothing to do with attributes beginning with '$'. However,
				789	* they will automatically be removed if a header before them is removed,
				790	* since they're supposed to be linked together.
				791	*/
				792	if (*att_beg == '$')
				793	continue;
				794
				795	/* Ignore cookies with no equal sign */
				796	if (equal == next)
				797	continue;
				798
				799	/* Now we have the cookie name between att_beg and att_end, and
				800	* its value between val_beg and val_end.
				801	*/
				802
Maciej Zdeb	dea7c20	2020-11-13 09:38:06 +0000	[diff] [blame]	803	if (cookie_name_l == 0 \|\| (att_end - att_beg == cookie_name_l &&
				804	memcmp(att_beg, cookie_name, cookie_name_l) == 0)) {
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	805	/* let's return this value and indicate where to go on from */
				806	*value = val_beg;
				807	*value_l = val_end - val_beg;
				808	return next + 1;
				809	}
				810
				811	/* Set-Cookie headers only have the name in the first attr=value part */
				812	if (!list)
				813	break;
				814	}
				815
				816	return NULL;
				817	}
				818
Joseph Herlant	942eea3	2018-11-15 13:57:22 -0800	[diff] [blame]	819	/* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	820	* value is larger than 1000, it is bound to 1000. The parser consumes up to
				821	* 1 digit, one dot and 3 digits and stops on the first invalid character.
				822	* Unparsable qvalues return 1000 as "q=1.000".
				823	*/
				824	int http_parse_qvalue(const char qvalue, const char *end)
				825	{
				826	int q = 1000;
				827
				828	if (!isdigit((unsigned char)*qvalue))
				829	goto out;
				830	q = (qvalue++ - '0') 1000;
				831
				832	if (*qvalue++ != '.')
				833	goto out;
				834
				835	if (!isdigit((unsigned char)*qvalue))
				836	goto out;
				837	q += (qvalue++ - '0') 100;
				838
				839	if (!isdigit((unsigned char)*qvalue))
				840	goto out;
				841	q += (qvalue++ - '0') 10;
				842
				843	if (!isdigit((unsigned char)*qvalue))
				844	goto out;
				845	q += (qvalue++ - '0') 1;
				846	out:
				847	if (q > 1000)
				848	q = 1000;
				849	if (end)
				850	*end = qvalue;
				851	return q;
				852	}
				853
				854	/*
Joseph Herlant	942eea3	2018-11-15 13:57:22 -0800	[diff] [blame]	855	* Given a url parameter, find the starting position of the first occurrence,
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	856	* or NULL if the parameter is not found.
				857	*
				858	* Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
				859	* the function will return query_string+8.
				860	*
				861	* Warning: this function returns a pointer that can point to the first chunk
				862	* or the second chunk. The caller must be check the position before using the
				863	* result.
				864	*/
				865	const char http_find_url_param_pos(const char *chunks,
				866	const char* url_param_name, size_t url_param_name_l,
				867	char delim)
				868	{
				869	const char pos, last, *equal;
				870	const char **bufs = chunks;
				871	int l1, l2;
				872
				873
				874	pos = bufs[0];
				875	last = bufs[1];
				876	while (pos < last) {
				877	/* Check the equal. */
				878	equal = pos + url_param_name_l;
				879	if (fix_pointer_if_wrap(chunks, &equal)) {
				880	if (equal >= chunks[3])
				881	return NULL;
				882	} else {
				883	if (equal >= chunks[1])
				884	return NULL;
				885	}
				886	if (*equal == '=') {
				887	if (pos + url_param_name_l > last) {
				888	/* process wrap case, we detect a wrap. In this case, the
				889	* comparison is performed in two parts.
				890	*/
				891
Thayne McCombs	8f0cc5c	2021-01-07 21:35:52 -0700	[diff] [blame]	892	/* This is the end, we don't have any other chunk. */
Willy Tarreau	ab813a4	2018-09-10 18:41:28 +0200	[diff] [blame]	893	if (bufs != chunks \|\| !bufs[2])
				894	return NULL;
				895
				896	/* Compute the length of each part of the comparison. */
				897	l1 = last - pos;
				898	l2 = url_param_name_l - l1;
				899
				900	/* The second buffer is too short to contain the compared string. */
				901	if (bufs[2] + l2 > bufs[3])
				902	return NULL;
				903
				904	if (memcmp(pos, url_param_name, l1) == 0 &&
				905	memcmp(bufs[2], url_param_name+l1, l2) == 0)
				906	return pos;
				907
				908	/* Perform wrapping and jump the string who fail the comparison. */
				909	bufs += 2;
				910	pos = bufs[0] + l2;
				911	last = bufs[1];
				912
				913	} else {
				914	/* process a simple comparison. */
				915	if (memcmp(pos, url_param_name, url_param_name_l) == 0)
				916	return pos;
				917	pos += url_param_name_l + 1;
				918	if (fix_pointer_if_wrap(chunks, &pos))
				919	last = bufs[2];
				920	}
				921	}
				922
				923	while (1) {
				924	/* Look for the next delimiter. */
				925	while (pos < last && !http_is_param_delimiter(*pos, delim))
				926	pos++;
				927	if (pos < last)
				928	break;
				929	/* process buffer wrapping. */
				930	if (bufs != chunks \|\| !bufs[2])
				931	return NULL;
				932	bufs += 2;
				933	pos = bufs[0];
				934	last = bufs[1];
				935	}
				936	pos++;
				937	}
				938	return NULL;
				939	}
				940
				941	/*
				942	* Given a url parameter name and a query string, find the next value.
				943	* An empty url_param_name matches the first available parameter.
				944	* If the parameter is found, 1 is returned and vstart / vend are updated to
				945	* respectively provide a pointer to the value and its end.
				946	* Otherwise, 0 is returned and vstart/vend are not modified.
				947	*/
				948	int http_find_next_url_param(const char **chunks,
				949	const char* url_param_name, size_t url_param_name_l,
				950	const char vstart, const char vend, char delim)
				951	{
				952	const char arg_start, qs_end;
				953	const char value_start, value_end;
				954
				955	arg_start = chunks[0];
				956	qs_end = chunks[1];
				957	if (url_param_name_l) {
				958	/* Looks for an argument name. */
				959	arg_start = http_find_url_param_pos(chunks,
				960	url_param_name, url_param_name_l,
				961	delim);
				962	/* Check for wrapping. */
				963	if (arg_start >= qs_end)
				964	qs_end = chunks[3];
				965	}
				966	if (!arg_start)
				967	return 0;
				968
				969	if (!url_param_name_l) {
				970	while (1) {
				971	/* looks for the first argument. */
				972	value_start = memchr(arg_start, '=', qs_end - arg_start);
				973	if (!value_start) {
				974	/* Check for wrapping. */
				975	if (arg_start >= chunks[0] &&
				976	arg_start < chunks[1] &&
				977	chunks[2]) {
				978	arg_start = chunks[2];
				979	qs_end = chunks[3];
				980	continue;
				981	}
				982	return 0;
				983	}
				984	break;
				985	}
				986	value_start++;
				987	}
				988	else {
				989	/* Jump the argument length. */
				990	value_start = arg_start + url_param_name_l + 1;
				991
				992	/* Check for pointer wrapping. */
				993	if (fix_pointer_if_wrap(chunks, &value_start)) {
				994	/* Update the end pointer. */
				995	qs_end = chunks[3];
				996
				997	/* Check for overflow. */
				998	if (value_start >= qs_end)
				999	return 0;
				1000	}
				1001	}
				1002
				1003	value_end = value_start;
				1004
				1005	while (1) {
				1006	while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
				1007	value_end++;
				1008	if (value_end < qs_end)
				1009	break;
				1010	/* process buffer wrapping. */
				1011	if (value_end >= chunks[0] &&
				1012	value_end < chunks[1] &&
				1013	chunks[2]) {
				1014	value_end = chunks[2];
				1015	qs_end = chunks[3];
				1016	continue;
				1017	}
				1018	break;
				1019	}
				1020
				1021	*vstart = value_start;
				1022	*vend = value_end;
				1023	return 1;
				1024	}
				1025
Christopher Faulet	8277ca7	2018-10-22 15:12:04 +0200	[diff] [blame]	1026	/* Parses a single header line (without the CRLF) and splits it into its name
				1027	* and its value. The parsing is pretty naive and just skip spaces.
				1028	*/
				1029	int http_parse_header(const struct ist hdr, struct ist name, struct ist value)
				1030	{
				1031	char *p = hdr.ptr;
				1032	char *end = p + hdr.len;
				1033
				1034	name->len = value->len = 0;
				1035
				1036	/* Skip leading spaces */
				1037	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1038
				1039	/* Set the header name */
				1040	name->ptr = p;
				1041	for (; p < end && HTTP_IS_TOKEN(*p); p++);
				1042	name->len = p - name->ptr;
				1043
				1044	/* Skip the ':' and spaces before and after it */
				1045	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1046	if (p < end && *p == ':') p++;
				1047	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1048
				1049	/* Set the header value */
				1050	value->ptr = p;
				1051	value->len = end - p;
				1052
				1053	return 1;
				1054	}
				1055
				1056	/* Parses a single start line (without the CRLF) and splits it into 3 parts. The
				1057	* parsing is pretty naive and just skip spaces.
				1058	*/
				1059	int http_parse_stline(const struct ist line, struct ist p1, struct ist p2, struct ist *p3)
				1060	{
				1061	char *p = line.ptr;
				1062	char *end = p + line.len;
				1063
				1064	p1->len = p2->len = p3->len = 0;
				1065
				1066	/* Skip leading spaces */
				1067	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1068
				1069	/* Set the first part */
				1070	p1->ptr = p;
				1071	for (; p < end && HTTP_IS_TOKEN(*p); p++);
				1072	p1->len = p - p1->ptr;
				1073
				1074	/* Skip spaces between p1 and p2 */
				1075	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1076
				1077	/* Set the second part */
				1078	p2->ptr = p;
				1079	for (; p < end && !HTTP_IS_SPHT(*p); p++);
				1080	p2->len = p - p2->ptr;
				1081
				1082	/* Skip spaces between p2 and p3 */
				1083	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1084
Ilya Shipitsin	46a030c	2020-07-05 16:36:08 +0500	[diff] [blame]	1085	/* The remaining is the third value */
Christopher Faulet	8277ca7	2018-10-22 15:12:04 +0200	[diff] [blame]	1086	p3->ptr = p;
				1087	p3->len = end - p;
				1088
				1089	return 1;
				1090	}
Christopher Faulet	341fac1	2019-09-16 11:37:05 +0200	[diff] [blame]	1091
				1092	/* Parses value of a Status header with the following format: "Status: Code[
				1093	* Reason]". The parsing is pretty naive and just skip spaces. It return the
				1094	* numeric value of the status code.
				1095	*/
				1096	int http_parse_status_val(const struct ist value, struct ist status, struct ist reason)
				1097	{
				1098	char *p = value.ptr;
				1099	char *end = p + value.len;
				1100	uint16_t code;
				1101
				1102	status->len = reason->len = 0;
				1103
				1104	/* Skip leading spaces */
				1105	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1106
				1107	/* Set the status part */
				1108	status->ptr = p;
				1109	for (; p < end && HTTP_IS_TOKEN(*p); p++);
				1110	status->len = p - status->ptr;
				1111
				1112	/* Skip spaces between status and reason */
				1113	for (; p < end && HTTP_IS_SPHT(*p); p++);
				1114
				1115	/* the remaining is the reason */
				1116	reason->ptr = p;
				1117	reason->len = end - p;
				1118
				1119	code = strl2ui(status->ptr, status->len);
				1120	return code;
				1121	}
Remi Tricot-Le Breton	bcced09	2020-10-22 10:40:03 +0200	[diff] [blame]	1122
				1123
				1124	/* Returns non-zero if the two ETags are comparable (see RFC 7232#2.3.2).
				1125	* If any of them is a weak ETag, we discard the weakness prefix and perform
				1126	* a strict string comparison.
				1127	* Returns 0 otherwise.
				1128	*/
				1129	int http_compare_etags(struct ist etag1, struct ist etag2)
				1130	{
				1131	enum http_etag_type etag_type1;
				1132	enum http_etag_type etag_type2;
				1133
				1134	etag_type1 = http_get_etag_type(etag1);
				1135	etag_type2 = http_get_etag_type(etag2);
				1136
				1137	if (etag_type1 == ETAG_INVALID \|\| etag_type2 == ETAG_INVALID)
				1138	return 0;
				1139
				1140	/* Discard the 'W/' prefix an ETag is a weak one. */
				1141	if (etag_type1 == ETAG_WEAK)
				1142	etag1 = istadv(etag1, 2);
				1143	if (etag_type2 == ETAG_WEAK)
				1144	etag2 = istadv(etag2, 2);
				1145
				1146	return isteq(etag1, etag2);
				1147	}
Remi Tricot-Le Breton	56e46cb	2020-12-23 18:13:48 +0100	[diff] [blame]	1148
				1149
				1150	/*
				1151	* Trim leading space or horizontal tab characters from <value> string.
				1152	* Returns the trimmed string.
				1153	*/
				1154	struct ist http_trim_leading_spht(struct ist value)
				1155	{
				1156	struct ist ret = value;
				1157
				1158	while (ret.len && HTTP_IS_SPHT(ret.ptr[0])) {
				1159	++ret.ptr;
				1160	--ret.len;
				1161	}
				1162
				1163	return ret;
				1164	}
				1165
				1166	/*
				1167	* Trim trailing space or horizontal tab characters from <value> string.
				1168	* Returns the trimmed string.
				1169	*/
				1170	struct ist http_trim_trailing_spht(struct ist value)
				1171	{
				1172	struct ist ret = value;
				1173
				1174	while (ret.len && HTTP_IS_SPHT(ret.ptr[-1]))
				1175	--ret.len;
				1176
				1177	return ret;
				1178	}