Blame - src/h1.c - haproxy

blob: bca820c20e42eb3af2e99db74e37f40d40ee31c7 [file] [log] [blame]

Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	1	/*
				2	* HTTP/1 protocol analyzer
				3	*
				4	* Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
				13	#include <common/config.h>
				14
				15	#include <proto/h1.h>
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	16	#include <proto/hdr_idx.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	17
				18	/* It is about twice as fast on recent architectures to lookup a byte in a
				19	* table than to perform a boolean AND or OR between two tests. Refer to
				20	* RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
				21	* neither a separator nor a CTL char. An http ver_token is any ASCII which can
				22	* be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
				23	* digit. Note: please do not overwrite values in assignment since gcc-2.95
				24	* will not handle them correctly. It's worth noting that chars 128..255 are
				25	* nothing, not even control chars.
				26	*/
				27	const unsigned char h1_char_classes[256] = {
				28	[ 0] = H1_FLG_CTL,
				29	[ 1] = H1_FLG_CTL,
				30	[ 2] = H1_FLG_CTL,
				31	[ 3] = H1_FLG_CTL,
				32	[ 4] = H1_FLG_CTL,
				33	[ 5] = H1_FLG_CTL,
				34	[ 6] = H1_FLG_CTL,
				35	[ 7] = H1_FLG_CTL,
				36	[ 8] = H1_FLG_CTL,
				37	[ 9] = H1_FLG_SPHT \| H1_FLG_LWS \| H1_FLG_SEP \| H1_FLG_CTL,
				38	[ 10] = H1_FLG_CRLF \| H1_FLG_LWS \| H1_FLG_CTL,
				39	[ 11] = H1_FLG_CTL,
				40	[ 12] = H1_FLG_CTL,
				41	[ 13] = H1_FLG_CRLF \| H1_FLG_LWS \| H1_FLG_CTL,
				42	[ 14] = H1_FLG_CTL,
				43	[ 15] = H1_FLG_CTL,
				44	[ 16] = H1_FLG_CTL,
				45	[ 17] = H1_FLG_CTL,
				46	[ 18] = H1_FLG_CTL,
				47	[ 19] = H1_FLG_CTL,
				48	[ 20] = H1_FLG_CTL,
				49	[ 21] = H1_FLG_CTL,
				50	[ 22] = H1_FLG_CTL,
				51	[ 23] = H1_FLG_CTL,
				52	[ 24] = H1_FLG_CTL,
				53	[ 25] = H1_FLG_CTL,
				54	[ 26] = H1_FLG_CTL,
				55	[ 27] = H1_FLG_CTL,
				56	[ 28] = H1_FLG_CTL,
				57	[ 29] = H1_FLG_CTL,
				58	[ 30] = H1_FLG_CTL,
				59	[ 31] = H1_FLG_CTL,
				60	[' '] = H1_FLG_SPHT \| H1_FLG_LWS \| H1_FLG_SEP,
				61	['!'] = H1_FLG_TOK,
				62	['"'] = H1_FLG_SEP,
				63	['#'] = H1_FLG_TOK,
				64	['$'] = H1_FLG_TOK,
				65	['%'] = H1_FLG_TOK,
				66	['&'] = H1_FLG_TOK,
				67	[ 39] = H1_FLG_TOK,
				68	['('] = H1_FLG_SEP,
				69	[')'] = H1_FLG_SEP,
				70	['*'] = H1_FLG_TOK,
				71	['+'] = H1_FLG_TOK,
				72	[','] = H1_FLG_SEP,
				73	['-'] = H1_FLG_TOK,
				74	['.'] = H1_FLG_TOK \| H1_FLG_VER,
				75	['/'] = H1_FLG_SEP \| H1_FLG_VER,
				76	['0'] = H1_FLG_TOK \| H1_FLG_VER,
				77	['1'] = H1_FLG_TOK \| H1_FLG_VER,
				78	['2'] = H1_FLG_TOK \| H1_FLG_VER,
				79	['3'] = H1_FLG_TOK \| H1_FLG_VER,
				80	['4'] = H1_FLG_TOK \| H1_FLG_VER,
				81	['5'] = H1_FLG_TOK \| H1_FLG_VER,
				82	['6'] = H1_FLG_TOK \| H1_FLG_VER,
				83	['7'] = H1_FLG_TOK \| H1_FLG_VER,
				84	['8'] = H1_FLG_TOK \| H1_FLG_VER,
				85	['9'] = H1_FLG_TOK \| H1_FLG_VER,
				86	[':'] = H1_FLG_SEP,
				87	[';'] = H1_FLG_SEP,
				88	['<'] = H1_FLG_SEP,
				89	['='] = H1_FLG_SEP,
				90	['>'] = H1_FLG_SEP,
				91	['?'] = H1_FLG_SEP,
				92	['@'] = H1_FLG_SEP,
				93	['A'] = H1_FLG_TOK,
				94	['B'] = H1_FLG_TOK,
				95	['C'] = H1_FLG_TOK,
				96	['D'] = H1_FLG_TOK,
				97	['E'] = H1_FLG_TOK,
				98	['F'] = H1_FLG_TOK,
				99	['G'] = H1_FLG_TOK,
				100	['H'] = H1_FLG_TOK \| H1_FLG_VER,
				101	['I'] = H1_FLG_TOK,
				102	['J'] = H1_FLG_TOK,
				103	['K'] = H1_FLG_TOK,
				104	['L'] = H1_FLG_TOK,
				105	['M'] = H1_FLG_TOK,
				106	['N'] = H1_FLG_TOK,
				107	['O'] = H1_FLG_TOK,
				108	['P'] = H1_FLG_TOK \| H1_FLG_VER,
				109	['Q'] = H1_FLG_TOK,
				110	['R'] = H1_FLG_TOK \| H1_FLG_VER,
				111	['S'] = H1_FLG_TOK \| H1_FLG_VER,
				112	['T'] = H1_FLG_TOK \| H1_FLG_VER,
				113	['U'] = H1_FLG_TOK,
				114	['V'] = H1_FLG_TOK,
				115	['W'] = H1_FLG_TOK,
				116	['X'] = H1_FLG_TOK,
				117	['Y'] = H1_FLG_TOK,
				118	['Z'] = H1_FLG_TOK,
				119	['['] = H1_FLG_SEP,
				120	[ 92] = H1_FLG_SEP,
				121	[']'] = H1_FLG_SEP,
				122	['^'] = H1_FLG_TOK,
				123	['_'] = H1_FLG_TOK,
				124	['`'] = H1_FLG_TOK,
				125	['a'] = H1_FLG_TOK,
				126	['b'] = H1_FLG_TOK,
				127	['c'] = H1_FLG_TOK,
				128	['d'] = H1_FLG_TOK,
				129	['e'] = H1_FLG_TOK,
				130	['f'] = H1_FLG_TOK,
				131	['g'] = H1_FLG_TOK,
				132	['h'] = H1_FLG_TOK,
				133	['i'] = H1_FLG_TOK,
				134	['j'] = H1_FLG_TOK,
				135	['k'] = H1_FLG_TOK,
				136	['l'] = H1_FLG_TOK,
				137	['m'] = H1_FLG_TOK,
				138	['n'] = H1_FLG_TOK,
				139	['o'] = H1_FLG_TOK,
				140	['p'] = H1_FLG_TOK,
				141	['q'] = H1_FLG_TOK,
				142	['r'] = H1_FLG_TOK,
				143	['s'] = H1_FLG_TOK,
				144	['t'] = H1_FLG_TOK,
				145	['u'] = H1_FLG_TOK,
				146	['v'] = H1_FLG_TOK,
				147	['w'] = H1_FLG_TOK,
				148	['x'] = H1_FLG_TOK,
				149	['y'] = H1_FLG_TOK,
				150	['z'] = H1_FLG_TOK,
				151	['{'] = H1_FLG_SEP,
				152	['\|'] = H1_FLG_TOK,
				153	['}'] = H1_FLG_SEP,
				154	['~'] = H1_FLG_TOK,
				155	[127] = H1_FLG_CTL,
				156	};
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	157
				158
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	159	/*
				160	* This function parses a status line between <ptr> and <end>, starting with
				161	* parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
				162	* HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
				163	* will give undefined results.
				164	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				165	* and that msg->sol points to the beginning of the response.
				166	* If a complete line is found (which implies that at least one CR or LF is
				167	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				168	* returned indicating an incomplete line (which does not mean that parts have
				169	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				170	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				171	* upon next call.
				172	*
				173	* This function was intentionally designed to be called from
				174	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				175	* within its state machine and use the same macros, hence the need for same
				176	* labels and variable names. Note that msg->sol is left unchanged.
				177	*/
				178	const char http_parse_stsline(struct http_msg msg,
				179	enum h1_state state, const char ptr, const char end,
				180	unsigned int ret_ptr, enum h1_state ret_state)
				181	{
				182	const char *msg_start = msg->chn->buf->p;
				183
				184	switch (state) {
				185	case HTTP_MSG_RPVER:
				186	http_msg_rpver:
				187	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				188	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				189
				190	if (likely(HTTP_IS_SPHT(*ptr))) {
				191	msg->sl.st.v_l = ptr - msg_start;
				192	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				193	}
				194	msg->err_state = HTTP_MSG_RPVER;
				195	state = HTTP_MSG_ERROR;
				196	break;
				197
				198	case HTTP_MSG_RPVER_SP:
				199	http_msg_rpver_sp:
				200	if (likely(!HTTP_IS_LWS(*ptr))) {
				201	msg->sl.st.c = ptr - msg_start;
				202	goto http_msg_rpcode;
				203	}
				204	if (likely(HTTP_IS_SPHT(*ptr)))
				205	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				206	/* so it's a CR/LF, this is invalid */
				207	msg->err_state = HTTP_MSG_RPVER_SP;
				208	state = HTTP_MSG_ERROR;
				209	break;
				210
				211	case HTTP_MSG_RPCODE:
				212	http_msg_rpcode:
				213	if (likely(!HTTP_IS_LWS(*ptr)))
				214	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				215
				216	if (likely(HTTP_IS_SPHT(*ptr))) {
				217	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				218	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				219	}
				220
				221	/* so it's a CR/LF, so there is no reason phrase */
				222	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				223	http_msg_rsp_reason:
				224	/* FIXME: should we support HTTP responses without any reason phrase ? */
				225	msg->sl.st.r = ptr - msg_start;
				226	msg->sl.st.r_l = 0;
				227	goto http_msg_rpline_eol;
				228
				229	case HTTP_MSG_RPCODE_SP:
				230	http_msg_rpcode_sp:
				231	if (likely(!HTTP_IS_LWS(*ptr))) {
				232	msg->sl.st.r = ptr - msg_start;
				233	goto http_msg_rpreason;
				234	}
				235	if (likely(HTTP_IS_SPHT(*ptr)))
				236	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				237	/* so it's a CR/LF, so there is no reason phrase */
				238	goto http_msg_rsp_reason;
				239
				240	case HTTP_MSG_RPREASON:
				241	http_msg_rpreason:
				242	if (likely(!HTTP_IS_CRLF(*ptr)))
				243	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				244	msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
				245	http_msg_rpline_eol:
				246	/* We have seen the end of line. Note that we do not
				247	* necessarily have the \n yet, but at least we know that we
				248	* have EITHER \r OR \n, otherwise the response would not be
				249	* complete. We can then record the response length and return
				250	* to the caller which will be able to register it.
				251	*/
				252	msg->sl.st.l = ptr - msg_start - msg->sol;
				253	return ptr;
				254
				255	default:
				256	#ifdef DEBUG_FULL
				257	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				258	exit(1);
				259	#endif
				260	;
				261	}
				262
				263	http_msg_ood:
				264	/* out of valid data */
				265	if (ret_state)
				266	*ret_state = state;
				267	if (ret_ptr)
				268	*ret_ptr = ptr - msg_start;
				269	return NULL;
				270	}
				271
				272	/*
				273	* This function parses a request line between <ptr> and <end>, starting with
				274	* parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
				275	* HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
				276	* will give undefined results.
				277	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				278	* and that msg->sol points to the beginning of the request.
				279	* If a complete line is found (which implies that at least one CR or LF is
				280	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				281	* returned indicating an incomplete line (which does not mean that parts have
				282	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				283	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				284	* upon next call.
				285	*
				286	* This function was intentionally designed to be called from
				287	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				288	* within its state machine and use the same macros, hence the need for same
				289	* labels and variable names. Note that msg->sol is left unchanged.
				290	*/
				291	const char http_parse_reqline(struct http_msg msg,
				292	enum h1_state state, const char ptr, const char end,
				293	unsigned int ret_ptr, enum h1_state ret_state)
				294	{
				295	const char *msg_start = msg->chn->buf->p;
				296
				297	switch (state) {
				298	case HTTP_MSG_RQMETH:
				299	http_msg_rqmeth:
				300	if (likely(HTTP_IS_TOKEN(*ptr)))
				301	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
				302
				303	if (likely(HTTP_IS_SPHT(*ptr))) {
				304	msg->sl.rq.m_l = ptr - msg_start;
				305	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				306	}
				307
				308	if (likely(HTTP_IS_CRLF(*ptr))) {
				309	/* HTTP 0.9 request */
				310	msg->sl.rq.m_l = ptr - msg_start;
				311	http_msg_req09_uri:
				312	msg->sl.rq.u = ptr - msg_start;
				313	http_msg_req09_uri_e:
				314	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				315	http_msg_req09_ver:
				316	msg->sl.rq.v = ptr - msg_start;
				317	msg->sl.rq.v_l = 0;
				318	goto http_msg_rqline_eol;
				319	}
				320	msg->err_state = HTTP_MSG_RQMETH;
				321	state = HTTP_MSG_ERROR;
				322	break;
				323
				324	case HTTP_MSG_RQMETH_SP:
				325	http_msg_rqmeth_sp:
				326	if (likely(!HTTP_IS_LWS(*ptr))) {
				327	msg->sl.rq.u = ptr - msg_start;
				328	goto http_msg_rquri;
				329	}
				330	if (likely(HTTP_IS_SPHT(*ptr)))
				331	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				332	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				333	goto http_msg_req09_uri;
				334
				335	case HTTP_MSG_RQURI:
				336	http_msg_rquri:
				337	#if defined(__x86_64__) \|\| \
				338	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				339	defined(__ARM_ARCH_7A__)
				340	/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
				341	while (ptr <= end - sizeof(int)) {
				342	int x = (int )ptr - 0x21212121;
				343	if (x & 0x80808080)
				344	break;
				345
				346	x -= 0x5e5e5e5e;
				347	if (!(x & 0x80808080))
				348	break;
				349
				350	ptr += sizeof(int);
				351	}
				352	#endif
				353	if (ptr >= end) {
				354	state = HTTP_MSG_RQURI;
				355	goto http_msg_ood;
				356	}
				357	http_msg_rquri2:
				358	if (likely((unsigned char)(ptr - 33) <= 93)) / 33 to 126 included */
				359	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
				360
				361	if (likely(HTTP_IS_SPHT(*ptr))) {
				362	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				363	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				364	}
				365
				366	if (likely((unsigned char)*ptr >= 128)) {
				367	/* non-ASCII chars are forbidden unless option
				368	* accept-invalid-http-request is enabled in the frontend.
				369	* In any case, we capture the faulty char.
				370	*/
				371	if (msg->err_pos < -1)
				372	goto invalid_char;
				373	if (msg->err_pos == -1)
				374	msg->err_pos = ptr - msg_start;
				375	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
				376	}
				377
				378	if (likely(HTTP_IS_CRLF(*ptr))) {
				379	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				380	goto http_msg_req09_uri_e;
				381	}
				382
				383	/* OK forbidden chars, 0..31 or 127 */
				384	invalid_char:
				385	msg->err_pos = ptr - msg_start;
				386	msg->err_state = HTTP_MSG_RQURI;
				387	state = HTTP_MSG_ERROR;
				388	break;
				389
				390	case HTTP_MSG_RQURI_SP:
				391	http_msg_rquri_sp:
				392	if (likely(!HTTP_IS_LWS(*ptr))) {
				393	msg->sl.rq.v = ptr - msg_start;
				394	goto http_msg_rqver;
				395	}
				396	if (likely(HTTP_IS_SPHT(*ptr)))
				397	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				398	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				399	goto http_msg_req09_ver;
				400
				401	case HTTP_MSG_RQVER:
				402	http_msg_rqver:
				403	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				404	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
				405
				406	if (likely(HTTP_IS_CRLF(*ptr))) {
				407	msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
				408	http_msg_rqline_eol:
				409	/* We have seen the end of line. Note that we do not
				410	* necessarily have the \n yet, but at least we know that we
				411	* have EITHER \r OR \n, otherwise the request would not be
				412	* complete. We can then record the request length and return
				413	* to the caller which will be able to register it.
				414	*/
				415	msg->sl.rq.l = ptr - msg_start - msg->sol;
				416	return ptr;
				417	}
				418
				419	/* neither an HTTP_VER token nor a CRLF */
				420	msg->err_state = HTTP_MSG_RQVER;
				421	state = HTTP_MSG_ERROR;
				422	break;
				423
				424	default:
				425	#ifdef DEBUG_FULL
				426	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				427	exit(1);
				428	#endif
				429	;
				430	}
				431
				432	http_msg_ood:
				433	/* out of valid data */
				434	if (ret_state)
				435	*ret_state = state;
				436	if (ret_ptr)
				437	*ret_ptr = ptr - msg_start;
				438	return NULL;
				439	}
				440
				441	/*
				442	* This function parses an HTTP message, either a request or a response,
				443	* depending on the initial msg->msg_state. The caller is responsible for
				444	* ensuring that the message does not wrap. The function can be preempted
				445	* everywhere when data are missing and recalled at the exact same location
				446	* with no information loss. The message may even be realigned between two
				447	* calls. The header index is re-initialized when switching from
				448	* MSG_R[PQ]BEFORE to MSG_RPVER\|MSG_RQMETH. It modifies msg->sol among other
				449	* fields. Note that msg->sol will be initialized after completing the first
				450	* state, so that none of the msg pointers has to be initialized prior to the
				451	* first call.
				452	*/
				453	void http_msg_analyzer(struct http_msg msg, struct hdr_idx idx)
				454	{
				455	enum h1_state state; /* updated only when leaving the FSM */
				456	register char ptr, end; /* request pointers, to avoid dereferences */
				457	struct buffer *buf;
				458
				459	state = msg->msg_state;
				460	buf = msg->chn->buf;
				461	ptr = buf->p + msg->next;
				462	end = buf->p + buf->i;
				463
				464	if (unlikely(ptr >= end))
				465	goto http_msg_ood;
				466
				467	switch (state) {
				468	/*
				469	* First, states that are specific to the response only.
				470	* We check them first so that request and headers are
				471	* closer to each other (accessed more often).
				472	*/
				473	case HTTP_MSG_RPBEFORE:
				474	http_msg_rpbefore:
				475	if (likely(HTTP_IS_TOKEN(*ptr))) {
				476	/* we have a start of message, but we have to check
				477	* first if we need to remove some CRLF. We can only
				478	* do this when o=0.
				479	*/
				480	if (unlikely(ptr != buf->p)) {
				481	if (buf->o)
				482	goto http_msg_ood;
				483	/* Remove empty leading lines, as recommended by RFC2616. */
				484	bi_fast_delete(buf, ptr - buf->p);
				485	}
				486	msg->sol = 0;
				487	msg->sl.st.l = 0; /* used in debug mode */
				488	hdr_idx_init(idx);
				489	state = HTTP_MSG_RPVER;
				490	goto http_msg_rpver;
				491	}
				492
				493	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				494	state = HTTP_MSG_RPBEFORE;
				495	goto http_msg_invalid;
				496	}
				497
				498	if (unlikely(*ptr == '\n'))
				499	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				500	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				501	/* stop here */
				502
				503	case HTTP_MSG_RPBEFORE_CR:
				504	http_msg_rpbefore_cr:
				505	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				506	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				507	/* stop here */
				508
				509	case HTTP_MSG_RPVER:
				510	http_msg_rpver:
				511	case HTTP_MSG_RPVER_SP:
				512	case HTTP_MSG_RPCODE:
				513	case HTTP_MSG_RPCODE_SP:
				514	case HTTP_MSG_RPREASON:
				515	ptr = (char *)http_parse_stsline(msg,
				516	state, ptr, end,
				517	&msg->next, &msg->msg_state);
				518	if (unlikely(!ptr))
				519	return;
				520
				521	/* we have a full response and we know that we have either a CR
				522	* or an LF at <ptr>.
				523	*/
				524	hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
				525
				526	msg->sol = ptr - buf->p;
				527	if (likely(*ptr == '\r'))
				528	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				529	goto http_msg_rpline_end;
				530
				531	case HTTP_MSG_RPLINE_END:
				532	http_msg_rpline_end:
				533	/* msg->sol must point to the first of CR or LF. */
				534	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				535	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				536	/* stop here */
				537
				538	/*
				539	* Second, states that are specific to the request only
				540	*/
				541	case HTTP_MSG_RQBEFORE:
				542	http_msg_rqbefore:
				543	if (likely(HTTP_IS_TOKEN(*ptr))) {
				544	/* we have a start of message, but we have to check
				545	* first if we need to remove some CRLF. We can only
				546	* do this when o=0.
				547	*/
				548	if (likely(ptr != buf->p)) {
				549	if (buf->o)
				550	goto http_msg_ood;
				551	/* Remove empty leading lines, as recommended by RFC2616. */
				552	bi_fast_delete(buf, ptr - buf->p);
				553	}
				554	msg->sol = 0;
				555	msg->sl.rq.l = 0; /* used in debug mode */
				556	state = HTTP_MSG_RQMETH;
				557	goto http_msg_rqmeth;
				558	}
				559
				560	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				561	state = HTTP_MSG_RQBEFORE;
				562	goto http_msg_invalid;
				563	}
				564
				565	if (unlikely(*ptr == '\n'))
				566	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				567	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
				568	/* stop here */
				569
				570	case HTTP_MSG_RQBEFORE_CR:
				571	http_msg_rqbefore_cr:
				572	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
				573	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				574	/* stop here */
				575
				576	case HTTP_MSG_RQMETH:
				577	http_msg_rqmeth:
				578	case HTTP_MSG_RQMETH_SP:
				579	case HTTP_MSG_RQURI:
				580	case HTTP_MSG_RQURI_SP:
				581	case HTTP_MSG_RQVER:
				582	ptr = (char *)http_parse_reqline(msg,
				583	state, ptr, end,
				584	&msg->next, &msg->msg_state);
				585	if (unlikely(!ptr))
				586	return;
				587
				588	/* we have a full request and we know that we have either a CR
				589	* or an LF at <ptr>.
				590	*/
				591	hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
				592
				593	msg->sol = ptr - buf->p;
				594	if (likely(*ptr == '\r'))
				595	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
				596	goto http_msg_rqline_end;
				597
				598	case HTTP_MSG_RQLINE_END:
				599	http_msg_rqline_end:
				600	/* check for HTTP/0.9 request : no version information available.
				601	* msg->sol must point to the first of CR or LF.
				602	*/
				603	if (unlikely(msg->sl.rq.v_l == 0))
				604	goto http_msg_last_lf;
				605
				606	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
				607	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				608	/* stop here */
				609
				610	/*
				611	* Common states below
				612	*/
				613	case HTTP_MSG_HDR_FIRST:
				614	http_msg_hdr_first:
				615	msg->sol = ptr - buf->p;
				616	if (likely(!HTTP_IS_CRLF(*ptr))) {
				617	goto http_msg_hdr_name;
				618	}
				619
				620	if (likely(*ptr == '\r'))
				621	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				622	goto http_msg_last_lf;
				623
				624	case HTTP_MSG_HDR_NAME:
				625	http_msg_hdr_name:
				626	/* assumes msg->sol points to the first char */
				627	if (likely(HTTP_IS_TOKEN(*ptr)))
				628	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				629
				630	if (likely(*ptr == ':'))
				631	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				632
				633	if (likely(msg->err_pos < -1) \|\| *ptr == '\n') {
				634	state = HTTP_MSG_HDR_NAME;
				635	goto http_msg_invalid;
				636	}
				637
				638	if (msg->err_pos == -1) /* capture error pointer */
				639	msg->err_pos = ptr - buf->p; /* >= 0 now */
				640
				641	/* and we still accept this non-token character */
				642	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				643
				644	case HTTP_MSG_HDR_L1_SP:
				645	http_msg_hdr_l1_sp:
				646	/* assumes msg->sol points to the first char */
				647	if (likely(HTTP_IS_SPHT(*ptr)))
				648	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				649
				650	/* header value can be basically anything except CR/LF */
				651	msg->sov = ptr - buf->p;
				652
				653	if (likely(!HTTP_IS_CRLF(*ptr))) {
				654	goto http_msg_hdr_val;
				655	}
				656
				657	if (likely(*ptr == '\r'))
				658	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				659	goto http_msg_hdr_l1_lf;
				660
				661	case HTTP_MSG_HDR_L1_LF:
				662	http_msg_hdr_l1_lf:
				663	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				664	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				665
				666	case HTTP_MSG_HDR_L1_LWS:
				667	http_msg_hdr_l1_lws:
				668	if (likely(HTTP_IS_SPHT(*ptr))) {
				669	/* replace HT,CR,LF with spaces */
				670	for (; buf->p + msg->sov < ptr; msg->sov++)
				671	buf->p[msg->sov] = ' ';
				672	goto http_msg_hdr_l1_sp;
				673	}
				674	/* we had a header consisting only in spaces ! */
				675	msg->eol = msg->sov;
				676	goto http_msg_complete_header;
				677
				678	case HTTP_MSG_HDR_VAL:
				679	http_msg_hdr_val:
				680	/* assumes msg->sol points to the first char, and msg->sov
				681	* points to the first character of the value.
				682	*/
				683
				684	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				685	* and lower. In fact since most of the time is spent in the loop, we
				686	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				687	* loop, but we don't care since they're very rare in header values.
				688	*/
				689	#if defined(__x86_64__)
				690	while (ptr <= end - sizeof(long)) {
				691	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				692	goto http_msg_hdr_val2;
				693	ptr += sizeof(long);
				694	}
				695	#endif
				696	#if defined(__x86_64__) \|\| \
				697	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				698	defined(__ARM_ARCH_7A__)
				699	while (ptr <= end - sizeof(int)) {
				700	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				701	goto http_msg_hdr_val2;
				702	ptr += sizeof(int);
				703	}
				704	#endif
				705	if (ptr >= end) {
				706	state = HTTP_MSG_HDR_VAL;
				707	goto http_msg_ood;
				708	}
				709	http_msg_hdr_val2:
				710	if (likely(!HTTP_IS_CRLF(*ptr)))
				711	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				712
				713	msg->eol = ptr - buf->p;
				714	/* Note: we could also copy eol into ->eoh so that we have the
				715	* real header end in case it ends with lots of LWS, but is this
				716	* really needed ?
				717	*/
				718	if (likely(*ptr == '\r'))
				719	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				720	goto http_msg_hdr_l2_lf;
				721
				722	case HTTP_MSG_HDR_L2_LF:
				723	http_msg_hdr_l2_lf:
				724	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				725	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				726
				727	case HTTP_MSG_HDR_L2_LWS:
				728	http_msg_hdr_l2_lws:
				729	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				730	/* LWS: replace HT,CR,LF with spaces */
				731	for (; buf->p + msg->eol < ptr; msg->eol++)
				732	buf->p[msg->eol] = ' ';
				733	goto http_msg_hdr_val;
				734	}
				735	http_msg_complete_header:
				736	/*
				737	* It was a new header, so the last one is finished.
				738	* Assumes msg->sol points to the first char, msg->sov points
				739	* to the first character of the value and msg->eol to the
				740	* first CR or LF so we know how the line ends. We insert last
				741	* header into the index.
				742	*/
				743	if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
				744	idx, idx->tail) < 0)) {
				745	state = HTTP_MSG_HDR_L2_LWS;
				746	goto http_msg_invalid;
				747	}
				748
				749	msg->sol = ptr - buf->p;
				750	if (likely(!HTTP_IS_CRLF(*ptr))) {
				751	goto http_msg_hdr_name;
				752	}
				753
				754	if (likely(*ptr == '\r'))
				755	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				756	goto http_msg_last_lf;
				757
				758	case HTTP_MSG_LAST_LF:
				759	http_msg_last_lf:
				760	/* Assumes msg->sol points to the first of either CR or LF.
				761	* Sets ->sov and ->next to the total header length, ->eoh to
				762	* the last CRLF, and ->eol to the last CRLF length (1 or 2).
				763	*/
				764	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				765	ptr++;
				766	msg->sov = msg->next = ptr - buf->p;
				767	msg->eoh = msg->sol;
				768	msg->sol = 0;
				769	msg->eol = msg->sov - msg->eoh;
				770	msg->msg_state = HTTP_MSG_BODY;
				771	return;
				772
				773	case HTTP_MSG_ERROR:
				774	/* this may only happen if we call http_msg_analyser() twice with an error */
				775	break;
				776
				777	default:
				778	#ifdef DEBUG_FULL
				779	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				780	exit(1);
				781	#endif
				782	;
				783	}
				784	http_msg_ood:
				785	/* out of data */
				786	msg->msg_state = state;
				787	msg->next = ptr - buf->p;
				788	return;
				789
				790	http_msg_invalid:
				791	/* invalid message */
				792	msg->err_state = state;
				793	msg->msg_state = HTTP_MSG_ERROR;
				794	msg->next = ptr - buf->p;
				795	return;
				796	}
				797
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	798	/* This function skips trailers in the buffer associated with HTTP message
				799	* <msg>. The first visited position is msg->next. If the end of the trailers is
				800	* found, the function returns >0. So, the caller can automatically schedul it
				801	* to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
				802	* data are available, the function does not change anything except maybe
				803	* msg->sol if it could parse some lines, and returns zero. If a parse error
				804	* is encountered, the function returns < 0 and does not change anything except
				805	* maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
				806	* state before calling this function, which implies that all non-trailers data
				807	* have already been scheduled for forwarding, and that msg->next exactly
				808	* matches the length of trailers already parsed and not forwarded. It is also
				809	* important to note that this function is designed to be able to parse wrapped
				810	* headers at end of buffer.
				811	*/
				812	int http_forward_trailers(struct http_msg *msg)
				813	{
				814	const struct buffer *buf = msg->chn->buf;
				815
				816	/* we have msg->next which points to next line. Look for CRLF. But
				817	* first, we reset msg->sol */
				818	msg->sol = 0;
				819	while (1) {
				820	const char p1 = NULL, p2 = NULL;
				821	const char *start = b_ptr(buf, msg->next + msg->sol);
				822	const char *stop = bi_end(buf);
				823	const char *ptr = start;
				824	int bytes = 0;
				825
				826	/* scan current line and stop at LF or CRLF */
				827	while (1) {
				828	if (ptr == stop)
				829	return 0;
				830
				831	if (*ptr == '\n') {
				832	if (!p1)
				833	p1 = ptr;
				834	p2 = ptr;
				835	break;
				836	}
				837
				838	if (*ptr == '\r') {
				839	if (p1) {
				840	msg->err_pos = buffer_count(buf, buf->p, ptr);
				841	return -1;
				842	}
				843	p1 = ptr;
				844	}
				845
				846	ptr++;
				847	if (ptr >= buf->data + buf->size)
				848	ptr = buf->data;
				849	}
				850
				851	/* after LF; point to beginning of next line */
				852	p2++;
				853	if (p2 >= buf->data + buf->size)
				854	p2 = buf->data;
				855
				856	bytes = p2 - start;
				857	if (bytes < 0)
				858	bytes += buf->size;
				859	msg->sol += bytes;
				860
				861	/* LF/CRLF at beginning of line => end of trailers at p2.
				862	* Everything was scheduled for forwarding, there's nothing left
				863	* from this message. */
				864	if (p1 == start)
				865	return 1;
				866
				867	/* OK, next line then */
				868	}
				869	}