Blame - src/h1.c - haproxy

blob: bbfaabcf76000b7979d08044cbef38423b22a9ca [file] [log] [blame]

Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	1	/*
				2	* HTTP/1 protocol analyzer
				3	*
				4	* Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	13	#include <ctype.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	14	#include <common/config.h>
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	15	#include <common/http-hdr.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	16
				17	#include <proto/h1.h>
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	18	#include <proto/hdr_idx.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	19
				20	/* It is about twice as fast on recent architectures to lookup a byte in a
				21	* table than to perform a boolean AND or OR between two tests. Refer to
				22	* RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
				23	* neither a separator nor a CTL char. An http ver_token is any ASCII which can
				24	* be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
				25	* digit. Note: please do not overwrite values in assignment since gcc-2.95
				26	* will not handle them correctly. It's worth noting that chars 128..255 are
				27	* nothing, not even control chars.
				28	*/
				29	const unsigned char h1_char_classes[256] = {
				30	[ 0] = H1_FLG_CTL,
				31	[ 1] = H1_FLG_CTL,
				32	[ 2] = H1_FLG_CTL,
				33	[ 3] = H1_FLG_CTL,
				34	[ 4] = H1_FLG_CTL,
				35	[ 5] = H1_FLG_CTL,
				36	[ 6] = H1_FLG_CTL,
				37	[ 7] = H1_FLG_CTL,
				38	[ 8] = H1_FLG_CTL,
				39	[ 9] = H1_FLG_SPHT \| H1_FLG_LWS \| H1_FLG_SEP \| H1_FLG_CTL,
				40	[ 10] = H1_FLG_CRLF \| H1_FLG_LWS \| H1_FLG_CTL,
				41	[ 11] = H1_FLG_CTL,
				42	[ 12] = H1_FLG_CTL,
				43	[ 13] = H1_FLG_CRLF \| H1_FLG_LWS \| H1_FLG_CTL,
				44	[ 14] = H1_FLG_CTL,
				45	[ 15] = H1_FLG_CTL,
				46	[ 16] = H1_FLG_CTL,
				47	[ 17] = H1_FLG_CTL,
				48	[ 18] = H1_FLG_CTL,
				49	[ 19] = H1_FLG_CTL,
				50	[ 20] = H1_FLG_CTL,
				51	[ 21] = H1_FLG_CTL,
				52	[ 22] = H1_FLG_CTL,
				53	[ 23] = H1_FLG_CTL,
				54	[ 24] = H1_FLG_CTL,
				55	[ 25] = H1_FLG_CTL,
				56	[ 26] = H1_FLG_CTL,
				57	[ 27] = H1_FLG_CTL,
				58	[ 28] = H1_FLG_CTL,
				59	[ 29] = H1_FLG_CTL,
				60	[ 30] = H1_FLG_CTL,
				61	[ 31] = H1_FLG_CTL,
				62	[' '] = H1_FLG_SPHT \| H1_FLG_LWS \| H1_FLG_SEP,
				63	['!'] = H1_FLG_TOK,
				64	['"'] = H1_FLG_SEP,
				65	['#'] = H1_FLG_TOK,
				66	['$'] = H1_FLG_TOK,
				67	['%'] = H1_FLG_TOK,
				68	['&'] = H1_FLG_TOK,
				69	[ 39] = H1_FLG_TOK,
				70	['('] = H1_FLG_SEP,
				71	[')'] = H1_FLG_SEP,
				72	['*'] = H1_FLG_TOK,
				73	['+'] = H1_FLG_TOK,
				74	[','] = H1_FLG_SEP,
				75	['-'] = H1_FLG_TOK,
				76	['.'] = H1_FLG_TOK \| H1_FLG_VER,
				77	['/'] = H1_FLG_SEP \| H1_FLG_VER,
				78	['0'] = H1_FLG_TOK \| H1_FLG_VER,
				79	['1'] = H1_FLG_TOK \| H1_FLG_VER,
				80	['2'] = H1_FLG_TOK \| H1_FLG_VER,
				81	['3'] = H1_FLG_TOK \| H1_FLG_VER,
				82	['4'] = H1_FLG_TOK \| H1_FLG_VER,
				83	['5'] = H1_FLG_TOK \| H1_FLG_VER,
				84	['6'] = H1_FLG_TOK \| H1_FLG_VER,
				85	['7'] = H1_FLG_TOK \| H1_FLG_VER,
				86	['8'] = H1_FLG_TOK \| H1_FLG_VER,
				87	['9'] = H1_FLG_TOK \| H1_FLG_VER,
				88	[':'] = H1_FLG_SEP,
				89	[';'] = H1_FLG_SEP,
				90	['<'] = H1_FLG_SEP,
				91	['='] = H1_FLG_SEP,
				92	['>'] = H1_FLG_SEP,
				93	['?'] = H1_FLG_SEP,
				94	['@'] = H1_FLG_SEP,
				95	['A'] = H1_FLG_TOK,
				96	['B'] = H1_FLG_TOK,
				97	['C'] = H1_FLG_TOK,
				98	['D'] = H1_FLG_TOK,
				99	['E'] = H1_FLG_TOK,
				100	['F'] = H1_FLG_TOK,
				101	['G'] = H1_FLG_TOK,
				102	['H'] = H1_FLG_TOK \| H1_FLG_VER,
				103	['I'] = H1_FLG_TOK,
				104	['J'] = H1_FLG_TOK,
				105	['K'] = H1_FLG_TOK,
				106	['L'] = H1_FLG_TOK,
				107	['M'] = H1_FLG_TOK,
				108	['N'] = H1_FLG_TOK,
				109	['O'] = H1_FLG_TOK,
				110	['P'] = H1_FLG_TOK \| H1_FLG_VER,
				111	['Q'] = H1_FLG_TOK,
				112	['R'] = H1_FLG_TOK \| H1_FLG_VER,
				113	['S'] = H1_FLG_TOK \| H1_FLG_VER,
				114	['T'] = H1_FLG_TOK \| H1_FLG_VER,
				115	['U'] = H1_FLG_TOK,
				116	['V'] = H1_FLG_TOK,
				117	['W'] = H1_FLG_TOK,
				118	['X'] = H1_FLG_TOK,
				119	['Y'] = H1_FLG_TOK,
				120	['Z'] = H1_FLG_TOK,
				121	['['] = H1_FLG_SEP,
				122	[ 92] = H1_FLG_SEP,
				123	[']'] = H1_FLG_SEP,
				124	['^'] = H1_FLG_TOK,
				125	['_'] = H1_FLG_TOK,
				126	['`'] = H1_FLG_TOK,
				127	['a'] = H1_FLG_TOK,
				128	['b'] = H1_FLG_TOK,
				129	['c'] = H1_FLG_TOK,
				130	['d'] = H1_FLG_TOK,
				131	['e'] = H1_FLG_TOK,
				132	['f'] = H1_FLG_TOK,
				133	['g'] = H1_FLG_TOK,
				134	['h'] = H1_FLG_TOK,
				135	['i'] = H1_FLG_TOK,
				136	['j'] = H1_FLG_TOK,
				137	['k'] = H1_FLG_TOK,
				138	['l'] = H1_FLG_TOK,
				139	['m'] = H1_FLG_TOK,
				140	['n'] = H1_FLG_TOK,
				141	['o'] = H1_FLG_TOK,
				142	['p'] = H1_FLG_TOK,
				143	['q'] = H1_FLG_TOK,
				144	['r'] = H1_FLG_TOK,
				145	['s'] = H1_FLG_TOK,
				146	['t'] = H1_FLG_TOK,
				147	['u'] = H1_FLG_TOK,
				148	['v'] = H1_FLG_TOK,
				149	['w'] = H1_FLG_TOK,
				150	['x'] = H1_FLG_TOK,
				151	['y'] = H1_FLG_TOK,
				152	['z'] = H1_FLG_TOK,
				153	['{'] = H1_FLG_SEP,
				154	['\|'] = H1_FLG_TOK,
				155	['}'] = H1_FLG_SEP,
				156	['~'] = H1_FLG_TOK,
				157	[127] = H1_FLG_CTL,
				158	};
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	159
				160
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	161	/*
				162	* This function parses a status line between <ptr> and <end>, starting with
				163	* parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
				164	* HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
				165	* will give undefined results.
				166	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				167	* and that msg->sol points to the beginning of the response.
				168	* If a complete line is found (which implies that at least one CR or LF is
				169	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				170	* returned indicating an incomplete line (which does not mean that parts have
				171	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				172	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				173	* upon next call.
				174	*
				175	* This function was intentionally designed to be called from
				176	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				177	* within its state machine and use the same macros, hence the need for same
				178	* labels and variable names. Note that msg->sol is left unchanged.
				179	*/
				180	const char http_parse_stsline(struct http_msg msg,
				181	enum h1_state state, const char ptr, const char end,
				182	unsigned int ret_ptr, enum h1_state ret_state)
				183	{
				184	const char *msg_start = msg->chn->buf->p;
				185
				186	switch (state) {
				187	case HTTP_MSG_RPVER:
				188	http_msg_rpver:
				189	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				190	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				191
				192	if (likely(HTTP_IS_SPHT(*ptr))) {
				193	msg->sl.st.v_l = ptr - msg_start;
				194	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				195	}
				196	msg->err_state = HTTP_MSG_RPVER;
				197	state = HTTP_MSG_ERROR;
				198	break;
				199
				200	case HTTP_MSG_RPVER_SP:
				201	http_msg_rpver_sp:
				202	if (likely(!HTTP_IS_LWS(*ptr))) {
				203	msg->sl.st.c = ptr - msg_start;
				204	goto http_msg_rpcode;
				205	}
				206	if (likely(HTTP_IS_SPHT(*ptr)))
				207	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				208	/* so it's a CR/LF, this is invalid */
				209	msg->err_state = HTTP_MSG_RPVER_SP;
				210	state = HTTP_MSG_ERROR;
				211	break;
				212
				213	case HTTP_MSG_RPCODE:
				214	http_msg_rpcode:
				215	if (likely(!HTTP_IS_LWS(*ptr)))
				216	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				217
				218	if (likely(HTTP_IS_SPHT(*ptr))) {
				219	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				220	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				221	}
				222
				223	/* so it's a CR/LF, so there is no reason phrase */
				224	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				225	http_msg_rsp_reason:
				226	/* FIXME: should we support HTTP responses without any reason phrase ? */
				227	msg->sl.st.r = ptr - msg_start;
				228	msg->sl.st.r_l = 0;
				229	goto http_msg_rpline_eol;
				230
				231	case HTTP_MSG_RPCODE_SP:
				232	http_msg_rpcode_sp:
				233	if (likely(!HTTP_IS_LWS(*ptr))) {
				234	msg->sl.st.r = ptr - msg_start;
				235	goto http_msg_rpreason;
				236	}
				237	if (likely(HTTP_IS_SPHT(*ptr)))
				238	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				239	/* so it's a CR/LF, so there is no reason phrase */
				240	goto http_msg_rsp_reason;
				241
				242	case HTTP_MSG_RPREASON:
				243	http_msg_rpreason:
				244	if (likely(!HTTP_IS_CRLF(*ptr)))
				245	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				246	msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
				247	http_msg_rpline_eol:
				248	/* We have seen the end of line. Note that we do not
				249	* necessarily have the \n yet, but at least we know that we
				250	* have EITHER \r OR \n, otherwise the response would not be
				251	* complete. We can then record the response length and return
				252	* to the caller which will be able to register it.
				253	*/
				254	msg->sl.st.l = ptr - msg_start - msg->sol;
				255	return ptr;
				256
				257	default:
				258	#ifdef DEBUG_FULL
				259	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				260	exit(1);
				261	#endif
				262	;
				263	}
				264
				265	http_msg_ood:
				266	/* out of valid data */
				267	if (ret_state)
				268	*ret_state = state;
				269	if (ret_ptr)
				270	*ret_ptr = ptr - msg_start;
				271	return NULL;
				272	}
				273
				274	/*
				275	* This function parses a request line between <ptr> and <end>, starting with
				276	* parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
				277	* HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
				278	* will give undefined results.
				279	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				280	* and that msg->sol points to the beginning of the request.
				281	* If a complete line is found (which implies that at least one CR or LF is
				282	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				283	* returned indicating an incomplete line (which does not mean that parts have
				284	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				285	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				286	* upon next call.
				287	*
				288	* This function was intentionally designed to be called from
				289	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				290	* within its state machine and use the same macros, hence the need for same
				291	* labels and variable names. Note that msg->sol is left unchanged.
				292	*/
				293	const char http_parse_reqline(struct http_msg msg,
				294	enum h1_state state, const char ptr, const char end,
				295	unsigned int ret_ptr, enum h1_state ret_state)
				296	{
				297	const char *msg_start = msg->chn->buf->p;
				298
				299	switch (state) {
				300	case HTTP_MSG_RQMETH:
				301	http_msg_rqmeth:
				302	if (likely(HTTP_IS_TOKEN(*ptr)))
				303	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
				304
				305	if (likely(HTTP_IS_SPHT(*ptr))) {
				306	msg->sl.rq.m_l = ptr - msg_start;
				307	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				308	}
				309
				310	if (likely(HTTP_IS_CRLF(*ptr))) {
				311	/* HTTP 0.9 request */
				312	msg->sl.rq.m_l = ptr - msg_start;
				313	http_msg_req09_uri:
				314	msg->sl.rq.u = ptr - msg_start;
				315	http_msg_req09_uri_e:
				316	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				317	http_msg_req09_ver:
				318	msg->sl.rq.v = ptr - msg_start;
				319	msg->sl.rq.v_l = 0;
				320	goto http_msg_rqline_eol;
				321	}
				322	msg->err_state = HTTP_MSG_RQMETH;
				323	state = HTTP_MSG_ERROR;
				324	break;
				325
				326	case HTTP_MSG_RQMETH_SP:
				327	http_msg_rqmeth_sp:
				328	if (likely(!HTTP_IS_LWS(*ptr))) {
				329	msg->sl.rq.u = ptr - msg_start;
				330	goto http_msg_rquri;
				331	}
				332	if (likely(HTTP_IS_SPHT(*ptr)))
				333	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				334	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				335	goto http_msg_req09_uri;
				336
				337	case HTTP_MSG_RQURI:
				338	http_msg_rquri:
				339	#if defined(__x86_64__) \|\| \
				340	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				341	defined(__ARM_ARCH_7A__)
				342	/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
				343	while (ptr <= end - sizeof(int)) {
				344	int x = (int )ptr - 0x21212121;
				345	if (x & 0x80808080)
				346	break;
				347
				348	x -= 0x5e5e5e5e;
				349	if (!(x & 0x80808080))
				350	break;
				351
				352	ptr += sizeof(int);
				353	}
				354	#endif
				355	if (ptr >= end) {
				356	state = HTTP_MSG_RQURI;
				357	goto http_msg_ood;
				358	}
				359	http_msg_rquri2:
				360	if (likely((unsigned char)(ptr - 33) <= 93)) / 33 to 126 included */
				361	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
				362
				363	if (likely(HTTP_IS_SPHT(*ptr))) {
				364	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				365	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				366	}
				367
				368	if (likely((unsigned char)*ptr >= 128)) {
				369	/* non-ASCII chars are forbidden unless option
				370	* accept-invalid-http-request is enabled in the frontend.
				371	* In any case, we capture the faulty char.
				372	*/
				373	if (msg->err_pos < -1)
				374	goto invalid_char;
				375	if (msg->err_pos == -1)
				376	msg->err_pos = ptr - msg_start;
				377	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
				378	}
				379
				380	if (likely(HTTP_IS_CRLF(*ptr))) {
				381	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				382	goto http_msg_req09_uri_e;
				383	}
				384
				385	/* OK forbidden chars, 0..31 or 127 */
				386	invalid_char:
				387	msg->err_pos = ptr - msg_start;
				388	msg->err_state = HTTP_MSG_RQURI;
				389	state = HTTP_MSG_ERROR;
				390	break;
				391
				392	case HTTP_MSG_RQURI_SP:
				393	http_msg_rquri_sp:
				394	if (likely(!HTTP_IS_LWS(*ptr))) {
				395	msg->sl.rq.v = ptr - msg_start;
				396	goto http_msg_rqver;
				397	}
				398	if (likely(HTTP_IS_SPHT(*ptr)))
				399	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				400	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				401	goto http_msg_req09_ver;
				402
				403	case HTTP_MSG_RQVER:
				404	http_msg_rqver:
				405	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				406	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
				407
				408	if (likely(HTTP_IS_CRLF(*ptr))) {
				409	msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
				410	http_msg_rqline_eol:
				411	/* We have seen the end of line. Note that we do not
				412	* necessarily have the \n yet, but at least we know that we
				413	* have EITHER \r OR \n, otherwise the request would not be
				414	* complete. We can then record the request length and return
				415	* to the caller which will be able to register it.
				416	*/
				417	msg->sl.rq.l = ptr - msg_start - msg->sol;
				418	return ptr;
				419	}
				420
				421	/* neither an HTTP_VER token nor a CRLF */
				422	msg->err_state = HTTP_MSG_RQVER;
				423	state = HTTP_MSG_ERROR;
				424	break;
				425
				426	default:
				427	#ifdef DEBUG_FULL
				428	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				429	exit(1);
				430	#endif
				431	;
				432	}
				433
				434	http_msg_ood:
				435	/* out of valid data */
				436	if (ret_state)
				437	*ret_state = state;
				438	if (ret_ptr)
				439	*ret_ptr = ptr - msg_start;
				440	return NULL;
				441	}
				442
				443	/*
				444	* This function parses an HTTP message, either a request or a response,
				445	* depending on the initial msg->msg_state. The caller is responsible for
				446	* ensuring that the message does not wrap. The function can be preempted
				447	* everywhere when data are missing and recalled at the exact same location
				448	* with no information loss. The message may even be realigned between two
				449	* calls. The header index is re-initialized when switching from
				450	* MSG_R[PQ]BEFORE to MSG_RPVER\|MSG_RQMETH. It modifies msg->sol among other
				451	* fields. Note that msg->sol will be initialized after completing the first
				452	* state, so that none of the msg pointers has to be initialized prior to the
				453	* first call.
				454	*/
				455	void http_msg_analyzer(struct http_msg msg, struct hdr_idx idx)
				456	{
				457	enum h1_state state; /* updated only when leaving the FSM */
				458	register char ptr, end; /* request pointers, to avoid dereferences */
				459	struct buffer *buf;
				460
				461	state = msg->msg_state;
				462	buf = msg->chn->buf;
				463	ptr = buf->p + msg->next;
				464	end = buf->p + buf->i;
				465
				466	if (unlikely(ptr >= end))
				467	goto http_msg_ood;
				468
				469	switch (state) {
				470	/*
				471	* First, states that are specific to the response only.
				472	* We check them first so that request and headers are
				473	* closer to each other (accessed more often).
				474	*/
				475	case HTTP_MSG_RPBEFORE:
				476	http_msg_rpbefore:
				477	if (likely(HTTP_IS_TOKEN(*ptr))) {
				478	/* we have a start of message, but we have to check
				479	* first if we need to remove some CRLF. We can only
				480	* do this when o=0.
				481	*/
				482	if (unlikely(ptr != buf->p)) {
				483	if (buf->o)
				484	goto http_msg_ood;
				485	/* Remove empty leading lines, as recommended by RFC2616. */
				486	bi_fast_delete(buf, ptr - buf->p);
				487	}
				488	msg->sol = 0;
				489	msg->sl.st.l = 0; /* used in debug mode */
				490	hdr_idx_init(idx);
				491	state = HTTP_MSG_RPVER;
				492	goto http_msg_rpver;
				493	}
				494
				495	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				496	state = HTTP_MSG_RPBEFORE;
				497	goto http_msg_invalid;
				498	}
				499
				500	if (unlikely(*ptr == '\n'))
				501	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				502	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				503	/* stop here */
				504
				505	case HTTP_MSG_RPBEFORE_CR:
				506	http_msg_rpbefore_cr:
				507	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				508	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				509	/* stop here */
				510
				511	case HTTP_MSG_RPVER:
				512	http_msg_rpver:
				513	case HTTP_MSG_RPVER_SP:
				514	case HTTP_MSG_RPCODE:
				515	case HTTP_MSG_RPCODE_SP:
				516	case HTTP_MSG_RPREASON:
				517	ptr = (char *)http_parse_stsline(msg,
				518	state, ptr, end,
				519	&msg->next, &msg->msg_state);
				520	if (unlikely(!ptr))
				521	return;
				522
				523	/* we have a full response and we know that we have either a CR
				524	* or an LF at <ptr>.
				525	*/
				526	hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
				527
				528	msg->sol = ptr - buf->p;
				529	if (likely(*ptr == '\r'))
				530	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				531	goto http_msg_rpline_end;
				532
				533	case HTTP_MSG_RPLINE_END:
				534	http_msg_rpline_end:
				535	/* msg->sol must point to the first of CR or LF. */
				536	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				537	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				538	/* stop here */
				539
				540	/*
				541	* Second, states that are specific to the request only
				542	*/
				543	case HTTP_MSG_RQBEFORE:
				544	http_msg_rqbefore:
				545	if (likely(HTTP_IS_TOKEN(*ptr))) {
				546	/* we have a start of message, but we have to check
				547	* first if we need to remove some CRLF. We can only
				548	* do this when o=0.
				549	*/
				550	if (likely(ptr != buf->p)) {
				551	if (buf->o)
				552	goto http_msg_ood;
				553	/* Remove empty leading lines, as recommended by RFC2616. */
				554	bi_fast_delete(buf, ptr - buf->p);
				555	}
				556	msg->sol = 0;
				557	msg->sl.rq.l = 0; /* used in debug mode */
				558	state = HTTP_MSG_RQMETH;
				559	goto http_msg_rqmeth;
				560	}
				561
				562	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				563	state = HTTP_MSG_RQBEFORE;
				564	goto http_msg_invalid;
				565	}
				566
				567	if (unlikely(*ptr == '\n'))
				568	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				569	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
				570	/* stop here */
				571
				572	case HTTP_MSG_RQBEFORE_CR:
				573	http_msg_rqbefore_cr:
				574	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
				575	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				576	/* stop here */
				577
				578	case HTTP_MSG_RQMETH:
				579	http_msg_rqmeth:
				580	case HTTP_MSG_RQMETH_SP:
				581	case HTTP_MSG_RQURI:
				582	case HTTP_MSG_RQURI_SP:
				583	case HTTP_MSG_RQVER:
				584	ptr = (char *)http_parse_reqline(msg,
				585	state, ptr, end,
				586	&msg->next, &msg->msg_state);
				587	if (unlikely(!ptr))
				588	return;
				589
				590	/* we have a full request and we know that we have either a CR
				591	* or an LF at <ptr>.
				592	*/
				593	hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
				594
				595	msg->sol = ptr - buf->p;
				596	if (likely(*ptr == '\r'))
				597	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
				598	goto http_msg_rqline_end;
				599
				600	case HTTP_MSG_RQLINE_END:
				601	http_msg_rqline_end:
				602	/* check for HTTP/0.9 request : no version information available.
				603	* msg->sol must point to the first of CR or LF.
				604	*/
				605	if (unlikely(msg->sl.rq.v_l == 0))
				606	goto http_msg_last_lf;
				607
				608	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
				609	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				610	/* stop here */
				611
				612	/*
				613	* Common states below
				614	*/
				615	case HTTP_MSG_HDR_FIRST:
				616	http_msg_hdr_first:
				617	msg->sol = ptr - buf->p;
				618	if (likely(!HTTP_IS_CRLF(*ptr))) {
				619	goto http_msg_hdr_name;
				620	}
				621
				622	if (likely(*ptr == '\r'))
				623	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				624	goto http_msg_last_lf;
				625
				626	case HTTP_MSG_HDR_NAME:
				627	http_msg_hdr_name:
				628	/* assumes msg->sol points to the first char */
				629	if (likely(HTTP_IS_TOKEN(*ptr)))
				630	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				631
				632	if (likely(*ptr == ':'))
				633	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				634
				635	if (likely(msg->err_pos < -1) \|\| *ptr == '\n') {
				636	state = HTTP_MSG_HDR_NAME;
				637	goto http_msg_invalid;
				638	}
				639
				640	if (msg->err_pos == -1) /* capture error pointer */
				641	msg->err_pos = ptr - buf->p; /* >= 0 now */
				642
				643	/* and we still accept this non-token character */
				644	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				645
				646	case HTTP_MSG_HDR_L1_SP:
				647	http_msg_hdr_l1_sp:
				648	/* assumes msg->sol points to the first char */
				649	if (likely(HTTP_IS_SPHT(*ptr)))
				650	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				651
				652	/* header value can be basically anything except CR/LF */
				653	msg->sov = ptr - buf->p;
				654
				655	if (likely(!HTTP_IS_CRLF(*ptr))) {
				656	goto http_msg_hdr_val;
				657	}
				658
				659	if (likely(*ptr == '\r'))
				660	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				661	goto http_msg_hdr_l1_lf;
				662
				663	case HTTP_MSG_HDR_L1_LF:
				664	http_msg_hdr_l1_lf:
				665	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				666	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				667
				668	case HTTP_MSG_HDR_L1_LWS:
				669	http_msg_hdr_l1_lws:
				670	if (likely(HTTP_IS_SPHT(*ptr))) {
				671	/* replace HT,CR,LF with spaces */
				672	for (; buf->p + msg->sov < ptr; msg->sov++)
				673	buf->p[msg->sov] = ' ';
				674	goto http_msg_hdr_l1_sp;
				675	}
				676	/* we had a header consisting only in spaces ! */
				677	msg->eol = msg->sov;
				678	goto http_msg_complete_header;
				679
				680	case HTTP_MSG_HDR_VAL:
				681	http_msg_hdr_val:
				682	/* assumes msg->sol points to the first char, and msg->sov
				683	* points to the first character of the value.
				684	*/
				685
				686	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				687	* and lower. In fact since most of the time is spent in the loop, we
				688	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				689	* loop, but we don't care since they're very rare in header values.
				690	*/
				691	#if defined(__x86_64__)
				692	while (ptr <= end - sizeof(long)) {
				693	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				694	goto http_msg_hdr_val2;
				695	ptr += sizeof(long);
				696	}
				697	#endif
				698	#if defined(__x86_64__) \|\| \
				699	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				700	defined(__ARM_ARCH_7A__)
				701	while (ptr <= end - sizeof(int)) {
				702	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				703	goto http_msg_hdr_val2;
				704	ptr += sizeof(int);
				705	}
				706	#endif
				707	if (ptr >= end) {
				708	state = HTTP_MSG_HDR_VAL;
				709	goto http_msg_ood;
				710	}
				711	http_msg_hdr_val2:
				712	if (likely(!HTTP_IS_CRLF(*ptr)))
				713	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				714
				715	msg->eol = ptr - buf->p;
				716	/* Note: we could also copy eol into ->eoh so that we have the
				717	* real header end in case it ends with lots of LWS, but is this
				718	* really needed ?
				719	*/
				720	if (likely(*ptr == '\r'))
				721	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				722	goto http_msg_hdr_l2_lf;
				723
				724	case HTTP_MSG_HDR_L2_LF:
				725	http_msg_hdr_l2_lf:
				726	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				727	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				728
				729	case HTTP_MSG_HDR_L2_LWS:
				730	http_msg_hdr_l2_lws:
				731	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				732	/* LWS: replace HT,CR,LF with spaces */
				733	for (; buf->p + msg->eol < ptr; msg->eol++)
				734	buf->p[msg->eol] = ' ';
				735	goto http_msg_hdr_val;
				736	}
				737	http_msg_complete_header:
				738	/*
				739	* It was a new header, so the last one is finished.
				740	* Assumes msg->sol points to the first char, msg->sov points
				741	* to the first character of the value and msg->eol to the
				742	* first CR or LF so we know how the line ends. We insert last
				743	* header into the index.
				744	*/
				745	if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
				746	idx, idx->tail) < 0)) {
				747	state = HTTP_MSG_HDR_L2_LWS;
				748	goto http_msg_invalid;
				749	}
				750
				751	msg->sol = ptr - buf->p;
				752	if (likely(!HTTP_IS_CRLF(*ptr))) {
				753	goto http_msg_hdr_name;
				754	}
				755
				756	if (likely(*ptr == '\r'))
				757	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				758	goto http_msg_last_lf;
				759
				760	case HTTP_MSG_LAST_LF:
				761	http_msg_last_lf:
				762	/* Assumes msg->sol points to the first of either CR or LF.
				763	* Sets ->sov and ->next to the total header length, ->eoh to
				764	* the last CRLF, and ->eol to the last CRLF length (1 or 2).
				765	*/
				766	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				767	ptr++;
				768	msg->sov = msg->next = ptr - buf->p;
				769	msg->eoh = msg->sol;
				770	msg->sol = 0;
				771	msg->eol = msg->sov - msg->eoh;
				772	msg->msg_state = HTTP_MSG_BODY;
				773	return;
				774
				775	case HTTP_MSG_ERROR:
				776	/* this may only happen if we call http_msg_analyser() twice with an error */
				777	break;
				778
				779	default:
				780	#ifdef DEBUG_FULL
				781	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				782	exit(1);
				783	#endif
				784	;
				785	}
				786	http_msg_ood:
				787	/* out of data */
				788	msg->msg_state = state;
				789	msg->next = ptr - buf->p;
				790	return;
				791
				792	http_msg_invalid:
				793	/* invalid message */
				794	msg->err_state = state;
				795	msg->msg_state = HTTP_MSG_ERROR;
				796	msg->next = ptr - buf->p;
				797	return;
				798	}
				799
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	800	/* This function parses a contiguous HTTP/1 headers block starting at <start>
				801	* and ending before <stop>, at once, and converts it a list of (name,value)
				802	* pairs representing header fields into the array <hdr> of size <hdr_num>,
				803	* whose last entry will have an empty name and an empty value. If <hdr_num> is
				804	* too small to represent the whole message, an error is returned. If <h1m> is
				805	* not NULL, some protocol elements such as content-length and transfer-encoding
				806	* will be parsed and stored there as well.
				807	*
				808	* For now it's limited to the response. If the header block is incomplete,
				809	* 0 is returned, waiting to be called again with more data to try it again.
				810	*
				811	* The code derived from the main HTTP/1 parser above but was simplified and
				812	* optimized to process responses produced or forwarded by haproxy. The caller
				813	* is responsible for ensuring that the message doesn't wrap, and should ensure
				814	* it is complete to avoid having to retry the operation after a failed
				815	* attempt. The message is not supposed to be invalid, which is why a few
				816	* properties such as the character set used in the header field names are not
				817	* checked. In case of an unparsable response message, a negative value will be
				818	* returned with h1m->err_pos and h1m->err_state matching the location and
				819	* state where the error was met. Leading blank likes are tolerated but not
				820	* recommended.
				821	*
				822	* This function returns :
				823	* -1 in case of error. In this case, h1m->err_state is filled (if h1m is
				824	* set) with the state the error occurred in and h2-m>err_pos with the
				825	* the position relative to <start>
				826	* -2 if the output is full (hdr_num reached). err_state and err_pos also
				827	* indicate where it failed.
				828	* 0 in case of missing data.
				829	* > 0 on success, it then corresponds to the number of bytes read since
				830	* <start> so that the caller can go on with the payload.
				831	*/
				832	int h1_headers_to_hdr_list(char start, const char stop,
				833	struct http_hdr *hdr, unsigned int hdr_num,
				834	struct h1m *h1m)
				835	{
				836	enum h1_state state = HTTP_MSG_RPBEFORE;
				837	register char *ptr = start;
				838	register const char *end = stop;
				839	unsigned int hdr_count = 0;
				840	unsigned int code = 0; /* status code, ASCII form */
				841	unsigned int st_c; /* beginning of status code, relative to msg_start */
				842	unsigned int st_c_l; /* length of status code */
				843	unsigned int sol = 0; /* start of line */
				844	unsigned int col = 0; /* position of the colon */
				845	unsigned int eol = 0; /* end of line */
				846	unsigned int sov = 0; /* start of value */
				847	unsigned int skip = 0; /* number of bytes skipped at the beginning */
				848	struct ist n, v; /* header name and value during parsing */
				849
				850	if (unlikely(ptr >= end))
				851	goto http_msg_ood;
				852
				853	switch (state) {
				854	case HTTP_MSG_RPBEFORE:
				855	http_msg_rpbefore:
				856	if (likely(HTTP_IS_TOKEN(*ptr))) {
				857	/* we have a start of message, we may have skipped some
				858	* heading CRLF. Skip them now.
				859	*/
				860	skip += ptr - start;
				861	start = ptr;
				862
				863	sol = 0;
				864	hdr_count = 0;
				865	state = HTTP_MSG_RPVER;
				866	goto http_msg_rpver;
				867	}
				868
				869	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				870	state = HTTP_MSG_RPBEFORE;
				871	goto http_msg_invalid;
				872	}
				873
				874	if (unlikely(*ptr == '\n'))
				875	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				876	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				877	/* stop here */
				878
				879	case HTTP_MSG_RPBEFORE_CR:
				880	http_msg_rpbefore_cr:
				881	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				882	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				883	/* stop here */
				884
				885	case HTTP_MSG_RPVER:
				886	http_msg_rpver:
				887	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				888	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				889
				890	if (likely(HTTP_IS_SPHT(*ptr))) {
				891	/* version length = ptr - start */
				892	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				893	}
				894	state = HTTP_MSG_RPVER;
				895	goto http_msg_invalid;
				896
				897	case HTTP_MSG_RPVER_SP:
				898	http_msg_rpver_sp:
				899	if (likely(!HTTP_IS_LWS(*ptr))) {
				900	code = 0;
				901	st_c = ptr - start;
				902	goto http_msg_rpcode;
				903	}
				904	if (likely(HTTP_IS_SPHT(*ptr)))
				905	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				906	/* so it's a CR/LF, this is invalid */
				907	state = HTTP_MSG_RPVER_SP;
				908	goto http_msg_invalid;
				909
				910	case HTTP_MSG_RPCODE:
				911	http_msg_rpcode:
				912	if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	913	code = code * 10 + *ptr - '0';
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	914	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				915	}
				916
				917	if (likely(HTTP_IS_SPHT(*ptr))) {
				918	st_c_l = ptr - start - st_c;
				919	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				920	}
				921
				922	/* so it's a CR/LF, so there is no reason phrase */
				923	st_c_l = ptr - start - st_c;
				924
				925	http_msg_rsp_reason:
				926	/* reason = ptr - start; */
				927	/* reason length = 0 */
				928	goto http_msg_rpline_eol;
				929
				930	case HTTP_MSG_RPCODE_SP:
				931	http_msg_rpcode_sp:
				932	if (likely(!HTTP_IS_LWS(*ptr))) {
				933	/* reason = ptr - start */
				934	goto http_msg_rpreason;
				935	}
				936	if (likely(HTTP_IS_SPHT(*ptr)))
				937	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				938	/* so it's a CR/LF, so there is no reason phrase */
				939	goto http_msg_rsp_reason;
				940
				941	case HTTP_MSG_RPREASON:
				942	http_msg_rpreason:
				943	if (likely(!HTTP_IS_CRLF(*ptr)))
				944	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				945	/* reason length = ptr - start - reason */
				946	http_msg_rpline_eol:
				947	/* We have seen the end of line. Note that we do not
				948	* necessarily have the \n yet, but at least we know that we
				949	* have EITHER \r OR \n, otherwise the response would not be
				950	* complete. We can then record the response length and return
				951	* to the caller which will be able to register it.
				952	*/
				953
				954	if (unlikely(hdr_count >= hdr_num)) {
				955	state = HTTP_MSG_RPREASON;
				956	goto http_output_full;
				957	}
				958	http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	959	if (h1m)
				960	h1m->status = code;
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	961
				962	sol = ptr - start;
				963	if (likely(*ptr == '\r'))
				964	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				965	goto http_msg_rpline_end;
				966
				967	case HTTP_MSG_RPLINE_END:
				968	http_msg_rpline_end:
				969	/* sol must point to the first of CR or LF. */
				970	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				971	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				972	/* stop here */
				973
				974	case HTTP_MSG_HDR_FIRST:
				975	http_msg_hdr_first:
				976	sol = ptr - start;
				977	if (likely(!HTTP_IS_CRLF(*ptr))) {
				978	goto http_msg_hdr_name;
				979	}
				980
				981	if (likely(*ptr == '\r'))
				982	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				983	goto http_msg_last_lf;
				984
				985	case HTTP_MSG_HDR_NAME:
				986	http_msg_hdr_name:
				987	/* assumes sol points to the first char */
				988	if (likely(HTTP_IS_TOKEN(*ptr))) {
				989	/* turn it to lower case if needed */
				990	if (isupper((unsigned char)*ptr))
				991	ptr = tolower(ptr);
				992	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				993	}
				994
				995	if (likely(*ptr == ':')) {
				996	col = ptr - start;
				997	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				998	}
				999
				1000	if (HTTP_IS_LWS(*ptr)) {
				1001	state = HTTP_MSG_HDR_NAME;
				1002	goto http_msg_invalid;
				1003	}
				1004
				1005	/* now we have a non-token character in the header field name,
				1006	* it's up to the H1 layer to have decided whether or not it
				1007	* was acceptable. If we find it here, it was considered
				1008	* acceptable due to configuration rules so we obey.
				1009	*/
				1010	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				1011
				1012	case HTTP_MSG_HDR_L1_SP:
				1013	http_msg_hdr_l1_sp:
				1014	/* assumes sol points to the first char */
				1015	if (likely(HTTP_IS_SPHT(*ptr)))
				1016	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				1017
				1018	/* header value can be basically anything except CR/LF */
				1019	sov = ptr - start;
				1020
				1021	if (likely(!HTTP_IS_CRLF(*ptr))) {
				1022	goto http_msg_hdr_val;
				1023	}
				1024
				1025	if (likely(*ptr == '\r'))
				1026	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				1027	goto http_msg_hdr_l1_lf;
				1028
				1029	case HTTP_MSG_HDR_L1_LF:
				1030	http_msg_hdr_l1_lf:
				1031	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				1032	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				1033
				1034	case HTTP_MSG_HDR_L1_LWS:
				1035	http_msg_hdr_l1_lws:
				1036	if (likely(HTTP_IS_SPHT(*ptr))) {
				1037	/* replace HT,CR,LF with spaces */
				1038	for (; start + sov < ptr; sov++)
				1039	start[sov] = ' ';
				1040	goto http_msg_hdr_l1_sp;
				1041	}
				1042	/* we had a header consisting only in spaces ! */
				1043	eol = sov;
				1044	goto http_msg_complete_header;
				1045
				1046	case HTTP_MSG_HDR_VAL:
				1047	http_msg_hdr_val:
				1048	/* assumes sol points to the first char, and sov
				1049	* points to the first character of the value.
				1050	*/
				1051
				1052	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				1053	* and lower. In fact since most of the time is spent in the loop, we
				1054	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				1055	* loop, but we don't care since they're very rare in header values.
				1056	*/
				1057	#if defined(__x86_64__)
				1058	while (ptr <= end - sizeof(long)) {
				1059	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				1060	goto http_msg_hdr_val2;
				1061	ptr += sizeof(long);
				1062	}
				1063	#endif
				1064	#if defined(__x86_64__) \|\| \
				1065	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				1066	defined(__ARM_ARCH_7A__)
				1067	while (ptr <= end - sizeof(int)) {
				1068	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				1069	goto http_msg_hdr_val2;
				1070	ptr += sizeof(int);
				1071	}
				1072	#endif
				1073	if (ptr >= end) {
				1074	state = HTTP_MSG_HDR_VAL;
				1075	goto http_msg_ood;
				1076	}
				1077	http_msg_hdr_val2:
				1078	if (likely(!HTTP_IS_CRLF(*ptr)))
				1079	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				1080
				1081	eol = ptr - start;
				1082	/* Note: we could also copy eol into ->eoh so that we have the
				1083	* real header end in case it ends with lots of LWS, but is this
				1084	* really needed ?
				1085	*/
				1086	if (likely(*ptr == '\r'))
				1087	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				1088	goto http_msg_hdr_l2_lf;
				1089
				1090	case HTTP_MSG_HDR_L2_LF:
				1091	http_msg_hdr_l2_lf:
				1092	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				1093	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				1094
				1095	case HTTP_MSG_HDR_L2_LWS:
				1096	http_msg_hdr_l2_lws:
				1097	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				1098	/* LWS: replace HT,CR,LF with spaces */
				1099	for (; start + eol < ptr; eol++)
				1100	start[eol] = ' ';
				1101	goto http_msg_hdr_val;
				1102	}
				1103	http_msg_complete_header:
				1104	/*
				1105	* It was a new header, so the last one is finished. Assumes
				1106	* <sol> points to the first char of the name, <col> to the
				1107	* colon, <sov> points to the first character of the value and
				1108	* <eol> to the first CR or LF so we know how the line ends. We
				1109	* will trim spaces around the value. It's possible to do it by
				1110	* adjusting <eol> and <sov> which are no more used after this.
				1111	* We can add the header field to the list.
				1112	*/
				1113	while (sov < eol && HTTP_IS_LWS(start[sov]))
				1114	sov++;
				1115
				1116	while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
				1117	eol--;
				1118
				1119
				1120	n = ist2(start + sol, col - sol);
				1121	v = ist2(start + sov, eol - sov);
				1122
				1123	if (unlikely(hdr_count >= hdr_num)) {
				1124	state = HTTP_MSG_HDR_L2_LWS;
				1125	goto http_output_full;
				1126	}
				1127	http_set_hdr(&hdr[hdr_count++], n, v);
				1128
				1129	if (h1m) {
				1130	long long cl;
				1131
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	1132	if (h1m->status >= 100 && h1m->status < 200)
				1133	h1m->curr_len = h1m->body_len = 0;
				1134	else if (h1m->status == 304 \|\| h1m->status == 204) {
Willy Tarreau	8ea0f38	2017-10-30 19:31:59 +0100	[diff] [blame]	1135	/* no contents, claim c-len is present and set to zero */
				1136	h1m->flags \|= H1_MF_CLEN;
				1137	h1m->curr_len = h1m->body_len = 0;
				1138	}
				1139	else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	1140	h1m->flags &= ~H1_MF_CLEN;
				1141	h1m->flags \|= H1_MF_CHNK;
				1142	}
				1143	else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
				1144	h1m->flags \|= H1_MF_CLEN;
				1145	strl2llrc(v.ptr, v.len, &cl);
				1146	h1m->curr_len = h1m->body_len = cl;
				1147	}
				1148	}
				1149
				1150	sol = ptr - start;
				1151	if (likely(!HTTP_IS_CRLF(*ptr)))
				1152	goto http_msg_hdr_name;
				1153
				1154	if (likely(*ptr == '\r'))
				1155	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				1156	goto http_msg_last_lf;
				1157
				1158	case HTTP_MSG_LAST_LF:
				1159	http_msg_last_lf:
				1160	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				1161	ptr++;
				1162	/* <ptr> now points to the first byte of payload. If needed sol
				1163	* still points to the first of either CR or LF of the empty
				1164	* line ending the headers block.
				1165	*/
				1166	if (unlikely(hdr_count >= hdr_num)) {
				1167	state = HTTP_MSG_LAST_LF;
				1168	goto http_output_full;
				1169	}
				1170	http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
				1171	state = HTTP_MSG_BODY;
				1172	break;
				1173
				1174	default:
				1175	/* impossible states */
				1176	goto http_msg_invalid;
				1177	}
				1178
				1179	/* reaching here, we've parsed the whole message and the state is
				1180	* HTTP_MSG_BODY.
				1181	*/
				1182	return ptr - start + skip;
				1183
				1184	http_msg_ood:
				1185	/* out of data at <ptr> during state <state> */
				1186	return 0;
				1187
				1188	http_msg_invalid:
				1189	/* invalid message, error at <ptr> */
				1190	if (h1m) {
				1191	h1m->err_state = state;
				1192	h1m->err_pos = ptr - start + skip;
				1193	}
				1194	return -1;
				1195
				1196	http_output_full:
				1197	/* no more room to store the current header, error at <ptr> */
				1198	if (h1m) {
				1199	h1m->err_state = state;
				1200	h1m->err_pos = ptr - start + skip;
				1201	}
				1202	return -2;
				1203	}
				1204
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1205	/* This function skips trailers in the buffer associated with HTTP message
				1206	* <msg>. The first visited position is msg->next. If the end of the trailers is
				1207	* found, the function returns >0. So, the caller can automatically schedul it
				1208	* to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
				1209	* data are available, the function does not change anything except maybe
				1210	* msg->sol if it could parse some lines, and returns zero. If a parse error
				1211	* is encountered, the function returns < 0 and does not change anything except
				1212	* maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
				1213	* state before calling this function, which implies that all non-trailers data
				1214	* have already been scheduled for forwarding, and that msg->next exactly
				1215	* matches the length of trailers already parsed and not forwarded. It is also
				1216	* important to note that this function is designed to be able to parse wrapped
				1217	* headers at end of buffer.
				1218	*/
				1219	int http_forward_trailers(struct http_msg *msg)
				1220	{
				1221	const struct buffer *buf = msg->chn->buf;
				1222
				1223	/* we have msg->next which points to next line. Look for CRLF. But
				1224	* first, we reset msg->sol */
				1225	msg->sol = 0;
				1226	while (1) {
				1227	const char p1 = NULL, p2 = NULL;
				1228	const char *start = b_ptr(buf, msg->next + msg->sol);
				1229	const char *stop = bi_end(buf);
				1230	const char *ptr = start;
				1231	int bytes = 0;
				1232
				1233	/* scan current line and stop at LF or CRLF */
				1234	while (1) {
				1235	if (ptr == stop)
				1236	return 0;
				1237
				1238	if (*ptr == '\n') {
				1239	if (!p1)
				1240	p1 = ptr;
				1241	p2 = ptr;
				1242	break;
				1243	}
				1244
				1245	if (*ptr == '\r') {
				1246	if (p1) {
				1247	msg->err_pos = buffer_count(buf, buf->p, ptr);
				1248	return -1;
				1249	}
				1250	p1 = ptr;
				1251	}
				1252
				1253	ptr++;
				1254	if (ptr >= buf->data + buf->size)
				1255	ptr = buf->data;
				1256	}
				1257
				1258	/* after LF; point to beginning of next line */
				1259	p2++;
				1260	if (p2 >= buf->data + buf->size)
				1261	p2 = buf->data;
				1262
				1263	bytes = p2 - start;
				1264	if (bytes < 0)
				1265	bytes += buf->size;
				1266	msg->sol += bytes;
				1267
				1268	/* LF/CRLF at beginning of line => end of trailers at p2.
				1269	* Everything was scheduled for forwarding, there's nothing left
				1270	* from this message. */
				1271	if (p1 == start)
				1272	return 1;
				1273
				1274	/* OK, next line then */
				1275	}
				1276	}