Blame - src/h1.c - haproxy

blob: 63ff99399be5bb07a76118448f7988543120e3ee [file] [log] [blame]

Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	1	/*
				2	* HTTP/1 protocol analyzer
				3	*
				4	* Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	13	#include <ctype.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	14	#include <common/config.h>
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	15	#include <common/http-hdr.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	16
Willy Tarreau	188e230	2018-06-15 11:11:53 +0200	[diff] [blame]	17	#include <proto/channel.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	18	#include <proto/h1.h>
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	19	#include <proto/hdr_idx.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	20
				21	/* It is about twice as fast on recent architectures to lookup a byte in a
				22	* table than to perform a boolean AND or OR between two tests. Refer to
				23	* RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
				24	* neither a separator nor a CTL char. An http ver_token is any ASCII which can
				25	* be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
				26	* digit. Note: please do not overwrite values in assignment since gcc-2.95
				27	* will not handle them correctly. It's worth noting that chars 128..255 are
				28	* nothing, not even control chars.
				29	*/
				30	const unsigned char h1_char_classes[256] = {
				31	[ 0] = H1_FLG_CTL,
				32	[ 1] = H1_FLG_CTL,
				33	[ 2] = H1_FLG_CTL,
				34	[ 3] = H1_FLG_CTL,
				35	[ 4] = H1_FLG_CTL,
				36	[ 5] = H1_FLG_CTL,
				37	[ 6] = H1_FLG_CTL,
				38	[ 7] = H1_FLG_CTL,
				39	[ 8] = H1_FLG_CTL,
				40	[ 9] = H1_FLG_SPHT \| H1_FLG_LWS \| H1_FLG_SEP \| H1_FLG_CTL,
				41	[ 10] = H1_FLG_CRLF \| H1_FLG_LWS \| H1_FLG_CTL,
				42	[ 11] = H1_FLG_CTL,
				43	[ 12] = H1_FLG_CTL,
				44	[ 13] = H1_FLG_CRLF \| H1_FLG_LWS \| H1_FLG_CTL,
				45	[ 14] = H1_FLG_CTL,
				46	[ 15] = H1_FLG_CTL,
				47	[ 16] = H1_FLG_CTL,
				48	[ 17] = H1_FLG_CTL,
				49	[ 18] = H1_FLG_CTL,
				50	[ 19] = H1_FLG_CTL,
				51	[ 20] = H1_FLG_CTL,
				52	[ 21] = H1_FLG_CTL,
				53	[ 22] = H1_FLG_CTL,
				54	[ 23] = H1_FLG_CTL,
				55	[ 24] = H1_FLG_CTL,
				56	[ 25] = H1_FLG_CTL,
				57	[ 26] = H1_FLG_CTL,
				58	[ 27] = H1_FLG_CTL,
				59	[ 28] = H1_FLG_CTL,
				60	[ 29] = H1_FLG_CTL,
				61	[ 30] = H1_FLG_CTL,
				62	[ 31] = H1_FLG_CTL,
				63	[' '] = H1_FLG_SPHT \| H1_FLG_LWS \| H1_FLG_SEP,
				64	['!'] = H1_FLG_TOK,
				65	['"'] = H1_FLG_SEP,
				66	['#'] = H1_FLG_TOK,
				67	['$'] = H1_FLG_TOK,
				68	['%'] = H1_FLG_TOK,
				69	['&'] = H1_FLG_TOK,
				70	[ 39] = H1_FLG_TOK,
				71	['('] = H1_FLG_SEP,
				72	[')'] = H1_FLG_SEP,
				73	['*'] = H1_FLG_TOK,
				74	['+'] = H1_FLG_TOK,
				75	[','] = H1_FLG_SEP,
				76	['-'] = H1_FLG_TOK,
				77	['.'] = H1_FLG_TOK \| H1_FLG_VER,
				78	['/'] = H1_FLG_SEP \| H1_FLG_VER,
Willy Tarreau	1b4cf9b	2017-11-09 11:15:45 +0100	[diff] [blame]	79	['0'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				80	['1'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				81	['2'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				82	['3'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				83	['4'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				84	['5'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				85	['6'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				86	['7'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				87	['8'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
				88	['9'] = H1_FLG_TOK \| H1_FLG_VER \| H1_FLG_DIG,
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	89	[':'] = H1_FLG_SEP,
				90	[';'] = H1_FLG_SEP,
				91	['<'] = H1_FLG_SEP,
				92	['='] = H1_FLG_SEP,
				93	['>'] = H1_FLG_SEP,
				94	['?'] = H1_FLG_SEP,
				95	['@'] = H1_FLG_SEP,
				96	['A'] = H1_FLG_TOK,
				97	['B'] = H1_FLG_TOK,
				98	['C'] = H1_FLG_TOK,
				99	['D'] = H1_FLG_TOK,
				100	['E'] = H1_FLG_TOK,
				101	['F'] = H1_FLG_TOK,
				102	['G'] = H1_FLG_TOK,
				103	['H'] = H1_FLG_TOK \| H1_FLG_VER,
				104	['I'] = H1_FLG_TOK,
				105	['J'] = H1_FLG_TOK,
				106	['K'] = H1_FLG_TOK,
				107	['L'] = H1_FLG_TOK,
				108	['M'] = H1_FLG_TOK,
				109	['N'] = H1_FLG_TOK,
				110	['O'] = H1_FLG_TOK,
				111	['P'] = H1_FLG_TOK \| H1_FLG_VER,
				112	['Q'] = H1_FLG_TOK,
				113	['R'] = H1_FLG_TOK \| H1_FLG_VER,
				114	['S'] = H1_FLG_TOK \| H1_FLG_VER,
				115	['T'] = H1_FLG_TOK \| H1_FLG_VER,
				116	['U'] = H1_FLG_TOK,
				117	['V'] = H1_FLG_TOK,
				118	['W'] = H1_FLG_TOK,
				119	['X'] = H1_FLG_TOK,
				120	['Y'] = H1_FLG_TOK,
				121	['Z'] = H1_FLG_TOK,
				122	['['] = H1_FLG_SEP,
				123	[ 92] = H1_FLG_SEP,
				124	[']'] = H1_FLG_SEP,
				125	['^'] = H1_FLG_TOK,
				126	['_'] = H1_FLG_TOK,
				127	['`'] = H1_FLG_TOK,
				128	['a'] = H1_FLG_TOK,
				129	['b'] = H1_FLG_TOK,
				130	['c'] = H1_FLG_TOK,
				131	['d'] = H1_FLG_TOK,
				132	['e'] = H1_FLG_TOK,
				133	['f'] = H1_FLG_TOK,
				134	['g'] = H1_FLG_TOK,
				135	['h'] = H1_FLG_TOK,
				136	['i'] = H1_FLG_TOK,
				137	['j'] = H1_FLG_TOK,
				138	['k'] = H1_FLG_TOK,
				139	['l'] = H1_FLG_TOK,
				140	['m'] = H1_FLG_TOK,
				141	['n'] = H1_FLG_TOK,
				142	['o'] = H1_FLG_TOK,
				143	['p'] = H1_FLG_TOK,
				144	['q'] = H1_FLG_TOK,
				145	['r'] = H1_FLG_TOK,
				146	['s'] = H1_FLG_TOK,
				147	['t'] = H1_FLG_TOK,
				148	['u'] = H1_FLG_TOK,
				149	['v'] = H1_FLG_TOK,
				150	['w'] = H1_FLG_TOK,
				151	['x'] = H1_FLG_TOK,
				152	['y'] = H1_FLG_TOK,
				153	['z'] = H1_FLG_TOK,
				154	['{'] = H1_FLG_SEP,
				155	['\|'] = H1_FLG_TOK,
				156	['}'] = H1_FLG_SEP,
				157	['~'] = H1_FLG_TOK,
				158	[127] = H1_FLG_CTL,
				159	};
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	160
				161
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	162	/*
				163	* This function parses a status line between <ptr> and <end>, starting with
				164	* parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
				165	* HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
				166	* will give undefined results.
				167	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				168	* and that msg->sol points to the beginning of the response.
				169	* If a complete line is found (which implies that at least one CR or LF is
				170	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				171	* returned indicating an incomplete line (which does not mean that parts have
				172	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				173	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				174	* upon next call.
				175	*
				176	* This function was intentionally designed to be called from
				177	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				178	* within its state machine and use the same macros, hence the need for same
				179	* labels and variable names. Note that msg->sol is left unchanged.
				180	*/
				181	const char http_parse_stsline(struct http_msg msg,
				182	enum h1_state state, const char ptr, const char end,
				183	unsigned int ret_ptr, enum h1_state ret_state)
				184	{
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	185	const char *msg_start = ci_head(msg->chn);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	186
				187	switch (state) {
				188	case HTTP_MSG_RPVER:
				189	http_msg_rpver:
				190	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				191	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				192
				193	if (likely(HTTP_IS_SPHT(*ptr))) {
				194	msg->sl.st.v_l = ptr - msg_start;
				195	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				196	}
				197	msg->err_state = HTTP_MSG_RPVER;
				198	state = HTTP_MSG_ERROR;
				199	break;
				200
				201	case HTTP_MSG_RPVER_SP:
				202	http_msg_rpver_sp:
				203	if (likely(!HTTP_IS_LWS(*ptr))) {
				204	msg->sl.st.c = ptr - msg_start;
				205	goto http_msg_rpcode;
				206	}
				207	if (likely(HTTP_IS_SPHT(*ptr)))
				208	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				209	/* so it's a CR/LF, this is invalid */
				210	msg->err_state = HTTP_MSG_RPVER_SP;
				211	state = HTTP_MSG_ERROR;
				212	break;
				213
				214	case HTTP_MSG_RPCODE:
				215	http_msg_rpcode:
				216	if (likely(!HTTP_IS_LWS(*ptr)))
				217	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				218
				219	if (likely(HTTP_IS_SPHT(*ptr))) {
				220	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				221	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				222	}
				223
				224	/* so it's a CR/LF, so there is no reason phrase */
				225	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				226	http_msg_rsp_reason:
				227	/* FIXME: should we support HTTP responses without any reason phrase ? */
				228	msg->sl.st.r = ptr - msg_start;
				229	msg->sl.st.r_l = 0;
				230	goto http_msg_rpline_eol;
				231
				232	case HTTP_MSG_RPCODE_SP:
				233	http_msg_rpcode_sp:
				234	if (likely(!HTTP_IS_LWS(*ptr))) {
				235	msg->sl.st.r = ptr - msg_start;
				236	goto http_msg_rpreason;
				237	}
				238	if (likely(HTTP_IS_SPHT(*ptr)))
				239	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				240	/* so it's a CR/LF, so there is no reason phrase */
				241	goto http_msg_rsp_reason;
				242
				243	case HTTP_MSG_RPREASON:
				244	http_msg_rpreason:
				245	if (likely(!HTTP_IS_CRLF(*ptr)))
				246	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				247	msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
				248	http_msg_rpline_eol:
				249	/* We have seen the end of line. Note that we do not
				250	* necessarily have the \n yet, but at least we know that we
				251	* have EITHER \r OR \n, otherwise the response would not be
				252	* complete. We can then record the response length and return
				253	* to the caller which will be able to register it.
				254	*/
				255	msg->sl.st.l = ptr - msg_start - msg->sol;
				256	return ptr;
				257
				258	default:
				259	#ifdef DEBUG_FULL
				260	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				261	exit(1);
				262	#endif
				263	;
				264	}
				265
				266	http_msg_ood:
				267	/* out of valid data */
				268	if (ret_state)
				269	*ret_state = state;
				270	if (ret_ptr)
				271	*ret_ptr = ptr - msg_start;
				272	return NULL;
				273	}
				274
				275	/*
				276	* This function parses a request line between <ptr> and <end>, starting with
				277	* parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
				278	* HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
				279	* will give undefined results.
				280	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				281	* and that msg->sol points to the beginning of the request.
				282	* If a complete line is found (which implies that at least one CR or LF is
				283	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				284	* returned indicating an incomplete line (which does not mean that parts have
				285	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				286	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				287	* upon next call.
				288	*
				289	* This function was intentionally designed to be called from
				290	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				291	* within its state machine and use the same macros, hence the need for same
				292	* labels and variable names. Note that msg->sol is left unchanged.
				293	*/
				294	const char http_parse_reqline(struct http_msg msg,
				295	enum h1_state state, const char ptr, const char end,
				296	unsigned int ret_ptr, enum h1_state ret_state)
				297	{
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	298	const char *msg_start = ci_head(msg->chn);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	299
				300	switch (state) {
				301	case HTTP_MSG_RQMETH:
				302	http_msg_rqmeth:
				303	if (likely(HTTP_IS_TOKEN(*ptr)))
				304	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
				305
				306	if (likely(HTTP_IS_SPHT(*ptr))) {
				307	msg->sl.rq.m_l = ptr - msg_start;
				308	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				309	}
				310
				311	if (likely(HTTP_IS_CRLF(*ptr))) {
				312	/* HTTP 0.9 request */
				313	msg->sl.rq.m_l = ptr - msg_start;
				314	http_msg_req09_uri:
				315	msg->sl.rq.u = ptr - msg_start;
				316	http_msg_req09_uri_e:
				317	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				318	http_msg_req09_ver:
				319	msg->sl.rq.v = ptr - msg_start;
				320	msg->sl.rq.v_l = 0;
				321	goto http_msg_rqline_eol;
				322	}
				323	msg->err_state = HTTP_MSG_RQMETH;
				324	state = HTTP_MSG_ERROR;
				325	break;
				326
				327	case HTTP_MSG_RQMETH_SP:
				328	http_msg_rqmeth_sp:
				329	if (likely(!HTTP_IS_LWS(*ptr))) {
				330	msg->sl.rq.u = ptr - msg_start;
				331	goto http_msg_rquri;
				332	}
				333	if (likely(HTTP_IS_SPHT(*ptr)))
				334	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				335	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				336	goto http_msg_req09_uri;
				337
				338	case HTTP_MSG_RQURI:
				339	http_msg_rquri:
				340	#if defined(__x86_64__) \|\| \
				341	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				342	defined(__ARM_ARCH_7A__)
				343	/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
				344	while (ptr <= end - sizeof(int)) {
				345	int x = (int )ptr - 0x21212121;
				346	if (x & 0x80808080)
				347	break;
				348
				349	x -= 0x5e5e5e5e;
				350	if (!(x & 0x80808080))
				351	break;
				352
				353	ptr += sizeof(int);
				354	}
				355	#endif
				356	if (ptr >= end) {
				357	state = HTTP_MSG_RQURI;
				358	goto http_msg_ood;
				359	}
				360	http_msg_rquri2:
				361	if (likely((unsigned char)(ptr - 33) <= 93)) / 33 to 126 included */
				362	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
				363
				364	if (likely(HTTP_IS_SPHT(*ptr))) {
				365	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				366	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				367	}
				368
				369	if (likely((unsigned char)*ptr >= 128)) {
				370	/* non-ASCII chars are forbidden unless option
				371	* accept-invalid-http-request is enabled in the frontend.
				372	* In any case, we capture the faulty char.
				373	*/
				374	if (msg->err_pos < -1)
				375	goto invalid_char;
				376	if (msg->err_pos == -1)
				377	msg->err_pos = ptr - msg_start;
				378	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
				379	}
				380
				381	if (likely(HTTP_IS_CRLF(*ptr))) {
				382	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				383	goto http_msg_req09_uri_e;
				384	}
				385
				386	/* OK forbidden chars, 0..31 or 127 */
				387	invalid_char:
				388	msg->err_pos = ptr - msg_start;
				389	msg->err_state = HTTP_MSG_RQURI;
				390	state = HTTP_MSG_ERROR;
				391	break;
				392
				393	case HTTP_MSG_RQURI_SP:
				394	http_msg_rquri_sp:
				395	if (likely(!HTTP_IS_LWS(*ptr))) {
				396	msg->sl.rq.v = ptr - msg_start;
				397	goto http_msg_rqver;
				398	}
				399	if (likely(HTTP_IS_SPHT(*ptr)))
				400	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				401	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				402	goto http_msg_req09_ver;
				403
				404	case HTTP_MSG_RQVER:
				405	http_msg_rqver:
				406	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				407	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
				408
				409	if (likely(HTTP_IS_CRLF(*ptr))) {
				410	msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
				411	http_msg_rqline_eol:
				412	/* We have seen the end of line. Note that we do not
				413	* necessarily have the \n yet, but at least we know that we
				414	* have EITHER \r OR \n, otherwise the request would not be
				415	* complete. We can then record the request length and return
				416	* to the caller which will be able to register it.
				417	*/
				418	msg->sl.rq.l = ptr - msg_start - msg->sol;
				419	return ptr;
				420	}
				421
				422	/* neither an HTTP_VER token nor a CRLF */
				423	msg->err_state = HTTP_MSG_RQVER;
				424	state = HTTP_MSG_ERROR;
				425	break;
				426
				427	default:
				428	#ifdef DEBUG_FULL
				429	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				430	exit(1);
				431	#endif
				432	;
				433	}
				434
				435	http_msg_ood:
				436	/* out of valid data */
				437	if (ret_state)
				438	*ret_state = state;
				439	if (ret_ptr)
				440	*ret_ptr = ptr - msg_start;
				441	return NULL;
				442	}
				443
				444	/*
				445	* This function parses an HTTP message, either a request or a response,
				446	* depending on the initial msg->msg_state. The caller is responsible for
				447	* ensuring that the message does not wrap. The function can be preempted
				448	* everywhere when data are missing and recalled at the exact same location
				449	* with no information loss. The message may even be realigned between two
				450	* calls. The header index is re-initialized when switching from
				451	* MSG_R[PQ]BEFORE to MSG_RPVER\|MSG_RQMETH. It modifies msg->sol among other
				452	* fields. Note that msg->sol will be initialized after completing the first
				453	* state, so that none of the msg pointers has to be initialized prior to the
				454	* first call.
				455	*/
				456	void http_msg_analyzer(struct http_msg msg, struct hdr_idx idx)
				457	{
				458	enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	459	register const char ptr, end; /* request pointers, to avoid dereferences */
Willy Tarreau	950a8a6	2018-09-06 10:48:15 +0200	[diff] [blame]	460	struct buffer *buf = &msg->chn->buf;
				461	char *input = b_head(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	462
				463	state = msg->msg_state;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	464	ptr = input + msg->next;
				465	end = b_stop(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	466
				467	if (unlikely(ptr >= end))
				468	goto http_msg_ood;
				469
				470	switch (state) {
				471	/*
				472	* First, states that are specific to the response only.
				473	* We check them first so that request and headers are
				474	* closer to each other (accessed more often).
				475	*/
				476	case HTTP_MSG_RPBEFORE:
				477	http_msg_rpbefore:
				478	if (likely(HTTP_IS_TOKEN(*ptr))) {
				479	/* we have a start of message, but we have to check
				480	* first if we need to remove some CRLF. We can only
				481	* do this when o=0.
				482	*/
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	483	if (unlikely(ptr != input)) {
				484	if (co_data(msg->chn))
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	485	goto http_msg_ood;
				486	/* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau	72a100b	2018-07-10 09:59:31 +0200	[diff] [blame]	487	b_del(buf, ptr - input);
Willy Tarreau	950a8a6	2018-09-06 10:48:15 +0200	[diff] [blame]	488	input = b_head(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	489	}
				490	msg->sol = 0;
				491	msg->sl.st.l = 0; /* used in debug mode */
				492	hdr_idx_init(idx);
				493	state = HTTP_MSG_RPVER;
				494	goto http_msg_rpver;
				495	}
				496
				497	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				498	state = HTTP_MSG_RPBEFORE;
				499	goto http_msg_invalid;
				500	}
				501
				502	if (unlikely(*ptr == '\n'))
				503	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				504	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				505	/* stop here */
				506
				507	case HTTP_MSG_RPBEFORE_CR:
				508	http_msg_rpbefore_cr:
				509	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				510	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				511	/* stop here */
				512
				513	case HTTP_MSG_RPVER:
				514	http_msg_rpver:
				515	case HTTP_MSG_RPVER_SP:
				516	case HTTP_MSG_RPCODE:
				517	case HTTP_MSG_RPCODE_SP:
				518	case HTTP_MSG_RPREASON:
				519	ptr = (char *)http_parse_stsline(msg,
				520	state, ptr, end,
				521	&msg->next, &msg->msg_state);
				522	if (unlikely(!ptr))
				523	return;
				524
				525	/* we have a full response and we know that we have either a CR
				526	* or an LF at <ptr>.
				527	*/
				528	hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
				529
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	530	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	531	if (likely(*ptr == '\r'))
				532	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				533	goto http_msg_rpline_end;
				534
				535	case HTTP_MSG_RPLINE_END:
				536	http_msg_rpline_end:
				537	/* msg->sol must point to the first of CR or LF. */
				538	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				539	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				540	/* stop here */
				541
				542	/*
				543	* Second, states that are specific to the request only
				544	*/
				545	case HTTP_MSG_RQBEFORE:
				546	http_msg_rqbefore:
				547	if (likely(HTTP_IS_TOKEN(*ptr))) {
				548	/* we have a start of message, but we have to check
				549	* first if we need to remove some CRLF. We can only
				550	* do this when o=0.
				551	*/
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	552	if (likely(ptr != input)) {
				553	if (co_data(msg->chn))
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	554	goto http_msg_ood;
				555	/* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau	72a100b	2018-07-10 09:59:31 +0200	[diff] [blame]	556	b_del(buf, ptr - input);
Willy Tarreau	950a8a6	2018-09-06 10:48:15 +0200	[diff] [blame]	557	input = b_head(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	558	}
				559	msg->sol = 0;
				560	msg->sl.rq.l = 0; /* used in debug mode */
				561	state = HTTP_MSG_RQMETH;
				562	goto http_msg_rqmeth;
				563	}
				564
				565	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				566	state = HTTP_MSG_RQBEFORE;
				567	goto http_msg_invalid;
				568	}
				569
				570	if (unlikely(*ptr == '\n'))
				571	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				572	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
				573	/* stop here */
				574
				575	case HTTP_MSG_RQBEFORE_CR:
				576	http_msg_rqbefore_cr:
				577	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
				578	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				579	/* stop here */
				580
				581	case HTTP_MSG_RQMETH:
				582	http_msg_rqmeth:
				583	case HTTP_MSG_RQMETH_SP:
				584	case HTTP_MSG_RQURI:
				585	case HTTP_MSG_RQURI_SP:
				586	case HTTP_MSG_RQVER:
				587	ptr = (char *)http_parse_reqline(msg,
				588	state, ptr, end,
				589	&msg->next, &msg->msg_state);
				590	if (unlikely(!ptr))
				591	return;
				592
				593	/* we have a full request and we know that we have either a CR
				594	* or an LF at <ptr>.
				595	*/
				596	hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
				597
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	598	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	599	if (likely(*ptr == '\r'))
				600	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
				601	goto http_msg_rqline_end;
				602
				603	case HTTP_MSG_RQLINE_END:
				604	http_msg_rqline_end:
				605	/* check for HTTP/0.9 request : no version information available.
				606	* msg->sol must point to the first of CR or LF.
				607	*/
				608	if (unlikely(msg->sl.rq.v_l == 0))
				609	goto http_msg_last_lf;
				610
				611	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
				612	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				613	/* stop here */
				614
				615	/*
				616	* Common states below
				617	*/
				618	case HTTP_MSG_HDR_FIRST:
				619	http_msg_hdr_first:
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	620	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	621	if (likely(!HTTP_IS_CRLF(*ptr))) {
				622	goto http_msg_hdr_name;
				623	}
				624
				625	if (likely(*ptr == '\r'))
				626	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				627	goto http_msg_last_lf;
				628
				629	case HTTP_MSG_HDR_NAME:
				630	http_msg_hdr_name:
				631	/* assumes msg->sol points to the first char */
				632	if (likely(HTTP_IS_TOKEN(*ptr)))
				633	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				634
				635	if (likely(*ptr == ':'))
				636	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				637
				638	if (likely(msg->err_pos < -1) \|\| *ptr == '\n') {
				639	state = HTTP_MSG_HDR_NAME;
				640	goto http_msg_invalid;
				641	}
				642
				643	if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	644	msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	645
				646	/* and we still accept this non-token character */
				647	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				648
				649	case HTTP_MSG_HDR_L1_SP:
				650	http_msg_hdr_l1_sp:
				651	/* assumes msg->sol points to the first char */
				652	if (likely(HTTP_IS_SPHT(*ptr)))
				653	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				654
				655	/* header value can be basically anything except CR/LF */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	656	msg->sov = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	657
				658	if (likely(!HTTP_IS_CRLF(*ptr))) {
				659	goto http_msg_hdr_val;
				660	}
				661
				662	if (likely(*ptr == '\r'))
				663	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				664	goto http_msg_hdr_l1_lf;
				665
				666	case HTTP_MSG_HDR_L1_LF:
				667	http_msg_hdr_l1_lf:
				668	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				669	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				670
				671	case HTTP_MSG_HDR_L1_LWS:
				672	http_msg_hdr_l1_lws:
				673	if (likely(HTTP_IS_SPHT(*ptr))) {
				674	/* replace HT,CR,LF with spaces */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	675	for (; input + msg->sov < ptr; msg->sov++)
				676	input[msg->sov] = ' ';
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	677	goto http_msg_hdr_l1_sp;
				678	}
				679	/* we had a header consisting only in spaces ! */
				680	msg->eol = msg->sov;
				681	goto http_msg_complete_header;
				682
				683	case HTTP_MSG_HDR_VAL:
				684	http_msg_hdr_val:
				685	/* assumes msg->sol points to the first char, and msg->sov
				686	* points to the first character of the value.
				687	*/
				688
				689	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				690	* and lower. In fact since most of the time is spent in the loop, we
				691	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				692	* loop, but we don't care since they're very rare in header values.
				693	*/
				694	#if defined(__x86_64__)
				695	while (ptr <= end - sizeof(long)) {
				696	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				697	goto http_msg_hdr_val2;
				698	ptr += sizeof(long);
				699	}
				700	#endif
				701	#if defined(__x86_64__) \|\| \
				702	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				703	defined(__ARM_ARCH_7A__)
				704	while (ptr <= end - sizeof(int)) {
				705	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				706	goto http_msg_hdr_val2;
				707	ptr += sizeof(int);
				708	}
				709	#endif
				710	if (ptr >= end) {
				711	state = HTTP_MSG_HDR_VAL;
				712	goto http_msg_ood;
				713	}
				714	http_msg_hdr_val2:
				715	if (likely(!HTTP_IS_CRLF(*ptr)))
				716	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				717
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	718	msg->eol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	719	/* Note: we could also copy eol into ->eoh so that we have the
				720	* real header end in case it ends with lots of LWS, but is this
				721	* really needed ?
				722	*/
				723	if (likely(*ptr == '\r'))
				724	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				725	goto http_msg_hdr_l2_lf;
				726
				727	case HTTP_MSG_HDR_L2_LF:
				728	http_msg_hdr_l2_lf:
				729	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				730	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				731
				732	case HTTP_MSG_HDR_L2_LWS:
				733	http_msg_hdr_l2_lws:
				734	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				735	/* LWS: replace HT,CR,LF with spaces */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	736	for (; input + msg->eol < ptr; msg->eol++)
				737	input[msg->eol] = ' ';
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	738	goto http_msg_hdr_val;
				739	}
				740	http_msg_complete_header:
				741	/*
				742	* It was a new header, so the last one is finished.
				743	* Assumes msg->sol points to the first char, msg->sov points
				744	* to the first character of the value and msg->eol to the
				745	* first CR or LF so we know how the line ends. We insert last
				746	* header into the index.
				747	*/
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	748	if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	749	idx, idx->tail) < 0)) {
				750	state = HTTP_MSG_HDR_L2_LWS;
				751	goto http_msg_invalid;
				752	}
				753
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	754	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	755	if (likely(!HTTP_IS_CRLF(*ptr))) {
				756	goto http_msg_hdr_name;
				757	}
				758
				759	if (likely(*ptr == '\r'))
				760	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				761	goto http_msg_last_lf;
				762
				763	case HTTP_MSG_LAST_LF:
				764	http_msg_last_lf:
				765	/* Assumes msg->sol points to the first of either CR or LF.
				766	* Sets ->sov and ->next to the total header length, ->eoh to
				767	* the last CRLF, and ->eol to the last CRLF length (1 or 2).
				768	*/
				769	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				770	ptr++;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	771	msg->sov = msg->next = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	772	msg->eoh = msg->sol;
				773	msg->sol = 0;
				774	msg->eol = msg->sov - msg->eoh;
				775	msg->msg_state = HTTP_MSG_BODY;
				776	return;
				777
				778	case HTTP_MSG_ERROR:
				779	/* this may only happen if we call http_msg_analyser() twice with an error */
				780	break;
				781
				782	default:
				783	#ifdef DEBUG_FULL
				784	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				785	exit(1);
				786	#endif
				787	;
				788	}
				789	http_msg_ood:
				790	/* out of data */
				791	msg->msg_state = state;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	792	msg->next = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	793	return;
				794
				795	http_msg_invalid:
				796	/* invalid message */
				797	msg->err_state = state;
				798	msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	799	msg->next = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	800	return;
				801	}
				802
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	803	/* This function parses a contiguous HTTP/1 headers block starting at <start>
				804	* and ending before <stop>, at once, and converts it a list of (name,value)
				805	* pairs representing header fields into the array <hdr> of size <hdr_num>,
				806	* whose last entry will have an empty name and an empty value. If <hdr_num> is
				807	* too small to represent the whole message, an error is returned. If <h1m> is
				808	* not NULL, some protocol elements such as content-length and transfer-encoding
				809	* will be parsed and stored there as well.
				810	*
				811	* For now it's limited to the response. If the header block is incomplete,
				812	* 0 is returned, waiting to be called again with more data to try it again.
				813	*
				814	* The code derived from the main HTTP/1 parser above but was simplified and
				815	* optimized to process responses produced or forwarded by haproxy. The caller
				816	* is responsible for ensuring that the message doesn't wrap, and should ensure
				817	* it is complete to avoid having to retry the operation after a failed
				818	* attempt. The message is not supposed to be invalid, which is why a few
				819	* properties such as the character set used in the header field names are not
				820	* checked. In case of an unparsable response message, a negative value will be
				821	* returned with h1m->err_pos and h1m->err_state matching the location and
				822	* state where the error was met. Leading blank likes are tolerated but not
				823	* recommended.
				824	*
				825	* This function returns :
				826	* -1 in case of error. In this case, h1m->err_state is filled (if h1m is
				827	* set) with the state the error occurred in and h2-m>err_pos with the
				828	* the position relative to <start>
				829	* -2 if the output is full (hdr_num reached). err_state and err_pos also
				830	* indicate where it failed.
				831	* 0 in case of missing data.
				832	* > 0 on success, it then corresponds to the number of bytes read since
				833	* <start> so that the caller can go on with the payload.
				834	*/
				835	int h1_headers_to_hdr_list(char start, const char stop,
				836	struct http_hdr *hdr, unsigned int hdr_num,
				837	struct h1m *h1m)
				838	{
				839	enum h1_state state = HTTP_MSG_RPBEFORE;
				840	register char *ptr = start;
				841	register const char *end = stop;
				842	unsigned int hdr_count = 0;
				843	unsigned int code = 0; /* status code, ASCII form */
				844	unsigned int st_c; /* beginning of status code, relative to msg_start */
				845	unsigned int st_c_l; /* length of status code */
				846	unsigned int sol = 0; /* start of line */
				847	unsigned int col = 0; /* position of the colon */
				848	unsigned int eol = 0; /* end of line */
				849	unsigned int sov = 0; /* start of value */
				850	unsigned int skip = 0; /* number of bytes skipped at the beginning */
				851	struct ist n, v; /* header name and value during parsing */
				852
				853	if (unlikely(ptr >= end))
				854	goto http_msg_ood;
				855
				856	switch (state) {
				857	case HTTP_MSG_RPBEFORE:
				858	http_msg_rpbefore:
				859	if (likely(HTTP_IS_TOKEN(*ptr))) {
				860	/* we have a start of message, we may have skipped some
				861	* heading CRLF. Skip them now.
				862	*/
				863	skip += ptr - start;
				864	start = ptr;
				865
				866	sol = 0;
				867	hdr_count = 0;
				868	state = HTTP_MSG_RPVER;
				869	goto http_msg_rpver;
				870	}
				871
				872	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				873	state = HTTP_MSG_RPBEFORE;
				874	goto http_msg_invalid;
				875	}
				876
				877	if (unlikely(*ptr == '\n'))
				878	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				879	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				880	/* stop here */
				881
				882	case HTTP_MSG_RPBEFORE_CR:
				883	http_msg_rpbefore_cr:
				884	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				885	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				886	/* stop here */
				887
				888	case HTTP_MSG_RPVER:
				889	http_msg_rpver:
				890	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				891	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				892
				893	if (likely(HTTP_IS_SPHT(*ptr))) {
				894	/* version length = ptr - start */
				895	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				896	}
				897	state = HTTP_MSG_RPVER;
				898	goto http_msg_invalid;
				899
				900	case HTTP_MSG_RPVER_SP:
				901	http_msg_rpver_sp:
				902	if (likely(!HTTP_IS_LWS(*ptr))) {
				903	code = 0;
				904	st_c = ptr - start;
				905	goto http_msg_rpcode;
				906	}
				907	if (likely(HTTP_IS_SPHT(*ptr)))
				908	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				909	/* so it's a CR/LF, this is invalid */
				910	state = HTTP_MSG_RPVER_SP;
				911	goto http_msg_invalid;
				912
				913	case HTTP_MSG_RPCODE:
				914	http_msg_rpcode:
Willy Tarreau	1b4cf9b	2017-11-09 11:15:45 +0100	[diff] [blame]	915	if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	916	code = code * 10 + *ptr - '0';
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	917	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				918	}
				919
Willy Tarreau	1b4cf9b	2017-11-09 11:15:45 +0100	[diff] [blame]	920	if (unlikely(!HTTP_IS_LWS(*ptr))) {
				921	state = HTTP_MSG_RPCODE;
				922	goto http_msg_invalid;
				923	}
				924
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	925	if (likely(HTTP_IS_SPHT(*ptr))) {
				926	st_c_l = ptr - start - st_c;
				927	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				928	}
				929
				930	/* so it's a CR/LF, so there is no reason phrase */
				931	st_c_l = ptr - start - st_c;
				932
				933	http_msg_rsp_reason:
				934	/* reason = ptr - start; */
				935	/* reason length = 0 */
				936	goto http_msg_rpline_eol;
				937
				938	case HTTP_MSG_RPCODE_SP:
				939	http_msg_rpcode_sp:
				940	if (likely(!HTTP_IS_LWS(*ptr))) {
				941	/* reason = ptr - start */
				942	goto http_msg_rpreason;
				943	}
				944	if (likely(HTTP_IS_SPHT(*ptr)))
				945	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				946	/* so it's a CR/LF, so there is no reason phrase */
				947	goto http_msg_rsp_reason;
				948
				949	case HTTP_MSG_RPREASON:
				950	http_msg_rpreason:
				951	if (likely(!HTTP_IS_CRLF(*ptr)))
				952	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				953	/* reason length = ptr - start - reason */
				954	http_msg_rpline_eol:
				955	/* We have seen the end of line. Note that we do not
				956	* necessarily have the \n yet, but at least we know that we
				957	* have EITHER \r OR \n, otherwise the response would not be
				958	* complete. We can then record the response length and return
				959	* to the caller which will be able to register it.
				960	*/
				961
				962	if (unlikely(hdr_count >= hdr_num)) {
				963	state = HTTP_MSG_RPREASON;
				964	goto http_output_full;
				965	}
				966	http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	967	if (h1m)
				968	h1m->status = code;
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	969
				970	sol = ptr - start;
				971	if (likely(*ptr == '\r'))
				972	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				973	goto http_msg_rpline_end;
				974
				975	case HTTP_MSG_RPLINE_END:
				976	http_msg_rpline_end:
				977	/* sol must point to the first of CR or LF. */
				978	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				979	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				980	/* stop here */
				981
				982	case HTTP_MSG_HDR_FIRST:
				983	http_msg_hdr_first:
				984	sol = ptr - start;
				985	if (likely(!HTTP_IS_CRLF(*ptr))) {
				986	goto http_msg_hdr_name;
				987	}
				988
				989	if (likely(*ptr == '\r'))
				990	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				991	goto http_msg_last_lf;
				992
				993	case HTTP_MSG_HDR_NAME:
				994	http_msg_hdr_name:
				995	/* assumes sol points to the first char */
				996	if (likely(HTTP_IS_TOKEN(*ptr))) {
				997	/* turn it to lower case if needed */
				998	if (isupper((unsigned char)*ptr))
				999	ptr = tolower(ptr);
				1000	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				1001	}
				1002
				1003	if (likely(*ptr == ':')) {
				1004	col = ptr - start;
				1005	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				1006	}
				1007
				1008	if (HTTP_IS_LWS(*ptr)) {
				1009	state = HTTP_MSG_HDR_NAME;
				1010	goto http_msg_invalid;
				1011	}
				1012
				1013	/* now we have a non-token character in the header field name,
				1014	* it's up to the H1 layer to have decided whether or not it
				1015	* was acceptable. If we find it here, it was considered
				1016	* acceptable due to configuration rules so we obey.
				1017	*/
				1018	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				1019
				1020	case HTTP_MSG_HDR_L1_SP:
				1021	http_msg_hdr_l1_sp:
				1022	/* assumes sol points to the first char */
				1023	if (likely(HTTP_IS_SPHT(*ptr)))
				1024	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				1025
				1026	/* header value can be basically anything except CR/LF */
				1027	sov = ptr - start;
				1028
				1029	if (likely(!HTTP_IS_CRLF(*ptr))) {
				1030	goto http_msg_hdr_val;
				1031	}
				1032
				1033	if (likely(*ptr == '\r'))
				1034	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				1035	goto http_msg_hdr_l1_lf;
				1036
				1037	case HTTP_MSG_HDR_L1_LF:
				1038	http_msg_hdr_l1_lf:
				1039	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				1040	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				1041
				1042	case HTTP_MSG_HDR_L1_LWS:
				1043	http_msg_hdr_l1_lws:
				1044	if (likely(HTTP_IS_SPHT(*ptr))) {
				1045	/* replace HT,CR,LF with spaces */
				1046	for (; start + sov < ptr; sov++)
				1047	start[sov] = ' ';
				1048	goto http_msg_hdr_l1_sp;
				1049	}
				1050	/* we had a header consisting only in spaces ! */
				1051	eol = sov;
				1052	goto http_msg_complete_header;
				1053
				1054	case HTTP_MSG_HDR_VAL:
				1055	http_msg_hdr_val:
				1056	/* assumes sol points to the first char, and sov
				1057	* points to the first character of the value.
				1058	*/
				1059
				1060	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				1061	* and lower. In fact since most of the time is spent in the loop, we
				1062	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				1063	* loop, but we don't care since they're very rare in header values.
				1064	*/
				1065	#if defined(__x86_64__)
				1066	while (ptr <= end - sizeof(long)) {
				1067	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				1068	goto http_msg_hdr_val2;
				1069	ptr += sizeof(long);
				1070	}
				1071	#endif
				1072	#if defined(__x86_64__) \|\| \
				1073	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				1074	defined(__ARM_ARCH_7A__)
				1075	while (ptr <= end - sizeof(int)) {
				1076	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				1077	goto http_msg_hdr_val2;
				1078	ptr += sizeof(int);
				1079	}
				1080	#endif
				1081	if (ptr >= end) {
				1082	state = HTTP_MSG_HDR_VAL;
				1083	goto http_msg_ood;
				1084	}
				1085	http_msg_hdr_val2:
				1086	if (likely(!HTTP_IS_CRLF(*ptr)))
				1087	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				1088
				1089	eol = ptr - start;
				1090	/* Note: we could also copy eol into ->eoh so that we have the
				1091	* real header end in case it ends with lots of LWS, but is this
				1092	* really needed ?
				1093	*/
				1094	if (likely(*ptr == '\r'))
				1095	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				1096	goto http_msg_hdr_l2_lf;
				1097
				1098	case HTTP_MSG_HDR_L2_LF:
				1099	http_msg_hdr_l2_lf:
				1100	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				1101	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				1102
				1103	case HTTP_MSG_HDR_L2_LWS:
				1104	http_msg_hdr_l2_lws:
				1105	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				1106	/* LWS: replace HT,CR,LF with spaces */
				1107	for (; start + eol < ptr; eol++)
				1108	start[eol] = ' ';
				1109	goto http_msg_hdr_val;
				1110	}
				1111	http_msg_complete_header:
				1112	/*
				1113	* It was a new header, so the last one is finished. Assumes
				1114	* <sol> points to the first char of the name, <col> to the
				1115	* colon, <sov> points to the first character of the value and
				1116	* <eol> to the first CR or LF so we know how the line ends. We
				1117	* will trim spaces around the value. It's possible to do it by
				1118	* adjusting <eol> and <sov> which are no more used after this.
				1119	* We can add the header field to the list.
				1120	*/
				1121	while (sov < eol && HTTP_IS_LWS(start[sov]))
				1122	sov++;
				1123
				1124	while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
				1125	eol--;
				1126
				1127
				1128	n = ist2(start + sol, col - sol);
				1129	v = ist2(start + sov, eol - sov);
				1130
				1131	if (unlikely(hdr_count >= hdr_num)) {
				1132	state = HTTP_MSG_HDR_L2_LWS;
				1133	goto http_output_full;
				1134	}
				1135	http_set_hdr(&hdr[hdr_count++], n, v);
				1136
				1137	if (h1m) {
				1138	long long cl;
				1139
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	1140	if (h1m->status >= 100 && h1m->status < 200)
				1141	h1m->curr_len = h1m->body_len = 0;
				1142	else if (h1m->status == 304 \|\| h1m->status == 204) {
Willy Tarreau	8ea0f38	2017-10-30 19:31:59 +0100	[diff] [blame]	1143	/* no contents, claim c-len is present and set to zero */
				1144	h1m->flags \|= H1_MF_CLEN;
				1145	h1m->curr_len = h1m->body_len = 0;
				1146	}
				1147	else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	1148	h1m->flags &= ~H1_MF_CLEN;
				1149	h1m->flags \|= H1_MF_CHNK;
				1150	}
				1151	else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
				1152	h1m->flags \|= H1_MF_CLEN;
				1153	strl2llrc(v.ptr, v.len, &cl);
				1154	h1m->curr_len = h1m->body_len = cl;
				1155	}
				1156	}
				1157
				1158	sol = ptr - start;
				1159	if (likely(!HTTP_IS_CRLF(*ptr)))
				1160	goto http_msg_hdr_name;
				1161
				1162	if (likely(*ptr == '\r'))
				1163	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				1164	goto http_msg_last_lf;
				1165
				1166	case HTTP_MSG_LAST_LF:
				1167	http_msg_last_lf:
				1168	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				1169	ptr++;
				1170	/* <ptr> now points to the first byte of payload. If needed sol
				1171	* still points to the first of either CR or LF of the empty
				1172	* line ending the headers block.
				1173	*/
				1174	if (unlikely(hdr_count >= hdr_num)) {
				1175	state = HTTP_MSG_LAST_LF;
				1176	goto http_output_full;
				1177	}
				1178	http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
				1179	state = HTTP_MSG_BODY;
				1180	break;
				1181
				1182	default:
				1183	/* impossible states */
				1184	goto http_msg_invalid;
				1185	}
				1186
				1187	/* reaching here, we've parsed the whole message and the state is
				1188	* HTTP_MSG_BODY.
				1189	*/
				1190	return ptr - start + skip;
				1191
				1192	http_msg_ood:
				1193	/* out of data at <ptr> during state <state> */
				1194	return 0;
				1195
				1196	http_msg_invalid:
				1197	/* invalid message, error at <ptr> */
				1198	if (h1m) {
				1199	h1m->err_state = state;
				1200	h1m->err_pos = ptr - start + skip;
				1201	}
				1202	return -1;
				1203
				1204	http_output_full:
				1205	/* no more room to store the current header, error at <ptr> */
				1206	if (h1m) {
				1207	h1m->err_state = state;
				1208	h1m->err_pos = ptr - start + skip;
				1209	}
				1210	return -2;
				1211	}
				1212
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1213	/* This function performs a very minimal parsing of the trailers block present
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1214	* at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1215	* bytes to delete to skip the trailers. It may return 0 if it's missing some
				1216	* input data, or < 0 in case of parse error (in which case the caller may have
				1217	* to decide how to proceed, possibly eating everything).
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1218	*/
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1219	int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1220	{
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1221	const char *stop = b_peek(buf, ofs + max);
				1222	int count = ofs;
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1223
				1224	while (1) {
				1225	const char p1 = NULL, p2 = NULL;
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1226	const char *start = b_peek(buf, count);
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1227	const char *ptr = start;
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1228
				1229	/* scan current line and stop at LF or CRLF */
				1230	while (1) {
				1231	if (ptr == stop)
				1232	return 0;
				1233
				1234	if (*ptr == '\n') {
				1235	if (!p1)
				1236	p1 = ptr;
				1237	p2 = ptr;
				1238	break;
				1239	}
				1240
				1241	if (*ptr == '\r') {
				1242	if (p1)
				1243	return -1;
				1244	p1 = ptr;
				1245	}
				1246
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1247	ptr = b_next(buf, ptr);
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1248	}
				1249
				1250	/* after LF; point to beginning of next line */
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1251	p2 = b_next(buf, p2);
				1252	count += b_dist(buf, start, p2);
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1253
				1254	/* LF/CRLF at beginning of line => end of trailers at p2.
				1255	* Everything was scheduled for forwarding, there's nothing left
				1256	* from this message. */
				1257	if (p1 == start)
				1258	break;
				1259	/* OK, next line then */
				1260	}
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1261	return count - ofs;
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1262	}
				1263
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1264	/* This function skips trailers in the buffer associated with HTTP message
				1265	* <msg>. The first visited position is msg->next. If the end of the trailers is
				1266	* found, the function returns >0. So, the caller can automatically schedul it
				1267	* to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
				1268	* data are available, the function does not change anything except maybe
				1269	* msg->sol if it could parse some lines, and returns zero. If a parse error
				1270	* is encountered, the function returns < 0 and does not change anything except
				1271	* maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
				1272	* state before calling this function, which implies that all non-trailers data
				1273	* have already been scheduled for forwarding, and that msg->next exactly
				1274	* matches the length of trailers already parsed and not forwarded. It is also
				1275	* important to note that this function is designed to be able to parse wrapped
				1276	* headers at end of buffer.
				1277	*/
				1278	int http_forward_trailers(struct http_msg *msg)
				1279	{
Willy Tarreau	c9fa048	2018-07-10 17:43:27 +0200	[diff] [blame]	1280	const struct buffer *buf = &msg->chn->buf;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1281	const char *parse = ci_head(msg->chn);
				1282	const char *stop = b_tail(buf);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1283
				1284	/* we have msg->next which points to next line. Look for CRLF. But
				1285	* first, we reset msg->sol */
				1286	msg->sol = 0;
				1287	while (1) {
				1288	const char p1 = NULL, p2 = NULL;
Willy Tarreau	188e230	2018-06-15 11:11:53 +0200	[diff] [blame]	1289	const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1290	const char *ptr = start;
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1291
				1292	/* scan current line and stop at LF or CRLF */
				1293	while (1) {
				1294	if (ptr == stop)
				1295	return 0;
				1296
				1297	if (*ptr == '\n') {
				1298	if (!p1)
				1299	p1 = ptr;
				1300	p2 = ptr;
				1301	break;
				1302	}
				1303
				1304	if (*ptr == '\r') {
				1305	if (p1) {
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1306	msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1307	return -1;
				1308	}
				1309	p1 = ptr;
				1310	}
				1311
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1312	ptr = b_next(buf, ptr);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1313	}
				1314
				1315	/* after LF; point to beginning of next line */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1316	p2 = b_next(buf, p2);
				1317	msg->sol += b_dist(buf, start, p2);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1318
				1319	/* LF/CRLF at beginning of line => end of trailers at p2.
				1320	* Everything was scheduled for forwarding, there's nothing left
				1321	* from this message. */
				1322	if (p1 == start)
				1323	return 1;
				1324
				1325	/* OK, next line then */
				1326	}
				1327	}