Blame - src/h1.c - haproxy

blob: 325f5b6c4358cdd164cf88c51b2a16c5549513f7 [file] [log] [blame]

Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	1	/*
				2	* HTTP/1 protocol analyzer
				3	*
				4	* Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	13	#include <ctype.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	14	#include <common/config.h>
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	15	#include <common/http-hdr.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	16
Willy Tarreau	188e230	2018-06-15 11:11:53 +0200	[diff] [blame]	17	#include <proto/channel.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	18	#include <proto/h1.h>
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	19	#include <proto/hdr_idx.h>
Willy Tarreau	0da5b3b	2017-09-21 09:30:46 +0200	[diff] [blame]	20
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	21	/*
				22	* This function parses a status line between <ptr> and <end>, starting with
				23	* parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
				24	* HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
				25	* will give undefined results.
				26	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				27	* and that msg->sol points to the beginning of the response.
				28	* If a complete line is found (which implies that at least one CR or LF is
				29	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				30	* returned indicating an incomplete line (which does not mean that parts have
				31	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				32	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				33	* upon next call.
				34	*
				35	* This function was intentionally designed to be called from
				36	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				37	* within its state machine and use the same macros, hence the need for same
				38	* labels and variable names. Note that msg->sol is left unchanged.
				39	*/
				40	const char http_parse_stsline(struct http_msg msg,
				41	enum h1_state state, const char ptr, const char end,
				42	unsigned int ret_ptr, enum h1_state ret_state)
				43	{
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	44	const char *msg_start = ci_head(msg->chn);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	45
				46	switch (state) {
				47	case HTTP_MSG_RPVER:
				48	http_msg_rpver:
				49	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				50	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				51
				52	if (likely(HTTP_IS_SPHT(*ptr))) {
				53	msg->sl.st.v_l = ptr - msg_start;
				54	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				55	}
				56	msg->err_state = HTTP_MSG_RPVER;
				57	state = HTTP_MSG_ERROR;
				58	break;
				59
				60	case HTTP_MSG_RPVER_SP:
				61	http_msg_rpver_sp:
				62	if (likely(!HTTP_IS_LWS(*ptr))) {
				63	msg->sl.st.c = ptr - msg_start;
				64	goto http_msg_rpcode;
				65	}
				66	if (likely(HTTP_IS_SPHT(*ptr)))
				67	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				68	/* so it's a CR/LF, this is invalid */
				69	msg->err_state = HTTP_MSG_RPVER_SP;
				70	state = HTTP_MSG_ERROR;
				71	break;
				72
				73	case HTTP_MSG_RPCODE:
				74	http_msg_rpcode:
				75	if (likely(!HTTP_IS_LWS(*ptr)))
				76	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				77
				78	if (likely(HTTP_IS_SPHT(*ptr))) {
				79	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				80	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				81	}
				82
				83	/* so it's a CR/LF, so there is no reason phrase */
				84	msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
				85	http_msg_rsp_reason:
				86	/* FIXME: should we support HTTP responses without any reason phrase ? */
				87	msg->sl.st.r = ptr - msg_start;
				88	msg->sl.st.r_l = 0;
				89	goto http_msg_rpline_eol;
				90
				91	case HTTP_MSG_RPCODE_SP:
				92	http_msg_rpcode_sp:
				93	if (likely(!HTTP_IS_LWS(*ptr))) {
				94	msg->sl.st.r = ptr - msg_start;
				95	goto http_msg_rpreason;
				96	}
				97	if (likely(HTTP_IS_SPHT(*ptr)))
				98	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				99	/* so it's a CR/LF, so there is no reason phrase */
				100	goto http_msg_rsp_reason;
				101
				102	case HTTP_MSG_RPREASON:
				103	http_msg_rpreason:
				104	if (likely(!HTTP_IS_CRLF(*ptr)))
				105	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				106	msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
				107	http_msg_rpline_eol:
				108	/* We have seen the end of line. Note that we do not
				109	* necessarily have the \n yet, but at least we know that we
				110	* have EITHER \r OR \n, otherwise the response would not be
				111	* complete. We can then record the response length and return
				112	* to the caller which will be able to register it.
				113	*/
				114	msg->sl.st.l = ptr - msg_start - msg->sol;
				115	return ptr;
				116
				117	default:
				118	#ifdef DEBUG_FULL
				119	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				120	exit(1);
				121	#endif
				122	;
				123	}
				124
				125	http_msg_ood:
				126	/* out of valid data */
				127	if (ret_state)
				128	*ret_state = state;
				129	if (ret_ptr)
				130	*ret_ptr = ptr - msg_start;
				131	return NULL;
				132	}
				133
				134	/*
				135	* This function parses a request line between <ptr> and <end>, starting with
				136	* parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
				137	* HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
				138	* will give undefined results.
				139	* Note that it is upon the caller's responsibility to ensure that ptr < end,
				140	* and that msg->sol points to the beginning of the request.
				141	* If a complete line is found (which implies that at least one CR or LF is
				142	* found before <end>, the updated <ptr> is returned, otherwise NULL is
				143	* returned indicating an incomplete line (which does not mean that parts have
				144	* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
				145	* non-NULL, they are fed with the new <ptr> and <state> values to be passed
				146	* upon next call.
				147	*
				148	* This function was intentionally designed to be called from
				149	* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
				150	* within its state machine and use the same macros, hence the need for same
				151	* labels and variable names. Note that msg->sol is left unchanged.
				152	*/
				153	const char http_parse_reqline(struct http_msg msg,
				154	enum h1_state state, const char ptr, const char end,
				155	unsigned int ret_ptr, enum h1_state ret_state)
				156	{
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	157	const char *msg_start = ci_head(msg->chn);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	158
				159	switch (state) {
				160	case HTTP_MSG_RQMETH:
				161	http_msg_rqmeth:
				162	if (likely(HTTP_IS_TOKEN(*ptr)))
				163	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
				164
				165	if (likely(HTTP_IS_SPHT(*ptr))) {
				166	msg->sl.rq.m_l = ptr - msg_start;
				167	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				168	}
				169
				170	if (likely(HTTP_IS_CRLF(*ptr))) {
				171	/* HTTP 0.9 request */
				172	msg->sl.rq.m_l = ptr - msg_start;
				173	http_msg_req09_uri:
				174	msg->sl.rq.u = ptr - msg_start;
				175	http_msg_req09_uri_e:
				176	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				177	http_msg_req09_ver:
				178	msg->sl.rq.v = ptr - msg_start;
				179	msg->sl.rq.v_l = 0;
				180	goto http_msg_rqline_eol;
				181	}
				182	msg->err_state = HTTP_MSG_RQMETH;
				183	state = HTTP_MSG_ERROR;
				184	break;
				185
				186	case HTTP_MSG_RQMETH_SP:
				187	http_msg_rqmeth_sp:
				188	if (likely(!HTTP_IS_LWS(*ptr))) {
				189	msg->sl.rq.u = ptr - msg_start;
				190	goto http_msg_rquri;
				191	}
				192	if (likely(HTTP_IS_SPHT(*ptr)))
				193	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
				194	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				195	goto http_msg_req09_uri;
				196
				197	case HTTP_MSG_RQURI:
				198	http_msg_rquri:
				199	#if defined(__x86_64__) \|\| \
				200	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				201	defined(__ARM_ARCH_7A__)
				202	/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
				203	while (ptr <= end - sizeof(int)) {
				204	int x = (int )ptr - 0x21212121;
				205	if (x & 0x80808080)
				206	break;
				207
				208	x -= 0x5e5e5e5e;
				209	if (!(x & 0x80808080))
				210	break;
				211
				212	ptr += sizeof(int);
				213	}
				214	#endif
				215	if (ptr >= end) {
				216	state = HTTP_MSG_RQURI;
				217	goto http_msg_ood;
				218	}
				219	http_msg_rquri2:
				220	if (likely((unsigned char)(ptr - 33) <= 93)) / 33 to 126 included */
				221	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
				222
				223	if (likely(HTTP_IS_SPHT(*ptr))) {
				224	msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
				225	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				226	}
				227
				228	if (likely((unsigned char)*ptr >= 128)) {
				229	/* non-ASCII chars are forbidden unless option
				230	* accept-invalid-http-request is enabled in the frontend.
				231	* In any case, we capture the faulty char.
				232	*/
				233	if (msg->err_pos < -1)
				234	goto invalid_char;
				235	if (msg->err_pos == -1)
				236	msg->err_pos = ptr - msg_start;
				237	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
				238	}
				239
				240	if (likely(HTTP_IS_CRLF(*ptr))) {
				241	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				242	goto http_msg_req09_uri_e;
				243	}
				244
				245	/* OK forbidden chars, 0..31 or 127 */
				246	invalid_char:
				247	msg->err_pos = ptr - msg_start;
				248	msg->err_state = HTTP_MSG_RQURI;
				249	state = HTTP_MSG_ERROR;
				250	break;
				251
				252	case HTTP_MSG_RQURI_SP:
				253	http_msg_rquri_sp:
				254	if (likely(!HTTP_IS_LWS(*ptr))) {
				255	msg->sl.rq.v = ptr - msg_start;
				256	goto http_msg_rqver;
				257	}
				258	if (likely(HTTP_IS_SPHT(*ptr)))
				259	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
				260	/* so it's a CR/LF, meaning an HTTP 0.9 request */
				261	goto http_msg_req09_ver;
				262
				263	case HTTP_MSG_RQVER:
				264	http_msg_rqver:
				265	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				266	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
				267
				268	if (likely(HTTP_IS_CRLF(*ptr))) {
				269	msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
				270	http_msg_rqline_eol:
				271	/* We have seen the end of line. Note that we do not
				272	* necessarily have the \n yet, but at least we know that we
				273	* have EITHER \r OR \n, otherwise the request would not be
				274	* complete. We can then record the request length and return
				275	* to the caller which will be able to register it.
				276	*/
				277	msg->sl.rq.l = ptr - msg_start - msg->sol;
				278	return ptr;
				279	}
				280
				281	/* neither an HTTP_VER token nor a CRLF */
				282	msg->err_state = HTTP_MSG_RQVER;
				283	state = HTTP_MSG_ERROR;
				284	break;
				285
				286	default:
				287	#ifdef DEBUG_FULL
				288	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				289	exit(1);
				290	#endif
				291	;
				292	}
				293
				294	http_msg_ood:
				295	/* out of valid data */
				296	if (ret_state)
				297	*ret_state = state;
				298	if (ret_ptr)
				299	*ret_ptr = ptr - msg_start;
				300	return NULL;
				301	}
				302
				303	/*
				304	* This function parses an HTTP message, either a request or a response,
				305	* depending on the initial msg->msg_state. The caller is responsible for
				306	* ensuring that the message does not wrap. The function can be preempted
				307	* everywhere when data are missing and recalled at the exact same location
				308	* with no information loss. The message may even be realigned between two
				309	* calls. The header index is re-initialized when switching from
				310	* MSG_R[PQ]BEFORE to MSG_RPVER\|MSG_RQMETH. It modifies msg->sol among other
				311	* fields. Note that msg->sol will be initialized after completing the first
				312	* state, so that none of the msg pointers has to be initialized prior to the
				313	* first call.
				314	*/
				315	void http_msg_analyzer(struct http_msg msg, struct hdr_idx idx)
				316	{
				317	enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	318	register const char ptr, end; /* request pointers, to avoid dereferences */
Willy Tarreau	950a8a6	2018-09-06 10:48:15 +0200	[diff] [blame]	319	struct buffer *buf = &msg->chn->buf;
				320	char *input = b_head(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	321
				322	state = msg->msg_state;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	323	ptr = input + msg->next;
				324	end = b_stop(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	325
				326	if (unlikely(ptr >= end))
				327	goto http_msg_ood;
				328
				329	switch (state) {
				330	/*
				331	* First, states that are specific to the response only.
				332	* We check them first so that request and headers are
				333	* closer to each other (accessed more often).
				334	*/
				335	case HTTP_MSG_RPBEFORE:
				336	http_msg_rpbefore:
				337	if (likely(HTTP_IS_TOKEN(*ptr))) {
				338	/* we have a start of message, but we have to check
				339	* first if we need to remove some CRLF. We can only
				340	* do this when o=0.
				341	*/
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	342	if (unlikely(ptr != input)) {
				343	if (co_data(msg->chn))
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	344	goto http_msg_ood;
				345	/* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau	72a100b	2018-07-10 09:59:31 +0200	[diff] [blame]	346	b_del(buf, ptr - input);
Willy Tarreau	950a8a6	2018-09-06 10:48:15 +0200	[diff] [blame]	347	input = b_head(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	348	}
				349	msg->sol = 0;
				350	msg->sl.st.l = 0; /* used in debug mode */
				351	hdr_idx_init(idx);
				352	state = HTTP_MSG_RPVER;
				353	goto http_msg_rpver;
				354	}
				355
				356	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				357	state = HTTP_MSG_RPBEFORE;
				358	goto http_msg_invalid;
				359	}
				360
				361	if (unlikely(*ptr == '\n'))
				362	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				363	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				364	/* stop here */
				365
				366	case HTTP_MSG_RPBEFORE_CR:
				367	http_msg_rpbefore_cr:
				368	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				369	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				370	/* stop here */
				371
				372	case HTTP_MSG_RPVER:
				373	http_msg_rpver:
				374	case HTTP_MSG_RPVER_SP:
				375	case HTTP_MSG_RPCODE:
				376	case HTTP_MSG_RPCODE_SP:
				377	case HTTP_MSG_RPREASON:
				378	ptr = (char *)http_parse_stsline(msg,
				379	state, ptr, end,
				380	&msg->next, &msg->msg_state);
				381	if (unlikely(!ptr))
				382	return;
				383
				384	/* we have a full response and we know that we have either a CR
				385	* or an LF at <ptr>.
				386	*/
				387	hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
				388
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	389	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	390	if (likely(*ptr == '\r'))
				391	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				392	goto http_msg_rpline_end;
				393
				394	case HTTP_MSG_RPLINE_END:
				395	http_msg_rpline_end:
				396	/* msg->sol must point to the first of CR or LF. */
				397	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				398	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				399	/* stop here */
				400
				401	/*
				402	* Second, states that are specific to the request only
				403	*/
				404	case HTTP_MSG_RQBEFORE:
				405	http_msg_rqbefore:
				406	if (likely(HTTP_IS_TOKEN(*ptr))) {
				407	/* we have a start of message, but we have to check
				408	* first if we need to remove some CRLF. We can only
				409	* do this when o=0.
				410	*/
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	411	if (likely(ptr != input)) {
				412	if (co_data(msg->chn))
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	413	goto http_msg_ood;
				414	/* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau	72a100b	2018-07-10 09:59:31 +0200	[diff] [blame]	415	b_del(buf, ptr - input);
Willy Tarreau	950a8a6	2018-09-06 10:48:15 +0200	[diff] [blame]	416	input = b_head(buf);
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	417	}
				418	msg->sol = 0;
				419	msg->sl.rq.l = 0; /* used in debug mode */
				420	state = HTTP_MSG_RQMETH;
				421	goto http_msg_rqmeth;
				422	}
				423
				424	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				425	state = HTTP_MSG_RQBEFORE;
				426	goto http_msg_invalid;
				427	}
				428
				429	if (unlikely(*ptr == '\n'))
				430	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				431	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
				432	/* stop here */
				433
				434	case HTTP_MSG_RQBEFORE_CR:
				435	http_msg_rqbefore_cr:
				436	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
				437	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
				438	/* stop here */
				439
				440	case HTTP_MSG_RQMETH:
				441	http_msg_rqmeth:
				442	case HTTP_MSG_RQMETH_SP:
				443	case HTTP_MSG_RQURI:
				444	case HTTP_MSG_RQURI_SP:
				445	case HTTP_MSG_RQVER:
				446	ptr = (char *)http_parse_reqline(msg,
				447	state, ptr, end,
				448	&msg->next, &msg->msg_state);
				449	if (unlikely(!ptr))
				450	return;
				451
				452	/* we have a full request and we know that we have either a CR
				453	* or an LF at <ptr>.
				454	*/
				455	hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
				456
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	457	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	458	if (likely(*ptr == '\r'))
				459	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
				460	goto http_msg_rqline_end;
				461
				462	case HTTP_MSG_RQLINE_END:
				463	http_msg_rqline_end:
				464	/* check for HTTP/0.9 request : no version information available.
				465	* msg->sol must point to the first of CR or LF.
				466	*/
				467	if (unlikely(msg->sl.rq.v_l == 0))
				468	goto http_msg_last_lf;
				469
				470	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
				471	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				472	/* stop here */
				473
				474	/*
				475	* Common states below
				476	*/
				477	case HTTP_MSG_HDR_FIRST:
				478	http_msg_hdr_first:
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	479	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	480	if (likely(!HTTP_IS_CRLF(*ptr))) {
				481	goto http_msg_hdr_name;
				482	}
				483
				484	if (likely(*ptr == '\r'))
				485	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				486	goto http_msg_last_lf;
				487
				488	case HTTP_MSG_HDR_NAME:
				489	http_msg_hdr_name:
				490	/* assumes msg->sol points to the first char */
				491	if (likely(HTTP_IS_TOKEN(*ptr)))
				492	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				493
				494	if (likely(*ptr == ':'))
				495	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				496
				497	if (likely(msg->err_pos < -1) \|\| *ptr == '\n') {
				498	state = HTTP_MSG_HDR_NAME;
				499	goto http_msg_invalid;
				500	}
				501
				502	if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	503	msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	504
				505	/* and we still accept this non-token character */
				506	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				507
				508	case HTTP_MSG_HDR_L1_SP:
				509	http_msg_hdr_l1_sp:
				510	/* assumes msg->sol points to the first char */
				511	if (likely(HTTP_IS_SPHT(*ptr)))
				512	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				513
				514	/* header value can be basically anything except CR/LF */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	515	msg->sov = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	516
				517	if (likely(!HTTP_IS_CRLF(*ptr))) {
				518	goto http_msg_hdr_val;
				519	}
				520
				521	if (likely(*ptr == '\r'))
				522	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				523	goto http_msg_hdr_l1_lf;
				524
				525	case HTTP_MSG_HDR_L1_LF:
				526	http_msg_hdr_l1_lf:
				527	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				528	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				529
				530	case HTTP_MSG_HDR_L1_LWS:
				531	http_msg_hdr_l1_lws:
				532	if (likely(HTTP_IS_SPHT(*ptr))) {
				533	/* replace HT,CR,LF with spaces */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	534	for (; input + msg->sov < ptr; msg->sov++)
				535	input[msg->sov] = ' ';
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	536	goto http_msg_hdr_l1_sp;
				537	}
				538	/* we had a header consisting only in spaces ! */
				539	msg->eol = msg->sov;
				540	goto http_msg_complete_header;
				541
				542	case HTTP_MSG_HDR_VAL:
				543	http_msg_hdr_val:
				544	/* assumes msg->sol points to the first char, and msg->sov
				545	* points to the first character of the value.
				546	*/
				547
				548	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				549	* and lower. In fact since most of the time is spent in the loop, we
				550	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				551	* loop, but we don't care since they're very rare in header values.
				552	*/
				553	#if defined(__x86_64__)
				554	while (ptr <= end - sizeof(long)) {
				555	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				556	goto http_msg_hdr_val2;
				557	ptr += sizeof(long);
				558	}
				559	#endif
				560	#if defined(__x86_64__) \|\| \
				561	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				562	defined(__ARM_ARCH_7A__)
				563	while (ptr <= end - sizeof(int)) {
				564	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				565	goto http_msg_hdr_val2;
				566	ptr += sizeof(int);
				567	}
				568	#endif
				569	if (ptr >= end) {
				570	state = HTTP_MSG_HDR_VAL;
				571	goto http_msg_ood;
				572	}
				573	http_msg_hdr_val2:
				574	if (likely(!HTTP_IS_CRLF(*ptr)))
				575	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				576
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	577	msg->eol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	578	/* Note: we could also copy eol into ->eoh so that we have the
				579	* real header end in case it ends with lots of LWS, but is this
				580	* really needed ?
				581	*/
				582	if (likely(*ptr == '\r'))
				583	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				584	goto http_msg_hdr_l2_lf;
				585
				586	case HTTP_MSG_HDR_L2_LF:
				587	http_msg_hdr_l2_lf:
				588	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				589	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				590
				591	case HTTP_MSG_HDR_L2_LWS:
				592	http_msg_hdr_l2_lws:
				593	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				594	/* LWS: replace HT,CR,LF with spaces */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	595	for (; input + msg->eol < ptr; msg->eol++)
				596	input[msg->eol] = ' ';
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	597	goto http_msg_hdr_val;
				598	}
				599	http_msg_complete_header:
				600	/*
				601	* It was a new header, so the last one is finished.
				602	* Assumes msg->sol points to the first char, msg->sov points
				603	* to the first character of the value and msg->eol to the
				604	* first CR or LF so we know how the line ends. We insert last
				605	* header into the index.
				606	*/
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	607	if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	608	idx, idx->tail) < 0)) {
				609	state = HTTP_MSG_HDR_L2_LWS;
				610	goto http_msg_invalid;
				611	}
				612
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	613	msg->sol = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	614	if (likely(!HTTP_IS_CRLF(*ptr))) {
				615	goto http_msg_hdr_name;
				616	}
				617
				618	if (likely(*ptr == '\r'))
				619	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				620	goto http_msg_last_lf;
				621
				622	case HTTP_MSG_LAST_LF:
				623	http_msg_last_lf:
				624	/* Assumes msg->sol points to the first of either CR or LF.
				625	* Sets ->sov and ->next to the total header length, ->eoh to
				626	* the last CRLF, and ->eol to the last CRLF length (1 or 2).
				627	*/
				628	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				629	ptr++;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	630	msg->sov = msg->next = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	631	msg->eoh = msg->sol;
				632	msg->sol = 0;
				633	msg->eol = msg->sov - msg->eoh;
				634	msg->msg_state = HTTP_MSG_BODY;
				635	return;
				636
				637	case HTTP_MSG_ERROR:
				638	/* this may only happen if we call http_msg_analyser() twice with an error */
				639	break;
				640
				641	default:
				642	#ifdef DEBUG_FULL
				643	fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
				644	exit(1);
				645	#endif
				646	;
				647	}
				648	http_msg_ood:
				649	/* out of data */
				650	msg->msg_state = state;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	651	msg->next = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	652	return;
				653
				654	http_msg_invalid:
				655	/* invalid message */
				656	msg->err_state = state;
				657	msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	658	msg->next = ptr - input;
Willy Tarreau	8740c8b	2017-09-21 10:22:25 +0200	[diff] [blame]	659	return;
				660	}
				661
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	662	/* This function parses a contiguous HTTP/1 headers block starting at <start>
				663	* and ending before <stop>, at once, and converts it a list of (name,value)
				664	* pairs representing header fields into the array <hdr> of size <hdr_num>,
				665	* whose last entry will have an empty name and an empty value. If <hdr_num> is
				666	* too small to represent the whole message, an error is returned. If <h1m> is
				667	* not NULL, some protocol elements such as content-length and transfer-encoding
				668	* will be parsed and stored there as well.
				669	*
				670	* For now it's limited to the response. If the header block is incomplete,
				671	* 0 is returned, waiting to be called again with more data to try it again.
				672	*
				673	* The code derived from the main HTTP/1 parser above but was simplified and
				674	* optimized to process responses produced or forwarded by haproxy. The caller
				675	* is responsible for ensuring that the message doesn't wrap, and should ensure
				676	* it is complete to avoid having to retry the operation after a failed
				677	* attempt. The message is not supposed to be invalid, which is why a few
				678	* properties such as the character set used in the header field names are not
				679	* checked. In case of an unparsable response message, a negative value will be
				680	* returned with h1m->err_pos and h1m->err_state matching the location and
				681	* state where the error was met. Leading blank likes are tolerated but not
				682	* recommended.
				683	*
				684	* This function returns :
				685	* -1 in case of error. In this case, h1m->err_state is filled (if h1m is
				686	* set) with the state the error occurred in and h2-m>err_pos with the
				687	* the position relative to <start>
				688	* -2 if the output is full (hdr_num reached). err_state and err_pos also
				689	* indicate where it failed.
				690	* 0 in case of missing data.
				691	* > 0 on success, it then corresponds to the number of bytes read since
				692	* <start> so that the caller can go on with the payload.
				693	*/
				694	int h1_headers_to_hdr_list(char start, const char stop,
				695	struct http_hdr *hdr, unsigned int hdr_num,
				696	struct h1m *h1m)
				697	{
				698	enum h1_state state = HTTP_MSG_RPBEFORE;
				699	register char *ptr = start;
				700	register const char *end = stop;
				701	unsigned int hdr_count = 0;
				702	unsigned int code = 0; /* status code, ASCII form */
				703	unsigned int st_c; /* beginning of status code, relative to msg_start */
				704	unsigned int st_c_l; /* length of status code */
				705	unsigned int sol = 0; /* start of line */
				706	unsigned int col = 0; /* position of the colon */
				707	unsigned int eol = 0; /* end of line */
				708	unsigned int sov = 0; /* start of value */
				709	unsigned int skip = 0; /* number of bytes skipped at the beginning */
				710	struct ist n, v; /* header name and value during parsing */
				711
				712	if (unlikely(ptr >= end))
				713	goto http_msg_ood;
				714
				715	switch (state) {
				716	case HTTP_MSG_RPBEFORE:
				717	http_msg_rpbefore:
				718	if (likely(HTTP_IS_TOKEN(*ptr))) {
				719	/* we have a start of message, we may have skipped some
				720	* heading CRLF. Skip them now.
				721	*/
				722	skip += ptr - start;
				723	start = ptr;
				724
				725	sol = 0;
				726	hdr_count = 0;
				727	state = HTTP_MSG_RPVER;
				728	goto http_msg_rpver;
				729	}
				730
				731	if (unlikely(!HTTP_IS_CRLF(*ptr))) {
				732	state = HTTP_MSG_RPBEFORE;
				733	goto http_msg_invalid;
				734	}
				735
				736	if (unlikely(*ptr == '\n'))
				737	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				738	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
				739	/* stop here */
				740
				741	case HTTP_MSG_RPBEFORE_CR:
				742	http_msg_rpbefore_cr:
				743	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
				744	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
				745	/* stop here */
				746
				747	case HTTP_MSG_RPVER:
				748	http_msg_rpver:
				749	if (likely(HTTP_IS_VER_TOKEN(*ptr)))
				750	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
				751
				752	if (likely(HTTP_IS_SPHT(*ptr))) {
				753	/* version length = ptr - start */
				754	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				755	}
				756	state = HTTP_MSG_RPVER;
				757	goto http_msg_invalid;
				758
				759	case HTTP_MSG_RPVER_SP:
				760	http_msg_rpver_sp:
				761	if (likely(!HTTP_IS_LWS(*ptr))) {
				762	code = 0;
				763	st_c = ptr - start;
				764	goto http_msg_rpcode;
				765	}
				766	if (likely(HTTP_IS_SPHT(*ptr)))
				767	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
				768	/* so it's a CR/LF, this is invalid */
				769	state = HTTP_MSG_RPVER_SP;
				770	goto http_msg_invalid;
				771
				772	case HTTP_MSG_RPCODE:
				773	http_msg_rpcode:
Willy Tarreau	1b4cf9b	2017-11-09 11:15:45 +0100	[diff] [blame]	774	if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	775	code = code * 10 + *ptr - '0';
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	776	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
				777	}
				778
Willy Tarreau	1b4cf9b	2017-11-09 11:15:45 +0100	[diff] [blame]	779	if (unlikely(!HTTP_IS_LWS(*ptr))) {
				780	state = HTTP_MSG_RPCODE;
				781	goto http_msg_invalid;
				782	}
				783
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	784	if (likely(HTTP_IS_SPHT(*ptr))) {
				785	st_c_l = ptr - start - st_c;
				786	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				787	}
				788
				789	/* so it's a CR/LF, so there is no reason phrase */
				790	st_c_l = ptr - start - st_c;
				791
				792	http_msg_rsp_reason:
				793	/* reason = ptr - start; */
				794	/* reason length = 0 */
				795	goto http_msg_rpline_eol;
				796
				797	case HTTP_MSG_RPCODE_SP:
				798	http_msg_rpcode_sp:
				799	if (likely(!HTTP_IS_LWS(*ptr))) {
				800	/* reason = ptr - start */
				801	goto http_msg_rpreason;
				802	}
				803	if (likely(HTTP_IS_SPHT(*ptr)))
				804	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
				805	/* so it's a CR/LF, so there is no reason phrase */
				806	goto http_msg_rsp_reason;
				807
				808	case HTTP_MSG_RPREASON:
				809	http_msg_rpreason:
				810	if (likely(!HTTP_IS_CRLF(*ptr)))
				811	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
				812	/* reason length = ptr - start - reason */
				813	http_msg_rpline_eol:
				814	/* We have seen the end of line. Note that we do not
				815	* necessarily have the \n yet, but at least we know that we
				816	* have EITHER \r OR \n, otherwise the response would not be
				817	* complete. We can then record the response length and return
				818	* to the caller which will be able to register it.
				819	*/
				820
				821	if (unlikely(hdr_count >= hdr_num)) {
				822	state = HTTP_MSG_RPREASON;
				823	goto http_output_full;
				824	}
				825	http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	826	if (h1m)
				827	h1m->status = code;
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	828
				829	sol = ptr - start;
				830	if (likely(*ptr == '\r'))
				831	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
				832	goto http_msg_rpline_end;
				833
				834	case HTTP_MSG_RPLINE_END:
				835	http_msg_rpline_end:
				836	/* sol must point to the first of CR or LF. */
				837	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
				838	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
				839	/* stop here */
				840
				841	case HTTP_MSG_HDR_FIRST:
				842	http_msg_hdr_first:
				843	sol = ptr - start;
				844	if (likely(!HTTP_IS_CRLF(*ptr))) {
				845	goto http_msg_hdr_name;
				846	}
				847
				848	if (likely(*ptr == '\r'))
				849	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				850	goto http_msg_last_lf;
				851
				852	case HTTP_MSG_HDR_NAME:
				853	http_msg_hdr_name:
				854	/* assumes sol points to the first char */
				855	if (likely(HTTP_IS_TOKEN(*ptr))) {
				856	/* turn it to lower case if needed */
				857	if (isupper((unsigned char)*ptr))
				858	ptr = tolower(ptr);
				859	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				860	}
				861
				862	if (likely(*ptr == ':')) {
				863	col = ptr - start;
				864	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				865	}
				866
				867	if (HTTP_IS_LWS(*ptr)) {
				868	state = HTTP_MSG_HDR_NAME;
				869	goto http_msg_invalid;
				870	}
				871
				872	/* now we have a non-token character in the header field name,
				873	* it's up to the H1 layer to have decided whether or not it
				874	* was acceptable. If we find it here, it was considered
				875	* acceptable due to configuration rules so we obey.
				876	*/
				877	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
				878
				879	case HTTP_MSG_HDR_L1_SP:
				880	http_msg_hdr_l1_sp:
				881	/* assumes sol points to the first char */
				882	if (likely(HTTP_IS_SPHT(*ptr)))
				883	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
				884
				885	/* header value can be basically anything except CR/LF */
				886	sov = ptr - start;
				887
				888	if (likely(!HTTP_IS_CRLF(*ptr))) {
				889	goto http_msg_hdr_val;
				890	}
				891
				892	if (likely(*ptr == '\r'))
				893	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
				894	goto http_msg_hdr_l1_lf;
				895
				896	case HTTP_MSG_HDR_L1_LF:
				897	http_msg_hdr_l1_lf:
				898	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
				899	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
				900
				901	case HTTP_MSG_HDR_L1_LWS:
				902	http_msg_hdr_l1_lws:
				903	if (likely(HTTP_IS_SPHT(*ptr))) {
				904	/* replace HT,CR,LF with spaces */
				905	for (; start + sov < ptr; sov++)
				906	start[sov] = ' ';
				907	goto http_msg_hdr_l1_sp;
				908	}
				909	/* we had a header consisting only in spaces ! */
				910	eol = sov;
				911	goto http_msg_complete_header;
				912
				913	case HTTP_MSG_HDR_VAL:
				914	http_msg_hdr_val:
				915	/* assumes sol points to the first char, and sov
				916	* points to the first character of the value.
				917	*/
				918
				919	/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
				920	* and lower. In fact since most of the time is spent in the loop, we
				921	* also remove the sign bit test so that bytes 0x8e..0x0d break the
				922	* loop, but we don't care since they're very rare in header values.
				923	*/
				924	#if defined(__x86_64__)
				925	while (ptr <= end - sizeof(long)) {
				926	if (((long )ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
				927	goto http_msg_hdr_val2;
				928	ptr += sizeof(long);
				929	}
				930	#endif
				931	#if defined(__x86_64__) \|\| \
				932	defined(__i386__) \|\| defined(__i486__) \|\| defined(__i586__) \|\| defined(__i686__) \|\| \
				933	defined(__ARM_ARCH_7A__)
				934	while (ptr <= end - sizeof(int)) {
				935	if (((int)ptr - 0x0e0e0e0e) & 0x80808080)
				936	goto http_msg_hdr_val2;
				937	ptr += sizeof(int);
				938	}
				939	#endif
				940	if (ptr >= end) {
				941	state = HTTP_MSG_HDR_VAL;
				942	goto http_msg_ood;
				943	}
				944	http_msg_hdr_val2:
				945	if (likely(!HTTP_IS_CRLF(*ptr)))
				946	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
				947
				948	eol = ptr - start;
				949	/* Note: we could also copy eol into ->eoh so that we have the
				950	* real header end in case it ends with lots of LWS, but is this
				951	* really needed ?
				952	*/
				953	if (likely(*ptr == '\r'))
				954	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
				955	goto http_msg_hdr_l2_lf;
				956
				957	case HTTP_MSG_HDR_L2_LF:
				958	http_msg_hdr_l2_lf:
				959	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
				960	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
				961
				962	case HTTP_MSG_HDR_L2_LWS:
				963	http_msg_hdr_l2_lws:
				964	if (unlikely(HTTP_IS_SPHT(*ptr))) {
				965	/* LWS: replace HT,CR,LF with spaces */
				966	for (; start + eol < ptr; eol++)
				967	start[eol] = ' ';
				968	goto http_msg_hdr_val;
				969	}
				970	http_msg_complete_header:
				971	/*
				972	* It was a new header, so the last one is finished. Assumes
				973	* <sol> points to the first char of the name, <col> to the
				974	* colon, <sov> points to the first character of the value and
				975	* <eol> to the first CR or LF so we know how the line ends. We
				976	* will trim spaces around the value. It's possible to do it by
				977	* adjusting <eol> and <sov> which are no more used after this.
				978	* We can add the header field to the list.
				979	*/
				980	while (sov < eol && HTTP_IS_LWS(start[sov]))
				981	sov++;
				982
				983	while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
				984	eol--;
				985
				986
				987	n = ist2(start + sol, col - sol);
				988	v = ist2(start + sov, eol - sov);
				989
				990	if (unlikely(hdr_count >= hdr_num)) {
				991	state = HTTP_MSG_HDR_L2_LWS;
				992	goto http_output_full;
				993	}
				994	http_set_hdr(&hdr[hdr_count++], n, v);
				995
				996	if (h1m) {
				997	long long cl;
				998
Willy Tarreau	d22e83a	2017-10-31 08:02:24 +0100	[diff] [blame]	999	if (h1m->status >= 100 && h1m->status < 200)
				1000	h1m->curr_len = h1m->body_len = 0;
				1001	else if (h1m->status == 304 \|\| h1m->status == 204) {
Willy Tarreau	8ea0f38	2017-10-30 19:31:59 +0100	[diff] [blame]	1002	/* no contents, claim c-len is present and set to zero */
				1003	h1m->flags \|= H1_MF_CLEN;
				1004	h1m->curr_len = h1m->body_len = 0;
				1005	}
				1006	else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau	794f9af	2017-07-26 09:07:47 +0200	[diff] [blame]	1007	h1m->flags &= ~H1_MF_CLEN;
				1008	h1m->flags \|= H1_MF_CHNK;
				1009	}
				1010	else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
				1011	h1m->flags \|= H1_MF_CLEN;
				1012	strl2llrc(v.ptr, v.len, &cl);
				1013	h1m->curr_len = h1m->body_len = cl;
				1014	}
				1015	}
				1016
				1017	sol = ptr - start;
				1018	if (likely(!HTTP_IS_CRLF(*ptr)))
				1019	goto http_msg_hdr_name;
				1020
				1021	if (likely(*ptr == '\r'))
				1022	EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
				1023	goto http_msg_last_lf;
				1024
				1025	case HTTP_MSG_LAST_LF:
				1026	http_msg_last_lf:
				1027	EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
				1028	ptr++;
				1029	/* <ptr> now points to the first byte of payload. If needed sol
				1030	* still points to the first of either CR or LF of the empty
				1031	* line ending the headers block.
				1032	*/
				1033	if (unlikely(hdr_count >= hdr_num)) {
				1034	state = HTTP_MSG_LAST_LF;
				1035	goto http_output_full;
				1036	}
				1037	http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
				1038	state = HTTP_MSG_BODY;
				1039	break;
				1040
				1041	default:
				1042	/* impossible states */
				1043	goto http_msg_invalid;
				1044	}
				1045
				1046	/* reaching here, we've parsed the whole message and the state is
				1047	* HTTP_MSG_BODY.
				1048	*/
				1049	return ptr - start + skip;
				1050
				1051	http_msg_ood:
				1052	/* out of data at <ptr> during state <state> */
				1053	return 0;
				1054
				1055	http_msg_invalid:
				1056	/* invalid message, error at <ptr> */
				1057	if (h1m) {
				1058	h1m->err_state = state;
				1059	h1m->err_pos = ptr - start + skip;
				1060	}
				1061	return -1;
				1062
				1063	http_output_full:
				1064	/* no more room to store the current header, error at <ptr> */
				1065	if (h1m) {
				1066	h1m->err_state = state;
				1067	h1m->err_pos = ptr - start + skip;
				1068	}
				1069	return -2;
				1070	}
				1071
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1072	/* This function performs a very minimal parsing of the trailers block present
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1073	* at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1074	* bytes to delete to skip the trailers. It may return 0 if it's missing some
				1075	* input data, or < 0 in case of parse error (in which case the caller may have
				1076	* to decide how to proceed, possibly eating everything).
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1077	*/
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1078	int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1079	{
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1080	const char *stop = b_peek(buf, ofs + max);
				1081	int count = ofs;
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1082
				1083	while (1) {
				1084	const char p1 = NULL, p2 = NULL;
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1085	const char *start = b_peek(buf, count);
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1086	const char *ptr = start;
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1087
				1088	/* scan current line and stop at LF or CRLF */
				1089	while (1) {
				1090	if (ptr == stop)
				1091	return 0;
				1092
				1093	if (*ptr == '\n') {
				1094	if (!p1)
				1095	p1 = ptr;
				1096	p2 = ptr;
				1097	break;
				1098	}
				1099
				1100	if (*ptr == '\r') {
				1101	if (p1)
				1102	return -1;
				1103	p1 = ptr;
				1104	}
				1105
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1106	ptr = b_next(buf, ptr);
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1107	}
				1108
				1109	/* after LF; point to beginning of next line */
Willy Tarreau	7314be8	2018-06-14 13:32:50 +0200	[diff] [blame]	1110	p2 = b_next(buf, p2);
				1111	count += b_dist(buf, start, p2);
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1112
				1113	/* LF/CRLF at beginning of line => end of trailers at p2.
				1114	* Everything was scheduled for forwarding, there's nothing left
				1115	* from this message. */
				1116	if (p1 == start)
				1117	break;
				1118	/* OK, next line then */
				1119	}
Willy Tarreau	f40e682	2018-06-14 16:52:02 +0200	[diff] [blame]	1120	return count - ofs;
Willy Tarreau	2510f70	2017-10-31 17:14:16 +0100	[diff] [blame]	1121	}
				1122
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1123	/* This function skips trailers in the buffer associated with HTTP message
				1124	* <msg>. The first visited position is msg->next. If the end of the trailers is
				1125	* found, the function returns >0. So, the caller can automatically schedul it
				1126	* to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
				1127	* data are available, the function does not change anything except maybe
				1128	* msg->sol if it could parse some lines, and returns zero. If a parse error
				1129	* is encountered, the function returns < 0 and does not change anything except
				1130	* maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
				1131	* state before calling this function, which implies that all non-trailers data
				1132	* have already been scheduled for forwarding, and that msg->next exactly
				1133	* matches the length of trailers already parsed and not forwarded. It is also
				1134	* important to note that this function is designed to be able to parse wrapped
				1135	* headers at end of buffer.
				1136	*/
				1137	int http_forward_trailers(struct http_msg *msg)
				1138	{
Willy Tarreau	c9fa048	2018-07-10 17:43:27 +0200	[diff] [blame]	1139	const struct buffer *buf = &msg->chn->buf;
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1140	const char *parse = ci_head(msg->chn);
				1141	const char *stop = b_tail(buf);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1142
				1143	/* we have msg->next which points to next line. Look for CRLF. But
				1144	* first, we reset msg->sol */
				1145	msg->sol = 0;
				1146	while (1) {
				1147	const char p1 = NULL, p2 = NULL;
Willy Tarreau	188e230	2018-06-15 11:11:53 +0200	[diff] [blame]	1148	const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1149	const char *ptr = start;
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1150
				1151	/* scan current line and stop at LF or CRLF */
				1152	while (1) {
				1153	if (ptr == stop)
				1154	return 0;
				1155
				1156	if (*ptr == '\n') {
				1157	if (!p1)
				1158	p1 = ptr;
				1159	p2 = ptr;
				1160	break;
				1161	}
				1162
				1163	if (*ptr == '\r') {
				1164	if (p1) {
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1165	msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1166	return -1;
				1167	}
				1168	p1 = ptr;
				1169	}
				1170
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1171	ptr = b_next(buf, ptr);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1172	}
				1173
				1174	/* after LF; point to beginning of next line */
Willy Tarreau	5e74b0b	2018-06-19 08:03:19 +0200	[diff] [blame]	1175	p2 = b_next(buf, p2);
				1176	msg->sol += b_dist(buf, start, p2);
Willy Tarreau	db4893d	2017-09-21 08:40:02 +0200	[diff] [blame]	1177
				1178	/* LF/CRLF at beginning of line => end of trailers at p2.
				1179	* Everything was scheduled for forwarding, there's nothing left
				1180	* from this message. */
				1181	if (p1 == start)
				1182	return 1;
				1183
				1184	/* OK, next line then */
				1185	}
				1186	}