Blame - include/haproxy/h1.h - haproxy

blob: 858db7d02f1f1e0e3110beeb72b42abbbce2201f [file] [log] [blame]

Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	1	/*
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	2	* include/haproxy/h1.h
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	3	* This file contains HTTP/1 protocol definitions.
				4	*
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	5	* Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	6	*
				7	* This library is free software; you can redistribute it and/or
				8	* modify it under the terms of the GNU Lesser General Public
				9	* License as published by the Free Software Foundation, version 2.1
				10	* exclusively.
				11	*
				12	* This library is distributed in the hope that it will be useful,
				13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				15	* Lesser General Public License for more details.
				16	*
				17	* You should have received a copy of the GNU Lesser General Public
				18	* License along with this library; if not, write to the Free Software
				19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
				20	*/
				21
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	22	#ifndef _HAPROXY_H1_H
				23	#define _HAPROXY_H1_H
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	24
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	25	#include <import/ist.h>
Willy Tarreau	4c7e4b7	2020-05-27 12:58:42 +0200	[diff] [blame]	26	#include <haproxy/api.h>
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	27	#include <haproxy/buf.h>
Willy Tarreau	cd72d8c	2020-06-02 19:11:26 +0200	[diff] [blame]	28	#include <haproxy/http.h>
Willy Tarreau	0017be0	2020-06-02 19:25:28 +0200	[diff] [blame]	29	#include <haproxy/http-hdr-t.h>
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	30	#include <haproxy/intops.h>
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	31
				32
				33	/* Possible states while parsing HTTP/1 messages (request\|response) */
				34	enum h1m_state {
				35	H1_MSG_RQBEFORE = 0, // request: leading LF, before start line
				36	H1_MSG_RQBEFORE_CR = 1, // request: leading CRLF, before start line
				37	/* these ones define a request start line */
				38	H1_MSG_RQMETH = 2, // parsing the Method
				39	H1_MSG_RQMETH_SP = 3, // space(s) after the Method
				40	H1_MSG_RQURI = 4, // parsing the Request URI
				41	H1_MSG_RQURI_SP = 5, // space(s) after the Request URI
				42	H1_MSG_RQVER = 6, // parsing the Request Version
				43	H1_MSG_RQLINE_END = 7, // end of request line (CR or LF)
				44
				45	H1_MSG_RPBEFORE = 8, // response: leading LF, before start line
				46	H1_MSG_RPBEFORE_CR = 9, // response: leading CRLF, before start line
				47
				48	/* these ones define a response start line */
				49	H1_MSG_RPVER = 10, // parsing the Response Version
				50	H1_MSG_RPVER_SP = 11, // space(s) after the Response Version
				51	H1_MSG_RPCODE = 12, // response code
				52	H1_MSG_RPCODE_SP = 13, // space(s) after the response code
				53	H1_MSG_RPREASON = 14, // response reason
				54	H1_MSG_RPLINE_END = 15, // end of response line (CR or LF)
				55
				56	/* common header processing */
				57	H1_MSG_HDR_FIRST = 16, // waiting for first header or last CRLF (no LWS possible)
				58	H1_MSG_HDR_NAME = 17, // parsing header name
				59	H1_MSG_HDR_COL = 18, // parsing header colon
				60	H1_MSG_HDR_L1_SP = 19, // parsing header LWS (SP\|HT) before value
				61	H1_MSG_HDR_L1_LF = 20, // parsing header LWS (LF) before value
				62	H1_MSG_HDR_L1_LWS = 21, // checking whether it's a new header or an LWS
				63	H1_MSG_HDR_VAL = 22, // parsing header value
				64	H1_MSG_HDR_L2_LF = 23, // parsing header LWS (LF) inside/after value
				65	H1_MSG_HDR_L2_LWS = 24, // checking whether it's a new header or an LWS
				66
				67	H1_MSG_LAST_LF = 25, // parsing last LF, last state for headers
				68
				69	/* Body processing. */
				70
				71	H1_MSG_CHUNK_SIZE = 26, // parsing the chunk size (RFC7230 #4.1)
				72	H1_MSG_DATA = 27, // skipping data chunk / content-length data
				73	H1_MSG_CHUNK_CRLF = 28, // skipping CRLF after data chunk
				74	H1_MSG_TRAILERS = 29, // trailers (post-data entity headers)
				75	/* we enter this state when we've received the end of the current message */
				76	H1_MSG_DONE = 30, // message end received, waiting for resync or close
				77	H1_MSG_TUNNEL = 31, // tunneled data after DONE
				78	} __attribute__((packed));
				79
				80
				81	/* HTTP/1 message flags (32 bit), for use in h1m->flags only */
				82	#define H1_MF_NONE 0x00000000
				83	#define H1_MF_CLEN 0x00000001 // content-length present
				84	#define H1_MF_CHNK 0x00000002 // chunk present, exclusive with c-l
				85	#define H1_MF_RESP 0x00000004 // this message is the response message
				86	#define H1_MF_TOLOWER 0x00000008 // turn the header names to lower case
				87	#define H1_MF_VER_11 0x00000010 // message indicates version 1.1 or above
				88	#define H1_MF_CONN_CLO 0x00000020 // message contains "connection: close"
				89	#define H1_MF_CONN_KAL 0x00000040 // message contains "connection: keep-alive"
				90	#define H1_MF_CONN_UPG 0x00000080 // message contains "connection: upgrade"
				91	#define H1_MF_XFER_LEN 0x00000100 // message xfer size can be determined
				92	#define H1_MF_XFER_ENC 0x00000200 // transfer-encoding is present
				93	#define H1_MF_NO_PHDR 0x00000400 // don't add pseudo-headers in the header list
Willy Tarreau	0f8fb6b	2019-01-04 10:48:03 +0100	[diff] [blame]	94	#define H1_MF_HDRS_ONLY 0x00000800 // parse headers only
Christopher Faulet	a51ebb7	2019-03-29 15:03:13 +0100	[diff] [blame]	95	#define H1_MF_CLEAN_CONN_HDR 0x00001000 // skip close/keep-alive values of connection headers during parsing
Christopher Faulet	4f0f88a	2019-08-10 11:17:44 +0200	[diff] [blame]	96	#define H1_MF_METH_CONNECT 0x00002000 // Set for a response to a CONNECT request
				97	#define H1_MF_METH_HEAD 0x00004000 // Set for a response to a HEAD request
Amaury Denoyelle	18ee5c3	2020-12-11 17:53:02 +0100	[diff] [blame]	98	#define H1_MF_UPG_WEBSOCKET 0x00008000 // Set for a Websocket upgrade handshake
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	99
				100	/* Note: for a connection to be persistent, we need this for the request :
				101	* - one of CLEN or CHNK
				102	* - version 1.0 and KAL and not CLO
				103	* - or version 1.1 and not CLO
				104	* For the response it's the same except that UPG must not appear either.
				105	* So in short, for a request it's (CLEN\|CHNK) > 0 && !CLO && (VER_11 \|\| KAL)
				106	* and for a response it's (CLEN\|CHNK) > 0 && !(CLO\|UPG) && (VER_11 \|\| KAL)
				107	*/
				108
				109
				110	/* basic HTTP/1 message state for use in parsers. The err_pos field is special,
				111	* it is pre-set to a negative value (-1 or -2), and once non-negative it contains
				112	* the relative position in the message of the first parse error. -2 is used to tell
				113	* the parser that we want to block the invalid message. -1 is used to only perform
				114	* a silent capture.
				115	*/
				116	struct h1m {
				117	enum h1m_state state; // H1 message state (H1_MSG_*)
				118	/* 24 bits available here */
				119	uint32_t flags; // H1 message flags (H1_MF_*)
				120	uint64_t curr_len; // content-length or last chunk length
				121	uint64_t body_len; // total known size of the body length
				122	uint32_t next; // next byte to parse, relative to buffer's head
				123	int err_pos; // position in the byte stream of the first error (H1 or H2)
				124	int err_state; // state where the first error was met (H1 or H2)
				125	};
				126
				127	/* basic H1 start line, describes either the request and the response */
				128	union h1_sl { /* useful start line pointers, relative to ->sol */
				129	struct {
				130	struct ist m; /* METHOD */
				131	struct ist u; /* URI */
				132	struct ist v; /* VERSION */
				133	enum http_meth_t meth; /* method */
				134	} rq; /* request line : field, length */
				135	struct {
				136	struct ist v; /* VERSION */
				137	struct ist c; /* CODE */
				138	struct ist r; /* REASON */
				139	uint16_t status; /* status code */
				140	} st; /* status line : field, length */
				141	};
				142
				143	int h1_headers_to_hdr_list(char start, const char stop,
				144	struct http_hdr *hdr, unsigned int hdr_num,
				145	struct h1m h1m, union h1_sl slp);
				146	int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max);
				147
				148	int h1_parse_cont_len_header(struct h1m h1m, struct ist value);
				149	void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value);
Christopher Faulet	a51ebb7	2019-03-29 15:03:13 +0100	[diff] [blame]	150	void h1_parse_connection_header(struct h1m h1m, struct ist value);
Amaury Denoyelle	18ee5c3	2020-12-11 17:53:02 +0100	[diff] [blame]	151	void h1_parse_upgrade_header(struct h1m *h1m, struct ist value);
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	152
Amaury Denoyelle	aad333a	2020-12-11 17:53:07 +0100	[diff] [blame]	153	void h1_generate_random_ws_input_key(char key_out[25]);
Amaury Denoyelle	c193823	2020-12-11 17:53:03 +0100	[diff] [blame]	154	void h1_calculate_ws_output_key(const char key, char result);
				155
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	156	/* for debugging, reports the HTTP/1 message state name */
				157	static inline const char *h1m_state_str(enum h1m_state msg_state)
				158	{
				159	switch (msg_state) {
				160	case H1_MSG_RQBEFORE: return "MSG_RQBEFORE";
				161	case H1_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR";
				162	case H1_MSG_RQMETH: return "MSG_RQMETH";
				163	case H1_MSG_RQMETH_SP: return "MSG_RQMETH_SP";
				164	case H1_MSG_RQURI: return "MSG_RQURI";
				165	case H1_MSG_RQURI_SP: return "MSG_RQURI_SP";
				166	case H1_MSG_RQVER: return "MSG_RQVER";
				167	case H1_MSG_RQLINE_END: return "MSG_RQLINE_END";
				168	case H1_MSG_RPBEFORE: return "MSG_RPBEFORE";
				169	case H1_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR";
				170	case H1_MSG_RPVER: return "MSG_RPVER";
				171	case H1_MSG_RPVER_SP: return "MSG_RPVER_SP";
				172	case H1_MSG_RPCODE: return "MSG_RPCODE";
				173	case H1_MSG_RPCODE_SP: return "MSG_RPCODE_SP";
				174	case H1_MSG_RPREASON: return "MSG_RPREASON";
				175	case H1_MSG_RPLINE_END: return "MSG_RPLINE_END";
				176	case H1_MSG_HDR_FIRST: return "MSG_HDR_FIRST";
				177	case H1_MSG_HDR_NAME: return "MSG_HDR_NAME";
				178	case H1_MSG_HDR_COL: return "MSG_HDR_COL";
				179	case H1_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP";
				180	case H1_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF";
				181	case H1_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS";
				182	case H1_MSG_HDR_VAL: return "MSG_HDR_VAL";
				183	case H1_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF";
				184	case H1_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS";
				185	case H1_MSG_LAST_LF: return "MSG_LAST_LF";
				186	case H1_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE";
				187	case H1_MSG_DATA: return "MSG_DATA";
				188	case H1_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF";
				189	case H1_MSG_TRAILERS: return "MSG_TRAILERS";
				190	case H1_MSG_DONE: return "MSG_DONE";
				191	case H1_MSG_TUNNEL: return "MSG_TUNNEL";
				192	default: return "MSG_??????";
				193	}
				194	}
				195
				196	/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
				197	* a possible LF alone at the end of a chunk. The caller should adjust msg->next
				198	* in order to include this part into the next forwarding phase. Note that the
				199	* caller must ensure that head+start points to the first byte to parse. It
				200	* returns the number of bytes parsed on success, so the caller can set msg_state
				201	* to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
				202	* change anything and returns zero. Otherwise it returns a negative value
Ilya Shipitsin	77e3b4a	2020-03-10 12:06:11 +0500	[diff] [blame]	203	* indicating the error position relative to <stop>. Note: this function is
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	204	* designed to parse wrapped CRLF at the end of the buffer.
				205	*/
				206	static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop)
				207	{
				208	const char *ptr = b_peek(buf, start);
				209	int bytes = 1;
				210
Christopher Faulet	22c57be	2019-04-19 14:12:27 +0200	[diff] [blame]	211	if (stop <= start)
				212	return 0;
				213
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	214	/* NB: we'll check data availability at the end. It's not a
				215	* problem because whatever we match first will be checked
				216	* against the correct length.
				217	*/
				218	if (*ptr == '\r') {
				219	bytes++;
				220	ptr++;
				221	if (ptr >= b_wrap(buf))
				222	ptr = b_orig(buf);
				223	}
				224
				225	if (bytes > stop - start)
				226	return 0;
				227
				228	if (*ptr != '\n') // negative position to stop
				229	return ptr - __b_peek(buf, stop);
				230
				231	return bytes;
				232	}
				233
				234	/* Parse the chunk size start at buf + start and stops before buf + stop. The
				235	* positions are relative to the buffer's head.
				236	* It returns the chunk size in <res> and the amount of bytes read this way :
				237	* < 0 : error at this position relative to <stop>
				238	* = 0 : not enough bytes to read a complete chunk size
				239	* > 0 : number of bytes successfully read that the caller can skip
				240	* On success, the caller should adjust its msg->next to point to the first
				241	* byte of data after the chunk size, so that we know we can forward exactly
				242	* msg->next bytes, and msg->sol to contain the exact number of bytes forming
				243	* the chunk size. That way it is always possible to differentiate between the
				244	* start of the body and the start of the data. Note: this function is designed
				245	* to parse wrapped CRLF at the end of the buffer.
				246	*/
Christopher Faulet	405f054	2021-01-27 15:17:13 +0100	[diff] [blame]	247	static inline int h1_parse_chunk_size(const struct buffer buf, int start, int stop, uint64_t res)
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	248	{
				249	const char *ptr = b_peek(buf, start);
				250	const char *ptr_old = ptr;
				251	const char *end = b_wrap(buf);
Christopher Faulet	405f054	2021-01-27 15:17:13 +0100	[diff] [blame]	252	uint64_t chunk = 0;
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	253
				254	stop -= start; // bytes left
				255	start = stop; // bytes to transfer
				256
				257	/* The chunk size is in the following form, though we are only
				258	* interested in the size and CRLF :
				259	* 1HEXDIGIT WSP *[ ';' extensions ] CRLF
				260	*/
				261	while (1) {
				262	int c;
				263	if (!stop)
				264	return 0;
				265	c = hex2i(*ptr);
				266	if (c < 0) /* not a hex digit anymore */
				267	break;
				268	if (unlikely(++ptr >= end))
				269	ptr = b_orig(buf);
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	270	chunk = (chunk << 4) + c;
Christopher Faulet	405f054	2021-01-27 15:17:13 +0100	[diff] [blame]	271	if (unlikely(chunk & 0xF0000000000000)) {
				272	/* Don't get more than 13 hexa-digit (2^52 - 1) to never fed possibly
				273	* bogus values from languages that use floats for their integers
				274	*/
				275	goto error;
				276	}
Willy Tarreau	afba57a	2018-12-11 13:44:24 +0100	[diff] [blame]	277	stop--;
				278	}
				279
				280	/* empty size not allowed */
				281	if (unlikely(ptr == ptr_old))
				282	goto error;
				283
				284	while (HTTP_IS_SPHT(*ptr)) {
				285	if (++ptr >= end)
				286	ptr = b_orig(buf);
				287	if (--stop == 0)
				288	return 0;
				289	}
				290
				291	/* Up to there, we know that at least one byte is present at *ptr. Check
				292	* for the end of chunk size.
				293	*/
				294	while (1) {
				295	if (likely(HTTP_IS_CRLF(*ptr))) {
				296	/* we now have a CR or an LF at ptr */
				297	if (likely(*ptr == '\r')) {
				298	if (++ptr >= end)
				299	ptr = b_orig(buf);
				300	if (--stop == 0)
				301	return 0;
				302	}
				303
				304	if (*ptr != '\n')
				305	goto error;
				306	if (++ptr >= end)
				307	ptr = b_orig(buf);
				308	--stop;
				309	/* done */
				310	break;
				311	}
				312	else if (likely(*ptr == ';')) {
				313	/* chunk extension, ends at next CRLF */
				314	if (++ptr >= end)
				315	ptr = b_orig(buf);
				316	if (--stop == 0)
				317	return 0;
				318
				319	while (!HTTP_IS_CRLF(*ptr)) {
				320	if (++ptr >= end)
				321	ptr = b_orig(buf);
				322	if (--stop == 0)
				323	return 0;
				324	}
				325	/* we have a CRLF now, loop above */
				326	continue;
				327	}
				328	else
				329	goto error;
				330	}
				331
				332	/* OK we found our CRLF and now <ptr> points to the next byte, which may
				333	* or may not be present. Let's return the number of bytes parsed.
				334	*/
				335	*res = chunk;
				336	return start - stop;
				337	error:
				338	*res = 0; // just to stop gcc's -Wuninitialized warning :-(
				339	return -stop;
				340	}
				341
				342	/* initializes an H1 message for a request */
				343	static inline struct h1m h1m_init_req(struct h1m h1m)
				344	{
				345	h1m->state = H1_MSG_RQBEFORE;
				346	h1m->next = 0;
				347	h1m->flags = H1_MF_NONE;
				348	h1m->curr_len = 0;
				349	h1m->body_len = 0;
				350	h1m->err_pos = -2;
				351	h1m->err_state = 0;
				352	return h1m;
				353	}
				354
				355	/* initializes an H1 message for a response */
				356	static inline struct h1m h1m_init_res(struct h1m h1m)
				357	{
				358	h1m->state = H1_MSG_RPBEFORE;
				359	h1m->next = 0;
				360	h1m->flags = H1_MF_RESP;
				361	h1m->curr_len = 0;
				362	h1m->body_len = 0;
				363	h1m->err_pos = -2;
				364	h1m->err_state = 0;
				365	return h1m;
				366	}
				367
Willy Tarreau	5413a87	2020-06-02 19:33:08 +0200	[diff] [blame]	368	#endif /* _HAPROXY_H1_H */