blob: 858db7d02f1f1e0e3110beeb72b42abbbce2201f [file] [log] [blame]
Willy Tarreauafba57a2018-12-11 13:44:24 +01001/*
Willy Tarreau5413a872020-06-02 19:33:08 +02002 * include/haproxy/h1.h
Willy Tarreauafba57a2018-12-11 13:44:24 +01003 * This file contains HTTP/1 protocol definitions.
4 *
Willy Tarreau5413a872020-06-02 19:33:08 +02005 * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu
Willy Tarreauafba57a2018-12-11 13:44:24 +01006 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
Willy Tarreau5413a872020-06-02 19:33:08 +020022#ifndef _HAPROXY_H1_H
23#define _HAPROXY_H1_H
Willy Tarreauafba57a2018-12-11 13:44:24 +010024
Willy Tarreau5413a872020-06-02 19:33:08 +020025#include <import/ist.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020026#include <haproxy/api.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020027#include <haproxy/buf.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020028#include <haproxy/http.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020029#include <haproxy/http-hdr-t.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020030#include <haproxy/intops.h>
Willy Tarreauafba57a2018-12-11 13:44:24 +010031
32
33/* Possible states while parsing HTTP/1 messages (request|response) */
34enum h1m_state {
35 H1_MSG_RQBEFORE = 0, // request: leading LF, before start line
36 H1_MSG_RQBEFORE_CR = 1, // request: leading CRLF, before start line
37 /* these ones define a request start line */
38 H1_MSG_RQMETH = 2, // parsing the Method
39 H1_MSG_RQMETH_SP = 3, // space(s) after the Method
40 H1_MSG_RQURI = 4, // parsing the Request URI
41 H1_MSG_RQURI_SP = 5, // space(s) after the Request URI
42 H1_MSG_RQVER = 6, // parsing the Request Version
43 H1_MSG_RQLINE_END = 7, // end of request line (CR or LF)
44
45 H1_MSG_RPBEFORE = 8, // response: leading LF, before start line
46 H1_MSG_RPBEFORE_CR = 9, // response: leading CRLF, before start line
47
48 /* these ones define a response start line */
49 H1_MSG_RPVER = 10, // parsing the Response Version
50 H1_MSG_RPVER_SP = 11, // space(s) after the Response Version
51 H1_MSG_RPCODE = 12, // response code
52 H1_MSG_RPCODE_SP = 13, // space(s) after the response code
53 H1_MSG_RPREASON = 14, // response reason
54 H1_MSG_RPLINE_END = 15, // end of response line (CR or LF)
55
56 /* common header processing */
57 H1_MSG_HDR_FIRST = 16, // waiting for first header or last CRLF (no LWS possible)
58 H1_MSG_HDR_NAME = 17, // parsing header name
59 H1_MSG_HDR_COL = 18, // parsing header colon
60 H1_MSG_HDR_L1_SP = 19, // parsing header LWS (SP|HT) before value
61 H1_MSG_HDR_L1_LF = 20, // parsing header LWS (LF) before value
62 H1_MSG_HDR_L1_LWS = 21, // checking whether it's a new header or an LWS
63 H1_MSG_HDR_VAL = 22, // parsing header value
64 H1_MSG_HDR_L2_LF = 23, // parsing header LWS (LF) inside/after value
65 H1_MSG_HDR_L2_LWS = 24, // checking whether it's a new header or an LWS
66
67 H1_MSG_LAST_LF = 25, // parsing last LF, last state for headers
68
69 /* Body processing. */
70
71 H1_MSG_CHUNK_SIZE = 26, // parsing the chunk size (RFC7230 #4.1)
72 H1_MSG_DATA = 27, // skipping data chunk / content-length data
73 H1_MSG_CHUNK_CRLF = 28, // skipping CRLF after data chunk
74 H1_MSG_TRAILERS = 29, // trailers (post-data entity headers)
75 /* we enter this state when we've received the end of the current message */
76 H1_MSG_DONE = 30, // message end received, waiting for resync or close
77 H1_MSG_TUNNEL = 31, // tunneled data after DONE
78} __attribute__((packed));
79
80
81/* HTTP/1 message flags (32 bit), for use in h1m->flags only */
82#define H1_MF_NONE 0x00000000
83#define H1_MF_CLEN 0x00000001 // content-length present
84#define H1_MF_CHNK 0x00000002 // chunk present, exclusive with c-l
85#define H1_MF_RESP 0x00000004 // this message is the response message
86#define H1_MF_TOLOWER 0x00000008 // turn the header names to lower case
87#define H1_MF_VER_11 0x00000010 // message indicates version 1.1 or above
88#define H1_MF_CONN_CLO 0x00000020 // message contains "connection: close"
89#define H1_MF_CONN_KAL 0x00000040 // message contains "connection: keep-alive"
90#define H1_MF_CONN_UPG 0x00000080 // message contains "connection: upgrade"
91#define H1_MF_XFER_LEN 0x00000100 // message xfer size can be determined
92#define H1_MF_XFER_ENC 0x00000200 // transfer-encoding is present
93#define H1_MF_NO_PHDR 0x00000400 // don't add pseudo-headers in the header list
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +010094#define H1_MF_HDRS_ONLY 0x00000800 // parse headers only
Christopher Fauleta51ebb72019-03-29 15:03:13 +010095#define H1_MF_CLEAN_CONN_HDR 0x00001000 // skip close/keep-alive values of connection headers during parsing
Christopher Faulet4f0f88a2019-08-10 11:17:44 +020096#define H1_MF_METH_CONNECT 0x00002000 // Set for a response to a CONNECT request
97#define H1_MF_METH_HEAD 0x00004000 // Set for a response to a HEAD request
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +010098#define H1_MF_UPG_WEBSOCKET 0x00008000 // Set for a Websocket upgrade handshake
Willy Tarreauafba57a2018-12-11 13:44:24 +010099
100/* Note: for a connection to be persistent, we need this for the request :
101 * - one of CLEN or CHNK
102 * - version 1.0 and KAL and not CLO
103 * - or version 1.1 and not CLO
104 * For the response it's the same except that UPG must not appear either.
105 * So in short, for a request it's (CLEN|CHNK) > 0 && !CLO && (VER_11 || KAL)
106 * and for a response it's (CLEN|CHNK) > 0 && !(CLO|UPG) && (VER_11 || KAL)
107 */
108
109
110/* basic HTTP/1 message state for use in parsers. The err_pos field is special,
111 * it is pre-set to a negative value (-1 or -2), and once non-negative it contains
112 * the relative position in the message of the first parse error. -2 is used to tell
113 * the parser that we want to block the invalid message. -1 is used to only perform
114 * a silent capture.
115 */
116struct h1m {
117 enum h1m_state state; // H1 message state (H1_MSG_*)
118 /* 24 bits available here */
119 uint32_t flags; // H1 message flags (H1_MF_*)
120 uint64_t curr_len; // content-length or last chunk length
121 uint64_t body_len; // total known size of the body length
122 uint32_t next; // next byte to parse, relative to buffer's head
123 int err_pos; // position in the byte stream of the first error (H1 or H2)
124 int err_state; // state where the first error was met (H1 or H2)
125};
126
127/* basic H1 start line, describes either the request and the response */
128union h1_sl { /* useful start line pointers, relative to ->sol */
129 struct {
130 struct ist m; /* METHOD */
131 struct ist u; /* URI */
132 struct ist v; /* VERSION */
133 enum http_meth_t meth; /* method */
134 } rq; /* request line : field, length */
135 struct {
136 struct ist v; /* VERSION */
137 struct ist c; /* CODE */
138 struct ist r; /* REASON */
139 uint16_t status; /* status code */
140 } st; /* status line : field, length */
141};
142
143int h1_headers_to_hdr_list(char *start, const char *stop,
144 struct http_hdr *hdr, unsigned int hdr_num,
145 struct h1m *h1m, union h1_sl *slp);
146int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max);
147
148int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value);
149void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value);
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100150void h1_parse_connection_header(struct h1m *h1m, struct ist *value);
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100151void h1_parse_upgrade_header(struct h1m *h1m, struct ist value);
Willy Tarreauafba57a2018-12-11 13:44:24 +0100152
Amaury Denoyelleaad333a2020-12-11 17:53:07 +0100153void h1_generate_random_ws_input_key(char key_out[25]);
Amaury Denoyellec1938232020-12-11 17:53:03 +0100154void h1_calculate_ws_output_key(const char *key, char *result);
155
Willy Tarreauafba57a2018-12-11 13:44:24 +0100156/* for debugging, reports the HTTP/1 message state name */
157static inline const char *h1m_state_str(enum h1m_state msg_state)
158{
159 switch (msg_state) {
160 case H1_MSG_RQBEFORE: return "MSG_RQBEFORE";
161 case H1_MSG_RQBEFORE_CR: return "MSG_RQBEFORE_CR";
162 case H1_MSG_RQMETH: return "MSG_RQMETH";
163 case H1_MSG_RQMETH_SP: return "MSG_RQMETH_SP";
164 case H1_MSG_RQURI: return "MSG_RQURI";
165 case H1_MSG_RQURI_SP: return "MSG_RQURI_SP";
166 case H1_MSG_RQVER: return "MSG_RQVER";
167 case H1_MSG_RQLINE_END: return "MSG_RQLINE_END";
168 case H1_MSG_RPBEFORE: return "MSG_RPBEFORE";
169 case H1_MSG_RPBEFORE_CR: return "MSG_RPBEFORE_CR";
170 case H1_MSG_RPVER: return "MSG_RPVER";
171 case H1_MSG_RPVER_SP: return "MSG_RPVER_SP";
172 case H1_MSG_RPCODE: return "MSG_RPCODE";
173 case H1_MSG_RPCODE_SP: return "MSG_RPCODE_SP";
174 case H1_MSG_RPREASON: return "MSG_RPREASON";
175 case H1_MSG_RPLINE_END: return "MSG_RPLINE_END";
176 case H1_MSG_HDR_FIRST: return "MSG_HDR_FIRST";
177 case H1_MSG_HDR_NAME: return "MSG_HDR_NAME";
178 case H1_MSG_HDR_COL: return "MSG_HDR_COL";
179 case H1_MSG_HDR_L1_SP: return "MSG_HDR_L1_SP";
180 case H1_MSG_HDR_L1_LF: return "MSG_HDR_L1_LF";
181 case H1_MSG_HDR_L1_LWS: return "MSG_HDR_L1_LWS";
182 case H1_MSG_HDR_VAL: return "MSG_HDR_VAL";
183 case H1_MSG_HDR_L2_LF: return "MSG_HDR_L2_LF";
184 case H1_MSG_HDR_L2_LWS: return "MSG_HDR_L2_LWS";
185 case H1_MSG_LAST_LF: return "MSG_LAST_LF";
186 case H1_MSG_CHUNK_SIZE: return "MSG_CHUNK_SIZE";
187 case H1_MSG_DATA: return "MSG_DATA";
188 case H1_MSG_CHUNK_CRLF: return "MSG_CHUNK_CRLF";
189 case H1_MSG_TRAILERS: return "MSG_TRAILERS";
190 case H1_MSG_DONE: return "MSG_DONE";
191 case H1_MSG_TUNNEL: return "MSG_TUNNEL";
192 default: return "MSG_??????";
193 }
194}
195
196/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
197 * a possible LF alone at the end of a chunk. The caller should adjust msg->next
198 * in order to include this part into the next forwarding phase. Note that the
199 * caller must ensure that head+start points to the first byte to parse. It
200 * returns the number of bytes parsed on success, so the caller can set msg_state
201 * to HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
202 * change anything and returns zero. Otherwise it returns a negative value
Ilya Shipitsin77e3b4a2020-03-10 12:06:11 +0500203 * indicating the error position relative to <stop>. Note: this function is
Willy Tarreauafba57a2018-12-11 13:44:24 +0100204 * designed to parse wrapped CRLF at the end of the buffer.
205 */
206static inline int h1_skip_chunk_crlf(const struct buffer *buf, int start, int stop)
207{
208 const char *ptr = b_peek(buf, start);
209 int bytes = 1;
210
Christopher Faulet22c57be2019-04-19 14:12:27 +0200211 if (stop <= start)
212 return 0;
213
Willy Tarreauafba57a2018-12-11 13:44:24 +0100214 /* NB: we'll check data availability at the end. It's not a
215 * problem because whatever we match first will be checked
216 * against the correct length.
217 */
218 if (*ptr == '\r') {
219 bytes++;
220 ptr++;
221 if (ptr >= b_wrap(buf))
222 ptr = b_orig(buf);
223 }
224
225 if (bytes > stop - start)
226 return 0;
227
228 if (*ptr != '\n') // negative position to stop
229 return ptr - __b_peek(buf, stop);
230
231 return bytes;
232}
233
234/* Parse the chunk size start at buf + start and stops before buf + stop. The
235 * positions are relative to the buffer's head.
236 * It returns the chunk size in <res> and the amount of bytes read this way :
237 * < 0 : error at this position relative to <stop>
238 * = 0 : not enough bytes to read a complete chunk size
239 * > 0 : number of bytes successfully read that the caller can skip
240 * On success, the caller should adjust its msg->next to point to the first
241 * byte of data after the chunk size, so that we know we can forward exactly
242 * msg->next bytes, and msg->sol to contain the exact number of bytes forming
243 * the chunk size. That way it is always possible to differentiate between the
244 * start of the body and the start of the data. Note: this function is designed
245 * to parse wrapped CRLF at the end of the buffer.
246 */
Christopher Faulet405f0542021-01-27 15:17:13 +0100247static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int stop, uint64_t *res)
Willy Tarreauafba57a2018-12-11 13:44:24 +0100248{
249 const char *ptr = b_peek(buf, start);
250 const char *ptr_old = ptr;
251 const char *end = b_wrap(buf);
Christopher Faulet405f0542021-01-27 15:17:13 +0100252 uint64_t chunk = 0;
Willy Tarreauafba57a2018-12-11 13:44:24 +0100253
254 stop -= start; // bytes left
255 start = stop; // bytes to transfer
256
257 /* The chunk size is in the following form, though we are only
258 * interested in the size and CRLF :
259 * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
260 */
261 while (1) {
262 int c;
263 if (!stop)
264 return 0;
265 c = hex2i(*ptr);
266 if (c < 0) /* not a hex digit anymore */
267 break;
268 if (unlikely(++ptr >= end))
269 ptr = b_orig(buf);
Willy Tarreauafba57a2018-12-11 13:44:24 +0100270 chunk = (chunk << 4) + c;
Christopher Faulet405f0542021-01-27 15:17:13 +0100271 if (unlikely(chunk & 0xF0000000000000)) {
272 /* Don't get more than 13 hexa-digit (2^52 - 1) to never fed possibly
273 * bogus values from languages that use floats for their integers
274 */
275 goto error;
276 }
Willy Tarreauafba57a2018-12-11 13:44:24 +0100277 stop--;
278 }
279
280 /* empty size not allowed */
281 if (unlikely(ptr == ptr_old))
282 goto error;
283
284 while (HTTP_IS_SPHT(*ptr)) {
285 if (++ptr >= end)
286 ptr = b_orig(buf);
287 if (--stop == 0)
288 return 0;
289 }
290
291 /* Up to there, we know that at least one byte is present at *ptr. Check
292 * for the end of chunk size.
293 */
294 while (1) {
295 if (likely(HTTP_IS_CRLF(*ptr))) {
296 /* we now have a CR or an LF at ptr */
297 if (likely(*ptr == '\r')) {
298 if (++ptr >= end)
299 ptr = b_orig(buf);
300 if (--stop == 0)
301 return 0;
302 }
303
304 if (*ptr != '\n')
305 goto error;
306 if (++ptr >= end)
307 ptr = b_orig(buf);
308 --stop;
309 /* done */
310 break;
311 }
312 else if (likely(*ptr == ';')) {
313 /* chunk extension, ends at next CRLF */
314 if (++ptr >= end)
315 ptr = b_orig(buf);
316 if (--stop == 0)
317 return 0;
318
319 while (!HTTP_IS_CRLF(*ptr)) {
320 if (++ptr >= end)
321 ptr = b_orig(buf);
322 if (--stop == 0)
323 return 0;
324 }
325 /* we have a CRLF now, loop above */
326 continue;
327 }
328 else
329 goto error;
330 }
331
332 /* OK we found our CRLF and now <ptr> points to the next byte, which may
333 * or may not be present. Let's return the number of bytes parsed.
334 */
335 *res = chunk;
336 return start - stop;
337 error:
338 *res = 0; // just to stop gcc's -Wuninitialized warning :-(
339 return -stop;
340}
341
342/* initializes an H1 message for a request */
343static inline struct h1m *h1m_init_req(struct h1m *h1m)
344{
345 h1m->state = H1_MSG_RQBEFORE;
346 h1m->next = 0;
347 h1m->flags = H1_MF_NONE;
348 h1m->curr_len = 0;
349 h1m->body_len = 0;
350 h1m->err_pos = -2;
351 h1m->err_state = 0;
352 return h1m;
353}
354
355/* initializes an H1 message for a response */
356static inline struct h1m *h1m_init_res(struct h1m *h1m)
357{
358 h1m->state = H1_MSG_RPBEFORE;
359 h1m->next = 0;
360 h1m->flags = H1_MF_RESP;
361 h1m->curr_len = 0;
362 h1m->body_len = 0;
363 h1m->err_pos = -2;
364 h1m->err_state = 0;
365 return h1m;
366}
367
Willy Tarreau5413a872020-06-02 19:33:08 +0200368#endif /* _HAPROXY_H1_H */