blob: ca39d97a4a8a825cb7f74e4ac71e6b13ab5253df [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020014#include <haproxy/api.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020015#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020016#include <haproxy/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020017
Willy Tarreau73373ab2018-09-14 17:11:33 +020018/* Parse the Content-Length header field of an HTTP/1 request. The function
19 * checks all possible occurrences of a comma-delimited value, and verifies
20 * if any of them doesn't match a previous value. It returns <0 if a value
21 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
22 * if the value can be indexed (first one). In the last case, the value might
23 * be adjusted and the caller must only add the updated value.
24 */
25int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
26{
27 char *e, *n;
28 long long cl;
29 int not_first = !!(h1m->flags & H1_MF_CLEN);
30 struct ist word;
31
32 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
33 e = value->ptr + value->len;
34
35 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +050036 /* skip leading delimiter and blanks */
Willy Tarreau73373ab2018-09-14 17:11:33 +020037 if (unlikely(HTTP_IS_LWS(*word.ptr)))
38 continue;
39
40 /* digits only now */
41 for (cl = 0, n = word.ptr; n < e; n++) {
42 unsigned int c = *n - '0';
43 if (unlikely(c > 9)) {
44 /* non-digit */
45 if (unlikely(n == word.ptr)) // spaces only
46 goto fail;
47 break;
48 }
49 if (unlikely(cl > ULLONG_MAX / 10ULL))
50 goto fail; /* multiply overflow */
51 cl = cl * 10ULL;
52 if (unlikely(cl + c < cl))
53 goto fail; /* addition overflow */
54 cl = cl + c;
55 }
56
57 /* keep a copy of the exact cleaned value */
58 word.len = n - word.ptr;
59
60 /* skip trailing LWS till next comma or EOL */
61 for (; n < e; n++) {
62 if (!HTTP_IS_LWS(*n)) {
63 if (unlikely(*n != ','))
64 goto fail;
65 break;
66 }
67 }
68
69 /* if duplicate, must be equal */
70 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
71 goto fail;
72
73 /* OK, store this result as the one to be indexed */
74 h1m->flags |= H1_MF_CLEN;
75 h1m->curr_len = h1m->body_len = cl;
76 *value = word;
77 word.ptr = n;
78 }
79 /* here we've reached the end with a single value or a series of
80 * identical values, all matching previous series if any. The last
81 * parsed value was sent back into <value>. We just have to decide
82 * if this occurrence has to be indexed (it's the first one) or
83 * silently skipped (it's not the first one)
84 */
85 return !not_first;
86 fail:
87 return -1;
88}
89
Willy Tarreau2557f6a2018-09-14 16:34:47 +020090/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
91 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
92 * this case. Any other token found or any empty header field found will reset
93 * this flag, so that it accurately represents the token's presence at the last
94 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
95 * are case-insensitive (cf RFC7230#4).
96 */
97void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
98{
99 char *e, *n;
100 struct ist word;
101
102 h1m->flags |= H1_MF_XFER_ENC;
103 h1m->flags &= ~H1_MF_CHNK;
104
105 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
106 e = value.ptr + value.len;
107
108 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500109 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200110 if (HTTP_IS_LWS(*word.ptr))
111 continue;
112
113 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
114 word.len = n - word.ptr;
115
116 /* trim trailing blanks */
117 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
118 word.len--;
119
120 h1m->flags &= ~H1_MF_CHNK;
121 if (isteqi(word, ist("chunked")))
122 h1m->flags |= H1_MF_CHNK;
123
124 word.ptr = n;
125 }
126}
127
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200128/* Parse the Connection: header of an HTTP/1 request, looking for "close",
129 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
130 * what was found there. Note that flags are only added, not removed, so the
131 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100132 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
133 * up from "keep-alive" and "close" values. To do so, the header value is
134 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200135 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100136void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200137{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100138 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200139 struct ist word;
140
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100141 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
142 p = value->ptr;
143 e = value->ptr + value->len;
144 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
145 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200146
147 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500148 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200149 if (HTTP_IS_LWS(*word.ptr))
150 continue;
151
152 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
153 word.len = n - word.ptr;
154
155 /* trim trailing blanks */
156 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
157 word.len--;
158
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100159 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200160 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100161 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
162 goto skip_val;
163 }
164 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200165 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100166 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
167 goto skip_val;
168 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200169 else if (isteqi(word, ist("upgrade")))
170 h1m->flags |= H1_MF_CONN_UPG;
171
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100172 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
173 if (value->ptr + value->len == p) {
174 /* no rewrite done till now */
175 value->len = n - value->ptr;
176 }
177 else {
178 if (value->len)
179 value->ptr[value->len++] = ',';
180 istcat(value, word, e - value->ptr);
181 }
182 }
183
184 skip_val:
185 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200186 }
187}
188
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100189/* Parse the Upgrade: header of an HTTP/1 request.
190 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
191 */
192void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
193{
194 char *e, *n;
195 struct ist word;
196
197 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
198
199 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
200 e = value.ptr + value.len;
201
202 while (++word.ptr < e) {
203 /* skip leading delimiter and blanks */
204 if (HTTP_IS_LWS(*word.ptr))
205 continue;
206
207 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
208 word.len = n - word.ptr;
209
210 /* trim trailing blanks */
211 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
212 word.len--;
213
214 if (isteqi(word, ist("websocket")))
215 h1m->flags |= H1_MF_UPG_WEBSOCKET;
216
217 word.ptr = n;
218 }
219}
220
Willy Tarreau538746a2018-12-11 10:59:20 +0100221/* Macros used in the HTTP/1 parser, to check for the expected presence of
222 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
223 */
224
225/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
226 * <bad>.
227 */
228#define EXPECT_LF_HERE(ptr, bad, state, where) \
229 do { \
230 if (unlikely(*(ptr) != '\n')) { \
231 state = (where); \
232 goto bad; \
233 } \
234 } while (0)
235
236/* Increments pointer <ptr>, continues to label <more> if it's still below
237 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
238 * of buffer was reached.
239 */
240#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
241 do { \
242 if (likely(++(ptr) < (end))) \
243 goto more; \
244 else { \
245 state = (where); \
246 goto stop; \
247 } \
248 } while (0)
249
Willy Tarreau794f9af2017-07-26 09:07:47 +0200250/* This function parses a contiguous HTTP/1 headers block starting at <start>
251 * and ending before <stop>, at once, and converts it a list of (name,value)
252 * pairs representing header fields into the array <hdr> of size <hdr_num>,
253 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200254 * too small to represent the whole message, an error is returned. Some
255 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200256 * parsed and stored into h1m as well. <hdr> may be null, in which case only
257 * the parsing state will be updated. This may be used to restart the parsing
258 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200259 *
260 * For now it's limited to the response. If the header block is incomplete,
261 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200262 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200263 * and h1m->next to zero on the first call, the parser will do the rest. If
264 * an incomplete message is seen, the caller only needs to present h1m->state
265 * and h1m->next again, with an empty header list so that the parser can start
266 * again. In this case, it will detect that it interrupted a previous session
267 * and will first look for the end of the message before reparsing it again and
268 * indexing it at the same time. This ensures that incomplete messages fed 1
269 * character at a time are never processed entirely more than exactly twice,
270 * and that there is no need to store all the internal state and pre-parsed
271 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200272 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200273 * A pointer to a start line descriptor may be passed in <slp>, in which case
274 * the parser will fill it with whatever it found.
275 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200276 * The code derived from the main HTTP/1 parser above but was simplified and
277 * optimized to process responses produced or forwarded by haproxy. The caller
278 * is responsible for ensuring that the message doesn't wrap, and should ensure
279 * it is complete to avoid having to retry the operation after a failed
280 * attempt. The message is not supposed to be invalid, which is why a few
281 * properties such as the character set used in the header field names are not
282 * checked. In case of an unparsable response message, a negative value will be
283 * returned with h1m->err_pos and h1m->err_state matching the location and
284 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100285 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
286 * parsed and the start line is skipped. It is not required to set h1m->state
287 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200288 *
289 * This function returns :
290 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200291 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200292 * the position relative to <start>
293 * -2 if the output is full (hdr_num reached). err_state and err_pos also
294 * indicate where it failed.
295 * 0 in case of missing data.
296 * > 0 on success, it then corresponds to the number of bytes read since
297 * <start> so that the caller can go on with the payload.
298 */
299int h1_headers_to_hdr_list(char *start, const char *stop,
300 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200301 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200302{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200303 enum h1m_state state;
304 register char *ptr;
305 register const char *end;
306 unsigned int hdr_count;
307 unsigned int skip; /* number of bytes skipped at the beginning */
308 unsigned int sol; /* start of line */
309 unsigned int col; /* position of the colon */
310 unsigned int eol; /* end of line */
311 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200312 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200313 int skip_update;
314 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200315 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200316 struct ist n, v; /* header name and value during parsing */
317
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200318 skip = 0; // do it only once to keep track of the leading CRLF.
319
320 try_again:
321 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200322 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200323 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200324 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200325
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100326 if (h1m->flags & H1_MF_HDRS_ONLY) {
327 state = H1_MSG_HDR_FIRST;
328 h1m->next = 0;
329 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100330 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100331 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100332 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
333 restarting = 1;
334 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100335
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200336 ptr = start + h1m->next;
337 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200338
Willy Tarreau794f9af2017-07-26 09:07:47 +0200339 if (unlikely(ptr >= end))
340 goto http_msg_ood;
341
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200342 /* don't update output if hdr is NULL or if we're restarting */
343 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200344 skip_update = 1;
345
Willy Tarreau794f9af2017-07-26 09:07:47 +0200346 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200347 case H1_MSG_RQBEFORE:
348 http_msg_rqbefore:
349 if (likely(HTTP_IS_TOKEN(*ptr))) {
350 /* we have a start of message, we may have skipped some
351 * heading CRLF. Skip them now.
352 */
353 skip += ptr - start;
354 start = ptr;
355
356 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200357 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200358 hdr_count = 0;
359 state = H1_MSG_RQMETH;
360 goto http_msg_rqmeth;
361 }
362
363 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
364 state = H1_MSG_RQBEFORE;
365 goto http_msg_invalid;
366 }
367
368 if (unlikely(*ptr == '\n'))
369 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
370 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
371 /* stop here */
372
373 case H1_MSG_RQBEFORE_CR:
374 http_msg_rqbefore_cr:
375 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
376 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
377 /* stop here */
378
379 case H1_MSG_RQMETH:
380 http_msg_rqmeth:
381 if (likely(HTTP_IS_TOKEN(*ptr)))
382 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
383
384 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200385 sl.rq.m.len = ptr - sl.rq.m.ptr;
386 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200387 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
388 }
389
390 if (likely(HTTP_IS_CRLF(*ptr))) {
391 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200392 sl.rq.m.len = ptr - sl.rq.m.ptr;
393 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200394 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200395 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200396 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200397 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200398 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200399 sl.rq.v.ptr = ptr;
400 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200401 goto http_msg_rqline_eol;
402 }
403 state = H1_MSG_RQMETH;
404 goto http_msg_invalid;
405
406 case H1_MSG_RQMETH_SP:
407 http_msg_rqmeth_sp:
408 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200409 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200410 goto http_msg_rquri;
411 }
412 if (likely(HTTP_IS_SPHT(*ptr)))
413 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
414 /* so it's a CR/LF, meaning an HTTP 0.9 request */
415 goto http_msg_req09_uri;
416
417 case H1_MSG_RQURI:
418 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100419#ifdef HA_UNALIGNED_LE
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200420 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
421 while (ptr <= end - sizeof(int)) {
422 int x = *(int *)ptr - 0x21212121;
423 if (x & 0x80808080)
424 break;
425
426 x -= 0x5e5e5e5e;
427 if (!(x & 0x80808080))
428 break;
429
430 ptr += sizeof(int);
431 }
432#endif
433 if (ptr >= end) {
434 state = H1_MSG_RQURI;
435 goto http_msg_ood;
436 }
437 http_msg_rquri2:
438 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
439 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
440
441 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200442 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200443 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
444 }
445 if (likely((unsigned char)*ptr >= 128)) {
446 /* non-ASCII chars are forbidden unless option
447 * accept-invalid-http-request is enabled in the frontend.
448 * In any case, we capture the faulty char.
449 */
450 if (h1m->err_pos < -1)
451 goto invalid_char;
452 if (h1m->err_pos == -1)
453 h1m->err_pos = ptr - start + skip;
454 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
455 }
456
457 if (likely(HTTP_IS_CRLF(*ptr))) {
458 /* so it's a CR/LF, meaning an HTTP 0.9 request */
459 goto http_msg_req09_uri_e;
460 }
461
462 /* OK forbidden chars, 0..31 or 127 */
463 invalid_char:
464 state = H1_MSG_RQURI;
465 goto http_msg_invalid;
466
467 case H1_MSG_RQURI_SP:
468 http_msg_rquri_sp:
469 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200470 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200471 goto http_msg_rqver;
472 }
473 if (likely(HTTP_IS_SPHT(*ptr)))
474 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
475 /* so it's a CR/LF, meaning an HTTP 0.9 request */
476 goto http_msg_req09_ver;
477
478
479 case H1_MSG_RQVER:
480 http_msg_rqver:
481 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
482 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
483
484 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200485 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200486 http_msg_rqline_eol:
487 /* We have seen the end of line. Note that we do not
488 * necessarily have the \n yet, but at least we know that we
489 * have EITHER \r OR \n, otherwise the request would not be
490 * complete. We can then record the request length and return
491 * to the caller which will be able to register it.
492 */
493
494 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200495 if ((sl.rq.v.len == 8) &&
496 (*(sl.rq.v.ptr + 5) > '1' ||
497 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200498 h1m->flags |= H1_MF_VER_11;
499
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200500 if (unlikely(hdr_count >= hdr_num)) {
501 state = H1_MSG_RQVER;
502 goto http_output_full;
503 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200504 if (!(h1m->flags & H1_MF_NO_PHDR))
505 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200506
507 if (unlikely(hdr_count >= hdr_num)) {
508 state = H1_MSG_RQVER;
509 goto http_output_full;
510 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200511 if (!(h1m->flags & H1_MF_NO_PHDR))
512 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200513 }
514
515 sol = ptr - start;
516 if (likely(*ptr == '\r'))
517 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
518 goto http_msg_rqline_end;
519 }
520
521 /* neither an HTTP_VER token nor a CRLF */
522 state = H1_MSG_RQVER;
523 goto http_msg_invalid;
524
525 case H1_MSG_RQLINE_END:
526 http_msg_rqline_end:
527 /* check for HTTP/0.9 request : no version information
528 * available. sol must point to the first of CR or LF. However
529 * since we don't save these elements between calls, if we come
530 * here from a restart, we don't necessarily know. Thus in this
531 * case we simply start over.
532 */
533 if (restarting)
534 goto restart;
535
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200536 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200537 goto http_msg_last_lf;
538
539 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
540 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
541 /* stop here */
542
543 /*
544 * Common states below
545 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200546 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200547 http_msg_rpbefore:
548 if (likely(HTTP_IS_TOKEN(*ptr))) {
549 /* we have a start of message, we may have skipped some
550 * heading CRLF. Skip them now.
551 */
552 skip += ptr - start;
553 start = ptr;
554
555 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200556 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200557 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200558 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200559 goto http_msg_rpver;
560 }
561
562 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200563 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200564 goto http_msg_invalid;
565 }
566
567 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200568 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
569 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200570 /* stop here */
571
Willy Tarreau801250e2018-09-11 11:45:04 +0200572 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200573 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200574 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200576 /* stop here */
577
Willy Tarreau801250e2018-09-11 11:45:04 +0200578 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200579 http_msg_rpver:
580 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200581 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200582
583 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200584 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200585
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200586 if ((sl.st.v.len == 8) &&
587 (*(sl.st.v.ptr + 5) > '1' ||
588 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200589 h1m->flags |= H1_MF_VER_11;
590
Willy Tarreau801250e2018-09-11 11:45:04 +0200591 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200592 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200593 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200594 goto http_msg_invalid;
595
Willy Tarreau801250e2018-09-11 11:45:04 +0200596 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200597 http_msg_rpver_sp:
598 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200599 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200600 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200601 goto http_msg_rpcode;
602 }
603 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200604 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200605 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200606 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200607 goto http_msg_invalid;
608
Willy Tarreau801250e2018-09-11 11:45:04 +0200609 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200610 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100611 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200612 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200613 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200614 }
615
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100616 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200617 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100618 goto http_msg_invalid;
619 }
620
Willy Tarreau794f9af2017-07-26 09:07:47 +0200621 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200622 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200623 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200624 }
625
626 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200627 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200628
629 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200630 sl.st.r.ptr = ptr;
631 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200632 goto http_msg_rpline_eol;
633
Willy Tarreau801250e2018-09-11 11:45:04 +0200634 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200635 http_msg_rpcode_sp:
636 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200637 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200638 goto http_msg_rpreason;
639 }
640 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200641 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200642 /* so it's a CR/LF, so there is no reason phrase */
643 goto http_msg_rsp_reason;
644
Willy Tarreau801250e2018-09-11 11:45:04 +0200645 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200646 http_msg_rpreason:
647 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200648 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200649 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200650 http_msg_rpline_eol:
651 /* We have seen the end of line. Note that we do not
652 * necessarily have the \n yet, but at least we know that we
653 * have EITHER \r OR \n, otherwise the response would not be
654 * complete. We can then record the response length and return
655 * to the caller which will be able to register it.
656 */
657
Willy Tarreau5384aac2018-09-11 16:04:48 +0200658 if (likely(!skip_update)) {
659 if (unlikely(hdr_count >= hdr_num)) {
660 state = H1_MSG_RPREASON;
661 goto http_output_full;
662 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200663 if (!(h1m->flags & H1_MF_NO_PHDR))
664 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200665 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200666
667 sol = ptr - start;
668 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200669 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670 goto http_msg_rpline_end;
671
Willy Tarreau801250e2018-09-11 11:45:04 +0200672 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200673 http_msg_rpline_end:
674 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200675 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
676 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200677 /* stop here */
678
Willy Tarreau801250e2018-09-11 11:45:04 +0200679 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200680 http_msg_hdr_first:
681 sol = ptr - start;
682 if (likely(!HTTP_IS_CRLF(*ptr))) {
683 goto http_msg_hdr_name;
684 }
685
686 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200687 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200688 goto http_msg_last_lf;
689
Willy Tarreau801250e2018-09-11 11:45:04 +0200690 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200691 http_msg_hdr_name:
692 /* assumes sol points to the first char */
693 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200694 if (!skip_update) {
695 /* turn it to lower case if needed */
696 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200697 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200698 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200699 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200700 }
701
702 if (likely(*ptr == ':')) {
703 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200704 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200705 }
706
Willy Tarreau9aec3052018-09-12 09:20:40 +0200707 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200708 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200709 goto http_msg_invalid;
710 }
711
Willy Tarreau9aec3052018-09-12 09:20:40 +0200712 if (h1m->err_pos == -1) /* capture the error pointer */
713 h1m->err_pos = ptr - start + skip; /* >= 0 now */
714
715 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200716 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200717
Willy Tarreau801250e2018-09-11 11:45:04 +0200718 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200719 http_msg_hdr_l1_sp:
720 /* assumes sol points to the first char */
721 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200722 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200723
724 /* header value can be basically anything except CR/LF */
725 sov = ptr - start;
726
727 if (likely(!HTTP_IS_CRLF(*ptr))) {
728 goto http_msg_hdr_val;
729 }
730
731 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200732 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200733 goto http_msg_hdr_l1_lf;
734
Willy Tarreau801250e2018-09-11 11:45:04 +0200735 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200736 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200737 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
738 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200739
Willy Tarreau801250e2018-09-11 11:45:04 +0200740 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200741 http_msg_hdr_l1_lws:
742 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200743 if (!skip_update) {
744 /* replace HT,CR,LF with spaces */
745 for (; start + sov < ptr; sov++)
746 start[sov] = ' ';
747 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200748 goto http_msg_hdr_l1_sp;
749 }
750 /* we had a header consisting only in spaces ! */
751 eol = sov;
752 goto http_msg_complete_header;
753
Willy Tarreau801250e2018-09-11 11:45:04 +0200754 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200755 http_msg_hdr_val:
756 /* assumes sol points to the first char, and sov
757 * points to the first character of the value.
758 */
759
760 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
761 * and lower. In fact since most of the time is spent in the loop, we
762 * also remove the sign bit test so that bytes 0x8e..0x0d break the
763 * loop, but we don't care since they're very rare in header values.
764 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100765#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200766 while (ptr <= end - sizeof(long)) {
767 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
768 goto http_msg_hdr_val2;
769 ptr += sizeof(long);
770 }
771#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100772#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200773 while (ptr <= end - sizeof(int)) {
774 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
775 goto http_msg_hdr_val2;
776 ptr += sizeof(int);
777 }
778#endif
779 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200780 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200781 goto http_msg_ood;
782 }
783 http_msg_hdr_val2:
784 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200785 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200786
787 eol = ptr - start;
788 /* Note: we could also copy eol into ->eoh so that we have the
789 * real header end in case it ends with lots of LWS, but is this
790 * really needed ?
791 */
792 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200793 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200794 goto http_msg_hdr_l2_lf;
795
Willy Tarreau801250e2018-09-11 11:45:04 +0200796 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200797 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200798 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
799 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200800
Willy Tarreau801250e2018-09-11 11:45:04 +0200801 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200802 http_msg_hdr_l2_lws:
803 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200804 if (!skip_update) {
805 /* LWS: replace HT,CR,LF with spaces */
806 for (; start + eol < ptr; eol++)
807 start[eol] = ' ';
808 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200809 goto http_msg_hdr_val;
810 }
811 http_msg_complete_header:
812 /*
813 * It was a new header, so the last one is finished. Assumes
814 * <sol> points to the first char of the name, <col> to the
815 * colon, <sov> points to the first character of the value and
816 * <eol> to the first CR or LF so we know how the line ends. We
817 * will trim spaces around the value. It's possible to do it by
818 * adjusting <eol> and <sov> which are no more used after this.
819 * We can add the header field to the list.
820 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200821 if (likely(!skip_update)) {
822 while (sov < eol && HTTP_IS_LWS(start[sov]))
823 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200824
Christopher Faulet2912f872018-09-19 14:01:04 +0200825 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
826 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200827
828
Christopher Faulet2912f872018-09-19 14:01:04 +0200829 n = ist2(start + sol, col - sol);
830 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200831
Christopher Faulet2912f872018-09-19 14:01:04 +0200832 do {
833 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200834
Christopher Faulet2912f872018-09-19 14:01:04 +0200835 if (unlikely(hdr_count >= hdr_num)) {
836 state = H1_MSG_HDR_L2_LWS;
837 goto http_output_full;
838 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200839
Christopher Faulet2912f872018-09-19 14:01:04 +0200840 if (isteqi(n, ist("transfer-encoding"))) {
841 h1_parse_xfer_enc_header(h1m, v);
842 }
843 else if (isteqi(n, ist("content-length"))) {
844 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200845
Christopher Faulet2912f872018-09-19 14:01:04 +0200846 if (ret < 0) {
847 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100848 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200849 goto http_msg_invalid;
850 }
851 else if (ret == 0) {
852 /* skip it */
853 break;
854 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200855 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200856 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100857 h1_parse_connection_header(h1m, &v);
858 if (!v.len) {
859 /* skip it */
860 break;
861 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200862 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100863 else if (isteqi(n, ist("upgrade"))) {
864 h1_parse_upgrade_header(h1m, v);
865 }
Christopher Faulet7032a3f2020-05-04 09:01:45 +0200866 else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
Christopher Faulet531b83e2019-10-11 13:34:22 +0200867 if (host_idx == -1) {
868 struct ist authority;
869
870 authority = http_get_authority(sl.rq.u, 1);
871 if (authority.len && !isteqi(v, authority)) {
872 if (h1m->err_pos < -1) {
873 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100874 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200875 goto http_msg_invalid;
876 }
877 if (h1m->err_pos == -1) /* capture the error pointer */
Christopher Faulet17034782020-01-06 13:41:01 +0100878 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200879 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200880 host_idx = hdr_count;
Christopher Faulet531b83e2019-10-11 13:34:22 +0200881 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200882 else {
883 if (!isteqi(v, hdr[host_idx].v)) {
884 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100885 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200886 goto http_msg_invalid;
887 }
888 /* if the same host, skip it */
889 break;
890 }
891 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200892
Christopher Faulet2912f872018-09-19 14:01:04 +0200893 http_set_hdr(&hdr[hdr_count++], n, v);
894 } while (0);
895 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200896
897 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200898
Willy Tarreau794f9af2017-07-26 09:07:47 +0200899 if (likely(!HTTP_IS_CRLF(*ptr)))
900 goto http_msg_hdr_name;
901
902 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200903 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200904 goto http_msg_last_lf;
905
Willy Tarreau801250e2018-09-11 11:45:04 +0200906 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200907 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200908 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200909 ptr++;
910 /* <ptr> now points to the first byte of payload. If needed sol
911 * still points to the first of either CR or LF of the empty
912 * line ending the headers block.
913 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200914 if (likely(!skip_update)) {
915 if (unlikely(hdr_count >= hdr_num)) {
916 state = H1_MSG_LAST_LF;
917 goto http_output_full;
918 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200919 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200920 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200921
922 /* reaching here we've parsed the whole message. We may detect
923 * that we were already continuing an interrupted parsing pass
924 * so we were silently looking for the end of message not
925 * updating anything before deciding to parse it fully at once.
926 * It's guaranteed that we won't match this test twice in a row
927 * since restarting will turn zero.
928 */
929 if (restarting)
930 goto restart;
931
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200932 state = H1_MSG_DATA;
933 if (h1m->flags & H1_MF_XFER_ENC) {
934 if (h1m->flags & H1_MF_CLEN) {
935 h1m->flags &= ~H1_MF_CLEN;
936 hdr_count = http_del_hdr(hdr, ist("content-length"));
937 }
938
939 if (h1m->flags & H1_MF_CHNK)
940 state = H1_MSG_CHUNK_SIZE;
941 else if (!(h1m->flags & H1_MF_RESP)) {
942 /* cf RFC7230#3.3.3 : transfer-encoding in
943 * request without chunked encoding is invalid.
944 */
945 goto http_msg_invalid;
946 }
947 }
948
Willy Tarreau794f9af2017-07-26 09:07:47 +0200949 break;
950
951 default:
952 /* impossible states */
953 goto http_msg_invalid;
954 }
955
Willy Tarreau001823c2018-09-12 17:25:32 +0200956 /* Now we've left the headers state and are either in H1_MSG_DATA or
957 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200958 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200959
Willy Tarreau5384aac2018-09-11 16:04:48 +0200960 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200961 *slp = sl;
962
Willy Tarreau4433c082018-09-11 15:33:32 +0200963 h1m->state = state;
964 h1m->next = ptr - start + skip;
965 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200966
967 http_msg_ood:
968 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200969 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200970 *slp = sl;
971
Willy Tarreau4433c082018-09-11 15:33:32 +0200972 h1m->state = state;
973 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200974 return 0;
975
976 http_msg_invalid:
977 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200978 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200979 *slp = sl;
980
Willy Tarreau4433c082018-09-11 15:33:32 +0200981 h1m->err_state = h1m->state = state;
982 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200983 return -1;
984
985 http_output_full:
986 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200987 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200988 *slp = sl;
989
Willy Tarreau4433c082018-09-11 15:33:32 +0200990 h1m->err_state = h1m->state = state;
991 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200992 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200993
994 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +0200995 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
996 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200997 if (h1m->flags & H1_MF_RESP)
998 h1m->state = H1_MSG_RPBEFORE;
999 else
1000 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001001 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001002}
1003
Willy Tarreau2510f702017-10-31 17:14:16 +01001004/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001005 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001006 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1007 * input data, or < 0 in case of parse error (in which case the caller may have
1008 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001009 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001010int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001011{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001012 const char *stop = b_peek(buf, ofs + max);
1013 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001014
1015 while (1) {
1016 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001017 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001018 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001019
1020 /* scan current line and stop at LF or CRLF */
1021 while (1) {
1022 if (ptr == stop)
1023 return 0;
1024
1025 if (*ptr == '\n') {
1026 if (!p1)
1027 p1 = ptr;
1028 p2 = ptr;
1029 break;
1030 }
1031
1032 if (*ptr == '\r') {
1033 if (p1)
1034 return -1;
1035 p1 = ptr;
1036 }
1037
Willy Tarreau7314be82018-06-14 13:32:50 +02001038 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001039 }
1040
1041 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001042 p2 = b_next(buf, p2);
1043 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001044
1045 /* LF/CRLF at beginning of line => end of trailers at p2.
1046 * Everything was scheduled for forwarding, there's nothing left
1047 * from this message. */
1048 if (p1 == start)
1049 break;
1050 /* OK, next line then */
1051 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001052 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001053}