blob: 957abd5b813b3c307e47c6804478f5cf4d28907f [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreauafba57a2018-12-11 13:44:24 +010015#include <common/h1.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020016#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020017
Willy Tarreau188e2302018-06-15 11:11:53 +020018#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020019
Willy Tarreau73373ab2018-09-14 17:11:33 +020020/* Parse the Content-Length header field of an HTTP/1 request. The function
21 * checks all possible occurrences of a comma-delimited value, and verifies
22 * if any of them doesn't match a previous value. It returns <0 if a value
23 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
24 * if the value can be indexed (first one). In the last case, the value might
25 * be adjusted and the caller must only add the updated value.
26 */
27int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
28{
29 char *e, *n;
30 long long cl;
31 int not_first = !!(h1m->flags & H1_MF_CLEN);
32 struct ist word;
33
34 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
35 e = value->ptr + value->len;
36
37 while (++word.ptr < e) {
38 /* skip leading delimitor and blanks */
39 if (unlikely(HTTP_IS_LWS(*word.ptr)))
40 continue;
41
42 /* digits only now */
43 for (cl = 0, n = word.ptr; n < e; n++) {
44 unsigned int c = *n - '0';
45 if (unlikely(c > 9)) {
46 /* non-digit */
47 if (unlikely(n == word.ptr)) // spaces only
48 goto fail;
49 break;
50 }
51 if (unlikely(cl > ULLONG_MAX / 10ULL))
52 goto fail; /* multiply overflow */
53 cl = cl * 10ULL;
54 if (unlikely(cl + c < cl))
55 goto fail; /* addition overflow */
56 cl = cl + c;
57 }
58
59 /* keep a copy of the exact cleaned value */
60 word.len = n - word.ptr;
61
62 /* skip trailing LWS till next comma or EOL */
63 for (; n < e; n++) {
64 if (!HTTP_IS_LWS(*n)) {
65 if (unlikely(*n != ','))
66 goto fail;
67 break;
68 }
69 }
70
71 /* if duplicate, must be equal */
72 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
73 goto fail;
74
75 /* OK, store this result as the one to be indexed */
76 h1m->flags |= H1_MF_CLEN;
77 h1m->curr_len = h1m->body_len = cl;
78 *value = word;
79 word.ptr = n;
80 }
81 /* here we've reached the end with a single value or a series of
82 * identical values, all matching previous series if any. The last
83 * parsed value was sent back into <value>. We just have to decide
84 * if this occurrence has to be indexed (it's the first one) or
85 * silently skipped (it's not the first one)
86 */
87 return !not_first;
88 fail:
89 return -1;
90}
91
Willy Tarreau2557f6a2018-09-14 16:34:47 +020092/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
93 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
94 * this case. Any other token found or any empty header field found will reset
95 * this flag, so that it accurately represents the token's presence at the last
96 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
97 * are case-insensitive (cf RFC7230#4).
98 */
99void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
100{
101 char *e, *n;
102 struct ist word;
103
104 h1m->flags |= H1_MF_XFER_ENC;
105 h1m->flags &= ~H1_MF_CHNK;
106
107 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
108 e = value.ptr + value.len;
109
110 while (++word.ptr < e) {
111 /* skip leading delimitor and blanks */
112 if (HTTP_IS_LWS(*word.ptr))
113 continue;
114
115 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
116 word.len = n - word.ptr;
117
118 /* trim trailing blanks */
119 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
120 word.len--;
121
122 h1m->flags &= ~H1_MF_CHNK;
123 if (isteqi(word, ist("chunked")))
124 h1m->flags |= H1_MF_CHNK;
125
126 word.ptr = n;
127 }
128}
129
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200130/* Parse the Connection: header of an HTTP/1 request, looking for "close",
131 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
132 * what was found there. Note that flags are only added, not removed, so the
133 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100134 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
135 * up from "keep-alive" and "close" values. To do so, the header value is
136 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200137 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100138void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200139{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100140 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200141 struct ist word;
142
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100143 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
144 p = value->ptr;
145 e = value->ptr + value->len;
146 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
147 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200148
149 while (++word.ptr < e) {
150 /* skip leading delimitor and blanks */
151 if (HTTP_IS_LWS(*word.ptr))
152 continue;
153
154 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
155 word.len = n - word.ptr;
156
157 /* trim trailing blanks */
158 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
159 word.len--;
160
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100161 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200162 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100163 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
164 goto skip_val;
165 }
166 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200167 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100168 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
169 goto skip_val;
170 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200171 else if (isteqi(word, ist("upgrade")))
172 h1m->flags |= H1_MF_CONN_UPG;
173
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100174 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
175 if (value->ptr + value->len == p) {
176 /* no rewrite done till now */
177 value->len = n - value->ptr;
178 }
179 else {
180 if (value->len)
181 value->ptr[value->len++] = ',';
182 istcat(value, word, e - value->ptr);
183 }
184 }
185
186 skip_val:
187 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200188 }
189}
190
Willy Tarreau538746a2018-12-11 10:59:20 +0100191/* Macros used in the HTTP/1 parser, to check for the expected presence of
192 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
193 */
194
195/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
196 * <bad>.
197 */
198#define EXPECT_LF_HERE(ptr, bad, state, where) \
199 do { \
200 if (unlikely(*(ptr) != '\n')) { \
201 state = (where); \
202 goto bad; \
203 } \
204 } while (0)
205
206/* Increments pointer <ptr>, continues to label <more> if it's still below
207 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
208 * of buffer was reached.
209 */
210#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
211 do { \
212 if (likely(++(ptr) < (end))) \
213 goto more; \
214 else { \
215 state = (where); \
216 goto stop; \
217 } \
218 } while (0)
219
Willy Tarreau794f9af2017-07-26 09:07:47 +0200220/* This function parses a contiguous HTTP/1 headers block starting at <start>
221 * and ending before <stop>, at once, and converts it a list of (name,value)
222 * pairs representing header fields into the array <hdr> of size <hdr_num>,
223 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200224 * too small to represent the whole message, an error is returned. Some
225 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200226 * parsed and stored into h1m as well. <hdr> may be null, in which case only
227 * the parsing state will be updated. This may be used to restart the parsing
228 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200229 *
230 * For now it's limited to the response. If the header block is incomplete,
231 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200232 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200233 * and h1m->next to zero on the first call, the parser will do the rest. If
234 * an incomplete message is seen, the caller only needs to present h1m->state
235 * and h1m->next again, with an empty header list so that the parser can start
236 * again. In this case, it will detect that it interrupted a previous session
237 * and will first look for the end of the message before reparsing it again and
238 * indexing it at the same time. This ensures that incomplete messages fed 1
239 * character at a time are never processed entirely more than exactly twice,
240 * and that there is no need to store all the internal state and pre-parsed
241 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200242 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200243 * A pointer to a start line descriptor may be passed in <slp>, in which case
244 * the parser will fill it with whatever it found.
245 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200246 * The code derived from the main HTTP/1 parser above but was simplified and
247 * optimized to process responses produced or forwarded by haproxy. The caller
248 * is responsible for ensuring that the message doesn't wrap, and should ensure
249 * it is complete to avoid having to retry the operation after a failed
250 * attempt. The message is not supposed to be invalid, which is why a few
251 * properties such as the character set used in the header field names are not
252 * checked. In case of an unparsable response message, a negative value will be
253 * returned with h1m->err_pos and h1m->err_state matching the location and
254 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100255 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
256 * parsed and the start line is skipped. It is not required to set h1m->state
257 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200258 *
259 * This function returns :
260 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200261 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200262 * the position relative to <start>
263 * -2 if the output is full (hdr_num reached). err_state and err_pos also
264 * indicate where it failed.
265 * 0 in case of missing data.
266 * > 0 on success, it then corresponds to the number of bytes read since
267 * <start> so that the caller can go on with the payload.
268 */
269int h1_headers_to_hdr_list(char *start, const char *stop,
270 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200271 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200272{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200273 enum h1m_state state;
274 register char *ptr;
275 register const char *end;
276 unsigned int hdr_count;
277 unsigned int skip; /* number of bytes skipped at the beginning */
278 unsigned int sol; /* start of line */
279 unsigned int col; /* position of the colon */
280 unsigned int eol; /* end of line */
281 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200282 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200283 int skip_update;
284 int restarting;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200285 struct ist n, v; /* header name and value during parsing */
286
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200287 skip = 0; // do it only once to keep track of the leading CRLF.
288
289 try_again:
290 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200291 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200292 skip_update = restarting = 0;
293
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100294 if (h1m->flags & H1_MF_HDRS_ONLY) {
295 state = H1_MSG_HDR_FIRST;
296 h1m->next = 0;
297 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100298 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100299 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100300 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
301 restarting = 1;
302 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100303
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200304 ptr = start + h1m->next;
305 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200306
Willy Tarreau794f9af2017-07-26 09:07:47 +0200307 if (unlikely(ptr >= end))
308 goto http_msg_ood;
309
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200310 /* don't update output if hdr is NULL or if we're restarting */
311 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200312 skip_update = 1;
313
Willy Tarreau794f9af2017-07-26 09:07:47 +0200314 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200315 case H1_MSG_RQBEFORE:
316 http_msg_rqbefore:
317 if (likely(HTTP_IS_TOKEN(*ptr))) {
318 /* we have a start of message, we may have skipped some
319 * heading CRLF. Skip them now.
320 */
321 skip += ptr - start;
322 start = ptr;
323
324 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200325 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200326 hdr_count = 0;
327 state = H1_MSG_RQMETH;
328 goto http_msg_rqmeth;
329 }
330
331 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
332 state = H1_MSG_RQBEFORE;
333 goto http_msg_invalid;
334 }
335
336 if (unlikely(*ptr == '\n'))
337 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
338 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
339 /* stop here */
340
341 case H1_MSG_RQBEFORE_CR:
342 http_msg_rqbefore_cr:
343 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
344 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
345 /* stop here */
346
347 case H1_MSG_RQMETH:
348 http_msg_rqmeth:
349 if (likely(HTTP_IS_TOKEN(*ptr)))
350 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
351
352 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200353 sl.rq.m.len = ptr - sl.rq.m.ptr;
354 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200355 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
356 }
357
358 if (likely(HTTP_IS_CRLF(*ptr))) {
359 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200360 sl.rq.m.len = ptr - sl.rq.m.ptr;
361 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200362 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200363 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200364 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200365 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200366 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200367 sl.rq.v.ptr = ptr;
368 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200369 goto http_msg_rqline_eol;
370 }
371 state = H1_MSG_RQMETH;
372 goto http_msg_invalid;
373
374 case H1_MSG_RQMETH_SP:
375 http_msg_rqmeth_sp:
376 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200377 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200378 goto http_msg_rquri;
379 }
380 if (likely(HTTP_IS_SPHT(*ptr)))
381 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
382 /* so it's a CR/LF, meaning an HTTP 0.9 request */
383 goto http_msg_req09_uri;
384
385 case H1_MSG_RQURI:
386 http_msg_rquri:
387#if defined(__x86_64__) || \
388 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
389 defined(__ARM_ARCH_7A__)
390 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
391 while (ptr <= end - sizeof(int)) {
392 int x = *(int *)ptr - 0x21212121;
393 if (x & 0x80808080)
394 break;
395
396 x -= 0x5e5e5e5e;
397 if (!(x & 0x80808080))
398 break;
399
400 ptr += sizeof(int);
401 }
402#endif
403 if (ptr >= end) {
404 state = H1_MSG_RQURI;
405 goto http_msg_ood;
406 }
407 http_msg_rquri2:
408 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
409 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
410
411 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200412 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200413 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
414 }
415 if (likely((unsigned char)*ptr >= 128)) {
416 /* non-ASCII chars are forbidden unless option
417 * accept-invalid-http-request is enabled in the frontend.
418 * In any case, we capture the faulty char.
419 */
420 if (h1m->err_pos < -1)
421 goto invalid_char;
422 if (h1m->err_pos == -1)
423 h1m->err_pos = ptr - start + skip;
424 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
425 }
426
427 if (likely(HTTP_IS_CRLF(*ptr))) {
428 /* so it's a CR/LF, meaning an HTTP 0.9 request */
429 goto http_msg_req09_uri_e;
430 }
431
432 /* OK forbidden chars, 0..31 or 127 */
433 invalid_char:
434 state = H1_MSG_RQURI;
435 goto http_msg_invalid;
436
437 case H1_MSG_RQURI_SP:
438 http_msg_rquri_sp:
439 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200440 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200441 goto http_msg_rqver;
442 }
443 if (likely(HTTP_IS_SPHT(*ptr)))
444 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
445 /* so it's a CR/LF, meaning an HTTP 0.9 request */
446 goto http_msg_req09_ver;
447
448
449 case H1_MSG_RQVER:
450 http_msg_rqver:
451 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
452 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
453
454 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200455 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200456 http_msg_rqline_eol:
457 /* We have seen the end of line. Note that we do not
458 * necessarily have the \n yet, but at least we know that we
459 * have EITHER \r OR \n, otherwise the request would not be
460 * complete. We can then record the request length and return
461 * to the caller which will be able to register it.
462 */
463
464 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200465 if ((sl.rq.v.len == 8) &&
466 (*(sl.rq.v.ptr + 5) > '1' ||
467 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200468 h1m->flags |= H1_MF_VER_11;
469
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200470 if (unlikely(hdr_count >= hdr_num)) {
471 state = H1_MSG_RQVER;
472 goto http_output_full;
473 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200474 if (!(h1m->flags & H1_MF_NO_PHDR))
475 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200476
477 if (unlikely(hdr_count >= hdr_num)) {
478 state = H1_MSG_RQVER;
479 goto http_output_full;
480 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200481 if (!(h1m->flags & H1_MF_NO_PHDR))
482 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200483 }
484
485 sol = ptr - start;
486 if (likely(*ptr == '\r'))
487 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
488 goto http_msg_rqline_end;
489 }
490
491 /* neither an HTTP_VER token nor a CRLF */
492 state = H1_MSG_RQVER;
493 goto http_msg_invalid;
494
495 case H1_MSG_RQLINE_END:
496 http_msg_rqline_end:
497 /* check for HTTP/0.9 request : no version information
498 * available. sol must point to the first of CR or LF. However
499 * since we don't save these elements between calls, if we come
500 * here from a restart, we don't necessarily know. Thus in this
501 * case we simply start over.
502 */
503 if (restarting)
504 goto restart;
505
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200506 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200507 goto http_msg_last_lf;
508
509 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
510 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
511 /* stop here */
512
513 /*
514 * Common states below
515 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200516 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200517 http_msg_rpbefore:
518 if (likely(HTTP_IS_TOKEN(*ptr))) {
519 /* we have a start of message, we may have skipped some
520 * heading CRLF. Skip them now.
521 */
522 skip += ptr - start;
523 start = ptr;
524
525 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200526 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200527 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200528 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200529 goto http_msg_rpver;
530 }
531
532 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200533 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200534 goto http_msg_invalid;
535 }
536
537 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200538 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
539 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200540 /* stop here */
541
Willy Tarreau801250e2018-09-11 11:45:04 +0200542 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200543 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200544 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
545 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200546 /* stop here */
547
Willy Tarreau801250e2018-09-11 11:45:04 +0200548 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200549 http_msg_rpver:
550 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200551 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200552
553 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200554 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200555
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200556 if ((sl.st.v.len == 8) &&
557 (*(sl.st.v.ptr + 5) > '1' ||
558 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200559 h1m->flags |= H1_MF_VER_11;
560
Willy Tarreau801250e2018-09-11 11:45:04 +0200561 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200562 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200563 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200564 goto http_msg_invalid;
565
Willy Tarreau801250e2018-09-11 11:45:04 +0200566 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200567 http_msg_rpver_sp:
568 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200569 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200570 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200571 goto http_msg_rpcode;
572 }
573 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200574 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200575 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200576 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200577 goto http_msg_invalid;
578
Willy Tarreau801250e2018-09-11 11:45:04 +0200579 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200580 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100581 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200582 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200583 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200584 }
585
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100586 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200587 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100588 goto http_msg_invalid;
589 }
590
Willy Tarreau794f9af2017-07-26 09:07:47 +0200591 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200592 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200593 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200594 }
595
596 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200597 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200598
599 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200600 sl.st.r.ptr = ptr;
601 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200602 goto http_msg_rpline_eol;
603
Willy Tarreau801250e2018-09-11 11:45:04 +0200604 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200605 http_msg_rpcode_sp:
606 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200607 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200608 goto http_msg_rpreason;
609 }
610 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200611 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200612 /* so it's a CR/LF, so there is no reason phrase */
613 goto http_msg_rsp_reason;
614
Willy Tarreau801250e2018-09-11 11:45:04 +0200615 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200616 http_msg_rpreason:
617 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200618 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200619 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200620 http_msg_rpline_eol:
621 /* We have seen the end of line. Note that we do not
622 * necessarily have the \n yet, but at least we know that we
623 * have EITHER \r OR \n, otherwise the response would not be
624 * complete. We can then record the response length and return
625 * to the caller which will be able to register it.
626 */
627
Willy Tarreau5384aac2018-09-11 16:04:48 +0200628 if (likely(!skip_update)) {
629 if (unlikely(hdr_count >= hdr_num)) {
630 state = H1_MSG_RPREASON;
631 goto http_output_full;
632 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200633 if (!(h1m->flags & H1_MF_NO_PHDR))
634 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200635 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200636
637 sol = ptr - start;
638 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200639 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200640 goto http_msg_rpline_end;
641
Willy Tarreau801250e2018-09-11 11:45:04 +0200642 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200643 http_msg_rpline_end:
644 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200645 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
646 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200647 /* stop here */
648
Willy Tarreau801250e2018-09-11 11:45:04 +0200649 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200650 http_msg_hdr_first:
651 sol = ptr - start;
652 if (likely(!HTTP_IS_CRLF(*ptr))) {
653 goto http_msg_hdr_name;
654 }
655
656 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200657 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200658 goto http_msg_last_lf;
659
Willy Tarreau801250e2018-09-11 11:45:04 +0200660 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200661 http_msg_hdr_name:
662 /* assumes sol points to the first char */
663 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200664 if (!skip_update) {
665 /* turn it to lower case if needed */
666 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
667 *ptr = tolower(*ptr);
668 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200669 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670 }
671
672 if (likely(*ptr == ':')) {
673 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200674 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200675 }
676
Willy Tarreau9aec3052018-09-12 09:20:40 +0200677 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200678 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200679 goto http_msg_invalid;
680 }
681
Willy Tarreau9aec3052018-09-12 09:20:40 +0200682 if (h1m->err_pos == -1) /* capture the error pointer */
683 h1m->err_pos = ptr - start + skip; /* >= 0 now */
684
685 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200686 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200687
Willy Tarreau801250e2018-09-11 11:45:04 +0200688 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200689 http_msg_hdr_l1_sp:
690 /* assumes sol points to the first char */
691 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200692 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200693
694 /* header value can be basically anything except CR/LF */
695 sov = ptr - start;
696
697 if (likely(!HTTP_IS_CRLF(*ptr))) {
698 goto http_msg_hdr_val;
699 }
700
701 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200702 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200703 goto http_msg_hdr_l1_lf;
704
Willy Tarreau801250e2018-09-11 11:45:04 +0200705 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200706 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200707 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
708 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200709
Willy Tarreau801250e2018-09-11 11:45:04 +0200710 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200711 http_msg_hdr_l1_lws:
712 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200713 if (!skip_update) {
714 /* replace HT,CR,LF with spaces */
715 for (; start + sov < ptr; sov++)
716 start[sov] = ' ';
717 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200718 goto http_msg_hdr_l1_sp;
719 }
720 /* we had a header consisting only in spaces ! */
721 eol = sov;
722 goto http_msg_complete_header;
723
Willy Tarreau801250e2018-09-11 11:45:04 +0200724 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200725 http_msg_hdr_val:
726 /* assumes sol points to the first char, and sov
727 * points to the first character of the value.
728 */
729
730 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
731 * and lower. In fact since most of the time is spent in the loop, we
732 * also remove the sign bit test so that bytes 0x8e..0x0d break the
733 * loop, but we don't care since they're very rare in header values.
734 */
735#if defined(__x86_64__)
736 while (ptr <= end - sizeof(long)) {
737 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
738 goto http_msg_hdr_val2;
739 ptr += sizeof(long);
740 }
741#endif
742#if defined(__x86_64__) || \
743 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
744 defined(__ARM_ARCH_7A__)
745 while (ptr <= end - sizeof(int)) {
746 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
747 goto http_msg_hdr_val2;
748 ptr += sizeof(int);
749 }
750#endif
751 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200752 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200753 goto http_msg_ood;
754 }
755 http_msg_hdr_val2:
756 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200757 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200758
759 eol = ptr - start;
760 /* Note: we could also copy eol into ->eoh so that we have the
761 * real header end in case it ends with lots of LWS, but is this
762 * really needed ?
763 */
764 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200765 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200766 goto http_msg_hdr_l2_lf;
767
Willy Tarreau801250e2018-09-11 11:45:04 +0200768 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200769 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200770 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
771 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200772
Willy Tarreau801250e2018-09-11 11:45:04 +0200773 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200774 http_msg_hdr_l2_lws:
775 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200776 if (!skip_update) {
777 /* LWS: replace HT,CR,LF with spaces */
778 for (; start + eol < ptr; eol++)
779 start[eol] = ' ';
780 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200781 goto http_msg_hdr_val;
782 }
783 http_msg_complete_header:
784 /*
785 * It was a new header, so the last one is finished. Assumes
786 * <sol> points to the first char of the name, <col> to the
787 * colon, <sov> points to the first character of the value and
788 * <eol> to the first CR or LF so we know how the line ends. We
789 * will trim spaces around the value. It's possible to do it by
790 * adjusting <eol> and <sov> which are no more used after this.
791 * We can add the header field to the list.
792 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200793 if (likely(!skip_update)) {
794 while (sov < eol && HTTP_IS_LWS(start[sov]))
795 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200796
Christopher Faulet2912f872018-09-19 14:01:04 +0200797 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
798 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200799
800
Christopher Faulet2912f872018-09-19 14:01:04 +0200801 n = ist2(start + sol, col - sol);
802 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200803
Christopher Faulet2912f872018-09-19 14:01:04 +0200804 do {
805 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200806
Christopher Faulet2912f872018-09-19 14:01:04 +0200807 if (unlikely(hdr_count >= hdr_num)) {
808 state = H1_MSG_HDR_L2_LWS;
809 goto http_output_full;
810 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200811
Christopher Faulet2912f872018-09-19 14:01:04 +0200812 if (isteqi(n, ist("transfer-encoding"))) {
813 h1_parse_xfer_enc_header(h1m, v);
814 }
815 else if (isteqi(n, ist("content-length"))) {
816 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200817
Christopher Faulet2912f872018-09-19 14:01:04 +0200818 if (ret < 0) {
819 state = H1_MSG_HDR_L2_LWS;
820 goto http_msg_invalid;
821 }
822 else if (ret == 0) {
823 /* skip it */
824 break;
825 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200826 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200827 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100828 h1_parse_connection_header(h1m, &v);
829 if (!v.len) {
830 /* skip it */
831 break;
832 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200833 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200834
Christopher Faulet2912f872018-09-19 14:01:04 +0200835 http_set_hdr(&hdr[hdr_count++], n, v);
836 } while (0);
837 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200838
839 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200840
Willy Tarreau794f9af2017-07-26 09:07:47 +0200841 if (likely(!HTTP_IS_CRLF(*ptr)))
842 goto http_msg_hdr_name;
843
844 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200845 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200846 goto http_msg_last_lf;
847
Willy Tarreau801250e2018-09-11 11:45:04 +0200848 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200849 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200850 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200851 ptr++;
852 /* <ptr> now points to the first byte of payload. If needed sol
853 * still points to the first of either CR or LF of the empty
854 * line ending the headers block.
855 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200856 if (likely(!skip_update)) {
857 if (unlikely(hdr_count >= hdr_num)) {
858 state = H1_MSG_LAST_LF;
859 goto http_output_full;
860 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200861 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200862 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200863
864 /* reaching here we've parsed the whole message. We may detect
865 * that we were already continuing an interrupted parsing pass
866 * so we were silently looking for the end of message not
867 * updating anything before deciding to parse it fully at once.
868 * It's guaranteed that we won't match this test twice in a row
869 * since restarting will turn zero.
870 */
871 if (restarting)
872 goto restart;
873
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200874 state = H1_MSG_DATA;
875 if (h1m->flags & H1_MF_XFER_ENC) {
876 if (h1m->flags & H1_MF_CLEN) {
877 h1m->flags &= ~H1_MF_CLEN;
878 hdr_count = http_del_hdr(hdr, ist("content-length"));
879 }
880
881 if (h1m->flags & H1_MF_CHNK)
882 state = H1_MSG_CHUNK_SIZE;
883 else if (!(h1m->flags & H1_MF_RESP)) {
884 /* cf RFC7230#3.3.3 : transfer-encoding in
885 * request without chunked encoding is invalid.
886 */
887 goto http_msg_invalid;
888 }
889 }
890
Willy Tarreau794f9af2017-07-26 09:07:47 +0200891 break;
892
893 default:
894 /* impossible states */
895 goto http_msg_invalid;
896 }
897
Willy Tarreau001823c2018-09-12 17:25:32 +0200898 /* Now we've left the headers state and are either in H1_MSG_DATA or
899 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200900 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200901
Willy Tarreau5384aac2018-09-11 16:04:48 +0200902 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200903 *slp = sl;
904
Willy Tarreau4433c082018-09-11 15:33:32 +0200905 h1m->state = state;
906 h1m->next = ptr - start + skip;
907 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200908
909 http_msg_ood:
910 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200911 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200912 *slp = sl;
913
Willy Tarreau4433c082018-09-11 15:33:32 +0200914 h1m->state = state;
915 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200916 return 0;
917
918 http_msg_invalid:
919 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200920 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200921 *slp = sl;
922
Willy Tarreau4433c082018-09-11 15:33:32 +0200923 h1m->err_state = h1m->state = state;
924 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200925 return -1;
926
927 http_output_full:
928 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200929 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200930 *slp = sl;
931
Willy Tarreau4433c082018-09-11 15:33:32 +0200932 h1m->err_state = h1m->state = state;
933 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200934 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200935
936 restart:
937 h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200938 if (h1m->flags & H1_MF_RESP)
939 h1m->state = H1_MSG_RPBEFORE;
940 else
941 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200942 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200943}
944
Willy Tarreau2510f702017-10-31 17:14:16 +0100945/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +0200946 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +0200947 * bytes to delete to skip the trailers. It may return 0 if it's missing some
948 * input data, or < 0 in case of parse error (in which case the caller may have
949 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +0100950 */
Willy Tarreauf40e6822018-06-14 16:52:02 +0200951int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +0100952{
Willy Tarreauf40e6822018-06-14 16:52:02 +0200953 const char *stop = b_peek(buf, ofs + max);
954 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +0100955
956 while (1) {
957 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +0200958 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +0100959 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +0100960
961 /* scan current line and stop at LF or CRLF */
962 while (1) {
963 if (ptr == stop)
964 return 0;
965
966 if (*ptr == '\n') {
967 if (!p1)
968 p1 = ptr;
969 p2 = ptr;
970 break;
971 }
972
973 if (*ptr == '\r') {
974 if (p1)
975 return -1;
976 p1 = ptr;
977 }
978
Willy Tarreau7314be82018-06-14 13:32:50 +0200979 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +0100980 }
981
982 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +0200983 p2 = b_next(buf, p2);
984 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +0100985
986 /* LF/CRLF at beginning of line => end of trailers at p2.
987 * Everything was scheduled for forwarding, there's nothing left
988 * from this message. */
989 if (p1 == start)
990 break;
991 /* OK, next line then */
992 }
Willy Tarreauf40e6822018-06-14 16:52:02 +0200993 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +0100994}