blob: d89bcb5f7121d66fda56dbab4f63871825ec1f8b [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreauafba57a2018-12-11 13:44:24 +010015#include <common/h1.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020016#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020017
Willy Tarreau188e2302018-06-15 11:11:53 +020018#include <proto/channel.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
Willy Tarreau73373ab2018-09-14 17:11:33 +020021/* Parse the Content-Length header field of an HTTP/1 request. The function
22 * checks all possible occurrences of a comma-delimited value, and verifies
23 * if any of them doesn't match a previous value. It returns <0 if a value
24 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
25 * if the value can be indexed (first one). In the last case, the value might
26 * be adjusted and the caller must only add the updated value.
27 */
28int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
29{
30 char *e, *n;
31 long long cl;
32 int not_first = !!(h1m->flags & H1_MF_CLEN);
33 struct ist word;
34
35 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
36 e = value->ptr + value->len;
37
38 while (++word.ptr < e) {
39 /* skip leading delimitor and blanks */
40 if (unlikely(HTTP_IS_LWS(*word.ptr)))
41 continue;
42
43 /* digits only now */
44 for (cl = 0, n = word.ptr; n < e; n++) {
45 unsigned int c = *n - '0';
46 if (unlikely(c > 9)) {
47 /* non-digit */
48 if (unlikely(n == word.ptr)) // spaces only
49 goto fail;
50 break;
51 }
52 if (unlikely(cl > ULLONG_MAX / 10ULL))
53 goto fail; /* multiply overflow */
54 cl = cl * 10ULL;
55 if (unlikely(cl + c < cl))
56 goto fail; /* addition overflow */
57 cl = cl + c;
58 }
59
60 /* keep a copy of the exact cleaned value */
61 word.len = n - word.ptr;
62
63 /* skip trailing LWS till next comma or EOL */
64 for (; n < e; n++) {
65 if (!HTTP_IS_LWS(*n)) {
66 if (unlikely(*n != ','))
67 goto fail;
68 break;
69 }
70 }
71
72 /* if duplicate, must be equal */
73 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
74 goto fail;
75
76 /* OK, store this result as the one to be indexed */
77 h1m->flags |= H1_MF_CLEN;
78 h1m->curr_len = h1m->body_len = cl;
79 *value = word;
80 word.ptr = n;
81 }
82 /* here we've reached the end with a single value or a series of
83 * identical values, all matching previous series if any. The last
84 * parsed value was sent back into <value>. We just have to decide
85 * if this occurrence has to be indexed (it's the first one) or
86 * silently skipped (it's not the first one)
87 */
88 return !not_first;
89 fail:
90 return -1;
91}
92
Willy Tarreau2557f6a2018-09-14 16:34:47 +020093/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
94 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
95 * this case. Any other token found or any empty header field found will reset
96 * this flag, so that it accurately represents the token's presence at the last
97 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
98 * are case-insensitive (cf RFC7230#4).
99 */
100void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
101{
102 char *e, *n;
103 struct ist word;
104
105 h1m->flags |= H1_MF_XFER_ENC;
106 h1m->flags &= ~H1_MF_CHNK;
107
108 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
109 e = value.ptr + value.len;
110
111 while (++word.ptr < e) {
112 /* skip leading delimitor and blanks */
113 if (HTTP_IS_LWS(*word.ptr))
114 continue;
115
116 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
117 word.len = n - word.ptr;
118
119 /* trim trailing blanks */
120 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
121 word.len--;
122
123 h1m->flags &= ~H1_MF_CHNK;
124 if (isteqi(word, ist("chunked")))
125 h1m->flags |= H1_MF_CHNK;
126
127 word.ptr = n;
128 }
129}
130
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200131/* Parse the Connection: header of an HTTP/1 request, looking for "close",
132 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
133 * what was found there. Note that flags are only added, not removed, so the
134 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100135 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
136 * up from "keep-alive" and "close" values. To do so, the header value is
137 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200138 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100139void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200140{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100141 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200142 struct ist word;
143
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100144 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
145 p = value->ptr;
146 e = value->ptr + value->len;
147 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
148 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200149
150 while (++word.ptr < e) {
151 /* skip leading delimitor and blanks */
152 if (HTTP_IS_LWS(*word.ptr))
153 continue;
154
155 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
156 word.len = n - word.ptr;
157
158 /* trim trailing blanks */
159 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
160 word.len--;
161
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100162 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200163 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100164 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
165 goto skip_val;
166 }
167 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200168 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100169 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
170 goto skip_val;
171 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200172 else if (isteqi(word, ist("upgrade")))
173 h1m->flags |= H1_MF_CONN_UPG;
174
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100175 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
176 if (value->ptr + value->len == p) {
177 /* no rewrite done till now */
178 value->len = n - value->ptr;
179 }
180 else {
181 if (value->len)
182 value->ptr[value->len++] = ',';
183 istcat(value, word, e - value->ptr);
184 }
185 }
186
187 skip_val:
188 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200189 }
190}
191
Willy Tarreau538746a2018-12-11 10:59:20 +0100192/* Macros used in the HTTP/1 parser, to check for the expected presence of
193 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
194 */
195
196/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
197 * <bad>.
198 */
199#define EXPECT_LF_HERE(ptr, bad, state, where) \
200 do { \
201 if (unlikely(*(ptr) != '\n')) { \
202 state = (where); \
203 goto bad; \
204 } \
205 } while (0)
206
207/* Increments pointer <ptr>, continues to label <more> if it's still below
208 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
209 * of buffer was reached.
210 */
211#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
212 do { \
213 if (likely(++(ptr) < (end))) \
214 goto more; \
215 else { \
216 state = (where); \
217 goto stop; \
218 } \
219 } while (0)
220
Willy Tarreau794f9af2017-07-26 09:07:47 +0200221/* This function parses a contiguous HTTP/1 headers block starting at <start>
222 * and ending before <stop>, at once, and converts it a list of (name,value)
223 * pairs representing header fields into the array <hdr> of size <hdr_num>,
224 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200225 * too small to represent the whole message, an error is returned. Some
226 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200227 * parsed and stored into h1m as well. <hdr> may be null, in which case only
228 * the parsing state will be updated. This may be used to restart the parsing
229 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200230 *
231 * For now it's limited to the response. If the header block is incomplete,
232 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200233 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200234 * and h1m->next to zero on the first call, the parser will do the rest. If
235 * an incomplete message is seen, the caller only needs to present h1m->state
236 * and h1m->next again, with an empty header list so that the parser can start
237 * again. In this case, it will detect that it interrupted a previous session
238 * and will first look for the end of the message before reparsing it again and
239 * indexing it at the same time. This ensures that incomplete messages fed 1
240 * character at a time are never processed entirely more than exactly twice,
241 * and that there is no need to store all the internal state and pre-parsed
242 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200243 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200244 * A pointer to a start line descriptor may be passed in <slp>, in which case
245 * the parser will fill it with whatever it found.
246 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200247 * The code derived from the main HTTP/1 parser above but was simplified and
248 * optimized to process responses produced or forwarded by haproxy. The caller
249 * is responsible for ensuring that the message doesn't wrap, and should ensure
250 * it is complete to avoid having to retry the operation after a failed
251 * attempt. The message is not supposed to be invalid, which is why a few
252 * properties such as the character set used in the header field names are not
253 * checked. In case of an unparsable response message, a negative value will be
254 * returned with h1m->err_pos and h1m->err_state matching the location and
255 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100256 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
257 * parsed and the start line is skipped. It is not required to set h1m->state
258 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200259 *
260 * This function returns :
261 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200262 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200263 * the position relative to <start>
264 * -2 if the output is full (hdr_num reached). err_state and err_pos also
265 * indicate where it failed.
266 * 0 in case of missing data.
267 * > 0 on success, it then corresponds to the number of bytes read since
268 * <start> so that the caller can go on with the payload.
269 */
270int h1_headers_to_hdr_list(char *start, const char *stop,
271 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200272 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200273{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200274 enum h1m_state state;
275 register char *ptr;
276 register const char *end;
277 unsigned int hdr_count;
278 unsigned int skip; /* number of bytes skipped at the beginning */
279 unsigned int sol; /* start of line */
280 unsigned int col; /* position of the colon */
281 unsigned int eol; /* end of line */
282 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200283 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200284 int skip_update;
285 int restarting;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200286 struct ist n, v; /* header name and value during parsing */
287
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200288 skip = 0; // do it only once to keep track of the leading CRLF.
289
290 try_again:
291 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200292 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200293 skip_update = restarting = 0;
294
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100295 if (h1m->flags & H1_MF_HDRS_ONLY) {
296 state = H1_MSG_HDR_FIRST;
297 h1m->next = 0;
298 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100299 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100300 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100301 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
302 restarting = 1;
303 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100304
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200305 ptr = start + h1m->next;
306 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200307
Willy Tarreau794f9af2017-07-26 09:07:47 +0200308 if (unlikely(ptr >= end))
309 goto http_msg_ood;
310
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200311 /* don't update output if hdr is NULL or if we're restarting */
312 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200313 skip_update = 1;
314
Willy Tarreau794f9af2017-07-26 09:07:47 +0200315 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200316 case H1_MSG_RQBEFORE:
317 http_msg_rqbefore:
318 if (likely(HTTP_IS_TOKEN(*ptr))) {
319 /* we have a start of message, we may have skipped some
320 * heading CRLF. Skip them now.
321 */
322 skip += ptr - start;
323 start = ptr;
324
325 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200326 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200327 hdr_count = 0;
328 state = H1_MSG_RQMETH;
329 goto http_msg_rqmeth;
330 }
331
332 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
333 state = H1_MSG_RQBEFORE;
334 goto http_msg_invalid;
335 }
336
337 if (unlikely(*ptr == '\n'))
338 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
339 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
340 /* stop here */
341
342 case H1_MSG_RQBEFORE_CR:
343 http_msg_rqbefore_cr:
344 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
345 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
346 /* stop here */
347
348 case H1_MSG_RQMETH:
349 http_msg_rqmeth:
350 if (likely(HTTP_IS_TOKEN(*ptr)))
351 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
352
353 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200354 sl.rq.m.len = ptr - sl.rq.m.ptr;
355 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200356 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
357 }
358
359 if (likely(HTTP_IS_CRLF(*ptr))) {
360 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200361 sl.rq.m.len = ptr - sl.rq.m.ptr;
362 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200363 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200364 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200365 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200366 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200367 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200368 sl.rq.v.ptr = ptr;
369 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200370 goto http_msg_rqline_eol;
371 }
372 state = H1_MSG_RQMETH;
373 goto http_msg_invalid;
374
375 case H1_MSG_RQMETH_SP:
376 http_msg_rqmeth_sp:
377 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200378 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200379 goto http_msg_rquri;
380 }
381 if (likely(HTTP_IS_SPHT(*ptr)))
382 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
383 /* so it's a CR/LF, meaning an HTTP 0.9 request */
384 goto http_msg_req09_uri;
385
386 case H1_MSG_RQURI:
387 http_msg_rquri:
388#if defined(__x86_64__) || \
389 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
390 defined(__ARM_ARCH_7A__)
391 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
392 while (ptr <= end - sizeof(int)) {
393 int x = *(int *)ptr - 0x21212121;
394 if (x & 0x80808080)
395 break;
396
397 x -= 0x5e5e5e5e;
398 if (!(x & 0x80808080))
399 break;
400
401 ptr += sizeof(int);
402 }
403#endif
404 if (ptr >= end) {
405 state = H1_MSG_RQURI;
406 goto http_msg_ood;
407 }
408 http_msg_rquri2:
409 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
410 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
411
412 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200413 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200414 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
415 }
416 if (likely((unsigned char)*ptr >= 128)) {
417 /* non-ASCII chars are forbidden unless option
418 * accept-invalid-http-request is enabled in the frontend.
419 * In any case, we capture the faulty char.
420 */
421 if (h1m->err_pos < -1)
422 goto invalid_char;
423 if (h1m->err_pos == -1)
424 h1m->err_pos = ptr - start + skip;
425 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
426 }
427
428 if (likely(HTTP_IS_CRLF(*ptr))) {
429 /* so it's a CR/LF, meaning an HTTP 0.9 request */
430 goto http_msg_req09_uri_e;
431 }
432
433 /* OK forbidden chars, 0..31 or 127 */
434 invalid_char:
435 state = H1_MSG_RQURI;
436 goto http_msg_invalid;
437
438 case H1_MSG_RQURI_SP:
439 http_msg_rquri_sp:
440 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200441 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200442 goto http_msg_rqver;
443 }
444 if (likely(HTTP_IS_SPHT(*ptr)))
445 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
446 /* so it's a CR/LF, meaning an HTTP 0.9 request */
447 goto http_msg_req09_ver;
448
449
450 case H1_MSG_RQVER:
451 http_msg_rqver:
452 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
453 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
454
455 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200456 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200457 http_msg_rqline_eol:
458 /* We have seen the end of line. Note that we do not
459 * necessarily have the \n yet, but at least we know that we
460 * have EITHER \r OR \n, otherwise the request would not be
461 * complete. We can then record the request length and return
462 * to the caller which will be able to register it.
463 */
464
465 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200466 if ((sl.rq.v.len == 8) &&
467 (*(sl.rq.v.ptr + 5) > '1' ||
468 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200469 h1m->flags |= H1_MF_VER_11;
470
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200471 if (unlikely(hdr_count >= hdr_num)) {
472 state = H1_MSG_RQVER;
473 goto http_output_full;
474 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200475 if (!(h1m->flags & H1_MF_NO_PHDR))
476 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200477
478 if (unlikely(hdr_count >= hdr_num)) {
479 state = H1_MSG_RQVER;
480 goto http_output_full;
481 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200482 if (!(h1m->flags & H1_MF_NO_PHDR))
483 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200484 }
485
486 sol = ptr - start;
487 if (likely(*ptr == '\r'))
488 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
489 goto http_msg_rqline_end;
490 }
491
492 /* neither an HTTP_VER token nor a CRLF */
493 state = H1_MSG_RQVER;
494 goto http_msg_invalid;
495
496 case H1_MSG_RQLINE_END:
497 http_msg_rqline_end:
498 /* check for HTTP/0.9 request : no version information
499 * available. sol must point to the first of CR or LF. However
500 * since we don't save these elements between calls, if we come
501 * here from a restart, we don't necessarily know. Thus in this
502 * case we simply start over.
503 */
504 if (restarting)
505 goto restart;
506
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200507 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200508 goto http_msg_last_lf;
509
510 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
511 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
512 /* stop here */
513
514 /*
515 * Common states below
516 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200517 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200518 http_msg_rpbefore:
519 if (likely(HTTP_IS_TOKEN(*ptr))) {
520 /* we have a start of message, we may have skipped some
521 * heading CRLF. Skip them now.
522 */
523 skip += ptr - start;
524 start = ptr;
525
526 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200527 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200528 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200529 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200530 goto http_msg_rpver;
531 }
532
533 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200534 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200535 goto http_msg_invalid;
536 }
537
538 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200539 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
540 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200541 /* stop here */
542
Willy Tarreau801250e2018-09-11 11:45:04 +0200543 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200544 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200545 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
546 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200547 /* stop here */
548
Willy Tarreau801250e2018-09-11 11:45:04 +0200549 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200550 http_msg_rpver:
551 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200552 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200553
554 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200555 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200556
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200557 if ((sl.st.v.len == 8) &&
558 (*(sl.st.v.ptr + 5) > '1' ||
559 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200560 h1m->flags |= H1_MF_VER_11;
561
Willy Tarreau801250e2018-09-11 11:45:04 +0200562 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200563 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200564 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200565 goto http_msg_invalid;
566
Willy Tarreau801250e2018-09-11 11:45:04 +0200567 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200568 http_msg_rpver_sp:
569 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200570 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200571 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200572 goto http_msg_rpcode;
573 }
574 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200576 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200577 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200578 goto http_msg_invalid;
579
Willy Tarreau801250e2018-09-11 11:45:04 +0200580 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200581 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100582 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200583 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200584 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200585 }
586
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100587 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200588 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100589 goto http_msg_invalid;
590 }
591
Willy Tarreau794f9af2017-07-26 09:07:47 +0200592 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200593 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200594 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200595 }
596
597 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200598 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200599
600 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200601 sl.st.r.ptr = ptr;
602 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200603 goto http_msg_rpline_eol;
604
Willy Tarreau801250e2018-09-11 11:45:04 +0200605 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200606 http_msg_rpcode_sp:
607 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200608 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200609 goto http_msg_rpreason;
610 }
611 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200612 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200613 /* so it's a CR/LF, so there is no reason phrase */
614 goto http_msg_rsp_reason;
615
Willy Tarreau801250e2018-09-11 11:45:04 +0200616 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200617 http_msg_rpreason:
618 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200619 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200620 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200621 http_msg_rpline_eol:
622 /* We have seen the end of line. Note that we do not
623 * necessarily have the \n yet, but at least we know that we
624 * have EITHER \r OR \n, otherwise the response would not be
625 * complete. We can then record the response length and return
626 * to the caller which will be able to register it.
627 */
628
Willy Tarreau5384aac2018-09-11 16:04:48 +0200629 if (likely(!skip_update)) {
630 if (unlikely(hdr_count >= hdr_num)) {
631 state = H1_MSG_RPREASON;
632 goto http_output_full;
633 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200634 if (!(h1m->flags & H1_MF_NO_PHDR))
635 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200636 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200637
638 sol = ptr - start;
639 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200640 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200641 goto http_msg_rpline_end;
642
Willy Tarreau801250e2018-09-11 11:45:04 +0200643 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200644 http_msg_rpline_end:
645 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200646 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
647 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200648 /* stop here */
649
Willy Tarreau801250e2018-09-11 11:45:04 +0200650 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200651 http_msg_hdr_first:
652 sol = ptr - start;
653 if (likely(!HTTP_IS_CRLF(*ptr))) {
654 goto http_msg_hdr_name;
655 }
656
657 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200658 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200659 goto http_msg_last_lf;
660
Willy Tarreau801250e2018-09-11 11:45:04 +0200661 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200662 http_msg_hdr_name:
663 /* assumes sol points to the first char */
664 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200665 if (!skip_update) {
666 /* turn it to lower case if needed */
667 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
668 *ptr = tolower(*ptr);
669 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200670 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200671 }
672
673 if (likely(*ptr == ':')) {
674 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200675 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200676 }
677
Willy Tarreau9aec3052018-09-12 09:20:40 +0200678 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200679 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200680 goto http_msg_invalid;
681 }
682
Willy Tarreau9aec3052018-09-12 09:20:40 +0200683 if (h1m->err_pos == -1) /* capture the error pointer */
684 h1m->err_pos = ptr - start + skip; /* >= 0 now */
685
686 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200687 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200688
Willy Tarreau801250e2018-09-11 11:45:04 +0200689 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200690 http_msg_hdr_l1_sp:
691 /* assumes sol points to the first char */
692 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200693 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200694
695 /* header value can be basically anything except CR/LF */
696 sov = ptr - start;
697
698 if (likely(!HTTP_IS_CRLF(*ptr))) {
699 goto http_msg_hdr_val;
700 }
701
702 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200703 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200704 goto http_msg_hdr_l1_lf;
705
Willy Tarreau801250e2018-09-11 11:45:04 +0200706 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200707 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200708 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
709 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200710
Willy Tarreau801250e2018-09-11 11:45:04 +0200711 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200712 http_msg_hdr_l1_lws:
713 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200714 if (!skip_update) {
715 /* replace HT,CR,LF with spaces */
716 for (; start + sov < ptr; sov++)
717 start[sov] = ' ';
718 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200719 goto http_msg_hdr_l1_sp;
720 }
721 /* we had a header consisting only in spaces ! */
722 eol = sov;
723 goto http_msg_complete_header;
724
Willy Tarreau801250e2018-09-11 11:45:04 +0200725 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200726 http_msg_hdr_val:
727 /* assumes sol points to the first char, and sov
728 * points to the first character of the value.
729 */
730
731 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
732 * and lower. In fact since most of the time is spent in the loop, we
733 * also remove the sign bit test so that bytes 0x8e..0x0d break the
734 * loop, but we don't care since they're very rare in header values.
735 */
736#if defined(__x86_64__)
737 while (ptr <= end - sizeof(long)) {
738 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
739 goto http_msg_hdr_val2;
740 ptr += sizeof(long);
741 }
742#endif
743#if defined(__x86_64__) || \
744 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
745 defined(__ARM_ARCH_7A__)
746 while (ptr <= end - sizeof(int)) {
747 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
748 goto http_msg_hdr_val2;
749 ptr += sizeof(int);
750 }
751#endif
752 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200753 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200754 goto http_msg_ood;
755 }
756 http_msg_hdr_val2:
757 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200758 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200759
760 eol = ptr - start;
761 /* Note: we could also copy eol into ->eoh so that we have the
762 * real header end in case it ends with lots of LWS, but is this
763 * really needed ?
764 */
765 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200766 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200767 goto http_msg_hdr_l2_lf;
768
Willy Tarreau801250e2018-09-11 11:45:04 +0200769 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200770 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200771 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
772 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200773
Willy Tarreau801250e2018-09-11 11:45:04 +0200774 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200775 http_msg_hdr_l2_lws:
776 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200777 if (!skip_update) {
778 /* LWS: replace HT,CR,LF with spaces */
779 for (; start + eol < ptr; eol++)
780 start[eol] = ' ';
781 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200782 goto http_msg_hdr_val;
783 }
784 http_msg_complete_header:
785 /*
786 * It was a new header, so the last one is finished. Assumes
787 * <sol> points to the first char of the name, <col> to the
788 * colon, <sov> points to the first character of the value and
789 * <eol> to the first CR or LF so we know how the line ends. We
790 * will trim spaces around the value. It's possible to do it by
791 * adjusting <eol> and <sov> which are no more used after this.
792 * We can add the header field to the list.
793 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200794 if (likely(!skip_update)) {
795 while (sov < eol && HTTP_IS_LWS(start[sov]))
796 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200797
Christopher Faulet2912f872018-09-19 14:01:04 +0200798 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
799 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200800
801
Christopher Faulet2912f872018-09-19 14:01:04 +0200802 n = ist2(start + sol, col - sol);
803 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200804
Christopher Faulet2912f872018-09-19 14:01:04 +0200805 do {
806 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200807
Christopher Faulet2912f872018-09-19 14:01:04 +0200808 if (unlikely(hdr_count >= hdr_num)) {
809 state = H1_MSG_HDR_L2_LWS;
810 goto http_output_full;
811 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200812
Christopher Faulet2912f872018-09-19 14:01:04 +0200813 if (isteqi(n, ist("transfer-encoding"))) {
814 h1_parse_xfer_enc_header(h1m, v);
815 }
816 else if (isteqi(n, ist("content-length"))) {
817 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200818
Christopher Faulet2912f872018-09-19 14:01:04 +0200819 if (ret < 0) {
820 state = H1_MSG_HDR_L2_LWS;
821 goto http_msg_invalid;
822 }
823 else if (ret == 0) {
824 /* skip it */
825 break;
826 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200827 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200828 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100829 h1_parse_connection_header(h1m, &v);
830 if (!v.len) {
831 /* skip it */
832 break;
833 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200834 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200835
Christopher Faulet2912f872018-09-19 14:01:04 +0200836 http_set_hdr(&hdr[hdr_count++], n, v);
837 } while (0);
838 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200839
840 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200841
Willy Tarreau794f9af2017-07-26 09:07:47 +0200842 if (likely(!HTTP_IS_CRLF(*ptr)))
843 goto http_msg_hdr_name;
844
845 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200846 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200847 goto http_msg_last_lf;
848
Willy Tarreau801250e2018-09-11 11:45:04 +0200849 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200850 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200851 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200852 ptr++;
853 /* <ptr> now points to the first byte of payload. If needed sol
854 * still points to the first of either CR or LF of the empty
855 * line ending the headers block.
856 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200857 if (likely(!skip_update)) {
858 if (unlikely(hdr_count >= hdr_num)) {
859 state = H1_MSG_LAST_LF;
860 goto http_output_full;
861 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200862 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200863 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200864
865 /* reaching here we've parsed the whole message. We may detect
866 * that we were already continuing an interrupted parsing pass
867 * so we were silently looking for the end of message not
868 * updating anything before deciding to parse it fully at once.
869 * It's guaranteed that we won't match this test twice in a row
870 * since restarting will turn zero.
871 */
872 if (restarting)
873 goto restart;
874
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200875 state = H1_MSG_DATA;
876 if (h1m->flags & H1_MF_XFER_ENC) {
877 if (h1m->flags & H1_MF_CLEN) {
878 h1m->flags &= ~H1_MF_CLEN;
879 hdr_count = http_del_hdr(hdr, ist("content-length"));
880 }
881
882 if (h1m->flags & H1_MF_CHNK)
883 state = H1_MSG_CHUNK_SIZE;
884 else if (!(h1m->flags & H1_MF_RESP)) {
885 /* cf RFC7230#3.3.3 : transfer-encoding in
886 * request without chunked encoding is invalid.
887 */
888 goto http_msg_invalid;
889 }
890 }
891
Willy Tarreau794f9af2017-07-26 09:07:47 +0200892 break;
893
894 default:
895 /* impossible states */
896 goto http_msg_invalid;
897 }
898
Willy Tarreau001823c2018-09-12 17:25:32 +0200899 /* Now we've left the headers state and are either in H1_MSG_DATA or
900 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200901 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200902
Willy Tarreau5384aac2018-09-11 16:04:48 +0200903 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200904 *slp = sl;
905
Willy Tarreau4433c082018-09-11 15:33:32 +0200906 h1m->state = state;
907 h1m->next = ptr - start + skip;
908 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200909
910 http_msg_ood:
911 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200912 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200913 *slp = sl;
914
Willy Tarreau4433c082018-09-11 15:33:32 +0200915 h1m->state = state;
916 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200917 return 0;
918
919 http_msg_invalid:
920 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200921 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200922 *slp = sl;
923
Willy Tarreau4433c082018-09-11 15:33:32 +0200924 h1m->err_state = h1m->state = state;
925 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200926 return -1;
927
928 http_output_full:
929 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200930 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200931 *slp = sl;
932
Willy Tarreau4433c082018-09-11 15:33:32 +0200933 h1m->err_state = h1m->state = state;
934 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200935 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200936
937 restart:
938 h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200939 if (h1m->flags & H1_MF_RESP)
940 h1m->state = H1_MSG_RPBEFORE;
941 else
942 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200943 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200944}
945
Willy Tarreau2510f702017-10-31 17:14:16 +0100946/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +0200947 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +0200948 * bytes to delete to skip the trailers. It may return 0 if it's missing some
949 * input data, or < 0 in case of parse error (in which case the caller may have
950 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +0100951 */
Willy Tarreauf40e6822018-06-14 16:52:02 +0200952int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +0100953{
Willy Tarreauf40e6822018-06-14 16:52:02 +0200954 const char *stop = b_peek(buf, ofs + max);
955 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +0100956
957 while (1) {
958 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +0200959 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +0100960 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +0100961
962 /* scan current line and stop at LF or CRLF */
963 while (1) {
964 if (ptr == stop)
965 return 0;
966
967 if (*ptr == '\n') {
968 if (!p1)
969 p1 = ptr;
970 p2 = ptr;
971 break;
972 }
973
974 if (*ptr == '\r') {
975 if (p1)
976 return -1;
977 p1 = ptr;
978 }
979
Willy Tarreau7314be82018-06-14 13:32:50 +0200980 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +0100981 }
982
983 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +0200984 p2 = b_next(buf, p2);
985 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +0100986
987 /* LF/CRLF at beginning of line => end of trailers at p2.
988 * Everything was scheduled for forwarding, there's nothing left
989 * from this message. */
990 if (p1 == start)
991 break;
992 /* OK, next line then */
993 }
Willy Tarreauf40e6822018-06-14 16:52:02 +0200994 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +0100995}