blob: 95bcc1f0e4632115cc64bd98f78ede177be0aec6 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020021
Willy Tarreau73373ab2018-09-14 17:11:33 +020022/* Parse the Content-Length header field of an HTTP/1 request. The function
23 * checks all possible occurrences of a comma-delimited value, and verifies
24 * if any of them doesn't match a previous value. It returns <0 if a value
25 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
26 * if the value can be indexed (first one). In the last case, the value might
27 * be adjusted and the caller must only add the updated value.
28 */
29int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
30{
31 char *e, *n;
32 long long cl;
33 int not_first = !!(h1m->flags & H1_MF_CLEN);
34 struct ist word;
35
36 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
37 e = value->ptr + value->len;
38
39 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +050040 /* skip leading delimiter and blanks */
Willy Tarreau73373ab2018-09-14 17:11:33 +020041 if (unlikely(HTTP_IS_LWS(*word.ptr)))
42 continue;
43
44 /* digits only now */
45 for (cl = 0, n = word.ptr; n < e; n++) {
46 unsigned int c = *n - '0';
47 if (unlikely(c > 9)) {
48 /* non-digit */
49 if (unlikely(n == word.ptr)) // spaces only
50 goto fail;
51 break;
52 }
53 if (unlikely(cl > ULLONG_MAX / 10ULL))
54 goto fail; /* multiply overflow */
55 cl = cl * 10ULL;
56 if (unlikely(cl + c < cl))
57 goto fail; /* addition overflow */
58 cl = cl + c;
59 }
60
61 /* keep a copy of the exact cleaned value */
62 word.len = n - word.ptr;
63
64 /* skip trailing LWS till next comma or EOL */
65 for (; n < e; n++) {
66 if (!HTTP_IS_LWS(*n)) {
67 if (unlikely(*n != ','))
68 goto fail;
69 break;
70 }
71 }
72
73 /* if duplicate, must be equal */
74 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
75 goto fail;
76
77 /* OK, store this result as the one to be indexed */
78 h1m->flags |= H1_MF_CLEN;
79 h1m->curr_len = h1m->body_len = cl;
80 *value = word;
81 word.ptr = n;
82 }
83 /* here we've reached the end with a single value or a series of
84 * identical values, all matching previous series if any. The last
85 * parsed value was sent back into <value>. We just have to decide
86 * if this occurrence has to be indexed (it's the first one) or
87 * silently skipped (it's not the first one)
88 */
89 return !not_first;
90 fail:
91 return -1;
92}
93
Willy Tarreau2557f6a2018-09-14 16:34:47 +020094/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
95 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
96 * this case. Any other token found or any empty header field found will reset
97 * this flag, so that it accurately represents the token's presence at the last
98 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
99 * are case-insensitive (cf RFC7230#4).
100 */
101void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
102{
103 char *e, *n;
104 struct ist word;
105
106 h1m->flags |= H1_MF_XFER_ENC;
107 h1m->flags &= ~H1_MF_CHNK;
108
109 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
110 e = value.ptr + value.len;
111
112 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500113 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200114 if (HTTP_IS_LWS(*word.ptr))
115 continue;
116
117 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
118 word.len = n - word.ptr;
119
120 /* trim trailing blanks */
121 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
122 word.len--;
123
124 h1m->flags &= ~H1_MF_CHNK;
125 if (isteqi(word, ist("chunked")))
126 h1m->flags |= H1_MF_CHNK;
127
128 word.ptr = n;
129 }
130}
131
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200132/* Parse the Connection: header of an HTTP/1 request, looking for "close",
133 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
134 * what was found there. Note that flags are only added, not removed, so the
135 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100136 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
137 * up from "keep-alive" and "close" values. To do so, the header value is
138 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200139 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100140void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200141{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100142 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200143 struct ist word;
144
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100145 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
146 p = value->ptr;
147 e = value->ptr + value->len;
148 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
149 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200150
151 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500152 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200153 if (HTTP_IS_LWS(*word.ptr))
154 continue;
155
156 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
157 word.len = n - word.ptr;
158
159 /* trim trailing blanks */
160 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
161 word.len--;
162
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100163 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200164 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100165 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
166 goto skip_val;
167 }
168 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200169 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100170 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
171 goto skip_val;
172 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200173 else if (isteqi(word, ist("upgrade")))
174 h1m->flags |= H1_MF_CONN_UPG;
175
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100176 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
177 if (value->ptr + value->len == p) {
178 /* no rewrite done till now */
179 value->len = n - value->ptr;
180 }
181 else {
182 if (value->len)
183 value->ptr[value->len++] = ',';
184 istcat(value, word, e - value->ptr);
185 }
186 }
187
188 skip_val:
189 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200190 }
191}
192
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100193/* Parse the Upgrade: header of an HTTP/1 request.
194 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
195 */
196void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
197{
198 char *e, *n;
199 struct ist word;
200
201 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
202
203 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
204 e = value.ptr + value.len;
205
206 while (++word.ptr < e) {
207 /* skip leading delimiter and blanks */
208 if (HTTP_IS_LWS(*word.ptr))
209 continue;
210
211 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
212 word.len = n - word.ptr;
213
214 /* trim trailing blanks */
215 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
216 word.len--;
217
218 if (isteqi(word, ist("websocket")))
219 h1m->flags |= H1_MF_UPG_WEBSOCKET;
220
221 word.ptr = n;
222 }
223}
224
Willy Tarreau538746a2018-12-11 10:59:20 +0100225/* Macros used in the HTTP/1 parser, to check for the expected presence of
226 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
227 */
228
229/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
230 * <bad>.
231 */
232#define EXPECT_LF_HERE(ptr, bad, state, where) \
233 do { \
234 if (unlikely(*(ptr) != '\n')) { \
235 state = (where); \
236 goto bad; \
237 } \
238 } while (0)
239
240/* Increments pointer <ptr>, continues to label <more> if it's still below
241 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
242 * of buffer was reached.
243 */
244#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
245 do { \
246 if (likely(++(ptr) < (end))) \
247 goto more; \
248 else { \
249 state = (where); \
250 goto stop; \
251 } \
252 } while (0)
253
Willy Tarreau794f9af2017-07-26 09:07:47 +0200254/* This function parses a contiguous HTTP/1 headers block starting at <start>
255 * and ending before <stop>, at once, and converts it a list of (name,value)
256 * pairs representing header fields into the array <hdr> of size <hdr_num>,
257 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200258 * too small to represent the whole message, an error is returned. Some
259 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200260 * parsed and stored into h1m as well. <hdr> may be null, in which case only
261 * the parsing state will be updated. This may be used to restart the parsing
262 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200263 *
264 * For now it's limited to the response. If the header block is incomplete,
265 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200266 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200267 * and h1m->next to zero on the first call, the parser will do the rest. If
268 * an incomplete message is seen, the caller only needs to present h1m->state
269 * and h1m->next again, with an empty header list so that the parser can start
270 * again. In this case, it will detect that it interrupted a previous session
271 * and will first look for the end of the message before reparsing it again and
272 * indexing it at the same time. This ensures that incomplete messages fed 1
273 * character at a time are never processed entirely more than exactly twice,
274 * and that there is no need to store all the internal state and pre-parsed
275 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200276 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200277 * A pointer to a start line descriptor may be passed in <slp>, in which case
278 * the parser will fill it with whatever it found.
279 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200280 * The code derived from the main HTTP/1 parser above but was simplified and
281 * optimized to process responses produced or forwarded by haproxy. The caller
282 * is responsible for ensuring that the message doesn't wrap, and should ensure
283 * it is complete to avoid having to retry the operation after a failed
284 * attempt. The message is not supposed to be invalid, which is why a few
285 * properties such as the character set used in the header field names are not
286 * checked. In case of an unparsable response message, a negative value will be
287 * returned with h1m->err_pos and h1m->err_state matching the location and
288 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100289 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
290 * parsed and the start line is skipped. It is not required to set h1m->state
291 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200292 *
293 * This function returns :
294 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200295 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200296 * the position relative to <start>
297 * -2 if the output is full (hdr_num reached). err_state and err_pos also
298 * indicate where it failed.
299 * 0 in case of missing data.
300 * > 0 on success, it then corresponds to the number of bytes read since
301 * <start> so that the caller can go on with the payload.
302 */
303int h1_headers_to_hdr_list(char *start, const char *stop,
304 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200305 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200306{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200307 enum h1m_state state;
308 register char *ptr;
309 register const char *end;
310 unsigned int hdr_count;
311 unsigned int skip; /* number of bytes skipped at the beginning */
312 unsigned int sol; /* start of line */
313 unsigned int col; /* position of the colon */
314 unsigned int eol; /* end of line */
315 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200316 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200317 int skip_update;
318 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200319 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200320 struct ist n, v; /* header name and value during parsing */
321
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200322 skip = 0; // do it only once to keep track of the leading CRLF.
323
324 try_again:
325 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200326 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200327 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200328 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200329
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100330 if (h1m->flags & H1_MF_HDRS_ONLY) {
331 state = H1_MSG_HDR_FIRST;
332 h1m->next = 0;
333 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100334 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100335 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100336 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
337 restarting = 1;
338 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100339
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200340 ptr = start + h1m->next;
341 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200342
Willy Tarreau794f9af2017-07-26 09:07:47 +0200343 if (unlikely(ptr >= end))
344 goto http_msg_ood;
345
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200346 /* don't update output if hdr is NULL or if we're restarting */
347 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200348 skip_update = 1;
349
Willy Tarreau794f9af2017-07-26 09:07:47 +0200350 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200351 case H1_MSG_RQBEFORE:
352 http_msg_rqbefore:
353 if (likely(HTTP_IS_TOKEN(*ptr))) {
354 /* we have a start of message, we may have skipped some
355 * heading CRLF. Skip them now.
356 */
357 skip += ptr - start;
358 start = ptr;
359
360 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200361 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200362 hdr_count = 0;
363 state = H1_MSG_RQMETH;
364 goto http_msg_rqmeth;
365 }
366
367 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
368 state = H1_MSG_RQBEFORE;
369 goto http_msg_invalid;
370 }
371
372 if (unlikely(*ptr == '\n'))
373 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
374 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
375 /* stop here */
376
377 case H1_MSG_RQBEFORE_CR:
378 http_msg_rqbefore_cr:
379 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
380 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
381 /* stop here */
382
383 case H1_MSG_RQMETH:
384 http_msg_rqmeth:
385 if (likely(HTTP_IS_TOKEN(*ptr)))
386 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
387
388 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200389 sl.rq.m.len = ptr - sl.rq.m.ptr;
390 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200391 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
392 }
393
394 if (likely(HTTP_IS_CRLF(*ptr))) {
395 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200396 sl.rq.m.len = ptr - sl.rq.m.ptr;
397 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200398 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200399 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200400 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200401 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200402 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200403 sl.rq.v.ptr = ptr;
404 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200405 goto http_msg_rqline_eol;
406 }
407 state = H1_MSG_RQMETH;
408 goto http_msg_invalid;
409
410 case H1_MSG_RQMETH_SP:
411 http_msg_rqmeth_sp:
412 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200413 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200414 goto http_msg_rquri;
415 }
416 if (likely(HTTP_IS_SPHT(*ptr)))
417 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
418 /* so it's a CR/LF, meaning an HTTP 0.9 request */
419 goto http_msg_req09_uri;
420
421 case H1_MSG_RQURI:
422 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100423#ifdef HA_UNALIGNED_LE
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200424 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
425 while (ptr <= end - sizeof(int)) {
426 int x = *(int *)ptr - 0x21212121;
427 if (x & 0x80808080)
428 break;
429
430 x -= 0x5e5e5e5e;
431 if (!(x & 0x80808080))
432 break;
433
434 ptr += sizeof(int);
435 }
436#endif
437 if (ptr >= end) {
438 state = H1_MSG_RQURI;
439 goto http_msg_ood;
440 }
441 http_msg_rquri2:
442 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
443 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
444
445 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200446 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200447 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
448 }
449 if (likely((unsigned char)*ptr >= 128)) {
450 /* non-ASCII chars are forbidden unless option
451 * accept-invalid-http-request is enabled in the frontend.
452 * In any case, we capture the faulty char.
453 */
454 if (h1m->err_pos < -1)
455 goto invalid_char;
456 if (h1m->err_pos == -1)
457 h1m->err_pos = ptr - start + skip;
458 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
459 }
460
461 if (likely(HTTP_IS_CRLF(*ptr))) {
462 /* so it's a CR/LF, meaning an HTTP 0.9 request */
463 goto http_msg_req09_uri_e;
464 }
465
466 /* OK forbidden chars, 0..31 or 127 */
467 invalid_char:
468 state = H1_MSG_RQURI;
469 goto http_msg_invalid;
470
471 case H1_MSG_RQURI_SP:
472 http_msg_rquri_sp:
473 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200474 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200475 goto http_msg_rqver;
476 }
477 if (likely(HTTP_IS_SPHT(*ptr)))
478 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
479 /* so it's a CR/LF, meaning an HTTP 0.9 request */
480 goto http_msg_req09_ver;
481
482
483 case H1_MSG_RQVER:
484 http_msg_rqver:
485 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
486 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
487
488 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200489 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200490 http_msg_rqline_eol:
491 /* We have seen the end of line. Note that we do not
492 * necessarily have the \n yet, but at least we know that we
493 * have EITHER \r OR \n, otherwise the request would not be
494 * complete. We can then record the request length and return
495 * to the caller which will be able to register it.
496 */
497
498 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200499 if ((sl.rq.v.len == 8) &&
500 (*(sl.rq.v.ptr + 5) > '1' ||
501 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200502 h1m->flags |= H1_MF_VER_11;
503
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200504 if (unlikely(hdr_count >= hdr_num)) {
505 state = H1_MSG_RQVER;
506 goto http_output_full;
507 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200508 if (!(h1m->flags & H1_MF_NO_PHDR))
509 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200510
511 if (unlikely(hdr_count >= hdr_num)) {
512 state = H1_MSG_RQVER;
513 goto http_output_full;
514 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200515 if (!(h1m->flags & H1_MF_NO_PHDR))
516 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200517 }
518
519 sol = ptr - start;
520 if (likely(*ptr == '\r'))
521 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
522 goto http_msg_rqline_end;
523 }
524
525 /* neither an HTTP_VER token nor a CRLF */
526 state = H1_MSG_RQVER;
527 goto http_msg_invalid;
528
529 case H1_MSG_RQLINE_END:
530 http_msg_rqline_end:
531 /* check for HTTP/0.9 request : no version information
532 * available. sol must point to the first of CR or LF. However
533 * since we don't save these elements between calls, if we come
534 * here from a restart, we don't necessarily know. Thus in this
535 * case we simply start over.
536 */
537 if (restarting)
538 goto restart;
539
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200540 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200541 goto http_msg_last_lf;
542
543 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
544 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
545 /* stop here */
546
547 /*
548 * Common states below
549 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200550 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200551 http_msg_rpbefore:
552 if (likely(HTTP_IS_TOKEN(*ptr))) {
553 /* we have a start of message, we may have skipped some
554 * heading CRLF. Skip them now.
555 */
556 skip += ptr - start;
557 start = ptr;
558
559 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200560 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200561 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200562 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200563 goto http_msg_rpver;
564 }
565
566 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200567 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200568 goto http_msg_invalid;
569 }
570
571 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200572 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
573 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200574 /* stop here */
575
Willy Tarreau801250e2018-09-11 11:45:04 +0200576 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200577 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200578 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
579 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200580 /* stop here */
581
Willy Tarreau801250e2018-09-11 11:45:04 +0200582 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200583 http_msg_rpver:
584 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200585 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200586
587 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200588 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200589
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200590 if ((sl.st.v.len == 8) &&
591 (*(sl.st.v.ptr + 5) > '1' ||
592 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200593 h1m->flags |= H1_MF_VER_11;
594
Willy Tarreau801250e2018-09-11 11:45:04 +0200595 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200596 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200597 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200598 goto http_msg_invalid;
599
Willy Tarreau801250e2018-09-11 11:45:04 +0200600 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200601 http_msg_rpver_sp:
602 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200603 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200604 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200605 goto http_msg_rpcode;
606 }
607 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200608 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200609 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200610 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200611 goto http_msg_invalid;
612
Willy Tarreau801250e2018-09-11 11:45:04 +0200613 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200614 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100615 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200616 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200617 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200618 }
619
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100620 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200621 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100622 goto http_msg_invalid;
623 }
624
Willy Tarreau794f9af2017-07-26 09:07:47 +0200625 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200626 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200627 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200628 }
629
630 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200631 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200632
633 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200634 sl.st.r.ptr = ptr;
635 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200636 goto http_msg_rpline_eol;
637
Willy Tarreau801250e2018-09-11 11:45:04 +0200638 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200639 http_msg_rpcode_sp:
640 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200641 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200642 goto http_msg_rpreason;
643 }
644 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200645 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200646 /* so it's a CR/LF, so there is no reason phrase */
647 goto http_msg_rsp_reason;
648
Willy Tarreau801250e2018-09-11 11:45:04 +0200649 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200650 http_msg_rpreason:
651 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200652 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200653 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200654 http_msg_rpline_eol:
655 /* We have seen the end of line. Note that we do not
656 * necessarily have the \n yet, but at least we know that we
657 * have EITHER \r OR \n, otherwise the response would not be
658 * complete. We can then record the response length and return
659 * to the caller which will be able to register it.
660 */
661
Willy Tarreau5384aac2018-09-11 16:04:48 +0200662 if (likely(!skip_update)) {
663 if (unlikely(hdr_count >= hdr_num)) {
664 state = H1_MSG_RPREASON;
665 goto http_output_full;
666 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200667 if (!(h1m->flags & H1_MF_NO_PHDR))
668 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200669 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670
671 sol = ptr - start;
672 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200673 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200674 goto http_msg_rpline_end;
675
Willy Tarreau801250e2018-09-11 11:45:04 +0200676 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200677 http_msg_rpline_end:
678 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200679 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
680 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200681 /* stop here */
682
Willy Tarreau801250e2018-09-11 11:45:04 +0200683 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200684 http_msg_hdr_first:
685 sol = ptr - start;
686 if (likely(!HTTP_IS_CRLF(*ptr))) {
687 goto http_msg_hdr_name;
688 }
689
690 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200691 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200692 goto http_msg_last_lf;
693
Willy Tarreau801250e2018-09-11 11:45:04 +0200694 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200695 http_msg_hdr_name:
696 /* assumes sol points to the first char */
697 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200698 if (!skip_update) {
699 /* turn it to lower case if needed */
700 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200701 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200702 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200703 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200704 }
705
706 if (likely(*ptr == ':')) {
707 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200708 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200709 }
710
Willy Tarreau9aec3052018-09-12 09:20:40 +0200711 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200712 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200713 goto http_msg_invalid;
714 }
715
Willy Tarreau9aec3052018-09-12 09:20:40 +0200716 if (h1m->err_pos == -1) /* capture the error pointer */
717 h1m->err_pos = ptr - start + skip; /* >= 0 now */
718
719 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200720 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200721
Willy Tarreau801250e2018-09-11 11:45:04 +0200722 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200723 http_msg_hdr_l1_sp:
724 /* assumes sol points to the first char */
725 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200726 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200727
728 /* header value can be basically anything except CR/LF */
729 sov = ptr - start;
730
731 if (likely(!HTTP_IS_CRLF(*ptr))) {
732 goto http_msg_hdr_val;
733 }
734
735 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200736 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200737 goto http_msg_hdr_l1_lf;
738
Willy Tarreau801250e2018-09-11 11:45:04 +0200739 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200740 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200741 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
742 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200743
Willy Tarreau801250e2018-09-11 11:45:04 +0200744 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200745 http_msg_hdr_l1_lws:
746 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200747 if (!skip_update) {
748 /* replace HT,CR,LF with spaces */
749 for (; start + sov < ptr; sov++)
750 start[sov] = ' ';
751 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200752 goto http_msg_hdr_l1_sp;
753 }
754 /* we had a header consisting only in spaces ! */
755 eol = sov;
756 goto http_msg_complete_header;
757
Willy Tarreau801250e2018-09-11 11:45:04 +0200758 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200759 http_msg_hdr_val:
760 /* assumes sol points to the first char, and sov
761 * points to the first character of the value.
762 */
763
764 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
765 * and lower. In fact since most of the time is spent in the loop, we
766 * also remove the sign bit test so that bytes 0x8e..0x0d break the
767 * loop, but we don't care since they're very rare in header values.
768 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100769#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200770 while (ptr <= end - sizeof(long)) {
771 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
772 goto http_msg_hdr_val2;
773 ptr += sizeof(long);
774 }
775#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100776#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200777 while (ptr <= end - sizeof(int)) {
778 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
779 goto http_msg_hdr_val2;
780 ptr += sizeof(int);
781 }
782#endif
783 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200784 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200785 goto http_msg_ood;
786 }
787 http_msg_hdr_val2:
788 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200789 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200790
791 eol = ptr - start;
792 /* Note: we could also copy eol into ->eoh so that we have the
793 * real header end in case it ends with lots of LWS, but is this
794 * really needed ?
795 */
796 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200797 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200798 goto http_msg_hdr_l2_lf;
799
Willy Tarreau801250e2018-09-11 11:45:04 +0200800 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200801 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200802 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
803 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200804
Willy Tarreau801250e2018-09-11 11:45:04 +0200805 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200806 http_msg_hdr_l2_lws:
807 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200808 if (!skip_update) {
809 /* LWS: replace HT,CR,LF with spaces */
810 for (; start + eol < ptr; eol++)
811 start[eol] = ' ';
812 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200813 goto http_msg_hdr_val;
814 }
815 http_msg_complete_header:
816 /*
817 * It was a new header, so the last one is finished. Assumes
818 * <sol> points to the first char of the name, <col> to the
819 * colon, <sov> points to the first character of the value and
820 * <eol> to the first CR or LF so we know how the line ends. We
821 * will trim spaces around the value. It's possible to do it by
822 * adjusting <eol> and <sov> which are no more used after this.
823 * We can add the header field to the list.
824 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200825 if (likely(!skip_update)) {
826 while (sov < eol && HTTP_IS_LWS(start[sov]))
827 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200828
Christopher Faulet2912f872018-09-19 14:01:04 +0200829 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
830 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200831
832
Christopher Faulet2912f872018-09-19 14:01:04 +0200833 n = ist2(start + sol, col - sol);
834 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200835
Christopher Faulet2912f872018-09-19 14:01:04 +0200836 do {
837 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200838
Christopher Faulet2912f872018-09-19 14:01:04 +0200839 if (unlikely(hdr_count >= hdr_num)) {
840 state = H1_MSG_HDR_L2_LWS;
841 goto http_output_full;
842 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200843
Christopher Faulet2912f872018-09-19 14:01:04 +0200844 if (isteqi(n, ist("transfer-encoding"))) {
845 h1_parse_xfer_enc_header(h1m, v);
846 }
847 else if (isteqi(n, ist("content-length"))) {
848 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200849
Christopher Faulet2912f872018-09-19 14:01:04 +0200850 if (ret < 0) {
851 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100852 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200853 goto http_msg_invalid;
854 }
855 else if (ret == 0) {
856 /* skip it */
857 break;
858 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200859 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200860 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100861 h1_parse_connection_header(h1m, &v);
862 if (!v.len) {
863 /* skip it */
864 break;
865 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200866 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100867 else if (isteqi(n, ist("upgrade"))) {
868 h1_parse_upgrade_header(h1m, v);
869 }
Christopher Faulet7032a3f2020-05-04 09:01:45 +0200870 else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
Christopher Faulet531b83e2019-10-11 13:34:22 +0200871 if (host_idx == -1) {
872 struct ist authority;
873
874 authority = http_get_authority(sl.rq.u, 1);
875 if (authority.len && !isteqi(v, authority)) {
876 if (h1m->err_pos < -1) {
877 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100878 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200879 goto http_msg_invalid;
880 }
881 if (h1m->err_pos == -1) /* capture the error pointer */
Christopher Faulet17034782020-01-06 13:41:01 +0100882 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200883 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200884 host_idx = hdr_count;
Christopher Faulet531b83e2019-10-11 13:34:22 +0200885 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200886 else {
887 if (!isteqi(v, hdr[host_idx].v)) {
888 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100889 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200890 goto http_msg_invalid;
891 }
892 /* if the same host, skip it */
893 break;
894 }
895 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200896
Christopher Faulet2912f872018-09-19 14:01:04 +0200897 http_set_hdr(&hdr[hdr_count++], n, v);
898 } while (0);
899 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200900
901 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200902
Willy Tarreau794f9af2017-07-26 09:07:47 +0200903 if (likely(!HTTP_IS_CRLF(*ptr)))
904 goto http_msg_hdr_name;
905
906 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200907 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200908 goto http_msg_last_lf;
909
Willy Tarreau801250e2018-09-11 11:45:04 +0200910 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200911 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200912 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200913 ptr++;
914 /* <ptr> now points to the first byte of payload. If needed sol
915 * still points to the first of either CR or LF of the empty
916 * line ending the headers block.
917 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200918 if (likely(!skip_update)) {
919 if (unlikely(hdr_count >= hdr_num)) {
920 state = H1_MSG_LAST_LF;
921 goto http_output_full;
922 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200923 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200924 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200925
926 /* reaching here we've parsed the whole message. We may detect
927 * that we were already continuing an interrupted parsing pass
928 * so we were silently looking for the end of message not
929 * updating anything before deciding to parse it fully at once.
930 * It's guaranteed that we won't match this test twice in a row
931 * since restarting will turn zero.
932 */
933 if (restarting)
934 goto restart;
935
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200936 state = H1_MSG_DATA;
937 if (h1m->flags & H1_MF_XFER_ENC) {
938 if (h1m->flags & H1_MF_CLEN) {
939 h1m->flags &= ~H1_MF_CLEN;
940 hdr_count = http_del_hdr(hdr, ist("content-length"));
941 }
942
943 if (h1m->flags & H1_MF_CHNK)
944 state = H1_MSG_CHUNK_SIZE;
945 else if (!(h1m->flags & H1_MF_RESP)) {
946 /* cf RFC7230#3.3.3 : transfer-encoding in
947 * request without chunked encoding is invalid.
948 */
949 goto http_msg_invalid;
950 }
951 }
952
Willy Tarreau794f9af2017-07-26 09:07:47 +0200953 break;
954
955 default:
956 /* impossible states */
957 goto http_msg_invalid;
958 }
959
Willy Tarreau001823c2018-09-12 17:25:32 +0200960 /* Now we've left the headers state and are either in H1_MSG_DATA or
961 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200962 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200963
Willy Tarreau5384aac2018-09-11 16:04:48 +0200964 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200965 *slp = sl;
966
Willy Tarreau4433c082018-09-11 15:33:32 +0200967 h1m->state = state;
968 h1m->next = ptr - start + skip;
969 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200970
971 http_msg_ood:
972 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200973 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200974 *slp = sl;
975
Willy Tarreau4433c082018-09-11 15:33:32 +0200976 h1m->state = state;
977 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200978 return 0;
979
980 http_msg_invalid:
981 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200982 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200983 *slp = sl;
984
Willy Tarreau4433c082018-09-11 15:33:32 +0200985 h1m->err_state = h1m->state = state;
986 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200987 return -1;
988
989 http_output_full:
990 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200991 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200992 *slp = sl;
993
Willy Tarreau4433c082018-09-11 15:33:32 +0200994 h1m->err_state = h1m->state = state;
995 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200996 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200997
998 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +0200999 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
1000 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001001 if (h1m->flags & H1_MF_RESP)
1002 h1m->state = H1_MSG_RPBEFORE;
1003 else
1004 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001005 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001006}
1007
Willy Tarreau2510f702017-10-31 17:14:16 +01001008/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001009 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001010 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1011 * input data, or < 0 in case of parse error (in which case the caller may have
1012 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001013 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001014int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001015{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001016 const char *stop = b_peek(buf, ofs + max);
1017 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001018
1019 while (1) {
1020 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001021 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001022 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001023
1024 /* scan current line and stop at LF or CRLF */
1025 while (1) {
1026 if (ptr == stop)
1027 return 0;
1028
1029 if (*ptr == '\n') {
1030 if (!p1)
1031 p1 = ptr;
1032 p2 = ptr;
1033 break;
1034 }
1035
1036 if (*ptr == '\r') {
1037 if (p1)
1038 return -1;
1039 p1 = ptr;
1040 }
1041
Willy Tarreau7314be82018-06-14 13:32:50 +02001042 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001043 }
1044
1045 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001046 p2 = b_next(buf, p2);
1047 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001048
1049 /* LF/CRLF at beginning of line => end of trailers at p2.
1050 * Everything was scheduled for forwarding, there's nothing left
1051 * from this message. */
1052 if (p1 == start)
1053 break;
1054 /* OK, next line then */
1055 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001056 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001057}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001058
1059#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1060
1061/*
1062 * Calculate the WebSocket handshake response key from <key_in>. Following the
1063 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1064 * as a 29 bytes long string.
1065 */
1066void h1_calculate_ws_output_key(const char *key, char *result)
1067{
1068 blk_SHA_CTX sha1_ctx;
1069 char hash_in[60], hash_out[20];
1070
1071 /* concatenate the key with a fixed suffix */
1072 memcpy(hash_in, key, 24);
1073 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1074
1075 /* sha1 the result */
1076 blk_SHA1_Init(&sha1_ctx);
1077 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1078 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1079
1080 /* encode in base64 the hash */
1081 a2base64(hash_out, 20, result, 29);
1082}