blob: 99b9c2993b235b484b4546436d079c85adbef5a0 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
37 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
38 e = value->ptr + value->len;
39
40 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +050041 /* skip leading delimiter and blanks */
Willy Tarreau73373ab2018-09-14 17:11:33 +020042 if (unlikely(HTTP_IS_LWS(*word.ptr)))
43 continue;
44
45 /* digits only now */
46 for (cl = 0, n = word.ptr; n < e; n++) {
47 unsigned int c = *n - '0';
48 if (unlikely(c > 9)) {
49 /* non-digit */
50 if (unlikely(n == word.ptr)) // spaces only
51 goto fail;
52 break;
53 }
54 if (unlikely(cl > ULLONG_MAX / 10ULL))
55 goto fail; /* multiply overflow */
56 cl = cl * 10ULL;
57 if (unlikely(cl + c < cl))
58 goto fail; /* addition overflow */
59 cl = cl + c;
60 }
61
62 /* keep a copy of the exact cleaned value */
63 word.len = n - word.ptr;
64
65 /* skip trailing LWS till next comma or EOL */
66 for (; n < e; n++) {
67 if (!HTTP_IS_LWS(*n)) {
68 if (unlikely(*n != ','))
69 goto fail;
70 break;
71 }
72 }
73
74 /* if duplicate, must be equal */
75 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
76 goto fail;
77
78 /* OK, store this result as the one to be indexed */
79 h1m->flags |= H1_MF_CLEN;
80 h1m->curr_len = h1m->body_len = cl;
81 *value = word;
82 word.ptr = n;
83 }
84 /* here we've reached the end with a single value or a series of
85 * identical values, all matching previous series if any. The last
86 * parsed value was sent back into <value>. We just have to decide
87 * if this occurrence has to be indexed (it's the first one) or
88 * silently skipped (it's not the first one)
89 */
90 return !not_first;
91 fail:
92 return -1;
93}
94
Willy Tarreau2557f6a2018-09-14 16:34:47 +020095/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
Christopher Faulet545fbba2021-09-28 09:36:25 +020096 * "chunked" encoding to perform some checks (it must be the last encoding for
97 * the request and must not be performed twice for any message). The
98 * H1_MF_TE_CHUNKED is set if a valid "chunked" encoding is found. The
99 * H1_MF_TE_OTHER flag is set if any other encoding is found. The H1_MF_XFER_ENC
100 * flag is always set. The H1_MF_CHNK is set when "chunked" encoding is the last
101 * one. Note that transfer codings are case-insensitive (cf RFC7230#4). This
102 * function returns <0 if a error is found, 0 if the whole header can be dropped
103 * (not used yet), or >0 if the value can be indexed.
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200104 */
Christopher Faulet545fbba2021-09-28 09:36:25 +0200105int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200106{
107 char *e, *n;
108 struct ist word;
109
110 h1m->flags |= H1_MF_XFER_ENC;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200111
112 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
Tim Duesterhus4c8f75f2021-11-06 15:14:44 +0100113 e = istend(value);
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200114
115 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500116 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200117 if (HTTP_IS_LWS(*word.ptr))
118 continue;
119
120 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
121 word.len = n - word.ptr;
122
123 /* trim trailing blanks */
124 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
125 word.len--;
126
127 h1m->flags &= ~H1_MF_CHNK;
Christopher Faulet545fbba2021-09-28 09:36:25 +0200128 if (isteqi(word, ist("chunked"))) {
129 if (h1m->flags & H1_MF_TE_CHUNKED) {
130 /* cf RFC7230#3.3.1 : A sender MUST NOT apply
131 * chunked more than once to a message body
132 * (i.e., chunking an already chunked message is
133 * not allowed)
134 */
135 goto fail;
136 }
137 h1m->flags |= (H1_MF_TE_CHUNKED|H1_MF_CHNK);
138 }
139 else {
140 if ((h1m->flags & (H1_MF_RESP|H1_MF_TE_CHUNKED)) == H1_MF_TE_CHUNKED) {
141 /* cf RFC7230#3.3.1 : If any transfer coding
142 * other than chunked is applied to a request
143 * payload body, the sender MUST apply chunked
144 * as the final transfer coding to ensure that
145 * the message is properly framed.
146 */
147 goto fail;
148 }
149 h1m->flags |= H1_MF_TE_OTHER;
150 }
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200151
152 word.ptr = n;
153 }
Christopher Faulet545fbba2021-09-28 09:36:25 +0200154
155 return 1;
156 fail:
157 return -1;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200158}
159
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200160/* Parse the Connection: header of an HTTP/1 request, looking for "close",
161 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
162 * what was found there. Note that flags are only added, not removed, so the
163 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100164 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
165 * up from "keep-alive" and "close" values. To do so, the header value is
166 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200167 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100168void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200169{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100170 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200171 struct ist word;
172
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100173 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
174 p = value->ptr;
175 e = value->ptr + value->len;
176 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
177 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200178
179 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500180 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200181 if (HTTP_IS_LWS(*word.ptr))
182 continue;
183
184 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
185 word.len = n - word.ptr;
186
187 /* trim trailing blanks */
188 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
189 word.len--;
190
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100191 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200192 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100193 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
194 goto skip_val;
195 }
196 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200197 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100198 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
199 goto skip_val;
200 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200201 else if (isteqi(word, ist("upgrade")))
202 h1m->flags |= H1_MF_CONN_UPG;
203
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100204 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
205 if (value->ptr + value->len == p) {
206 /* no rewrite done till now */
207 value->len = n - value->ptr;
208 }
209 else {
210 if (value->len)
211 value->ptr[value->len++] = ',';
212 istcat(value, word, e - value->ptr);
213 }
214 }
215
216 skip_val:
217 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200218 }
219}
220
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100221/* Parse the Upgrade: header of an HTTP/1 request.
222 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
223 */
224void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
225{
226 char *e, *n;
227 struct ist word;
228
229 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
230
231 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
Tim Duesterhus4c8f75f2021-11-06 15:14:44 +0100232 e = istend(value);
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100233
234 while (++word.ptr < e) {
235 /* skip leading delimiter and blanks */
236 if (HTTP_IS_LWS(*word.ptr))
237 continue;
238
239 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
240 word.len = n - word.ptr;
241
242 /* trim trailing blanks */
243 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
244 word.len--;
245
246 if (isteqi(word, ist("websocket")))
247 h1m->flags |= H1_MF_UPG_WEBSOCKET;
248
249 word.ptr = n;
250 }
251}
252
Willy Tarreau538746a2018-12-11 10:59:20 +0100253/* Macros used in the HTTP/1 parser, to check for the expected presence of
254 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
255 */
256
257/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
258 * <bad>.
259 */
260#define EXPECT_LF_HERE(ptr, bad, state, where) \
261 do { \
262 if (unlikely(*(ptr) != '\n')) { \
263 state = (where); \
264 goto bad; \
265 } \
266 } while (0)
267
268/* Increments pointer <ptr>, continues to label <more> if it's still below
269 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
270 * of buffer was reached.
271 */
272#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
273 do { \
274 if (likely(++(ptr) < (end))) \
275 goto more; \
276 else { \
277 state = (where); \
278 goto stop; \
279 } \
280 } while (0)
281
Willy Tarreau794f9af2017-07-26 09:07:47 +0200282/* This function parses a contiguous HTTP/1 headers block starting at <start>
283 * and ending before <stop>, at once, and converts it a list of (name,value)
284 * pairs representing header fields into the array <hdr> of size <hdr_num>,
285 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200286 * too small to represent the whole message, an error is returned. Some
287 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200288 * parsed and stored into h1m as well. <hdr> may be null, in which case only
289 * the parsing state will be updated. This may be used to restart the parsing
290 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200291 *
292 * For now it's limited to the response. If the header block is incomplete,
293 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200294 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200295 * and h1m->next to zero on the first call, the parser will do the rest. If
296 * an incomplete message is seen, the caller only needs to present h1m->state
297 * and h1m->next again, with an empty header list so that the parser can start
298 * again. In this case, it will detect that it interrupted a previous session
299 * and will first look for the end of the message before reparsing it again and
300 * indexing it at the same time. This ensures that incomplete messages fed 1
301 * character at a time are never processed entirely more than exactly twice,
302 * and that there is no need to store all the internal state and pre-parsed
303 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200304 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200305 * A pointer to a start line descriptor may be passed in <slp>, in which case
306 * the parser will fill it with whatever it found.
307 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200308 * The code derived from the main HTTP/1 parser above but was simplified and
309 * optimized to process responses produced or forwarded by haproxy. The caller
310 * is responsible for ensuring that the message doesn't wrap, and should ensure
311 * it is complete to avoid having to retry the operation after a failed
312 * attempt. The message is not supposed to be invalid, which is why a few
313 * properties such as the character set used in the header field names are not
314 * checked. In case of an unparsable response message, a negative value will be
315 * returned with h1m->err_pos and h1m->err_state matching the location and
316 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100317 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
318 * parsed and the start line is skipped. It is not required to set h1m->state
319 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200320 *
321 * This function returns :
322 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200323 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200324 * the position relative to <start>
325 * -2 if the output is full (hdr_num reached). err_state and err_pos also
326 * indicate where it failed.
327 * 0 in case of missing data.
328 * > 0 on success, it then corresponds to the number of bytes read since
329 * <start> so that the caller can go on with the payload.
330 */
331int h1_headers_to_hdr_list(char *start, const char *stop,
332 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200333 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200334{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200335 enum h1m_state state;
336 register char *ptr;
337 register const char *end;
338 unsigned int hdr_count;
339 unsigned int skip; /* number of bytes skipped at the beginning */
340 unsigned int sol; /* start of line */
341 unsigned int col; /* position of the colon */
342 unsigned int eol; /* end of line */
343 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200344 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200345 int skip_update;
346 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200347 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200348 struct ist n, v; /* header name and value during parsing */
349
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200350 skip = 0; // do it only once to keep track of the leading CRLF.
351
352 try_again:
353 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200354 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200355 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200356 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200357
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100358 if (h1m->flags & H1_MF_HDRS_ONLY) {
359 state = H1_MSG_HDR_FIRST;
360 h1m->next = 0;
361 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100362 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100363 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100364 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
365 restarting = 1;
366 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100367
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200368 ptr = start + h1m->next;
369 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200370
Willy Tarreau794f9af2017-07-26 09:07:47 +0200371 if (unlikely(ptr >= end))
372 goto http_msg_ood;
373
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200374 /* don't update output if hdr is NULL or if we're restarting */
375 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200376 skip_update = 1;
377
Willy Tarreau794f9af2017-07-26 09:07:47 +0200378 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200379 case H1_MSG_RQBEFORE:
380 http_msg_rqbefore:
381 if (likely(HTTP_IS_TOKEN(*ptr))) {
382 /* we have a start of message, we may have skipped some
383 * heading CRLF. Skip them now.
384 */
385 skip += ptr - start;
386 start = ptr;
387
388 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200389 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200390 hdr_count = 0;
391 state = H1_MSG_RQMETH;
392 goto http_msg_rqmeth;
393 }
394
395 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
396 state = H1_MSG_RQBEFORE;
397 goto http_msg_invalid;
398 }
399
400 if (unlikely(*ptr == '\n'))
401 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
402 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
403 /* stop here */
404
405 case H1_MSG_RQBEFORE_CR:
406 http_msg_rqbefore_cr:
407 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
408 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
409 /* stop here */
410
411 case H1_MSG_RQMETH:
412 http_msg_rqmeth:
413 if (likely(HTTP_IS_TOKEN(*ptr)))
414 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
415
416 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200417 sl.rq.m.len = ptr - sl.rq.m.ptr;
418 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200419 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
420 }
421
422 if (likely(HTTP_IS_CRLF(*ptr))) {
423 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200424 sl.rq.m.len = ptr - sl.rq.m.ptr;
425 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200426 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200427 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200428 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200429 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200430 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200431 sl.rq.v.ptr = ptr;
432 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200433 goto http_msg_rqline_eol;
434 }
435 state = H1_MSG_RQMETH;
436 goto http_msg_invalid;
437
438 case H1_MSG_RQMETH_SP:
439 http_msg_rqmeth_sp:
440 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200441 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200442 goto http_msg_rquri;
443 }
444 if (likely(HTTP_IS_SPHT(*ptr)))
445 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
446 /* so it's a CR/LF, meaning an HTTP 0.9 request */
447 goto http_msg_req09_uri;
448
449 case H1_MSG_RQURI:
450 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100451#ifdef HA_UNALIGNED_LE
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200452 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
453 while (ptr <= end - sizeof(int)) {
454 int x = *(int *)ptr - 0x21212121;
455 if (x & 0x80808080)
456 break;
457
458 x -= 0x5e5e5e5e;
459 if (!(x & 0x80808080))
460 break;
461
462 ptr += sizeof(int);
463 }
464#endif
465 if (ptr >= end) {
466 state = H1_MSG_RQURI;
467 goto http_msg_ood;
468 }
469 http_msg_rquri2:
470 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
471 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
472
473 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200474 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200475 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
476 }
477 if (likely((unsigned char)*ptr >= 128)) {
478 /* non-ASCII chars are forbidden unless option
479 * accept-invalid-http-request is enabled in the frontend.
480 * In any case, we capture the faulty char.
481 */
482 if (h1m->err_pos < -1)
483 goto invalid_char;
484 if (h1m->err_pos == -1)
485 h1m->err_pos = ptr - start + skip;
486 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
487 }
488
489 if (likely(HTTP_IS_CRLF(*ptr))) {
490 /* so it's a CR/LF, meaning an HTTP 0.9 request */
491 goto http_msg_req09_uri_e;
492 }
493
494 /* OK forbidden chars, 0..31 or 127 */
495 invalid_char:
496 state = H1_MSG_RQURI;
497 goto http_msg_invalid;
498
499 case H1_MSG_RQURI_SP:
500 http_msg_rquri_sp:
501 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200502 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200503 goto http_msg_rqver;
504 }
505 if (likely(HTTP_IS_SPHT(*ptr)))
506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
507 /* so it's a CR/LF, meaning an HTTP 0.9 request */
508 goto http_msg_req09_ver;
509
510
511 case H1_MSG_RQVER:
512 http_msg_rqver:
513 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
514 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
515
516 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200517 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200518 http_msg_rqline_eol:
519 /* We have seen the end of line. Note that we do not
520 * necessarily have the \n yet, but at least we know that we
521 * have EITHER \r OR \n, otherwise the request would not be
522 * complete. We can then record the request length and return
523 * to the caller which will be able to register it.
524 */
525
526 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200527 if ((sl.rq.v.len == 8) &&
528 (*(sl.rq.v.ptr + 5) > '1' ||
529 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200530 h1m->flags |= H1_MF_VER_11;
531
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200532 if (unlikely(hdr_count >= hdr_num)) {
533 state = H1_MSG_RQVER;
534 goto http_output_full;
535 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200536 if (!(h1m->flags & H1_MF_NO_PHDR))
537 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200538
539 if (unlikely(hdr_count >= hdr_num)) {
540 state = H1_MSG_RQVER;
541 goto http_output_full;
542 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200543 if (!(h1m->flags & H1_MF_NO_PHDR))
544 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200545 }
546
547 sol = ptr - start;
548 if (likely(*ptr == '\r'))
549 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
550 goto http_msg_rqline_end;
551 }
552
553 /* neither an HTTP_VER token nor a CRLF */
554 state = H1_MSG_RQVER;
555 goto http_msg_invalid;
556
557 case H1_MSG_RQLINE_END:
558 http_msg_rqline_end:
559 /* check for HTTP/0.9 request : no version information
560 * available. sol must point to the first of CR or LF. However
561 * since we don't save these elements between calls, if we come
562 * here from a restart, we don't necessarily know. Thus in this
563 * case we simply start over.
564 */
565 if (restarting)
566 goto restart;
567
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200568 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200569 goto http_msg_last_lf;
570
571 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
572 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
573 /* stop here */
574
575 /*
576 * Common states below
577 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200578 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200579 http_msg_rpbefore:
580 if (likely(HTTP_IS_TOKEN(*ptr))) {
581 /* we have a start of message, we may have skipped some
582 * heading CRLF. Skip them now.
583 */
584 skip += ptr - start;
585 start = ptr;
586
587 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200588 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200589 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200590 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200591 goto http_msg_rpver;
592 }
593
594 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200595 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200596 goto http_msg_invalid;
597 }
598
599 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200600 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
601 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200602 /* stop here */
603
Willy Tarreau801250e2018-09-11 11:45:04 +0200604 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200605 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200606 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
607 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200608 /* stop here */
609
Willy Tarreau801250e2018-09-11 11:45:04 +0200610 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200611 http_msg_rpver:
612 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200613 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200614
615 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200616 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200617
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200618 if ((sl.st.v.len == 8) &&
619 (*(sl.st.v.ptr + 5) > '1' ||
620 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200621 h1m->flags |= H1_MF_VER_11;
622
Willy Tarreau801250e2018-09-11 11:45:04 +0200623 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200624 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200625 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200626 goto http_msg_invalid;
627
Willy Tarreau801250e2018-09-11 11:45:04 +0200628 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200629 http_msg_rpver_sp:
630 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200631 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200632 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200633 goto http_msg_rpcode;
634 }
635 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200636 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200637 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200638 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200639 goto http_msg_invalid;
640
Willy Tarreau801250e2018-09-11 11:45:04 +0200641 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200642 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100643 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200644 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200645 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200646 }
647
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100648 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200649 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100650 goto http_msg_invalid;
651 }
652
Willy Tarreau794f9af2017-07-26 09:07:47 +0200653 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200654 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200655 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200656 }
657
658 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200659 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200660
661 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200662 sl.st.r.ptr = ptr;
663 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200664 goto http_msg_rpline_eol;
665
Willy Tarreau801250e2018-09-11 11:45:04 +0200666 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200667 http_msg_rpcode_sp:
668 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200669 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670 goto http_msg_rpreason;
671 }
672 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200673 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200674 /* so it's a CR/LF, so there is no reason phrase */
675 goto http_msg_rsp_reason;
676
Willy Tarreau801250e2018-09-11 11:45:04 +0200677 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200678 http_msg_rpreason:
679 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200680 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200681 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200682 http_msg_rpline_eol:
683 /* We have seen the end of line. Note that we do not
684 * necessarily have the \n yet, but at least we know that we
685 * have EITHER \r OR \n, otherwise the response would not be
686 * complete. We can then record the response length and return
687 * to the caller which will be able to register it.
688 */
689
Willy Tarreau5384aac2018-09-11 16:04:48 +0200690 if (likely(!skip_update)) {
691 if (unlikely(hdr_count >= hdr_num)) {
692 state = H1_MSG_RPREASON;
693 goto http_output_full;
694 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200695 if (!(h1m->flags & H1_MF_NO_PHDR))
696 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200697 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200698
699 sol = ptr - start;
700 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200701 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200702 goto http_msg_rpline_end;
703
Willy Tarreau801250e2018-09-11 11:45:04 +0200704 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200705 http_msg_rpline_end:
706 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200707 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
708 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200709 /* stop here */
710
Willy Tarreau801250e2018-09-11 11:45:04 +0200711 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200712 http_msg_hdr_first:
713 sol = ptr - start;
714 if (likely(!HTTP_IS_CRLF(*ptr))) {
715 goto http_msg_hdr_name;
716 }
717
718 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200719 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200720 goto http_msg_last_lf;
721
Willy Tarreau801250e2018-09-11 11:45:04 +0200722 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200723 http_msg_hdr_name:
724 /* assumes sol points to the first char */
725 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200726 if (!skip_update) {
727 /* turn it to lower case if needed */
728 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200729 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200730 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200731 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200732 }
733
734 if (likely(*ptr == ':')) {
735 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200736 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200737 }
738
Willy Tarreau9aec3052018-09-12 09:20:40 +0200739 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200740 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200741 goto http_msg_invalid;
742 }
743
Willy Tarreau9aec3052018-09-12 09:20:40 +0200744 if (h1m->err_pos == -1) /* capture the error pointer */
745 h1m->err_pos = ptr - start + skip; /* >= 0 now */
746
747 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200748 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200749
Willy Tarreau801250e2018-09-11 11:45:04 +0200750 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200751 http_msg_hdr_l1_sp:
752 /* assumes sol points to the first char */
753 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200754 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200755
756 /* header value can be basically anything except CR/LF */
757 sov = ptr - start;
758
759 if (likely(!HTTP_IS_CRLF(*ptr))) {
760 goto http_msg_hdr_val;
761 }
762
763 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200764 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200765 goto http_msg_hdr_l1_lf;
766
Willy Tarreau801250e2018-09-11 11:45:04 +0200767 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200768 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200769 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
770 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200771
Willy Tarreau801250e2018-09-11 11:45:04 +0200772 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200773 http_msg_hdr_l1_lws:
774 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200775 if (!skip_update) {
776 /* replace HT,CR,LF with spaces */
777 for (; start + sov < ptr; sov++)
778 start[sov] = ' ';
779 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200780 goto http_msg_hdr_l1_sp;
781 }
782 /* we had a header consisting only in spaces ! */
783 eol = sov;
784 goto http_msg_complete_header;
785
Willy Tarreau801250e2018-09-11 11:45:04 +0200786 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200787 http_msg_hdr_val:
788 /* assumes sol points to the first char, and sov
789 * points to the first character of the value.
790 */
791
792 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
793 * and lower. In fact since most of the time is spent in the loop, we
794 * also remove the sign bit test so that bytes 0x8e..0x0d break the
795 * loop, but we don't care since they're very rare in header values.
796 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100797#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200798 while (ptr <= end - sizeof(long)) {
799 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
800 goto http_msg_hdr_val2;
801 ptr += sizeof(long);
802 }
803#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100804#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200805 while (ptr <= end - sizeof(int)) {
806 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
807 goto http_msg_hdr_val2;
808 ptr += sizeof(int);
809 }
810#endif
811 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200812 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200813 goto http_msg_ood;
814 }
815 http_msg_hdr_val2:
816 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200817 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200818
819 eol = ptr - start;
820 /* Note: we could also copy eol into ->eoh so that we have the
821 * real header end in case it ends with lots of LWS, but is this
822 * really needed ?
823 */
824 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200825 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200826 goto http_msg_hdr_l2_lf;
827
Willy Tarreau801250e2018-09-11 11:45:04 +0200828 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200829 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200830 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
831 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832
Willy Tarreau801250e2018-09-11 11:45:04 +0200833 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200834 http_msg_hdr_l2_lws:
835 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200836 if (!skip_update) {
837 /* LWS: replace HT,CR,LF with spaces */
838 for (; start + eol < ptr; eol++)
839 start[eol] = ' ';
840 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200841 goto http_msg_hdr_val;
842 }
843 http_msg_complete_header:
844 /*
845 * It was a new header, so the last one is finished. Assumes
846 * <sol> points to the first char of the name, <col> to the
847 * colon, <sov> points to the first character of the value and
848 * <eol> to the first CR or LF so we know how the line ends. We
849 * will trim spaces around the value. It's possible to do it by
850 * adjusting <eol> and <sov> which are no more used after this.
851 * We can add the header field to the list.
852 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200853 if (likely(!skip_update)) {
854 while (sov < eol && HTTP_IS_LWS(start[sov]))
855 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200856
Christopher Faulet2912f872018-09-19 14:01:04 +0200857 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
858 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200859
860
Christopher Faulet2912f872018-09-19 14:01:04 +0200861 n = ist2(start + sol, col - sol);
862 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200863
Christopher Faulet2912f872018-09-19 14:01:04 +0200864 do {
865 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200866
Christopher Faulet2912f872018-09-19 14:01:04 +0200867 if (unlikely(hdr_count >= hdr_num)) {
868 state = H1_MSG_HDR_L2_LWS;
869 goto http_output_full;
870 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200871
Christopher Faulet2912f872018-09-19 14:01:04 +0200872 if (isteqi(n, ist("transfer-encoding"))) {
Christopher Faulet545fbba2021-09-28 09:36:25 +0200873 ret = h1_parse_xfer_enc_header(h1m, v);
874 if (ret < 0) {
875 state = H1_MSG_HDR_L2_LWS;
876 ptr = v.ptr; /* Set ptr on the error */
877 goto http_msg_invalid;
878 }
879 else if (ret == 0) {
880 /* skip it */
881 break;
882 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200883 }
884 else if (isteqi(n, ist("content-length"))) {
885 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200886
Christopher Faulet2912f872018-09-19 14:01:04 +0200887 if (ret < 0) {
888 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100889 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200890 goto http_msg_invalid;
891 }
892 else if (ret == 0) {
893 /* skip it */
894 break;
895 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200896 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200897 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100898 h1_parse_connection_header(h1m, &v);
899 if (!v.len) {
900 /* skip it */
901 break;
902 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200903 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100904 else if (isteqi(n, ist("upgrade"))) {
905 h1_parse_upgrade_header(h1m, v);
906 }
Christopher Faulet7032a3f2020-05-04 09:01:45 +0200907 else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
Christopher Faulet531b83e2019-10-11 13:34:22 +0200908 if (host_idx == -1) {
909 struct ist authority;
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200910 struct http_uri_parser parser = http_uri_parser_init(sl.rq.u);
Christopher Faulet531b83e2019-10-11 13:34:22 +0200911
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200912 authority = http_parse_authority(&parser, 1);
Christopher Faulet531b83e2019-10-11 13:34:22 +0200913 if (authority.len && !isteqi(v, authority)) {
914 if (h1m->err_pos < -1) {
915 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100916 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200917 goto http_msg_invalid;
918 }
919 if (h1m->err_pos == -1) /* capture the error pointer */
Christopher Faulet17034782020-01-06 13:41:01 +0100920 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200921 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200922 host_idx = hdr_count;
Christopher Faulet531b83e2019-10-11 13:34:22 +0200923 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200924 else {
925 if (!isteqi(v, hdr[host_idx].v)) {
926 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100927 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200928 goto http_msg_invalid;
929 }
930 /* if the same host, skip it */
931 break;
932 }
933 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200934
Christopher Faulet2912f872018-09-19 14:01:04 +0200935 http_set_hdr(&hdr[hdr_count++], n, v);
936 } while (0);
937 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200938
939 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200940
Willy Tarreau794f9af2017-07-26 09:07:47 +0200941 if (likely(!HTTP_IS_CRLF(*ptr)))
942 goto http_msg_hdr_name;
943
944 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200945 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200946 goto http_msg_last_lf;
947
Willy Tarreau801250e2018-09-11 11:45:04 +0200948 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200949 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200950 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200951 ptr++;
952 /* <ptr> now points to the first byte of payload. If needed sol
953 * still points to the first of either CR or LF of the empty
954 * line ending the headers block.
955 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200956 if (likely(!skip_update)) {
957 if (unlikely(hdr_count >= hdr_num)) {
958 state = H1_MSG_LAST_LF;
959 goto http_output_full;
960 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200961 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200962 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200963
964 /* reaching here we've parsed the whole message. We may detect
965 * that we were already continuing an interrupted parsing pass
966 * so we were silently looking for the end of message not
967 * updating anything before deciding to parse it fully at once.
968 * It's guaranteed that we won't match this test twice in a row
969 * since restarting will turn zero.
970 */
971 if (restarting)
972 goto restart;
973
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200974 state = H1_MSG_DATA;
975 if (h1m->flags & H1_MF_XFER_ENC) {
976 if (h1m->flags & H1_MF_CLEN) {
Christopher Faulet631c7e82021-09-27 09:47:03 +0200977 /* T-E + C-L: force close and remove C-L */
978 h1m->flags |= H1_MF_CONN_CLO;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200979 h1m->flags &= ~H1_MF_CLEN;
980 hdr_count = http_del_hdr(hdr, ist("content-length"));
981 }
Christopher Faulet631c7e82021-09-27 09:47:03 +0200982 else if (!(h1m->flags & H1_MF_VER_11)) {
983 /* T-E + HTTP/1.0: force close */
984 h1m->flags |= H1_MF_CONN_CLO;
985 }
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200986
987 if (h1m->flags & H1_MF_CHNK)
988 state = H1_MSG_CHUNK_SIZE;
989 else if (!(h1m->flags & H1_MF_RESP)) {
990 /* cf RFC7230#3.3.3 : transfer-encoding in
991 * request without chunked encoding is invalid.
992 */
993 goto http_msg_invalid;
994 }
995 }
996
Willy Tarreau794f9af2017-07-26 09:07:47 +0200997 break;
998
999 default:
1000 /* impossible states */
1001 goto http_msg_invalid;
1002 }
1003
Willy Tarreau001823c2018-09-12 17:25:32 +02001004 /* Now we've left the headers state and are either in H1_MSG_DATA or
1005 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001006 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001007
Willy Tarreau5384aac2018-09-11 16:04:48 +02001008 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001009 *slp = sl;
1010
Willy Tarreau4433c082018-09-11 15:33:32 +02001011 h1m->state = state;
1012 h1m->next = ptr - start + skip;
1013 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001014
1015 http_msg_ood:
1016 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001017 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001018 *slp = sl;
1019
Willy Tarreau4433c082018-09-11 15:33:32 +02001020 h1m->state = state;
1021 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001022 return 0;
1023
1024 http_msg_invalid:
1025 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001026 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001027 *slp = sl;
1028
Willy Tarreau4433c082018-09-11 15:33:32 +02001029 h1m->err_state = h1m->state = state;
1030 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001031 return -1;
1032
1033 http_output_full:
1034 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001035 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001036 *slp = sl;
1037
Willy Tarreau4433c082018-09-11 15:33:32 +02001038 h1m->err_state = h1m->state = state;
1039 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001040 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001041
1042 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +02001043 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
1044 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001045 if (h1m->flags & H1_MF_RESP)
1046 h1m->state = H1_MSG_RPBEFORE;
1047 else
1048 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001049 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001050}
1051
Willy Tarreau2510f702017-10-31 17:14:16 +01001052/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001053 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001054 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1055 * input data, or < 0 in case of parse error (in which case the caller may have
1056 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001057 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001058int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001059{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001060 const char *stop = b_peek(buf, ofs + max);
1061 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001062
1063 while (1) {
1064 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001065 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001066 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001067
1068 /* scan current line and stop at LF or CRLF */
1069 while (1) {
1070 if (ptr == stop)
1071 return 0;
1072
1073 if (*ptr == '\n') {
1074 if (!p1)
1075 p1 = ptr;
1076 p2 = ptr;
1077 break;
1078 }
1079
1080 if (*ptr == '\r') {
1081 if (p1)
1082 return -1;
1083 p1 = ptr;
1084 }
1085
Willy Tarreau7314be82018-06-14 13:32:50 +02001086 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001087 }
1088
1089 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001090 p2 = b_next(buf, p2);
1091 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001092
1093 /* LF/CRLF at beginning of line => end of trailers at p2.
1094 * Everything was scheduled for forwarding, there's nothing left
1095 * from this message. */
1096 if (p1 == start)
1097 break;
1098 /* OK, next line then */
1099 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001100 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001101}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001102
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001103/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1104 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1105 * (25 bytes long).
1106 */
1107void h1_generate_random_ws_input_key(char key_out[25])
1108{
1109 /* generate a random websocket key */
1110 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1111 char key[16];
1112
1113 memcpy(key, &rand1, 8);
1114 memcpy(&key[8], &rand2, 8);
1115 a2base64(key, 16, key_out, 25);
1116}
1117
Amaury Denoyellec1938232020-12-11 17:53:03 +01001118#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1119
1120/*
1121 * Calculate the WebSocket handshake response key from <key_in>. Following the
1122 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1123 * as a 29 bytes long string.
1124 */
1125void h1_calculate_ws_output_key(const char *key, char *result)
1126{
1127 blk_SHA_CTX sha1_ctx;
1128 char hash_in[60], hash_out[20];
1129
1130 /* concatenate the key with a fixed suffix */
1131 memcpy(hash_in, key, 24);
1132 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1133
1134 /* sha1 the result */
1135 blk_SHA1_Init(&sha1_ctx);
1136 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1137 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1138
1139 /* encode in base64 the hash */
1140 a2base64(hash_out, 20, result, 29);
1141}