blob: bf546b9d7fee6fb412f8c7244b92e6039e2dbf2e [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
37 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
38 e = value->ptr + value->len;
39
40 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +050041 /* skip leading delimiter and blanks */
Willy Tarreau73373ab2018-09-14 17:11:33 +020042 if (unlikely(HTTP_IS_LWS(*word.ptr)))
43 continue;
44
45 /* digits only now */
46 for (cl = 0, n = word.ptr; n < e; n++) {
47 unsigned int c = *n - '0';
48 if (unlikely(c > 9)) {
49 /* non-digit */
50 if (unlikely(n == word.ptr)) // spaces only
51 goto fail;
52 break;
53 }
54 if (unlikely(cl > ULLONG_MAX / 10ULL))
55 goto fail; /* multiply overflow */
56 cl = cl * 10ULL;
57 if (unlikely(cl + c < cl))
58 goto fail; /* addition overflow */
59 cl = cl + c;
60 }
61
62 /* keep a copy of the exact cleaned value */
63 word.len = n - word.ptr;
64
65 /* skip trailing LWS till next comma or EOL */
66 for (; n < e; n++) {
67 if (!HTTP_IS_LWS(*n)) {
68 if (unlikely(*n != ','))
69 goto fail;
70 break;
71 }
72 }
73
74 /* if duplicate, must be equal */
75 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
76 goto fail;
77
78 /* OK, store this result as the one to be indexed */
79 h1m->flags |= H1_MF_CLEN;
80 h1m->curr_len = h1m->body_len = cl;
81 *value = word;
82 word.ptr = n;
83 }
84 /* here we've reached the end with a single value or a series of
85 * identical values, all matching previous series if any. The last
86 * parsed value was sent back into <value>. We just have to decide
87 * if this occurrence has to be indexed (it's the first one) or
88 * silently skipped (it's not the first one)
89 */
90 return !not_first;
91 fail:
92 return -1;
93}
94
Willy Tarreau2557f6a2018-09-14 16:34:47 +020095/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
Christopher Faulet545fbba2021-09-28 09:36:25 +020096 * "chunked" encoding to perform some checks (it must be the last encoding for
97 * the request and must not be performed twice for any message). The
98 * H1_MF_TE_CHUNKED is set if a valid "chunked" encoding is found. The
99 * H1_MF_TE_OTHER flag is set if any other encoding is found. The H1_MF_XFER_ENC
100 * flag is always set. The H1_MF_CHNK is set when "chunked" encoding is the last
101 * one. Note that transfer codings are case-insensitive (cf RFC7230#4). This
102 * function returns <0 if a error is found, 0 if the whole header can be dropped
103 * (not used yet), or >0 if the value can be indexed.
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200104 */
Christopher Faulet545fbba2021-09-28 09:36:25 +0200105int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200106{
107 char *e, *n;
108 struct ist word;
109
110 h1m->flags |= H1_MF_XFER_ENC;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200111
112 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
Tim Duesterhus4c8f75f2021-11-06 15:14:44 +0100113 e = istend(value);
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200114
115 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500116 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200117 if (HTTP_IS_LWS(*word.ptr))
118 continue;
119
120 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
121 word.len = n - word.ptr;
122
123 /* trim trailing blanks */
124 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
125 word.len--;
126
127 h1m->flags &= ~H1_MF_CHNK;
Christopher Faulet545fbba2021-09-28 09:36:25 +0200128 if (isteqi(word, ist("chunked"))) {
129 if (h1m->flags & H1_MF_TE_CHUNKED) {
130 /* cf RFC7230#3.3.1 : A sender MUST NOT apply
131 * chunked more than once to a message body
132 * (i.e., chunking an already chunked message is
133 * not allowed)
134 */
135 goto fail;
136 }
137 h1m->flags |= (H1_MF_TE_CHUNKED|H1_MF_CHNK);
138 }
139 else {
140 if ((h1m->flags & (H1_MF_RESP|H1_MF_TE_CHUNKED)) == H1_MF_TE_CHUNKED) {
141 /* cf RFC7230#3.3.1 : If any transfer coding
142 * other than chunked is applied to a request
143 * payload body, the sender MUST apply chunked
144 * as the final transfer coding to ensure that
145 * the message is properly framed.
146 */
147 goto fail;
148 }
149 h1m->flags |= H1_MF_TE_OTHER;
150 }
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200151
152 word.ptr = n;
153 }
Christopher Faulet545fbba2021-09-28 09:36:25 +0200154
155 return 1;
156 fail:
157 return -1;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200158}
159
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200160/* Parse the Connection: header of an HTTP/1 request, looking for "close",
161 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
162 * what was found there. Note that flags are only added, not removed, so the
163 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100164 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
165 * up from "keep-alive" and "close" values. To do so, the header value is
166 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200167 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100168void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200169{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100170 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200171 struct ist word;
172
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100173 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
174 p = value->ptr;
175 e = value->ptr + value->len;
176 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
177 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200178
179 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500180 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200181 if (HTTP_IS_LWS(*word.ptr))
182 continue;
183
184 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
185 word.len = n - word.ptr;
186
187 /* trim trailing blanks */
188 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
189 word.len--;
190
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100191 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200192 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100193 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
194 goto skip_val;
195 }
196 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200197 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100198 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
199 goto skip_val;
200 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200201 else if (isteqi(word, ist("upgrade")))
202 h1m->flags |= H1_MF_CONN_UPG;
203
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100204 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
205 if (value->ptr + value->len == p) {
206 /* no rewrite done till now */
207 value->len = n - value->ptr;
208 }
209 else {
210 if (value->len)
211 value->ptr[value->len++] = ',';
212 istcat(value, word, e - value->ptr);
213 }
214 }
215
216 skip_val:
217 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200218 }
219}
220
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100221/* Parse the Upgrade: header of an HTTP/1 request.
222 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
223 */
224void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
225{
226 char *e, *n;
227 struct ist word;
228
229 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
230
231 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
Tim Duesterhus4c8f75f2021-11-06 15:14:44 +0100232 e = istend(value);
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100233
234 while (++word.ptr < e) {
235 /* skip leading delimiter and blanks */
236 if (HTTP_IS_LWS(*word.ptr))
237 continue;
238
239 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
240 word.len = n - word.ptr;
241
242 /* trim trailing blanks */
243 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
244 word.len--;
245
246 if (isteqi(word, ist("websocket")))
247 h1m->flags |= H1_MF_UPG_WEBSOCKET;
248
249 word.ptr = n;
250 }
251}
252
Willy Tarreau538746a2018-12-11 10:59:20 +0100253/* Macros used in the HTTP/1 parser, to check for the expected presence of
254 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
255 */
256
257/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
258 * <bad>.
259 */
260#define EXPECT_LF_HERE(ptr, bad, state, where) \
261 do { \
262 if (unlikely(*(ptr) != '\n')) { \
263 state = (where); \
264 goto bad; \
265 } \
266 } while (0)
267
268/* Increments pointer <ptr>, continues to label <more> if it's still below
269 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
270 * of buffer was reached.
271 */
272#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
273 do { \
274 if (likely(++(ptr) < (end))) \
275 goto more; \
276 else { \
277 state = (where); \
278 goto stop; \
279 } \
280 } while (0)
281
Willy Tarreau794f9af2017-07-26 09:07:47 +0200282/* This function parses a contiguous HTTP/1 headers block starting at <start>
283 * and ending before <stop>, at once, and converts it a list of (name,value)
284 * pairs representing header fields into the array <hdr> of size <hdr_num>,
285 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200286 * too small to represent the whole message, an error is returned. Some
287 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200288 * parsed and stored into h1m as well. <hdr> may be null, in which case only
289 * the parsing state will be updated. This may be used to restart the parsing
290 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200291 *
292 * For now it's limited to the response. If the header block is incomplete,
293 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200294 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200295 * and h1m->next to zero on the first call, the parser will do the rest. If
296 * an incomplete message is seen, the caller only needs to present h1m->state
297 * and h1m->next again, with an empty header list so that the parser can start
298 * again. In this case, it will detect that it interrupted a previous session
299 * and will first look for the end of the message before reparsing it again and
300 * indexing it at the same time. This ensures that incomplete messages fed 1
301 * character at a time are never processed entirely more than exactly twice,
302 * and that there is no need to store all the internal state and pre-parsed
303 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200304 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200305 * A pointer to a start line descriptor may be passed in <slp>, in which case
306 * the parser will fill it with whatever it found.
307 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200308 * The code derived from the main HTTP/1 parser above but was simplified and
309 * optimized to process responses produced or forwarded by haproxy. The caller
310 * is responsible for ensuring that the message doesn't wrap, and should ensure
311 * it is complete to avoid having to retry the operation after a failed
312 * attempt. The message is not supposed to be invalid, which is why a few
313 * properties such as the character set used in the header field names are not
314 * checked. In case of an unparsable response message, a negative value will be
315 * returned with h1m->err_pos and h1m->err_state matching the location and
316 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100317 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
318 * parsed and the start line is skipped. It is not required to set h1m->state
319 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200320 *
321 * This function returns :
322 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200323 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200324 * the position relative to <start>
325 * -2 if the output is full (hdr_num reached). err_state and err_pos also
326 * indicate where it failed.
327 * 0 in case of missing data.
328 * > 0 on success, it then corresponds to the number of bytes read since
329 * <start> so that the caller can go on with the payload.
330 */
331int h1_headers_to_hdr_list(char *start, const char *stop,
332 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200333 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200334{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200335 enum h1m_state state;
336 register char *ptr;
337 register const char *end;
338 unsigned int hdr_count;
339 unsigned int skip; /* number of bytes skipped at the beginning */
340 unsigned int sol; /* start of line */
341 unsigned int col; /* position of the colon */
342 unsigned int eol; /* end of line */
343 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200344 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200345 int skip_update;
346 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200347 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200348 struct ist n, v; /* header name and value during parsing */
349
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200350 skip = 0; // do it only once to keep track of the leading CRLF.
351
352 try_again:
353 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200354 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200355 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200356 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200357
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100358 if (h1m->flags & H1_MF_HDRS_ONLY) {
359 state = H1_MSG_HDR_FIRST;
360 h1m->next = 0;
361 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100362 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100363 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100364 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
365 restarting = 1;
366 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100367
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200368 ptr = start + h1m->next;
369 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200370
Willy Tarreau794f9af2017-07-26 09:07:47 +0200371 if (unlikely(ptr >= end))
372 goto http_msg_ood;
373
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200374 /* don't update output if hdr is NULL or if we're restarting */
375 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200376 skip_update = 1;
377
Willy Tarreau794f9af2017-07-26 09:07:47 +0200378 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200379 case H1_MSG_RQBEFORE:
380 http_msg_rqbefore:
381 if (likely(HTTP_IS_TOKEN(*ptr))) {
382 /* we have a start of message, we may have skipped some
383 * heading CRLF. Skip them now.
384 */
385 skip += ptr - start;
386 start = ptr;
387
388 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200389 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200390 hdr_count = 0;
391 state = H1_MSG_RQMETH;
392 goto http_msg_rqmeth;
393 }
394
395 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
396 state = H1_MSG_RQBEFORE;
397 goto http_msg_invalid;
398 }
399
400 if (unlikely(*ptr == '\n'))
401 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
402 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
403 /* stop here */
404
405 case H1_MSG_RQBEFORE_CR:
406 http_msg_rqbefore_cr:
407 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
408 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
409 /* stop here */
410
411 case H1_MSG_RQMETH:
412 http_msg_rqmeth:
413 if (likely(HTTP_IS_TOKEN(*ptr)))
414 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
415
416 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200417 sl.rq.m.len = ptr - sl.rq.m.ptr;
418 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200419 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
420 }
421
422 if (likely(HTTP_IS_CRLF(*ptr))) {
423 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200424 sl.rq.m.len = ptr - sl.rq.m.ptr;
425 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200426 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200427 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200428 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200429 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200430 http_msg_req09_ver:
Tim Duesterhus77508502022-03-15 13:11:06 +0100431 sl.rq.v = ist2(ptr, 0);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200432 goto http_msg_rqline_eol;
433 }
434 state = H1_MSG_RQMETH;
435 goto http_msg_invalid;
436
437 case H1_MSG_RQMETH_SP:
438 http_msg_rqmeth_sp:
439 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200440 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200441 goto http_msg_rquri;
442 }
443 if (likely(HTTP_IS_SPHT(*ptr)))
444 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
445 /* so it's a CR/LF, meaning an HTTP 0.9 request */
446 goto http_msg_req09_uri;
447
448 case H1_MSG_RQURI:
449 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100450#ifdef HA_UNALIGNED_LE
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200451 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
452 while (ptr <= end - sizeof(int)) {
453 int x = *(int *)ptr - 0x21212121;
454 if (x & 0x80808080)
455 break;
456
457 x -= 0x5e5e5e5e;
458 if (!(x & 0x80808080))
459 break;
460
461 ptr += sizeof(int);
462 }
463#endif
464 if (ptr >= end) {
465 state = H1_MSG_RQURI;
466 goto http_msg_ood;
467 }
468 http_msg_rquri2:
469 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
470 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
471
472 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200473 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200474 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
475 }
476 if (likely((unsigned char)*ptr >= 128)) {
477 /* non-ASCII chars are forbidden unless option
478 * accept-invalid-http-request is enabled in the frontend.
479 * In any case, we capture the faulty char.
480 */
481 if (h1m->err_pos < -1)
482 goto invalid_char;
483 if (h1m->err_pos == -1)
484 h1m->err_pos = ptr - start + skip;
485 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
486 }
487
488 if (likely(HTTP_IS_CRLF(*ptr))) {
489 /* so it's a CR/LF, meaning an HTTP 0.9 request */
490 goto http_msg_req09_uri_e;
491 }
492
493 /* OK forbidden chars, 0..31 or 127 */
494 invalid_char:
495 state = H1_MSG_RQURI;
496 goto http_msg_invalid;
497
498 case H1_MSG_RQURI_SP:
499 http_msg_rquri_sp:
500 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200501 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200502 goto http_msg_rqver;
503 }
504 if (likely(HTTP_IS_SPHT(*ptr)))
505 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
506 /* so it's a CR/LF, meaning an HTTP 0.9 request */
507 goto http_msg_req09_ver;
508
509
510 case H1_MSG_RQVER:
511 http_msg_rqver:
512 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
513 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
514
515 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200516 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200517 http_msg_rqline_eol:
518 /* We have seen the end of line. Note that we do not
519 * necessarily have the \n yet, but at least we know that we
520 * have EITHER \r OR \n, otherwise the request would not be
521 * complete. We can then record the request length and return
522 * to the caller which will be able to register it.
523 */
524
525 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200526 if ((sl.rq.v.len == 8) &&
527 (*(sl.rq.v.ptr + 5) > '1' ||
528 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200529 h1m->flags |= H1_MF_VER_11;
530
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200531 if (unlikely(hdr_count >= hdr_num)) {
532 state = H1_MSG_RQVER;
533 goto http_output_full;
534 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200535 if (!(h1m->flags & H1_MF_NO_PHDR))
536 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200537
538 if (unlikely(hdr_count >= hdr_num)) {
539 state = H1_MSG_RQVER;
540 goto http_output_full;
541 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200542 if (!(h1m->flags & H1_MF_NO_PHDR))
543 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200544 }
545
546 sol = ptr - start;
547 if (likely(*ptr == '\r'))
548 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
549 goto http_msg_rqline_end;
550 }
551
552 /* neither an HTTP_VER token nor a CRLF */
553 state = H1_MSG_RQVER;
554 goto http_msg_invalid;
555
556 case H1_MSG_RQLINE_END:
557 http_msg_rqline_end:
558 /* check for HTTP/0.9 request : no version information
559 * available. sol must point to the first of CR or LF. However
560 * since we don't save these elements between calls, if we come
561 * here from a restart, we don't necessarily know. Thus in this
562 * case we simply start over.
563 */
564 if (restarting)
565 goto restart;
566
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200567 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200568 goto http_msg_last_lf;
569
570 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
571 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
572 /* stop here */
573
574 /*
575 * Common states below
576 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200577 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200578 http_msg_rpbefore:
579 if (likely(HTTP_IS_TOKEN(*ptr))) {
580 /* we have a start of message, we may have skipped some
581 * heading CRLF. Skip them now.
582 */
583 skip += ptr - start;
584 start = ptr;
585
586 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200587 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200588 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200589 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200590 goto http_msg_rpver;
591 }
592
593 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200594 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200595 goto http_msg_invalid;
596 }
597
598 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200599 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
600 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200601 /* stop here */
602
Willy Tarreau801250e2018-09-11 11:45:04 +0200603 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200604 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200605 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
606 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200607 /* stop here */
608
Willy Tarreau801250e2018-09-11 11:45:04 +0200609 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200610 http_msg_rpver:
611 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200612 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200613
614 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200615 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200616
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200617 if ((sl.st.v.len == 8) &&
618 (*(sl.st.v.ptr + 5) > '1' ||
619 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200620 h1m->flags |= H1_MF_VER_11;
621
Willy Tarreau801250e2018-09-11 11:45:04 +0200622 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200623 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200624 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200625 goto http_msg_invalid;
626
Willy Tarreau801250e2018-09-11 11:45:04 +0200627 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200628 http_msg_rpver_sp:
629 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200630 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200631 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200632 goto http_msg_rpcode;
633 }
634 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200635 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200636 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200637 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200638 goto http_msg_invalid;
639
Willy Tarreau801250e2018-09-11 11:45:04 +0200640 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200641 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100642 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200643 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200644 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200645 }
646
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100647 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200648 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100649 goto http_msg_invalid;
650 }
651
Willy Tarreau794f9af2017-07-26 09:07:47 +0200652 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200653 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200654 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200655 }
656
657 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200658 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200659
660 http_msg_rsp_reason:
Tim Duesterhus77508502022-03-15 13:11:06 +0100661 sl.st.r = ist2(ptr, 0);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200662 goto http_msg_rpline_eol;
663
Willy Tarreau801250e2018-09-11 11:45:04 +0200664 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200665 http_msg_rpcode_sp:
666 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200667 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200668 goto http_msg_rpreason;
669 }
670 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200671 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200672 /* so it's a CR/LF, so there is no reason phrase */
673 goto http_msg_rsp_reason;
674
Willy Tarreau801250e2018-09-11 11:45:04 +0200675 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200676 http_msg_rpreason:
677 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200678 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200679 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200680 http_msg_rpline_eol:
681 /* We have seen the end of line. Note that we do not
682 * necessarily have the \n yet, but at least we know that we
683 * have EITHER \r OR \n, otherwise the response would not be
684 * complete. We can then record the response length and return
685 * to the caller which will be able to register it.
686 */
687
Willy Tarreau5384aac2018-09-11 16:04:48 +0200688 if (likely(!skip_update)) {
689 if (unlikely(hdr_count >= hdr_num)) {
690 state = H1_MSG_RPREASON;
691 goto http_output_full;
692 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200693 if (!(h1m->flags & H1_MF_NO_PHDR))
694 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200695 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200696
697 sol = ptr - start;
698 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200699 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200700 goto http_msg_rpline_end;
701
Willy Tarreau801250e2018-09-11 11:45:04 +0200702 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200703 http_msg_rpline_end:
704 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200705 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
706 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200707 /* stop here */
708
Willy Tarreau801250e2018-09-11 11:45:04 +0200709 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200710 http_msg_hdr_first:
711 sol = ptr - start;
712 if (likely(!HTTP_IS_CRLF(*ptr))) {
713 goto http_msg_hdr_name;
714 }
715
716 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200717 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200718 goto http_msg_last_lf;
719
Willy Tarreau801250e2018-09-11 11:45:04 +0200720 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200721 http_msg_hdr_name:
722 /* assumes sol points to the first char */
723 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200724 if (!skip_update) {
725 /* turn it to lower case if needed */
726 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200727 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200728 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200729 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200730 }
731
732 if (likely(*ptr == ':')) {
733 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200734 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200735 }
736
Willy Tarreau9aec3052018-09-12 09:20:40 +0200737 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200738 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200739 goto http_msg_invalid;
740 }
741
Willy Tarreau9aec3052018-09-12 09:20:40 +0200742 if (h1m->err_pos == -1) /* capture the error pointer */
743 h1m->err_pos = ptr - start + skip; /* >= 0 now */
744
745 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200746 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200747
Willy Tarreau801250e2018-09-11 11:45:04 +0200748 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200749 http_msg_hdr_l1_sp:
750 /* assumes sol points to the first char */
751 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200752 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200753
754 /* header value can be basically anything except CR/LF */
755 sov = ptr - start;
756
757 if (likely(!HTTP_IS_CRLF(*ptr))) {
758 goto http_msg_hdr_val;
759 }
760
761 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200762 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200763 goto http_msg_hdr_l1_lf;
764
Willy Tarreau801250e2018-09-11 11:45:04 +0200765 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200766 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200767 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
768 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200769
Willy Tarreau801250e2018-09-11 11:45:04 +0200770 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200771 http_msg_hdr_l1_lws:
772 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200773 if (!skip_update) {
774 /* replace HT,CR,LF with spaces */
775 for (; start + sov < ptr; sov++)
776 start[sov] = ' ';
777 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200778 goto http_msg_hdr_l1_sp;
779 }
780 /* we had a header consisting only in spaces ! */
781 eol = sov;
782 goto http_msg_complete_header;
783
Willy Tarreau801250e2018-09-11 11:45:04 +0200784 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200785 http_msg_hdr_val:
786 /* assumes sol points to the first char, and sov
787 * points to the first character of the value.
788 */
789
790 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
791 * and lower. In fact since most of the time is spent in the loop, we
792 * also remove the sign bit test so that bytes 0x8e..0x0d break the
793 * loop, but we don't care since they're very rare in header values.
794 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100795#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200796 while (ptr <= end - sizeof(long)) {
797 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
798 goto http_msg_hdr_val2;
799 ptr += sizeof(long);
800 }
801#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100802#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200803 while (ptr <= end - sizeof(int)) {
804 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
805 goto http_msg_hdr_val2;
806 ptr += sizeof(int);
807 }
808#endif
809 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200810 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200811 goto http_msg_ood;
812 }
813 http_msg_hdr_val2:
814 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200815 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200816
817 eol = ptr - start;
818 /* Note: we could also copy eol into ->eoh so that we have the
819 * real header end in case it ends with lots of LWS, but is this
820 * really needed ?
821 */
822 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200823 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200824 goto http_msg_hdr_l2_lf;
825
Willy Tarreau801250e2018-09-11 11:45:04 +0200826 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200827 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200828 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
829 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200830
Willy Tarreau801250e2018-09-11 11:45:04 +0200831 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832 http_msg_hdr_l2_lws:
833 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200834 if (!skip_update) {
835 /* LWS: replace HT,CR,LF with spaces */
836 for (; start + eol < ptr; eol++)
837 start[eol] = ' ';
838 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200839 goto http_msg_hdr_val;
840 }
841 http_msg_complete_header:
842 /*
843 * It was a new header, so the last one is finished. Assumes
844 * <sol> points to the first char of the name, <col> to the
845 * colon, <sov> points to the first character of the value and
846 * <eol> to the first CR or LF so we know how the line ends. We
847 * will trim spaces around the value. It's possible to do it by
848 * adjusting <eol> and <sov> which are no more used after this.
849 * We can add the header field to the list.
850 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200851 if (likely(!skip_update)) {
852 while (sov < eol && HTTP_IS_LWS(start[sov]))
853 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200854
Christopher Faulet2912f872018-09-19 14:01:04 +0200855 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
856 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200857
858
Christopher Faulet2912f872018-09-19 14:01:04 +0200859 n = ist2(start + sol, col - sol);
860 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200861
Christopher Faulet2912f872018-09-19 14:01:04 +0200862 do {
863 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200864
Christopher Faulet2912f872018-09-19 14:01:04 +0200865 if (unlikely(hdr_count >= hdr_num)) {
866 state = H1_MSG_HDR_L2_LWS;
867 goto http_output_full;
868 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200869
Christopher Faulet2912f872018-09-19 14:01:04 +0200870 if (isteqi(n, ist("transfer-encoding"))) {
Christopher Faulet545fbba2021-09-28 09:36:25 +0200871 ret = h1_parse_xfer_enc_header(h1m, v);
872 if (ret < 0) {
873 state = H1_MSG_HDR_L2_LWS;
874 ptr = v.ptr; /* Set ptr on the error */
875 goto http_msg_invalid;
876 }
877 else if (ret == 0) {
878 /* skip it */
879 break;
880 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200881 }
882 else if (isteqi(n, ist("content-length"))) {
883 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200884
Christopher Faulet2912f872018-09-19 14:01:04 +0200885 if (ret < 0) {
886 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100887 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200888 goto http_msg_invalid;
889 }
890 else if (ret == 0) {
891 /* skip it */
892 break;
893 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200894 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200895 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100896 h1_parse_connection_header(h1m, &v);
897 if (!v.len) {
898 /* skip it */
899 break;
900 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200901 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100902 else if (isteqi(n, ist("upgrade"))) {
903 h1_parse_upgrade_header(h1m, v);
904 }
Christopher Faulet7032a3f2020-05-04 09:01:45 +0200905 else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
Christopher Faulet531b83e2019-10-11 13:34:22 +0200906 if (host_idx == -1) {
907 struct ist authority;
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200908 struct http_uri_parser parser = http_uri_parser_init(sl.rq.u);
Christopher Faulet531b83e2019-10-11 13:34:22 +0200909
Amaury Denoyelle69294b22021-07-06 11:02:22 +0200910 authority = http_parse_authority(&parser, 1);
Christopher Faulet531b83e2019-10-11 13:34:22 +0200911 if (authority.len && !isteqi(v, authority)) {
912 if (h1m->err_pos < -1) {
913 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100914 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200915 goto http_msg_invalid;
916 }
917 if (h1m->err_pos == -1) /* capture the error pointer */
Christopher Faulet17034782020-01-06 13:41:01 +0100918 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
Christopher Faulet531b83e2019-10-11 13:34:22 +0200919 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200920 host_idx = hdr_count;
Christopher Faulet531b83e2019-10-11 13:34:22 +0200921 }
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200922 else {
923 if (!isteqi(v, hdr[host_idx].v)) {
924 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100925 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200926 goto http_msg_invalid;
927 }
928 /* if the same host, skip it */
929 break;
930 }
931 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200932
Christopher Faulet2912f872018-09-19 14:01:04 +0200933 http_set_hdr(&hdr[hdr_count++], n, v);
934 } while (0);
935 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200936
937 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200938
Willy Tarreau794f9af2017-07-26 09:07:47 +0200939 if (likely(!HTTP_IS_CRLF(*ptr)))
940 goto http_msg_hdr_name;
941
942 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200943 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200944 goto http_msg_last_lf;
945
Willy Tarreau801250e2018-09-11 11:45:04 +0200946 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200947 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200948 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200949 ptr++;
950 /* <ptr> now points to the first byte of payload. If needed sol
951 * still points to the first of either CR or LF of the empty
952 * line ending the headers block.
953 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200954 if (likely(!skip_update)) {
955 if (unlikely(hdr_count >= hdr_num)) {
956 state = H1_MSG_LAST_LF;
957 goto http_output_full;
958 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200959 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200960 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200961
962 /* reaching here we've parsed the whole message. We may detect
963 * that we were already continuing an interrupted parsing pass
964 * so we were silently looking for the end of message not
965 * updating anything before deciding to parse it fully at once.
966 * It's guaranteed that we won't match this test twice in a row
967 * since restarting will turn zero.
968 */
969 if (restarting)
970 goto restart;
971
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200972 state = H1_MSG_DATA;
973 if (h1m->flags & H1_MF_XFER_ENC) {
974 if (h1m->flags & H1_MF_CLEN) {
Christopher Faulet631c7e82021-09-27 09:47:03 +0200975 /* T-E + C-L: force close and remove C-L */
976 h1m->flags |= H1_MF_CONN_CLO;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200977 h1m->flags &= ~H1_MF_CLEN;
978 hdr_count = http_del_hdr(hdr, ist("content-length"));
979 }
Christopher Faulet631c7e82021-09-27 09:47:03 +0200980 else if (!(h1m->flags & H1_MF_VER_11)) {
981 /* T-E + HTTP/1.0: force close */
982 h1m->flags |= H1_MF_CONN_CLO;
983 }
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200984
985 if (h1m->flags & H1_MF_CHNK)
986 state = H1_MSG_CHUNK_SIZE;
987 else if (!(h1m->flags & H1_MF_RESP)) {
988 /* cf RFC7230#3.3.3 : transfer-encoding in
989 * request without chunked encoding is invalid.
990 */
991 goto http_msg_invalid;
992 }
993 }
994
Willy Tarreau794f9af2017-07-26 09:07:47 +0200995 break;
996
997 default:
998 /* impossible states */
999 goto http_msg_invalid;
1000 }
1001
Willy Tarreau001823c2018-09-12 17:25:32 +02001002 /* Now we've left the headers state and are either in H1_MSG_DATA or
1003 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001004 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001005
Willy Tarreau5384aac2018-09-11 16:04:48 +02001006 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001007 *slp = sl;
1008
Willy Tarreau4433c082018-09-11 15:33:32 +02001009 h1m->state = state;
1010 h1m->next = ptr - start + skip;
1011 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001012
1013 http_msg_ood:
1014 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001015 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001016 *slp = sl;
1017
Willy Tarreau4433c082018-09-11 15:33:32 +02001018 h1m->state = state;
1019 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001020 return 0;
1021
1022 http_msg_invalid:
1023 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001024 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001025 *slp = sl;
1026
Willy Tarreau4433c082018-09-11 15:33:32 +02001027 h1m->err_state = h1m->state = state;
1028 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001029 return -1;
1030
1031 http_output_full:
1032 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001033 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001034 *slp = sl;
1035
Willy Tarreau4433c082018-09-11 15:33:32 +02001036 h1m->err_state = h1m->state = state;
1037 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001038 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001039
1040 restart:
Christopher Faulet02c89332021-12-01 18:01:48 +01001041 h1m->flags &= H1_MF_RESTART_MASK;
Christopher Faulet84f06532019-09-03 16:05:31 +02001042 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001043 if (h1m->flags & H1_MF_RESP)
1044 h1m->state = H1_MSG_RPBEFORE;
1045 else
1046 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001047 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001048}
1049
Willy Tarreau2510f702017-10-31 17:14:16 +01001050/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001051 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001052 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1053 * input data, or < 0 in case of parse error (in which case the caller may have
1054 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001055 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001056int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001057{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001058 const char *stop = b_peek(buf, ofs + max);
1059 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001060
1061 while (1) {
1062 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001063 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001064 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001065
1066 /* scan current line and stop at LF or CRLF */
1067 while (1) {
1068 if (ptr == stop)
1069 return 0;
1070
1071 if (*ptr == '\n') {
1072 if (!p1)
1073 p1 = ptr;
1074 p2 = ptr;
1075 break;
1076 }
1077
1078 if (*ptr == '\r') {
1079 if (p1)
1080 return -1;
1081 p1 = ptr;
1082 }
1083
Willy Tarreau7314be82018-06-14 13:32:50 +02001084 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001085 }
1086
1087 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001088 p2 = b_next(buf, p2);
1089 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001090
1091 /* LF/CRLF at beginning of line => end of trailers at p2.
1092 * Everything was scheduled for forwarding, there's nothing left
1093 * from this message. */
1094 if (p1 == start)
1095 break;
1096 /* OK, next line then */
1097 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001098 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001099}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001100
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001101/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1102 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1103 * (25 bytes long).
1104 */
1105void h1_generate_random_ws_input_key(char key_out[25])
1106{
1107 /* generate a random websocket key */
1108 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1109 char key[16];
1110
1111 memcpy(key, &rand1, 8);
1112 memcpy(&key[8], &rand2, 8);
1113 a2base64(key, 16, key_out, 25);
1114}
1115
Amaury Denoyellec1938232020-12-11 17:53:03 +01001116#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1117
1118/*
1119 * Calculate the WebSocket handshake response key from <key_in>. Following the
1120 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1121 * as a 29 bytes long string.
1122 */
1123void h1_calculate_ws_output_key(const char *key, char *result)
1124{
1125 blk_SHA_CTX sha1_ctx;
1126 char hash_in[60], hash_out[20];
1127
1128 /* concatenate the key with a fixed suffix */
1129 memcpy(hash_in, key, 24);
1130 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1131
1132 /* sha1 the result */
1133 blk_SHA1_Init(&sha1_ctx);
1134 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1135 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1136
1137 /* encode in base64 the hash */
1138 a2base64(hash_out, 20, result, 29);
1139}