blob: 91d3dc47ad48b63bef99dbdc9d601704093bb3e6 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
Willy Tarreauba9afd22023-08-09 08:32:48 +020037 word.ptr = value->ptr;
Willy Tarreau73373ab2018-09-14 17:11:33 +020038 e = value->ptr + value->len;
39
Willy Tarreauba9afd22023-08-09 08:32:48 +020040 while (1) {
41 if (word.ptr >= e) {
42 /* empty header or empty value */
43 goto fail;
44 }
45
Ilya Shipitsin47d17182020-06-21 21:42:57 +050046 /* skip leading delimiter and blanks */
Willy Tarreauba9afd22023-08-09 08:32:48 +020047 if (unlikely(HTTP_IS_LWS(*word.ptr))) {
48 word.ptr++;
Willy Tarreau73373ab2018-09-14 17:11:33 +020049 continue;
Willy Tarreauba9afd22023-08-09 08:32:48 +020050 }
Willy Tarreau73373ab2018-09-14 17:11:33 +020051
52 /* digits only now */
53 for (cl = 0, n = word.ptr; n < e; n++) {
54 unsigned int c = *n - '0';
55 if (unlikely(c > 9)) {
56 /* non-digit */
57 if (unlikely(n == word.ptr)) // spaces only
58 goto fail;
59 break;
60 }
61 if (unlikely(cl > ULLONG_MAX / 10ULL))
62 goto fail; /* multiply overflow */
63 cl = cl * 10ULL;
64 if (unlikely(cl + c < cl))
65 goto fail; /* addition overflow */
66 cl = cl + c;
67 }
68
69 /* keep a copy of the exact cleaned value */
70 word.len = n - word.ptr;
71
72 /* skip trailing LWS till next comma or EOL */
73 for (; n < e; n++) {
74 if (!HTTP_IS_LWS(*n)) {
75 if (unlikely(*n != ','))
76 goto fail;
77 break;
78 }
79 }
80
81 /* if duplicate, must be equal */
82 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
83 goto fail;
84
85 /* OK, store this result as the one to be indexed */
86 h1m->flags |= H1_MF_CLEN;
87 h1m->curr_len = h1m->body_len = cl;
88 *value = word;
Willy Tarreauba9afd22023-08-09 08:32:48 +020089
90 /* Now either n==e and we're done, or n points to the comma,
91 * and we skip it and continue.
92 */
93 if (n++ == e)
94 break;
95
Willy Tarreau73373ab2018-09-14 17:11:33 +020096 word.ptr = n;
97 }
98 /* here we've reached the end with a single value or a series of
99 * identical values, all matching previous series if any. The last
100 * parsed value was sent back into <value>. We just have to decide
101 * if this occurrence has to be indexed (it's the first one) or
102 * silently skipped (it's not the first one)
103 */
104 return !not_first;
105 fail:
106 return -1;
107}
108
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200109/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
110 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
111 * this case. Any other token found or any empty header field found will reset
112 * this flag, so that it accurately represents the token's presence at the last
113 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
114 * are case-insensitive (cf RFC7230#4).
115 */
116void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
117{
118 char *e, *n;
119 struct ist word;
120
121 h1m->flags |= H1_MF_XFER_ENC;
122 h1m->flags &= ~H1_MF_CHNK;
123
124 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
125 e = value.ptr + value.len;
126
127 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500128 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200129 if (HTTP_IS_LWS(*word.ptr))
130 continue;
131
132 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
133 word.len = n - word.ptr;
134
135 /* trim trailing blanks */
136 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
137 word.len--;
138
139 h1m->flags &= ~H1_MF_CHNK;
140 if (isteqi(word, ist("chunked")))
141 h1m->flags |= H1_MF_CHNK;
142
143 word.ptr = n;
144 }
145}
146
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200147/* Validate the authority and the host header value for CONNECT method. If there
148 * is hast header, its value is normalized. 0 is returned on success, -1 if the
149 * authority is invalid and -2 if the host is invalid.
150 */
151static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
152{
153 struct ist uri_host, uri_port, host, host_port;
154
155 if (!isttest(authority))
156 goto invalid_authority;
157 uri_host = authority;
158 uri_port = http_get_host_port(authority);
159 if (!isttest(uri_port))
160 goto invalid_authority;
161 uri_host.len -= (istlen(uri_port) + 1);
162
163 if (!host_hdr || !isttest(*host_hdr))
164 goto end;
165
166 /* Get the port of the host header value, if any */
167 host = *host_hdr;
168 host_port = http_get_host_port(*host_hdr);
169 if (isttest(host_port)) {
170 host.len -= (istlen(host_port) + 1);
171 if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
172 goto invalid_host;
173 if (http_is_default_port(IST_NULL, uri_port))
174 *host_hdr = host; /* normalize */
175 }
176 else {
177 if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
178 goto invalid_host;
179 }
180
181 end:
182 return 0;
183
184 invalid_authority:
185 return -1;
186
187 invalid_host:
188 return -2;
189}
190
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200191/* Parse the Connection: header of an HTTP/1 request, looking for "close",
192 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
193 * what was found there. Note that flags are only added, not removed, so the
194 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100195 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
196 * up from "keep-alive" and "close" values. To do so, the header value is
197 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200198 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100199void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200200{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100201 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200202 struct ist word;
203
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100204 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
205 p = value->ptr;
206 e = value->ptr + value->len;
207 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
208 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200209
210 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500211 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200212 if (HTTP_IS_LWS(*word.ptr))
213 continue;
214
215 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
216 word.len = n - word.ptr;
217
218 /* trim trailing blanks */
219 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
220 word.len--;
221
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100222 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200223 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100224 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
225 goto skip_val;
226 }
227 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200228 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100229 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
230 goto skip_val;
231 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200232 else if (isteqi(word, ist("upgrade")))
233 h1m->flags |= H1_MF_CONN_UPG;
234
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100235 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
236 if (value->ptr + value->len == p) {
237 /* no rewrite done till now */
238 value->len = n - value->ptr;
239 }
240 else {
241 if (value->len)
242 value->ptr[value->len++] = ',';
243 istcat(value, word, e - value->ptr);
244 }
245 }
246
247 skip_val:
248 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200249 }
250}
251
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100252/* Parse the Upgrade: header of an HTTP/1 request.
253 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
254 */
255void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
256{
257 char *e, *n;
258 struct ist word;
259
260 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
261
262 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
263 e = value.ptr + value.len;
264
265 while (++word.ptr < e) {
266 /* skip leading delimiter and blanks */
267 if (HTTP_IS_LWS(*word.ptr))
268 continue;
269
270 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
271 word.len = n - word.ptr;
272
273 /* trim trailing blanks */
274 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
275 word.len--;
276
277 if (isteqi(word, ist("websocket")))
278 h1m->flags |= H1_MF_UPG_WEBSOCKET;
279
280 word.ptr = n;
281 }
282}
283
Willy Tarreau538746a2018-12-11 10:59:20 +0100284/* Macros used in the HTTP/1 parser, to check for the expected presence of
285 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
286 */
287
288/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
289 * <bad>.
290 */
291#define EXPECT_LF_HERE(ptr, bad, state, where) \
292 do { \
293 if (unlikely(*(ptr) != '\n')) { \
294 state = (where); \
295 goto bad; \
296 } \
297 } while (0)
298
299/* Increments pointer <ptr>, continues to label <more> if it's still below
300 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
301 * of buffer was reached.
302 */
303#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
304 do { \
305 if (likely(++(ptr) < (end))) \
306 goto more; \
307 else { \
308 state = (where); \
309 goto stop; \
310 } \
311 } while (0)
312
Willy Tarreau794f9af2017-07-26 09:07:47 +0200313/* This function parses a contiguous HTTP/1 headers block starting at <start>
314 * and ending before <stop>, at once, and converts it a list of (name,value)
315 * pairs representing header fields into the array <hdr> of size <hdr_num>,
316 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200317 * too small to represent the whole message, an error is returned. Some
318 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200319 * parsed and stored into h1m as well. <hdr> may be null, in which case only
320 * the parsing state will be updated. This may be used to restart the parsing
321 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200322 *
323 * For now it's limited to the response. If the header block is incomplete,
324 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200325 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200326 * and h1m->next to zero on the first call, the parser will do the rest. If
327 * an incomplete message is seen, the caller only needs to present h1m->state
328 * and h1m->next again, with an empty header list so that the parser can start
329 * again. In this case, it will detect that it interrupted a previous session
330 * and will first look for the end of the message before reparsing it again and
331 * indexing it at the same time. This ensures that incomplete messages fed 1
332 * character at a time are never processed entirely more than exactly twice,
333 * and that there is no need to store all the internal state and pre-parsed
334 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200335 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200336 * A pointer to a start line descriptor may be passed in <slp>, in which case
337 * the parser will fill it with whatever it found.
338 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200339 * The code derived from the main HTTP/1 parser above but was simplified and
340 * optimized to process responses produced or forwarded by haproxy. The caller
341 * is responsible for ensuring that the message doesn't wrap, and should ensure
342 * it is complete to avoid having to retry the operation after a failed
343 * attempt. The message is not supposed to be invalid, which is why a few
344 * properties such as the character set used in the header field names are not
345 * checked. In case of an unparsable response message, a negative value will be
346 * returned with h1m->err_pos and h1m->err_state matching the location and
347 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100348 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
349 * parsed and the start line is skipped. It is not required to set h1m->state
350 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200351 *
352 * This function returns :
353 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200354 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200355 * the position relative to <start>
356 * -2 if the output is full (hdr_num reached). err_state and err_pos also
357 * indicate where it failed.
358 * 0 in case of missing data.
359 * > 0 on success, it then corresponds to the number of bytes read since
360 * <start> so that the caller can go on with the payload.
361 */
362int h1_headers_to_hdr_list(char *start, const char *stop,
363 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200364 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200365{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200366 enum h1m_state state;
367 register char *ptr;
368 register const char *end;
369 unsigned int hdr_count;
370 unsigned int skip; /* number of bytes skipped at the beginning */
371 unsigned int sol; /* start of line */
372 unsigned int col; /* position of the colon */
373 unsigned int eol; /* end of line */
374 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200375 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200376 int skip_update;
377 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200378 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200379 struct ist n, v; /* header name and value during parsing */
380
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200381 skip = 0; // do it only once to keep track of the leading CRLF.
382
383 try_again:
384 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200385 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200386 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200387 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200388
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100389 if (h1m->flags & H1_MF_HDRS_ONLY) {
390 state = H1_MSG_HDR_FIRST;
391 h1m->next = 0;
392 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100393 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100394 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100395 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
396 restarting = 1;
397 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100398
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200399 ptr = start + h1m->next;
400 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200401
Willy Tarreau794f9af2017-07-26 09:07:47 +0200402 if (unlikely(ptr >= end))
403 goto http_msg_ood;
404
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200405 /* don't update output if hdr is NULL or if we're restarting */
406 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200407 skip_update = 1;
408
Willy Tarreau794f9af2017-07-26 09:07:47 +0200409 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200410 case H1_MSG_RQBEFORE:
411 http_msg_rqbefore:
412 if (likely(HTTP_IS_TOKEN(*ptr))) {
413 /* we have a start of message, we may have skipped some
414 * heading CRLF. Skip them now.
415 */
416 skip += ptr - start;
417 start = ptr;
418
419 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200420 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200421 hdr_count = 0;
422 state = H1_MSG_RQMETH;
423 goto http_msg_rqmeth;
424 }
425
426 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
427 state = H1_MSG_RQBEFORE;
428 goto http_msg_invalid;
429 }
430
431 if (unlikely(*ptr == '\n'))
432 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
433 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
434 /* stop here */
435
436 case H1_MSG_RQBEFORE_CR:
437 http_msg_rqbefore_cr:
438 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
439 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
440 /* stop here */
441
442 case H1_MSG_RQMETH:
443 http_msg_rqmeth:
444 if (likely(HTTP_IS_TOKEN(*ptr)))
445 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
446
447 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200448 sl.rq.m.len = ptr - sl.rq.m.ptr;
449 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200450 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
451 }
452
453 if (likely(HTTP_IS_CRLF(*ptr))) {
454 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200455 sl.rq.m.len = ptr - sl.rq.m.ptr;
456 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200457 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200458 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200459 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200460 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200461 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200462 sl.rq.v.ptr = ptr;
463 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200464 goto http_msg_rqline_eol;
465 }
466 state = H1_MSG_RQMETH;
467 goto http_msg_invalid;
468
469 case H1_MSG_RQMETH_SP:
470 http_msg_rqmeth_sp:
471 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200472 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200473 goto http_msg_rquri;
474 }
475 if (likely(HTTP_IS_SPHT(*ptr)))
476 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
477 /* so it's a CR/LF, meaning an HTTP 0.9 request */
478 goto http_msg_req09_uri;
479
480 case H1_MSG_RQURI:
481 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100482#ifdef HA_UNALIGNED_LE
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200483 /* speedup: skip bytes not between 0x24 and 0x7e inclusive */
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200484 while (ptr <= end - sizeof(int)) {
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200485 int x = *(int *)ptr - 0x24242424;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200486 if (x & 0x80808080)
487 break;
488
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200489 x -= 0x5b5b5b5b;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200490 if (!(x & 0x80808080))
491 break;
492
493 ptr += sizeof(int);
494 }
495#endif
496 if (ptr >= end) {
497 state = H1_MSG_RQURI;
498 goto http_msg_ood;
499 }
500 http_msg_rquri2:
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200501 if (likely((unsigned char)(*ptr - 33) <= 93)) { /* 33 to 126 included */
502 if (*ptr == '#') {
503 if (h1m->err_pos < -1) /* PR_O2_REQBUG_OK not set */
504 goto invalid_char;
505 if (h1m->err_pos == -1) /* PR_O2_REQBUG_OK set: just log */
506 h1m->err_pos = ptr - start + skip;
507 }
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200508 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200509 }
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200510
511 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200512 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200513 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
514 }
515 if (likely((unsigned char)*ptr >= 128)) {
516 /* non-ASCII chars are forbidden unless option
517 * accept-invalid-http-request is enabled in the frontend.
518 * In any case, we capture the faulty char.
519 */
520 if (h1m->err_pos < -1)
521 goto invalid_char;
522 if (h1m->err_pos == -1)
523 h1m->err_pos = ptr - start + skip;
524 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
525 }
526
527 if (likely(HTTP_IS_CRLF(*ptr))) {
528 /* so it's a CR/LF, meaning an HTTP 0.9 request */
529 goto http_msg_req09_uri_e;
530 }
531
532 /* OK forbidden chars, 0..31 or 127 */
533 invalid_char:
534 state = H1_MSG_RQURI;
535 goto http_msg_invalid;
536
537 case H1_MSG_RQURI_SP:
538 http_msg_rquri_sp:
539 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200540 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200541 goto http_msg_rqver;
542 }
543 if (likely(HTTP_IS_SPHT(*ptr)))
544 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
545 /* so it's a CR/LF, meaning an HTTP 0.9 request */
546 goto http_msg_req09_ver;
547
548
549 case H1_MSG_RQVER:
550 http_msg_rqver:
551 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
552 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
553
554 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200555 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200556 http_msg_rqline_eol:
557 /* We have seen the end of line. Note that we do not
558 * necessarily have the \n yet, but at least we know that we
559 * have EITHER \r OR \n, otherwise the request would not be
560 * complete. We can then record the request length and return
561 * to the caller which will be able to register it.
562 */
563
564 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200565 if ((sl.rq.v.len == 8) &&
566 (*(sl.rq.v.ptr + 5) > '1' ||
567 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200568 h1m->flags |= H1_MF_VER_11;
569
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200570 if (unlikely(hdr_count >= hdr_num)) {
571 state = H1_MSG_RQVER;
572 goto http_output_full;
573 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200574 if (!(h1m->flags & H1_MF_NO_PHDR))
575 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200576
577 if (unlikely(hdr_count >= hdr_num)) {
578 state = H1_MSG_RQVER;
579 goto http_output_full;
580 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200581 if (!(h1m->flags & H1_MF_NO_PHDR))
582 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200583 }
584
585 sol = ptr - start;
586 if (likely(*ptr == '\r'))
587 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
588 goto http_msg_rqline_end;
589 }
590
591 /* neither an HTTP_VER token nor a CRLF */
592 state = H1_MSG_RQVER;
593 goto http_msg_invalid;
594
595 case H1_MSG_RQLINE_END:
596 http_msg_rqline_end:
597 /* check for HTTP/0.9 request : no version information
598 * available. sol must point to the first of CR or LF. However
599 * since we don't save these elements between calls, if we come
600 * here from a restart, we don't necessarily know. Thus in this
601 * case we simply start over.
602 */
603 if (restarting)
604 goto restart;
605
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200606 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200607 goto http_msg_last_lf;
608
609 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
610 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
611 /* stop here */
612
613 /*
614 * Common states below
615 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200616 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200617 http_msg_rpbefore:
618 if (likely(HTTP_IS_TOKEN(*ptr))) {
619 /* we have a start of message, we may have skipped some
620 * heading CRLF. Skip them now.
621 */
622 skip += ptr - start;
623 start = ptr;
624
625 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200626 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200627 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200628 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200629 goto http_msg_rpver;
630 }
631
632 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200633 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200634 goto http_msg_invalid;
635 }
636
637 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200638 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
639 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200640 /* stop here */
641
Willy Tarreau801250e2018-09-11 11:45:04 +0200642 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200643 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200644 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
645 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200646 /* stop here */
647
Willy Tarreau801250e2018-09-11 11:45:04 +0200648 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200649 http_msg_rpver:
650 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200651 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200652
653 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200654 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200655
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200656 if ((sl.st.v.len == 8) &&
657 (*(sl.st.v.ptr + 5) > '1' ||
658 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200659 h1m->flags |= H1_MF_VER_11;
660
Willy Tarreau801250e2018-09-11 11:45:04 +0200661 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200662 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200663 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200664 goto http_msg_invalid;
665
Willy Tarreau801250e2018-09-11 11:45:04 +0200666 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200667 http_msg_rpver_sp:
668 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200669 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200670 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200671 goto http_msg_rpcode;
672 }
673 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200674 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200675 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200676 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200677 goto http_msg_invalid;
678
Willy Tarreau801250e2018-09-11 11:45:04 +0200679 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200680 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100681 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200682 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200683 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200684 }
685
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100686 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200687 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100688 goto http_msg_invalid;
689 }
690
Willy Tarreau794f9af2017-07-26 09:07:47 +0200691 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200692 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200693 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200694 }
695
696 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200697 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200698
699 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200700 sl.st.r.ptr = ptr;
701 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200702 goto http_msg_rpline_eol;
703
Willy Tarreau801250e2018-09-11 11:45:04 +0200704 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200705 http_msg_rpcode_sp:
706 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200707 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200708 goto http_msg_rpreason;
709 }
710 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200711 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200712 /* so it's a CR/LF, so there is no reason phrase */
713 goto http_msg_rsp_reason;
714
Willy Tarreau801250e2018-09-11 11:45:04 +0200715 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200716 http_msg_rpreason:
717 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200718 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200719 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200720 http_msg_rpline_eol:
721 /* We have seen the end of line. Note that we do not
722 * necessarily have the \n yet, but at least we know that we
723 * have EITHER \r OR \n, otherwise the response would not be
724 * complete. We can then record the response length and return
725 * to the caller which will be able to register it.
726 */
727
Willy Tarreau5384aac2018-09-11 16:04:48 +0200728 if (likely(!skip_update)) {
729 if (unlikely(hdr_count >= hdr_num)) {
730 state = H1_MSG_RPREASON;
731 goto http_output_full;
732 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200733 if (!(h1m->flags & H1_MF_NO_PHDR))
734 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200735 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200736
737 sol = ptr - start;
738 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200739 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200740 goto http_msg_rpline_end;
741
Willy Tarreau801250e2018-09-11 11:45:04 +0200742 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200743 http_msg_rpline_end:
744 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200745 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
746 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200747 /* stop here */
748
Willy Tarreau801250e2018-09-11 11:45:04 +0200749 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200750 http_msg_hdr_first:
751 sol = ptr - start;
752 if (likely(!HTTP_IS_CRLF(*ptr))) {
753 goto http_msg_hdr_name;
754 }
755
756 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200757 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200758 goto http_msg_last_lf;
759
Willy Tarreau801250e2018-09-11 11:45:04 +0200760 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200761 http_msg_hdr_name:
762 /* assumes sol points to the first char */
763 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200764 if (!skip_update) {
765 /* turn it to lower case if needed */
766 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200767 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200768 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200769 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200770 }
771
772 if (likely(*ptr == ':')) {
773 col = ptr - start;
Willy Tarreau486cd732023-02-09 21:36:54 +0100774 if (col <= sol) {
775 state = H1_MSG_HDR_NAME;
776 goto http_msg_invalid;
777 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200778 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200779 }
780
Willy Tarreau9aec3052018-09-12 09:20:40 +0200781 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200782 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200783 goto http_msg_invalid;
784 }
785
Willy Tarreau9aec3052018-09-12 09:20:40 +0200786 if (h1m->err_pos == -1) /* capture the error pointer */
787 h1m->err_pos = ptr - start + skip; /* >= 0 now */
788
789 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200790 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200791
Willy Tarreau801250e2018-09-11 11:45:04 +0200792 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200793 http_msg_hdr_l1_sp:
794 /* assumes sol points to the first char */
795 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200796 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200797
798 /* header value can be basically anything except CR/LF */
799 sov = ptr - start;
800
801 if (likely(!HTTP_IS_CRLF(*ptr))) {
802 goto http_msg_hdr_val;
803 }
804
805 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200806 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200807 goto http_msg_hdr_l1_lf;
808
Willy Tarreau801250e2018-09-11 11:45:04 +0200809 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200810 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200811 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
812 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200813
Willy Tarreau801250e2018-09-11 11:45:04 +0200814 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200815 http_msg_hdr_l1_lws:
816 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200817 if (!skip_update) {
818 /* replace HT,CR,LF with spaces */
819 for (; start + sov < ptr; sov++)
820 start[sov] = ' ';
821 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200822 goto http_msg_hdr_l1_sp;
823 }
824 /* we had a header consisting only in spaces ! */
825 eol = sov;
826 goto http_msg_complete_header;
827
Willy Tarreau801250e2018-09-11 11:45:04 +0200828 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200829 http_msg_hdr_val:
830 /* assumes sol points to the first char, and sov
831 * points to the first character of the value.
832 */
833
834 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
835 * and lower. In fact since most of the time is spent in the loop, we
836 * also remove the sign bit test so that bytes 0x8e..0x0d break the
837 * loop, but we don't care since they're very rare in header values.
838 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100839#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200840 while (ptr <= end - sizeof(long)) {
841 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
842 goto http_msg_hdr_val2;
843 ptr += sizeof(long);
844 }
845#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100846#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200847 while (ptr <= end - sizeof(int)) {
848 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
849 goto http_msg_hdr_val2;
850 ptr += sizeof(int);
851 }
852#endif
853 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200854 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200855 goto http_msg_ood;
856 }
857 http_msg_hdr_val2:
858 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200859 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200860
861 eol = ptr - start;
862 /* Note: we could also copy eol into ->eoh so that we have the
863 * real header end in case it ends with lots of LWS, but is this
864 * really needed ?
865 */
866 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200867 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200868 goto http_msg_hdr_l2_lf;
869
Willy Tarreau801250e2018-09-11 11:45:04 +0200870 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200871 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200872 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
873 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200874
Willy Tarreau801250e2018-09-11 11:45:04 +0200875 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200876 http_msg_hdr_l2_lws:
877 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200878 if (!skip_update) {
879 /* LWS: replace HT,CR,LF with spaces */
880 for (; start + eol < ptr; eol++)
881 start[eol] = ' ';
882 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200883 goto http_msg_hdr_val;
884 }
885 http_msg_complete_header:
886 /*
887 * It was a new header, so the last one is finished. Assumes
888 * <sol> points to the first char of the name, <col> to the
889 * colon, <sov> points to the first character of the value and
890 * <eol> to the first CR or LF so we know how the line ends. We
891 * will trim spaces around the value. It's possible to do it by
892 * adjusting <eol> and <sov> which are no more used after this.
893 * We can add the header field to the list.
894 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200895 if (likely(!skip_update)) {
896 while (sov < eol && HTTP_IS_LWS(start[sov]))
897 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200898
Christopher Faulet2912f872018-09-19 14:01:04 +0200899 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
900 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200901
902
Christopher Faulet2912f872018-09-19 14:01:04 +0200903 n = ist2(start + sol, col - sol);
904 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200905
Christopher Faulet2912f872018-09-19 14:01:04 +0200906 do {
907 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200908
Christopher Faulet2912f872018-09-19 14:01:04 +0200909 if (unlikely(hdr_count >= hdr_num)) {
910 state = H1_MSG_HDR_L2_LWS;
911 goto http_output_full;
912 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200913
Christopher Faulet2912f872018-09-19 14:01:04 +0200914 if (isteqi(n, ist("transfer-encoding"))) {
915 h1_parse_xfer_enc_header(h1m, v);
916 }
917 else if (isteqi(n, ist("content-length"))) {
918 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200919
Christopher Faulet2912f872018-09-19 14:01:04 +0200920 if (ret < 0) {
921 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100922 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200923 goto http_msg_invalid;
924 }
925 else if (ret == 0) {
926 /* skip it */
927 break;
928 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200929 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200930 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100931 h1_parse_connection_header(h1m, &v);
932 if (!v.len) {
933 /* skip it */
934 break;
935 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200936 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100937 else if (isteqi(n, ist("upgrade"))) {
938 h1_parse_upgrade_header(h1m, v);
939 }
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200940 else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
941 if (host_idx == -1)
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200942 host_idx = hdr_count;
943 else {
944 if (!isteqi(v, hdr[host_idx].v)) {
945 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100946 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200947 goto http_msg_invalid;
948 }
949 /* if the same host, skip it */
950 break;
951 }
952 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200953
Christopher Faulet2912f872018-09-19 14:01:04 +0200954 http_set_hdr(&hdr[hdr_count++], n, v);
955 } while (0);
956 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200957
958 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200959
Willy Tarreau794f9af2017-07-26 09:07:47 +0200960 if (likely(!HTTP_IS_CRLF(*ptr)))
961 goto http_msg_hdr_name;
962
963 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200964 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200965 goto http_msg_last_lf;
966
Willy Tarreau801250e2018-09-11 11:45:04 +0200967 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200968 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200969 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200970 ptr++;
971 /* <ptr> now points to the first byte of payload. If needed sol
972 * still points to the first of either CR or LF of the empty
973 * line ending the headers block.
974 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200975 if (likely(!skip_update)) {
976 if (unlikely(hdr_count >= hdr_num)) {
977 state = H1_MSG_LAST_LF;
978 goto http_output_full;
979 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200980 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200981 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200982
983 /* reaching here we've parsed the whole message. We may detect
984 * that we were already continuing an interrupted parsing pass
985 * so we were silently looking for the end of message not
986 * updating anything before deciding to parse it fully at once.
987 * It's guaranteed that we won't match this test twice in a row
988 * since restarting will turn zero.
989 */
990 if (restarting)
991 goto restart;
992
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200993
994 if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
995 struct ist authority;
996
997 authority = http_get_authority(sl.rq.u, 1);
998 if (sl.rq.meth == HTTP_METH_CONNECT) {
999 struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
1000 int ret;
1001
1002 ret = h1_validate_connect_authority(authority, host);
1003 if (ret < 0) {
1004 if (h1m->err_pos < -1) {
1005 state = H1_MSG_LAST_LF;
Willy Tarreaue21ad302022-10-04 08:02:03 +02001006 /* WT: gcc seems to see a path where sl.rq.u.ptr was used
1007 * uninitialized, but it doesn't know that the function is
1008 * called with initial states making this impossible.
1009 */
1010 ALREADY_CHECKED(sl.rq.u.ptr);
Christopher Faulet63f95ed2022-07-05 14:50:17 +02001011 ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
1012 goto http_msg_invalid;
1013 }
1014 if (h1m->err_pos == -1) /* capture the error pointer */
1015 h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
1016 }
1017 }
1018 else if (host_idx != -1 && istlen(authority)) {
1019 struct ist host = hdr[host_idx].v;
1020
1021 /* For non-CONNECT method, the authority must match the host header value */
1022 if (!isteqi(authority, host)) {
1023 if (h1m->err_pos < -1) {
1024 state = H1_MSG_LAST_LF;
1025 ptr = host.ptr; /* Set ptr on the error */
1026 goto http_msg_invalid;
1027 }
1028 if (h1m->err_pos == -1) /* capture the error pointer */
1029 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
1030 }
1031
1032 }
1033 }
1034
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001035 state = H1_MSG_DATA;
1036 if (h1m->flags & H1_MF_XFER_ENC) {
1037 if (h1m->flags & H1_MF_CLEN) {
1038 h1m->flags &= ~H1_MF_CLEN;
1039 hdr_count = http_del_hdr(hdr, ist("content-length"));
1040 }
1041
1042 if (h1m->flags & H1_MF_CHNK)
1043 state = H1_MSG_CHUNK_SIZE;
1044 else if (!(h1m->flags & H1_MF_RESP)) {
1045 /* cf RFC7230#3.3.3 : transfer-encoding in
1046 * request without chunked encoding is invalid.
1047 */
1048 goto http_msg_invalid;
1049 }
1050 }
1051
Willy Tarreau794f9af2017-07-26 09:07:47 +02001052 break;
1053
1054 default:
1055 /* impossible states */
1056 goto http_msg_invalid;
1057 }
1058
Willy Tarreau001823c2018-09-12 17:25:32 +02001059 /* Now we've left the headers state and are either in H1_MSG_DATA or
1060 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001061 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001062
Willy Tarreau5384aac2018-09-11 16:04:48 +02001063 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001064 *slp = sl;
1065
Willy Tarreau4433c082018-09-11 15:33:32 +02001066 h1m->state = state;
1067 h1m->next = ptr - start + skip;
1068 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001069
1070 http_msg_ood:
1071 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001072 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001073 *slp = sl;
1074
Willy Tarreau4433c082018-09-11 15:33:32 +02001075 h1m->state = state;
1076 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001077 return 0;
1078
1079 http_msg_invalid:
1080 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001081 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001082 *slp = sl;
1083
Willy Tarreau4433c082018-09-11 15:33:32 +02001084 h1m->err_state = h1m->state = state;
1085 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001086 return -1;
1087
1088 http_output_full:
1089 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001090 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001091 *slp = sl;
1092
Willy Tarreau4433c082018-09-11 15:33:32 +02001093 h1m->err_state = h1m->state = state;
1094 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001095 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001096
1097 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +02001098 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
1099 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001100 if (h1m->flags & H1_MF_RESP)
1101 h1m->state = H1_MSG_RPBEFORE;
1102 else
1103 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001104 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001105}
1106
Willy Tarreau2510f702017-10-31 17:14:16 +01001107/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001108 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001109 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1110 * input data, or < 0 in case of parse error (in which case the caller may have
1111 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001112 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001113int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001114{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001115 const char *stop = b_peek(buf, ofs + max);
1116 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001117
1118 while (1) {
1119 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001120 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001121 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001122
1123 /* scan current line and stop at LF or CRLF */
1124 while (1) {
1125 if (ptr == stop)
1126 return 0;
1127
1128 if (*ptr == '\n') {
1129 if (!p1)
1130 p1 = ptr;
1131 p2 = ptr;
1132 break;
1133 }
1134
1135 if (*ptr == '\r') {
1136 if (p1)
1137 return -1;
1138 p1 = ptr;
1139 }
1140
Willy Tarreau7314be82018-06-14 13:32:50 +02001141 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001142 }
1143
1144 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001145 p2 = b_next(buf, p2);
1146 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001147
1148 /* LF/CRLF at beginning of line => end of trailers at p2.
1149 * Everything was scheduled for forwarding, there's nothing left
1150 * from this message. */
1151 if (p1 == start)
1152 break;
1153 /* OK, next line then */
1154 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001155 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001156}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001157
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001158/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1159 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1160 * (25 bytes long).
1161 */
1162void h1_generate_random_ws_input_key(char key_out[25])
1163{
1164 /* generate a random websocket key */
1165 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1166 char key[16];
1167
1168 memcpy(key, &rand1, 8);
1169 memcpy(&key[8], &rand2, 8);
1170 a2base64(key, 16, key_out, 25);
1171}
1172
Amaury Denoyellec1938232020-12-11 17:53:03 +01001173#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1174
1175/*
1176 * Calculate the WebSocket handshake response key from <key_in>. Following the
1177 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1178 * as a 29 bytes long string.
1179 */
1180void h1_calculate_ws_output_key(const char *key, char *result)
1181{
1182 blk_SHA_CTX sha1_ctx;
1183 char hash_in[60], hash_out[20];
1184
1185 /* concatenate the key with a fixed suffix */
1186 memcpy(hash_in, key, 24);
1187 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1188
1189 /* sha1 the result */
1190 blk_SHA1_Init(&sha1_ctx);
1191 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1192 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1193
1194 /* encode in base64 the hash */
1195 a2base64(hash_out, 20, result, 29);
1196}