blob: eeda311b70c1bdc338c54b201d382186eb417a98 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
Willy Tarreauba9afd22023-08-09 08:32:48 +020037 word.ptr = value->ptr;
Willy Tarreau73373ab2018-09-14 17:11:33 +020038 e = value->ptr + value->len;
39
Willy Tarreauba9afd22023-08-09 08:32:48 +020040 while (1) {
41 if (word.ptr >= e) {
42 /* empty header or empty value */
43 goto fail;
44 }
45
Ilya Shipitsin47d17182020-06-21 21:42:57 +050046 /* skip leading delimiter and blanks */
Willy Tarreauba9afd22023-08-09 08:32:48 +020047 if (unlikely(HTTP_IS_LWS(*word.ptr))) {
48 word.ptr++;
Willy Tarreau73373ab2018-09-14 17:11:33 +020049 continue;
Willy Tarreauba9afd22023-08-09 08:32:48 +020050 }
Willy Tarreau73373ab2018-09-14 17:11:33 +020051
52 /* digits only now */
53 for (cl = 0, n = word.ptr; n < e; n++) {
54 unsigned int c = *n - '0';
55 if (unlikely(c > 9)) {
56 /* non-digit */
57 if (unlikely(n == word.ptr)) // spaces only
58 goto fail;
59 break;
60 }
61 if (unlikely(cl > ULLONG_MAX / 10ULL))
62 goto fail; /* multiply overflow */
63 cl = cl * 10ULL;
64 if (unlikely(cl + c < cl))
65 goto fail; /* addition overflow */
66 cl = cl + c;
67 }
68
69 /* keep a copy of the exact cleaned value */
70 word.len = n - word.ptr;
71
72 /* skip trailing LWS till next comma or EOL */
73 for (; n < e; n++) {
74 if (!HTTP_IS_LWS(*n)) {
75 if (unlikely(*n != ','))
76 goto fail;
77 break;
78 }
79 }
80
81 /* if duplicate, must be equal */
82 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
83 goto fail;
84
85 /* OK, store this result as the one to be indexed */
86 h1m->flags |= H1_MF_CLEN;
87 h1m->curr_len = h1m->body_len = cl;
88 *value = word;
Willy Tarreauba9afd22023-08-09 08:32:48 +020089
90 /* Now either n==e and we're done, or n points to the comma,
91 * and we skip it and continue.
92 */
93 if (n++ == e)
94 break;
95
Willy Tarreau73373ab2018-09-14 17:11:33 +020096 word.ptr = n;
97 }
98 /* here we've reached the end with a single value or a series of
99 * identical values, all matching previous series if any. The last
100 * parsed value was sent back into <value>. We just have to decide
101 * if this occurrence has to be indexed (it's the first one) or
102 * silently skipped (it's not the first one)
103 */
104 return !not_first;
105 fail:
106 return -1;
107}
108
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200109/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
110 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
111 * this case. Any other token found or any empty header field found will reset
112 * this flag, so that it accurately represents the token's presence at the last
113 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
114 * are case-insensitive (cf RFC7230#4).
115 */
116void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
117{
118 char *e, *n;
119 struct ist word;
120
121 h1m->flags |= H1_MF_XFER_ENC;
122 h1m->flags &= ~H1_MF_CHNK;
123
124 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
125 e = value.ptr + value.len;
126
127 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500128 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200129 if (HTTP_IS_LWS(*word.ptr))
130 continue;
131
132 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
133 word.len = n - word.ptr;
134
135 /* trim trailing blanks */
136 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
137 word.len--;
138
139 h1m->flags &= ~H1_MF_CHNK;
140 if (isteqi(word, ist("chunked")))
141 h1m->flags |= H1_MF_CHNK;
142
143 word.ptr = n;
144 }
145}
146
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200147/* Validate the authority and the host header value for CONNECT method. If there
148 * is hast header, its value is normalized. 0 is returned on success, -1 if the
149 * authority is invalid and -2 if the host is invalid.
150 */
151static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
152{
153 struct ist uri_host, uri_port, host, host_port;
154
155 if (!isttest(authority))
156 goto invalid_authority;
157 uri_host = authority;
158 uri_port = http_get_host_port(authority);
159 if (!isttest(uri_port))
160 goto invalid_authority;
161 uri_host.len -= (istlen(uri_port) + 1);
162
163 if (!host_hdr || !isttest(*host_hdr))
164 goto end;
165
166 /* Get the port of the host header value, if any */
167 host = *host_hdr;
168 host_port = http_get_host_port(*host_hdr);
169 if (isttest(host_port)) {
170 host.len -= (istlen(host_port) + 1);
171 if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
172 goto invalid_host;
173 if (http_is_default_port(IST_NULL, uri_port))
174 *host_hdr = host; /* normalize */
175 }
176 else {
177 if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
178 goto invalid_host;
179 }
180
181 end:
182 return 0;
183
184 invalid_authority:
185 return -1;
186
187 invalid_host:
188 return -2;
189}
190
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200191/* Parse the Connection: header of an HTTP/1 request, looking for "close",
192 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
193 * what was found there. Note that flags are only added, not removed, so the
194 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100195 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
196 * up from "keep-alive" and "close" values. To do so, the header value is
197 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200198 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100199void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200200{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100201 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200202 struct ist word;
203
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100204 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
205 p = value->ptr;
206 e = value->ptr + value->len;
207 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
208 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200209
210 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500211 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200212 if (HTTP_IS_LWS(*word.ptr))
213 continue;
214
215 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
216 word.len = n - word.ptr;
217
218 /* trim trailing blanks */
219 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
220 word.len--;
221
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100222 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200223 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100224 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
225 goto skip_val;
226 }
227 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200228 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100229 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
230 goto skip_val;
231 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200232 else if (isteqi(word, ist("upgrade")))
233 h1m->flags |= H1_MF_CONN_UPG;
234
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100235 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
236 if (value->ptr + value->len == p) {
237 /* no rewrite done till now */
238 value->len = n - value->ptr;
239 }
240 else {
241 if (value->len)
242 value->ptr[value->len++] = ',';
243 istcat(value, word, e - value->ptr);
244 }
245 }
246
247 skip_val:
248 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200249 }
250}
251
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100252/* Parse the Upgrade: header of an HTTP/1 request.
253 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
254 */
255void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
256{
257 char *e, *n;
258 struct ist word;
259
260 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
261
262 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
263 e = value.ptr + value.len;
264
265 while (++word.ptr < e) {
266 /* skip leading delimiter and blanks */
267 if (HTTP_IS_LWS(*word.ptr))
268 continue;
269
270 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
271 word.len = n - word.ptr;
272
273 /* trim trailing blanks */
274 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
275 word.len--;
276
277 if (isteqi(word, ist("websocket")))
278 h1m->flags |= H1_MF_UPG_WEBSOCKET;
279
280 word.ptr = n;
281 }
282}
283
Willy Tarreau538746a2018-12-11 10:59:20 +0100284/* Macros used in the HTTP/1 parser, to check for the expected presence of
285 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
286 */
287
288/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
289 * <bad>.
290 */
291#define EXPECT_LF_HERE(ptr, bad, state, where) \
292 do { \
293 if (unlikely(*(ptr) != '\n')) { \
294 state = (where); \
295 goto bad; \
296 } \
297 } while (0)
298
299/* Increments pointer <ptr>, continues to label <more> if it's still below
300 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
301 * of buffer was reached.
302 */
303#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
304 do { \
305 if (likely(++(ptr) < (end))) \
306 goto more; \
307 else { \
308 state = (where); \
309 goto stop; \
310 } \
311 } while (0)
312
Willy Tarreau794f9af2017-07-26 09:07:47 +0200313/* This function parses a contiguous HTTP/1 headers block starting at <start>
314 * and ending before <stop>, at once, and converts it a list of (name,value)
315 * pairs representing header fields into the array <hdr> of size <hdr_num>,
316 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200317 * too small to represent the whole message, an error is returned. Some
318 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200319 * parsed and stored into h1m as well. <hdr> may be null, in which case only
320 * the parsing state will be updated. This may be used to restart the parsing
321 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200322 *
323 * For now it's limited to the response. If the header block is incomplete,
324 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200325 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200326 * and h1m->next to zero on the first call, the parser will do the rest. If
327 * an incomplete message is seen, the caller only needs to present h1m->state
328 * and h1m->next again, with an empty header list so that the parser can start
329 * again. In this case, it will detect that it interrupted a previous session
330 * and will first look for the end of the message before reparsing it again and
331 * indexing it at the same time. This ensures that incomplete messages fed 1
332 * character at a time are never processed entirely more than exactly twice,
333 * and that there is no need to store all the internal state and pre-parsed
334 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200335 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200336 * A pointer to a start line descriptor may be passed in <slp>, in which case
337 * the parser will fill it with whatever it found.
338 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200339 * The code derived from the main HTTP/1 parser above but was simplified and
340 * optimized to process responses produced or forwarded by haproxy. The caller
341 * is responsible for ensuring that the message doesn't wrap, and should ensure
342 * it is complete to avoid having to retry the operation after a failed
343 * attempt. The message is not supposed to be invalid, which is why a few
344 * properties such as the character set used in the header field names are not
345 * checked. In case of an unparsable response message, a negative value will be
346 * returned with h1m->err_pos and h1m->err_state matching the location and
347 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100348 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
349 * parsed and the start line is skipped. It is not required to set h1m->state
350 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200351 *
352 * This function returns :
353 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200354 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200355 * the position relative to <start>
356 * -2 if the output is full (hdr_num reached). err_state and err_pos also
357 * indicate where it failed.
358 * 0 in case of missing data.
359 * > 0 on success, it then corresponds to the number of bytes read since
360 * <start> so that the caller can go on with the payload.
361 */
362int h1_headers_to_hdr_list(char *start, const char *stop,
363 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200364 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200365{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200366 enum h1m_state state;
367 register char *ptr;
368 register const char *end;
369 unsigned int hdr_count;
370 unsigned int skip; /* number of bytes skipped at the beginning */
371 unsigned int sol; /* start of line */
372 unsigned int col; /* position of the colon */
373 unsigned int eol; /* end of line */
374 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200375 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200376 int skip_update;
377 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200378 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200379 struct ist n, v; /* header name and value during parsing */
380
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200381 skip = 0; // do it only once to keep track of the leading CRLF.
382
383 try_again:
384 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200385 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200386 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200387 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200388
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100389 if (h1m->flags & H1_MF_HDRS_ONLY) {
390 state = H1_MSG_HDR_FIRST;
391 h1m->next = 0;
392 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100393 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100394 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100395 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
396 restarting = 1;
397 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100398
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200399 ptr = start + h1m->next;
400 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200401
Willy Tarreau794f9af2017-07-26 09:07:47 +0200402 if (unlikely(ptr >= end))
403 goto http_msg_ood;
404
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200405 /* don't update output if hdr is NULL or if we're restarting */
406 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200407 skip_update = 1;
408
Willy Tarreau794f9af2017-07-26 09:07:47 +0200409 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200410 case H1_MSG_RQBEFORE:
411 http_msg_rqbefore:
412 if (likely(HTTP_IS_TOKEN(*ptr))) {
413 /* we have a start of message, we may have skipped some
414 * heading CRLF. Skip them now.
415 */
416 skip += ptr - start;
417 start = ptr;
418
419 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200420 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200421 hdr_count = 0;
422 state = H1_MSG_RQMETH;
423 goto http_msg_rqmeth;
424 }
425
426 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
427 state = H1_MSG_RQBEFORE;
428 goto http_msg_invalid;
429 }
430
431 if (unlikely(*ptr == '\n'))
432 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
433 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
434 /* stop here */
435
436 case H1_MSG_RQBEFORE_CR:
437 http_msg_rqbefore_cr:
438 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
439 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
440 /* stop here */
441
442 case H1_MSG_RQMETH:
443 http_msg_rqmeth:
444 if (likely(HTTP_IS_TOKEN(*ptr)))
445 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
446
447 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200448 sl.rq.m.len = ptr - sl.rq.m.ptr;
449 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200450 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
451 }
452
453 if (likely(HTTP_IS_CRLF(*ptr))) {
454 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200455 sl.rq.m.len = ptr - sl.rq.m.ptr;
456 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200457 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200458 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200459 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200460 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200461 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200462 sl.rq.v.ptr = ptr;
463 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200464 goto http_msg_rqline_eol;
465 }
466 state = H1_MSG_RQMETH;
467 goto http_msg_invalid;
468
469 case H1_MSG_RQMETH_SP:
470 http_msg_rqmeth_sp:
471 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200472 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200473 goto http_msg_rquri;
474 }
475 if (likely(HTTP_IS_SPHT(*ptr)))
476 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
477 /* so it's a CR/LF, meaning an HTTP 0.9 request */
478 goto http_msg_req09_uri;
479
480 case H1_MSG_RQURI:
481 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100482#ifdef HA_UNALIGNED_LE
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200483 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
484 while (ptr <= end - sizeof(int)) {
485 int x = *(int *)ptr - 0x21212121;
486 if (x & 0x80808080)
487 break;
488
489 x -= 0x5e5e5e5e;
490 if (!(x & 0x80808080))
491 break;
492
493 ptr += sizeof(int);
494 }
495#endif
496 if (ptr >= end) {
497 state = H1_MSG_RQURI;
498 goto http_msg_ood;
499 }
500 http_msg_rquri2:
501 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
502 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
503
504 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200505 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
507 }
508 if (likely((unsigned char)*ptr >= 128)) {
509 /* non-ASCII chars are forbidden unless option
510 * accept-invalid-http-request is enabled in the frontend.
511 * In any case, we capture the faulty char.
512 */
513 if (h1m->err_pos < -1)
514 goto invalid_char;
515 if (h1m->err_pos == -1)
516 h1m->err_pos = ptr - start + skip;
517 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
518 }
519
520 if (likely(HTTP_IS_CRLF(*ptr))) {
521 /* so it's a CR/LF, meaning an HTTP 0.9 request */
522 goto http_msg_req09_uri_e;
523 }
524
525 /* OK forbidden chars, 0..31 or 127 */
526 invalid_char:
527 state = H1_MSG_RQURI;
528 goto http_msg_invalid;
529
530 case H1_MSG_RQURI_SP:
531 http_msg_rquri_sp:
532 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200533 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200534 goto http_msg_rqver;
535 }
536 if (likely(HTTP_IS_SPHT(*ptr)))
537 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
538 /* so it's a CR/LF, meaning an HTTP 0.9 request */
539 goto http_msg_req09_ver;
540
541
542 case H1_MSG_RQVER:
543 http_msg_rqver:
544 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
545 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
546
547 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200548 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200549 http_msg_rqline_eol:
550 /* We have seen the end of line. Note that we do not
551 * necessarily have the \n yet, but at least we know that we
552 * have EITHER \r OR \n, otherwise the request would not be
553 * complete. We can then record the request length and return
554 * to the caller which will be able to register it.
555 */
556
557 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200558 if ((sl.rq.v.len == 8) &&
559 (*(sl.rq.v.ptr + 5) > '1' ||
560 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200561 h1m->flags |= H1_MF_VER_11;
562
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200563 if (unlikely(hdr_count >= hdr_num)) {
564 state = H1_MSG_RQVER;
565 goto http_output_full;
566 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200567 if (!(h1m->flags & H1_MF_NO_PHDR))
568 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200569
570 if (unlikely(hdr_count >= hdr_num)) {
571 state = H1_MSG_RQVER;
572 goto http_output_full;
573 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200574 if (!(h1m->flags & H1_MF_NO_PHDR))
575 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200576 }
577
578 sol = ptr - start;
579 if (likely(*ptr == '\r'))
580 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
581 goto http_msg_rqline_end;
582 }
583
584 /* neither an HTTP_VER token nor a CRLF */
585 state = H1_MSG_RQVER;
586 goto http_msg_invalid;
587
588 case H1_MSG_RQLINE_END:
589 http_msg_rqline_end:
590 /* check for HTTP/0.9 request : no version information
591 * available. sol must point to the first of CR or LF. However
592 * since we don't save these elements between calls, if we come
593 * here from a restart, we don't necessarily know. Thus in this
594 * case we simply start over.
595 */
596 if (restarting)
597 goto restart;
598
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200599 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200600 goto http_msg_last_lf;
601
602 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
603 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
604 /* stop here */
605
606 /*
607 * Common states below
608 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200609 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200610 http_msg_rpbefore:
611 if (likely(HTTP_IS_TOKEN(*ptr))) {
612 /* we have a start of message, we may have skipped some
613 * heading CRLF. Skip them now.
614 */
615 skip += ptr - start;
616 start = ptr;
617
618 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200619 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200620 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200621 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200622 goto http_msg_rpver;
623 }
624
625 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200626 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200627 goto http_msg_invalid;
628 }
629
630 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200631 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
632 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200633 /* stop here */
634
Willy Tarreau801250e2018-09-11 11:45:04 +0200635 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200636 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200637 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
638 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200639 /* stop here */
640
Willy Tarreau801250e2018-09-11 11:45:04 +0200641 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200642 http_msg_rpver:
643 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200644 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200645
646 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200647 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200648
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200649 if ((sl.st.v.len == 8) &&
650 (*(sl.st.v.ptr + 5) > '1' ||
651 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200652 h1m->flags |= H1_MF_VER_11;
653
Willy Tarreau801250e2018-09-11 11:45:04 +0200654 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200655 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200656 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200657 goto http_msg_invalid;
658
Willy Tarreau801250e2018-09-11 11:45:04 +0200659 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200660 http_msg_rpver_sp:
661 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200662 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200663 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200664 goto http_msg_rpcode;
665 }
666 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200667 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200668 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200669 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670 goto http_msg_invalid;
671
Willy Tarreau801250e2018-09-11 11:45:04 +0200672 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200673 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100674 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200675 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200676 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200677 }
678
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100679 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200680 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100681 goto http_msg_invalid;
682 }
683
Willy Tarreau794f9af2017-07-26 09:07:47 +0200684 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200685 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200686 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200687 }
688
689 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200690 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200691
692 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200693 sl.st.r.ptr = ptr;
694 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200695 goto http_msg_rpline_eol;
696
Willy Tarreau801250e2018-09-11 11:45:04 +0200697 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200698 http_msg_rpcode_sp:
699 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200700 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200701 goto http_msg_rpreason;
702 }
703 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200704 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200705 /* so it's a CR/LF, so there is no reason phrase */
706 goto http_msg_rsp_reason;
707
Willy Tarreau801250e2018-09-11 11:45:04 +0200708 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200709 http_msg_rpreason:
710 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200711 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200712 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200713 http_msg_rpline_eol:
714 /* We have seen the end of line. Note that we do not
715 * necessarily have the \n yet, but at least we know that we
716 * have EITHER \r OR \n, otherwise the response would not be
717 * complete. We can then record the response length and return
718 * to the caller which will be able to register it.
719 */
720
Willy Tarreau5384aac2018-09-11 16:04:48 +0200721 if (likely(!skip_update)) {
722 if (unlikely(hdr_count >= hdr_num)) {
723 state = H1_MSG_RPREASON;
724 goto http_output_full;
725 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200726 if (!(h1m->flags & H1_MF_NO_PHDR))
727 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200728 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200729
730 sol = ptr - start;
731 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200732 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200733 goto http_msg_rpline_end;
734
Willy Tarreau801250e2018-09-11 11:45:04 +0200735 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200736 http_msg_rpline_end:
737 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200738 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
739 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200740 /* stop here */
741
Willy Tarreau801250e2018-09-11 11:45:04 +0200742 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200743 http_msg_hdr_first:
744 sol = ptr - start;
745 if (likely(!HTTP_IS_CRLF(*ptr))) {
746 goto http_msg_hdr_name;
747 }
748
749 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200750 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200751 goto http_msg_last_lf;
752
Willy Tarreau801250e2018-09-11 11:45:04 +0200753 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200754 http_msg_hdr_name:
755 /* assumes sol points to the first char */
756 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200757 if (!skip_update) {
758 /* turn it to lower case if needed */
759 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200760 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200761 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200762 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200763 }
764
765 if (likely(*ptr == ':')) {
766 col = ptr - start;
Willy Tarreau486cd732023-02-09 21:36:54 +0100767 if (col <= sol) {
768 state = H1_MSG_HDR_NAME;
769 goto http_msg_invalid;
770 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200771 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200772 }
773
Willy Tarreau9aec3052018-09-12 09:20:40 +0200774 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200775 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200776 goto http_msg_invalid;
777 }
778
Willy Tarreau9aec3052018-09-12 09:20:40 +0200779 if (h1m->err_pos == -1) /* capture the error pointer */
780 h1m->err_pos = ptr - start + skip; /* >= 0 now */
781
782 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200783 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200784
Willy Tarreau801250e2018-09-11 11:45:04 +0200785 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200786 http_msg_hdr_l1_sp:
787 /* assumes sol points to the first char */
788 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200789 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200790
791 /* header value can be basically anything except CR/LF */
792 sov = ptr - start;
793
794 if (likely(!HTTP_IS_CRLF(*ptr))) {
795 goto http_msg_hdr_val;
796 }
797
798 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200799 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200800 goto http_msg_hdr_l1_lf;
801
Willy Tarreau801250e2018-09-11 11:45:04 +0200802 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200803 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200804 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
805 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200806
Willy Tarreau801250e2018-09-11 11:45:04 +0200807 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200808 http_msg_hdr_l1_lws:
809 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200810 if (!skip_update) {
811 /* replace HT,CR,LF with spaces */
812 for (; start + sov < ptr; sov++)
813 start[sov] = ' ';
814 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200815 goto http_msg_hdr_l1_sp;
816 }
817 /* we had a header consisting only in spaces ! */
818 eol = sov;
819 goto http_msg_complete_header;
820
Willy Tarreau801250e2018-09-11 11:45:04 +0200821 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200822 http_msg_hdr_val:
823 /* assumes sol points to the first char, and sov
824 * points to the first character of the value.
825 */
826
827 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
828 * and lower. In fact since most of the time is spent in the loop, we
829 * also remove the sign bit test so that bytes 0x8e..0x0d break the
830 * loop, but we don't care since they're very rare in header values.
831 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100832#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200833 while (ptr <= end - sizeof(long)) {
834 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
835 goto http_msg_hdr_val2;
836 ptr += sizeof(long);
837 }
838#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100839#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200840 while (ptr <= end - sizeof(int)) {
841 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
842 goto http_msg_hdr_val2;
843 ptr += sizeof(int);
844 }
845#endif
846 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200847 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200848 goto http_msg_ood;
849 }
850 http_msg_hdr_val2:
851 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200852 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200853
854 eol = ptr - start;
855 /* Note: we could also copy eol into ->eoh so that we have the
856 * real header end in case it ends with lots of LWS, but is this
857 * really needed ?
858 */
859 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200860 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200861 goto http_msg_hdr_l2_lf;
862
Willy Tarreau801250e2018-09-11 11:45:04 +0200863 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200864 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200865 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
866 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200867
Willy Tarreau801250e2018-09-11 11:45:04 +0200868 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200869 http_msg_hdr_l2_lws:
870 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200871 if (!skip_update) {
872 /* LWS: replace HT,CR,LF with spaces */
873 for (; start + eol < ptr; eol++)
874 start[eol] = ' ';
875 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200876 goto http_msg_hdr_val;
877 }
878 http_msg_complete_header:
879 /*
880 * It was a new header, so the last one is finished. Assumes
881 * <sol> points to the first char of the name, <col> to the
882 * colon, <sov> points to the first character of the value and
883 * <eol> to the first CR or LF so we know how the line ends. We
884 * will trim spaces around the value. It's possible to do it by
885 * adjusting <eol> and <sov> which are no more used after this.
886 * We can add the header field to the list.
887 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200888 if (likely(!skip_update)) {
889 while (sov < eol && HTTP_IS_LWS(start[sov]))
890 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200891
Christopher Faulet2912f872018-09-19 14:01:04 +0200892 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
893 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200894
895
Christopher Faulet2912f872018-09-19 14:01:04 +0200896 n = ist2(start + sol, col - sol);
897 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200898
Christopher Faulet2912f872018-09-19 14:01:04 +0200899 do {
900 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200901
Christopher Faulet2912f872018-09-19 14:01:04 +0200902 if (unlikely(hdr_count >= hdr_num)) {
903 state = H1_MSG_HDR_L2_LWS;
904 goto http_output_full;
905 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200906
Christopher Faulet2912f872018-09-19 14:01:04 +0200907 if (isteqi(n, ist("transfer-encoding"))) {
908 h1_parse_xfer_enc_header(h1m, v);
909 }
910 else if (isteqi(n, ist("content-length"))) {
911 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200912
Christopher Faulet2912f872018-09-19 14:01:04 +0200913 if (ret < 0) {
914 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100915 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200916 goto http_msg_invalid;
917 }
918 else if (ret == 0) {
919 /* skip it */
920 break;
921 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200922 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200923 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100924 h1_parse_connection_header(h1m, &v);
925 if (!v.len) {
926 /* skip it */
927 break;
928 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200929 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100930 else if (isteqi(n, ist("upgrade"))) {
931 h1_parse_upgrade_header(h1m, v);
932 }
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200933 else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
934 if (host_idx == -1)
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200935 host_idx = hdr_count;
936 else {
937 if (!isteqi(v, hdr[host_idx].v)) {
938 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100939 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200940 goto http_msg_invalid;
941 }
942 /* if the same host, skip it */
943 break;
944 }
945 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200946
Christopher Faulet2912f872018-09-19 14:01:04 +0200947 http_set_hdr(&hdr[hdr_count++], n, v);
948 } while (0);
949 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200950
951 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200952
Willy Tarreau794f9af2017-07-26 09:07:47 +0200953 if (likely(!HTTP_IS_CRLF(*ptr)))
954 goto http_msg_hdr_name;
955
956 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200957 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200958 goto http_msg_last_lf;
959
Willy Tarreau801250e2018-09-11 11:45:04 +0200960 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200961 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200962 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200963 ptr++;
964 /* <ptr> now points to the first byte of payload. If needed sol
965 * still points to the first of either CR or LF of the empty
966 * line ending the headers block.
967 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200968 if (likely(!skip_update)) {
969 if (unlikely(hdr_count >= hdr_num)) {
970 state = H1_MSG_LAST_LF;
971 goto http_output_full;
972 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200973 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200974 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200975
976 /* reaching here we've parsed the whole message. We may detect
977 * that we were already continuing an interrupted parsing pass
978 * so we were silently looking for the end of message not
979 * updating anything before deciding to parse it fully at once.
980 * It's guaranteed that we won't match this test twice in a row
981 * since restarting will turn zero.
982 */
983 if (restarting)
984 goto restart;
985
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200986
987 if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
988 struct ist authority;
989
990 authority = http_get_authority(sl.rq.u, 1);
991 if (sl.rq.meth == HTTP_METH_CONNECT) {
992 struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
993 int ret;
994
995 ret = h1_validate_connect_authority(authority, host);
996 if (ret < 0) {
997 if (h1m->err_pos < -1) {
998 state = H1_MSG_LAST_LF;
Willy Tarreaue21ad302022-10-04 08:02:03 +0200999 /* WT: gcc seems to see a path where sl.rq.u.ptr was used
1000 * uninitialized, but it doesn't know that the function is
1001 * called with initial states making this impossible.
1002 */
1003 ALREADY_CHECKED(sl.rq.u.ptr);
Christopher Faulet63f95ed2022-07-05 14:50:17 +02001004 ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
1005 goto http_msg_invalid;
1006 }
1007 if (h1m->err_pos == -1) /* capture the error pointer */
1008 h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
1009 }
1010 }
1011 else if (host_idx != -1 && istlen(authority)) {
1012 struct ist host = hdr[host_idx].v;
1013
1014 /* For non-CONNECT method, the authority must match the host header value */
1015 if (!isteqi(authority, host)) {
1016 if (h1m->err_pos < -1) {
1017 state = H1_MSG_LAST_LF;
1018 ptr = host.ptr; /* Set ptr on the error */
1019 goto http_msg_invalid;
1020 }
1021 if (h1m->err_pos == -1) /* capture the error pointer */
1022 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
1023 }
1024
1025 }
1026 }
1027
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001028 state = H1_MSG_DATA;
1029 if (h1m->flags & H1_MF_XFER_ENC) {
1030 if (h1m->flags & H1_MF_CLEN) {
1031 h1m->flags &= ~H1_MF_CLEN;
1032 hdr_count = http_del_hdr(hdr, ist("content-length"));
1033 }
1034
1035 if (h1m->flags & H1_MF_CHNK)
1036 state = H1_MSG_CHUNK_SIZE;
1037 else if (!(h1m->flags & H1_MF_RESP)) {
1038 /* cf RFC7230#3.3.3 : transfer-encoding in
1039 * request without chunked encoding is invalid.
1040 */
1041 goto http_msg_invalid;
1042 }
1043 }
1044
Willy Tarreau794f9af2017-07-26 09:07:47 +02001045 break;
1046
1047 default:
1048 /* impossible states */
1049 goto http_msg_invalid;
1050 }
1051
Willy Tarreau001823c2018-09-12 17:25:32 +02001052 /* Now we've left the headers state and are either in H1_MSG_DATA or
1053 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001054 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001055
Willy Tarreau5384aac2018-09-11 16:04:48 +02001056 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001057 *slp = sl;
1058
Willy Tarreau4433c082018-09-11 15:33:32 +02001059 h1m->state = state;
1060 h1m->next = ptr - start + skip;
1061 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001062
1063 http_msg_ood:
1064 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001065 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001066 *slp = sl;
1067
Willy Tarreau4433c082018-09-11 15:33:32 +02001068 h1m->state = state;
1069 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001070 return 0;
1071
1072 http_msg_invalid:
1073 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001074 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001075 *slp = sl;
1076
Willy Tarreau4433c082018-09-11 15:33:32 +02001077 h1m->err_state = h1m->state = state;
1078 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001079 return -1;
1080
1081 http_output_full:
1082 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001083 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001084 *slp = sl;
1085
Willy Tarreau4433c082018-09-11 15:33:32 +02001086 h1m->err_state = h1m->state = state;
1087 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001088 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001089
1090 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +02001091 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
1092 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001093 if (h1m->flags & H1_MF_RESP)
1094 h1m->state = H1_MSG_RPBEFORE;
1095 else
1096 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001097 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001098}
1099
Willy Tarreau2510f702017-10-31 17:14:16 +01001100/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001101 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001102 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1103 * input data, or < 0 in case of parse error (in which case the caller may have
1104 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001105 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001106int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001107{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001108 const char *stop = b_peek(buf, ofs + max);
1109 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001110
1111 while (1) {
1112 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001113 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001114 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001115
1116 /* scan current line and stop at LF or CRLF */
1117 while (1) {
1118 if (ptr == stop)
1119 return 0;
1120
1121 if (*ptr == '\n') {
1122 if (!p1)
1123 p1 = ptr;
1124 p2 = ptr;
1125 break;
1126 }
1127
1128 if (*ptr == '\r') {
1129 if (p1)
1130 return -1;
1131 p1 = ptr;
1132 }
1133
Willy Tarreau7314be82018-06-14 13:32:50 +02001134 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001135 }
1136
1137 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001138 p2 = b_next(buf, p2);
1139 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001140
1141 /* LF/CRLF at beginning of line => end of trailers at p2.
1142 * Everything was scheduled for forwarding, there's nothing left
1143 * from this message. */
1144 if (p1 == start)
1145 break;
1146 /* OK, next line then */
1147 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001148 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001149}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001150
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001151/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1152 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1153 * (25 bytes long).
1154 */
1155void h1_generate_random_ws_input_key(char key_out[25])
1156{
1157 /* generate a random websocket key */
1158 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1159 char key[16];
1160
1161 memcpy(key, &rand1, 8);
1162 memcpy(&key[8], &rand2, 8);
1163 a2base64(key, 16, key_out, 25);
1164}
1165
Amaury Denoyellec1938232020-12-11 17:53:03 +01001166#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1167
1168/*
1169 * Calculate the WebSocket handshake response key from <key_in>. Following the
1170 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1171 * as a 29 bytes long string.
1172 */
1173void h1_calculate_ws_output_key(const char *key, char *result)
1174{
1175 blk_SHA_CTX sha1_ctx;
1176 char hash_in[60], hash_out[20];
1177
1178 /* concatenate the key with a fixed suffix */
1179 memcpy(hash_in, key, 24);
1180 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1181
1182 /* sha1 the result */
1183 blk_SHA1_Init(&sha1_ctx);
1184 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1185 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1186
1187 /* encode in base64 the hash */
1188 a2base64(hash_out, 20, result, 29);
1189}