blob: 42fe6705b7b55560cfb78cc7463a43f713623a62 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
Willy Tarreauba9afd22023-08-09 08:32:48 +020037 word.ptr = value->ptr;
Willy Tarreau73373ab2018-09-14 17:11:33 +020038 e = value->ptr + value->len;
39
Willy Tarreauba9afd22023-08-09 08:32:48 +020040 while (1) {
41 if (word.ptr >= e) {
42 /* empty header or empty value */
43 goto fail;
44 }
45
Ilya Shipitsin47d17182020-06-21 21:42:57 +050046 /* skip leading delimiter and blanks */
Willy Tarreauba9afd22023-08-09 08:32:48 +020047 if (unlikely(HTTP_IS_LWS(*word.ptr))) {
48 word.ptr++;
Willy Tarreau73373ab2018-09-14 17:11:33 +020049 continue;
Willy Tarreauba9afd22023-08-09 08:32:48 +020050 }
Willy Tarreau73373ab2018-09-14 17:11:33 +020051
52 /* digits only now */
53 for (cl = 0, n = word.ptr; n < e; n++) {
54 unsigned int c = *n - '0';
55 if (unlikely(c > 9)) {
56 /* non-digit */
57 if (unlikely(n == word.ptr)) // spaces only
58 goto fail;
59 break;
60 }
Willy Tarreauc48acd12023-08-09 11:02:34 +020061
62 if (unlikely(!cl && n > word.ptr)) {
63 /* There was a leading zero before this digit,
64 * let's trim it.
65 */
66 word.ptr = n;
67 }
68
Willy Tarreau73373ab2018-09-14 17:11:33 +020069 if (unlikely(cl > ULLONG_MAX / 10ULL))
70 goto fail; /* multiply overflow */
71 cl = cl * 10ULL;
72 if (unlikely(cl + c < cl))
73 goto fail; /* addition overflow */
74 cl = cl + c;
75 }
76
77 /* keep a copy of the exact cleaned value */
78 word.len = n - word.ptr;
79
80 /* skip trailing LWS till next comma or EOL */
81 for (; n < e; n++) {
82 if (!HTTP_IS_LWS(*n)) {
83 if (unlikely(*n != ','))
84 goto fail;
85 break;
86 }
87 }
88
89 /* if duplicate, must be equal */
90 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
91 goto fail;
92
93 /* OK, store this result as the one to be indexed */
94 h1m->flags |= H1_MF_CLEN;
95 h1m->curr_len = h1m->body_len = cl;
96 *value = word;
Willy Tarreauba9afd22023-08-09 08:32:48 +020097
98 /* Now either n==e and we're done, or n points to the comma,
99 * and we skip it and continue.
100 */
101 if (n++ == e)
102 break;
103
Willy Tarreau73373ab2018-09-14 17:11:33 +0200104 word.ptr = n;
105 }
106 /* here we've reached the end with a single value or a series of
107 * identical values, all matching previous series if any. The last
108 * parsed value was sent back into <value>. We just have to decide
109 * if this occurrence has to be indexed (it's the first one) or
110 * silently skipped (it's not the first one)
111 */
112 return !not_first;
113 fail:
114 return -1;
115}
116
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200117/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
118 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
119 * this case. Any other token found or any empty header field found will reset
120 * this flag, so that it accurately represents the token's presence at the last
121 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
122 * are case-insensitive (cf RFC7230#4).
123 */
124void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
125{
126 char *e, *n;
127 struct ist word;
128
129 h1m->flags |= H1_MF_XFER_ENC;
130 h1m->flags &= ~H1_MF_CHNK;
131
132 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
133 e = value.ptr + value.len;
134
135 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500136 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200137 if (HTTP_IS_LWS(*word.ptr))
138 continue;
139
140 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
141 word.len = n - word.ptr;
142
143 /* trim trailing blanks */
144 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
145 word.len--;
146
147 h1m->flags &= ~H1_MF_CHNK;
148 if (isteqi(word, ist("chunked")))
149 h1m->flags |= H1_MF_CHNK;
150
151 word.ptr = n;
152 }
153}
154
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200155/* Validate the authority and the host header value for CONNECT method. If there
156 * is hast header, its value is normalized. 0 is returned on success, -1 if the
157 * authority is invalid and -2 if the host is invalid.
158 */
159static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
160{
161 struct ist uri_host, uri_port, host, host_port;
162
163 if (!isttest(authority))
164 goto invalid_authority;
165 uri_host = authority;
166 uri_port = http_get_host_port(authority);
167 if (!isttest(uri_port))
168 goto invalid_authority;
169 uri_host.len -= (istlen(uri_port) + 1);
170
171 if (!host_hdr || !isttest(*host_hdr))
172 goto end;
173
174 /* Get the port of the host header value, if any */
175 host = *host_hdr;
176 host_port = http_get_host_port(*host_hdr);
177 if (isttest(host_port)) {
178 host.len -= (istlen(host_port) + 1);
179 if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
180 goto invalid_host;
181 if (http_is_default_port(IST_NULL, uri_port))
182 *host_hdr = host; /* normalize */
183 }
184 else {
185 if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
186 goto invalid_host;
187 }
188
189 end:
190 return 0;
191
192 invalid_authority:
193 return -1;
194
195 invalid_host:
196 return -2;
197}
198
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200199/* Parse the Connection: header of an HTTP/1 request, looking for "close",
200 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
201 * what was found there. Note that flags are only added, not removed, so the
202 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100203 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
204 * up from "keep-alive" and "close" values. To do so, the header value is
205 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200206 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100207void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200208{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100209 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200210 struct ist word;
211
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100212 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
213 p = value->ptr;
214 e = value->ptr + value->len;
215 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
216 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200217
218 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500219 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200220 if (HTTP_IS_LWS(*word.ptr))
221 continue;
222
223 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
224 word.len = n - word.ptr;
225
226 /* trim trailing blanks */
227 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
228 word.len--;
229
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100230 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200231 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100232 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
233 goto skip_val;
234 }
235 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200236 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100237 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
238 goto skip_val;
239 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200240 else if (isteqi(word, ist("upgrade")))
241 h1m->flags |= H1_MF_CONN_UPG;
242
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100243 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
244 if (value->ptr + value->len == p) {
245 /* no rewrite done till now */
246 value->len = n - value->ptr;
247 }
248 else {
249 if (value->len)
250 value->ptr[value->len++] = ',';
251 istcat(value, word, e - value->ptr);
252 }
253 }
254
255 skip_val:
256 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200257 }
258}
259
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100260/* Parse the Upgrade: header of an HTTP/1 request.
261 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
262 */
263void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
264{
265 char *e, *n;
266 struct ist word;
267
268 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
269
270 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
271 e = value.ptr + value.len;
272
273 while (++word.ptr < e) {
274 /* skip leading delimiter and blanks */
275 if (HTTP_IS_LWS(*word.ptr))
276 continue;
277
278 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
279 word.len = n - word.ptr;
280
281 /* trim trailing blanks */
282 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
283 word.len--;
284
285 if (isteqi(word, ist("websocket")))
286 h1m->flags |= H1_MF_UPG_WEBSOCKET;
287
288 word.ptr = n;
289 }
290}
291
Willy Tarreau538746a2018-12-11 10:59:20 +0100292/* Macros used in the HTTP/1 parser, to check for the expected presence of
293 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
294 */
295
296/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
297 * <bad>.
298 */
299#define EXPECT_LF_HERE(ptr, bad, state, where) \
300 do { \
301 if (unlikely(*(ptr) != '\n')) { \
302 state = (where); \
303 goto bad; \
304 } \
305 } while (0)
306
307/* Increments pointer <ptr>, continues to label <more> if it's still below
308 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
309 * of buffer was reached.
310 */
311#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
312 do { \
313 if (likely(++(ptr) < (end))) \
314 goto more; \
315 else { \
316 state = (where); \
317 goto stop; \
318 } \
319 } while (0)
320
Willy Tarreau794f9af2017-07-26 09:07:47 +0200321/* This function parses a contiguous HTTP/1 headers block starting at <start>
322 * and ending before <stop>, at once, and converts it a list of (name,value)
323 * pairs representing header fields into the array <hdr> of size <hdr_num>,
324 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200325 * too small to represent the whole message, an error is returned. Some
326 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200327 * parsed and stored into h1m as well. <hdr> may be null, in which case only
328 * the parsing state will be updated. This may be used to restart the parsing
329 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200330 *
331 * For now it's limited to the response. If the header block is incomplete,
332 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200333 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200334 * and h1m->next to zero on the first call, the parser will do the rest. If
335 * an incomplete message is seen, the caller only needs to present h1m->state
336 * and h1m->next again, with an empty header list so that the parser can start
337 * again. In this case, it will detect that it interrupted a previous session
338 * and will first look for the end of the message before reparsing it again and
339 * indexing it at the same time. This ensures that incomplete messages fed 1
340 * character at a time are never processed entirely more than exactly twice,
341 * and that there is no need to store all the internal state and pre-parsed
342 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200343 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200344 * A pointer to a start line descriptor may be passed in <slp>, in which case
345 * the parser will fill it with whatever it found.
346 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200347 * The code derived from the main HTTP/1 parser above but was simplified and
348 * optimized to process responses produced or forwarded by haproxy. The caller
349 * is responsible for ensuring that the message doesn't wrap, and should ensure
350 * it is complete to avoid having to retry the operation after a failed
351 * attempt. The message is not supposed to be invalid, which is why a few
352 * properties such as the character set used in the header field names are not
353 * checked. In case of an unparsable response message, a negative value will be
354 * returned with h1m->err_pos and h1m->err_state matching the location and
355 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100356 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
357 * parsed and the start line is skipped. It is not required to set h1m->state
358 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200359 *
360 * This function returns :
361 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200362 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200363 * the position relative to <start>
364 * -2 if the output is full (hdr_num reached). err_state and err_pos also
365 * indicate where it failed.
366 * 0 in case of missing data.
367 * > 0 on success, it then corresponds to the number of bytes read since
368 * <start> so that the caller can go on with the payload.
369 */
370int h1_headers_to_hdr_list(char *start, const char *stop,
371 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200372 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200373{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200374 enum h1m_state state;
375 register char *ptr;
376 register const char *end;
377 unsigned int hdr_count;
378 unsigned int skip; /* number of bytes skipped at the beginning */
379 unsigned int sol; /* start of line */
380 unsigned int col; /* position of the colon */
381 unsigned int eol; /* end of line */
382 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200383 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200384 int skip_update;
385 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200386 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200387 struct ist n, v; /* header name and value during parsing */
388
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200389 skip = 0; // do it only once to keep track of the leading CRLF.
390
391 try_again:
392 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200393 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200394 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200395 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200396
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100397 if (h1m->flags & H1_MF_HDRS_ONLY) {
398 state = H1_MSG_HDR_FIRST;
399 h1m->next = 0;
400 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100401 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100402 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100403 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
404 restarting = 1;
405 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100406
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200407 ptr = start + h1m->next;
408 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200409
Willy Tarreau794f9af2017-07-26 09:07:47 +0200410 if (unlikely(ptr >= end))
411 goto http_msg_ood;
412
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200413 /* don't update output if hdr is NULL or if we're restarting */
414 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200415 skip_update = 1;
416
Willy Tarreau794f9af2017-07-26 09:07:47 +0200417 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200418 case H1_MSG_RQBEFORE:
419 http_msg_rqbefore:
420 if (likely(HTTP_IS_TOKEN(*ptr))) {
421 /* we have a start of message, we may have skipped some
422 * heading CRLF. Skip them now.
423 */
424 skip += ptr - start;
425 start = ptr;
426
427 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200428 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200429 hdr_count = 0;
430 state = H1_MSG_RQMETH;
431 goto http_msg_rqmeth;
432 }
433
434 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
435 state = H1_MSG_RQBEFORE;
436 goto http_msg_invalid;
437 }
438
439 if (unlikely(*ptr == '\n'))
440 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
441 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
442 /* stop here */
443
444 case H1_MSG_RQBEFORE_CR:
445 http_msg_rqbefore_cr:
446 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
447 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
448 /* stop here */
449
450 case H1_MSG_RQMETH:
451 http_msg_rqmeth:
452 if (likely(HTTP_IS_TOKEN(*ptr)))
453 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
454
455 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200456 sl.rq.m.len = ptr - sl.rq.m.ptr;
457 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200458 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
459 }
460
461 if (likely(HTTP_IS_CRLF(*ptr))) {
462 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200463 sl.rq.m.len = ptr - sl.rq.m.ptr;
464 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200465 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200466 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200467 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200468 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200469 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200470 sl.rq.v.ptr = ptr;
471 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200472 goto http_msg_rqline_eol;
473 }
474 state = H1_MSG_RQMETH;
475 goto http_msg_invalid;
476
477 case H1_MSG_RQMETH_SP:
478 http_msg_rqmeth_sp:
479 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200480 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200481 goto http_msg_rquri;
482 }
483 if (likely(HTTP_IS_SPHT(*ptr)))
484 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
485 /* so it's a CR/LF, meaning an HTTP 0.9 request */
486 goto http_msg_req09_uri;
487
488 case H1_MSG_RQURI:
489 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100490#ifdef HA_UNALIGNED_LE
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200491 /* speedup: skip bytes not between 0x24 and 0x7e inclusive */
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200492 while (ptr <= end - sizeof(int)) {
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200493 int x = *(int *)ptr - 0x24242424;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200494 if (x & 0x80808080)
495 break;
496
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200497 x -= 0x5b5b5b5b;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200498 if (!(x & 0x80808080))
499 break;
500
501 ptr += sizeof(int);
502 }
503#endif
504 if (ptr >= end) {
505 state = H1_MSG_RQURI;
506 goto http_msg_ood;
507 }
508 http_msg_rquri2:
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200509 if (likely((unsigned char)(*ptr - 33) <= 93)) { /* 33 to 126 included */
510 if (*ptr == '#') {
511 if (h1m->err_pos < -1) /* PR_O2_REQBUG_OK not set */
512 goto invalid_char;
513 if (h1m->err_pos == -1) /* PR_O2_REQBUG_OK set: just log */
514 h1m->err_pos = ptr - start + skip;
515 }
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200516 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
Willy Tarreaue5a741f2023-08-08 16:17:22 +0200517 }
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200518
519 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200520 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200521 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
522 }
523 if (likely((unsigned char)*ptr >= 128)) {
524 /* non-ASCII chars are forbidden unless option
525 * accept-invalid-http-request is enabled in the frontend.
526 * In any case, we capture the faulty char.
527 */
528 if (h1m->err_pos < -1)
529 goto invalid_char;
530 if (h1m->err_pos == -1)
531 h1m->err_pos = ptr - start + skip;
532 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
533 }
534
535 if (likely(HTTP_IS_CRLF(*ptr))) {
536 /* so it's a CR/LF, meaning an HTTP 0.9 request */
537 goto http_msg_req09_uri_e;
538 }
539
540 /* OK forbidden chars, 0..31 or 127 */
541 invalid_char:
542 state = H1_MSG_RQURI;
543 goto http_msg_invalid;
544
545 case H1_MSG_RQURI_SP:
546 http_msg_rquri_sp:
547 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200548 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200549 goto http_msg_rqver;
550 }
551 if (likely(HTTP_IS_SPHT(*ptr)))
552 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
553 /* so it's a CR/LF, meaning an HTTP 0.9 request */
554 goto http_msg_req09_ver;
555
556
557 case H1_MSG_RQVER:
558 http_msg_rqver:
559 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
560 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
561
562 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200563 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200564 http_msg_rqline_eol:
565 /* We have seen the end of line. Note that we do not
566 * necessarily have the \n yet, but at least we know that we
567 * have EITHER \r OR \n, otherwise the request would not be
568 * complete. We can then record the request length and return
569 * to the caller which will be able to register it.
570 */
571
572 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200573 if ((sl.rq.v.len == 8) &&
574 (*(sl.rq.v.ptr + 5) > '1' ||
575 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200576 h1m->flags |= H1_MF_VER_11;
577
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200578 if (unlikely(hdr_count >= hdr_num)) {
579 state = H1_MSG_RQVER;
580 goto http_output_full;
581 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200582 if (!(h1m->flags & H1_MF_NO_PHDR))
583 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200584
585 if (unlikely(hdr_count >= hdr_num)) {
586 state = H1_MSG_RQVER;
587 goto http_output_full;
588 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200589 if (!(h1m->flags & H1_MF_NO_PHDR))
590 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200591 }
592
593 sol = ptr - start;
594 if (likely(*ptr == '\r'))
595 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
596 goto http_msg_rqline_end;
597 }
598
599 /* neither an HTTP_VER token nor a CRLF */
600 state = H1_MSG_RQVER;
601 goto http_msg_invalid;
602
603 case H1_MSG_RQLINE_END:
604 http_msg_rqline_end:
605 /* check for HTTP/0.9 request : no version information
606 * available. sol must point to the first of CR or LF. However
607 * since we don't save these elements between calls, if we come
608 * here from a restart, we don't necessarily know. Thus in this
609 * case we simply start over.
610 */
611 if (restarting)
612 goto restart;
613
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200614 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200615 goto http_msg_last_lf;
616
617 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
618 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
619 /* stop here */
620
621 /*
622 * Common states below
623 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200624 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200625 http_msg_rpbefore:
626 if (likely(HTTP_IS_TOKEN(*ptr))) {
627 /* we have a start of message, we may have skipped some
628 * heading CRLF. Skip them now.
629 */
630 skip += ptr - start;
631 start = ptr;
632
633 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200634 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200635 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200636 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200637 goto http_msg_rpver;
638 }
639
640 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200641 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200642 goto http_msg_invalid;
643 }
644
645 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200646 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
647 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200648 /* stop here */
649
Willy Tarreau801250e2018-09-11 11:45:04 +0200650 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200651 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200652 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
653 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200654 /* stop here */
655
Willy Tarreau801250e2018-09-11 11:45:04 +0200656 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200657 http_msg_rpver:
658 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200659 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200660
661 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200662 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200663
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200664 if ((sl.st.v.len == 8) &&
665 (*(sl.st.v.ptr + 5) > '1' ||
666 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200667 h1m->flags |= H1_MF_VER_11;
668
Willy Tarreau801250e2018-09-11 11:45:04 +0200669 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200671 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200672 goto http_msg_invalid;
673
Willy Tarreau801250e2018-09-11 11:45:04 +0200674 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200675 http_msg_rpver_sp:
676 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200677 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200678 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200679 goto http_msg_rpcode;
680 }
681 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200682 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200683 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200684 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200685 goto http_msg_invalid;
686
Willy Tarreau801250e2018-09-11 11:45:04 +0200687 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200688 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100689 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200690 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200691 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200692 }
693
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100694 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200695 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100696 goto http_msg_invalid;
697 }
698
Willy Tarreau794f9af2017-07-26 09:07:47 +0200699 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200700 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200701 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200702 }
703
704 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200705 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200706
707 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200708 sl.st.r.ptr = ptr;
709 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200710 goto http_msg_rpline_eol;
711
Willy Tarreau801250e2018-09-11 11:45:04 +0200712 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200713 http_msg_rpcode_sp:
714 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200715 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200716 goto http_msg_rpreason;
717 }
718 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200719 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200720 /* so it's a CR/LF, so there is no reason phrase */
721 goto http_msg_rsp_reason;
722
Willy Tarreau801250e2018-09-11 11:45:04 +0200723 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200724 http_msg_rpreason:
725 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200726 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200727 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200728 http_msg_rpline_eol:
729 /* We have seen the end of line. Note that we do not
730 * necessarily have the \n yet, but at least we know that we
731 * have EITHER \r OR \n, otherwise the response would not be
732 * complete. We can then record the response length and return
733 * to the caller which will be able to register it.
734 */
735
Willy Tarreau5384aac2018-09-11 16:04:48 +0200736 if (likely(!skip_update)) {
737 if (unlikely(hdr_count >= hdr_num)) {
738 state = H1_MSG_RPREASON;
739 goto http_output_full;
740 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200741 if (!(h1m->flags & H1_MF_NO_PHDR))
742 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200743 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200744
745 sol = ptr - start;
746 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200747 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200748 goto http_msg_rpline_end;
749
Willy Tarreau801250e2018-09-11 11:45:04 +0200750 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200751 http_msg_rpline_end:
752 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200753 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
754 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200755 /* stop here */
756
Willy Tarreau801250e2018-09-11 11:45:04 +0200757 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200758 http_msg_hdr_first:
759 sol = ptr - start;
760 if (likely(!HTTP_IS_CRLF(*ptr))) {
761 goto http_msg_hdr_name;
762 }
763
764 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200765 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200766 goto http_msg_last_lf;
767
Willy Tarreau801250e2018-09-11 11:45:04 +0200768 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200769 http_msg_hdr_name:
770 /* assumes sol points to the first char */
771 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200772 if (!skip_update) {
773 /* turn it to lower case if needed */
774 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200775 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200776 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200777 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200778 }
779
780 if (likely(*ptr == ':')) {
781 col = ptr - start;
Willy Tarreau486cd732023-02-09 21:36:54 +0100782 if (col <= sol) {
783 state = H1_MSG_HDR_NAME;
784 goto http_msg_invalid;
785 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200786 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200787 }
788
Willy Tarreau9aec3052018-09-12 09:20:40 +0200789 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200790 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200791 goto http_msg_invalid;
792 }
793
Willy Tarreau9aec3052018-09-12 09:20:40 +0200794 if (h1m->err_pos == -1) /* capture the error pointer */
795 h1m->err_pos = ptr - start + skip; /* >= 0 now */
796
797 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200798 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200799
Willy Tarreau801250e2018-09-11 11:45:04 +0200800 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200801 http_msg_hdr_l1_sp:
802 /* assumes sol points to the first char */
803 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200804 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200805
806 /* header value can be basically anything except CR/LF */
807 sov = ptr - start;
808
809 if (likely(!HTTP_IS_CRLF(*ptr))) {
810 goto http_msg_hdr_val;
811 }
812
813 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200814 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200815 goto http_msg_hdr_l1_lf;
816
Willy Tarreau801250e2018-09-11 11:45:04 +0200817 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200818 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200819 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
820 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200821
Willy Tarreau801250e2018-09-11 11:45:04 +0200822 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200823 http_msg_hdr_l1_lws:
824 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200825 if (!skip_update) {
826 /* replace HT,CR,LF with spaces */
827 for (; start + sov < ptr; sov++)
828 start[sov] = ' ';
829 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200830 goto http_msg_hdr_l1_sp;
831 }
832 /* we had a header consisting only in spaces ! */
833 eol = sov;
834 goto http_msg_complete_header;
835
Willy Tarreau801250e2018-09-11 11:45:04 +0200836 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200837 http_msg_hdr_val:
838 /* assumes sol points to the first char, and sov
839 * points to the first character of the value.
840 */
841
842 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
843 * and lower. In fact since most of the time is spent in the loop, we
844 * also remove the sign bit test so that bytes 0x8e..0x0d break the
845 * loop, but we don't care since they're very rare in header values.
846 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100847#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200848 while (ptr <= end - sizeof(long)) {
849 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
850 goto http_msg_hdr_val2;
851 ptr += sizeof(long);
852 }
853#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100854#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200855 while (ptr <= end - sizeof(int)) {
856 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
857 goto http_msg_hdr_val2;
858 ptr += sizeof(int);
859 }
860#endif
861 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200862 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200863 goto http_msg_ood;
864 }
865 http_msg_hdr_val2:
866 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200867 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200868
869 eol = ptr - start;
870 /* Note: we could also copy eol into ->eoh so that we have the
871 * real header end in case it ends with lots of LWS, but is this
872 * really needed ?
873 */
874 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200875 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200876 goto http_msg_hdr_l2_lf;
877
Willy Tarreau801250e2018-09-11 11:45:04 +0200878 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200879 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200880 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
881 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200882
Willy Tarreau801250e2018-09-11 11:45:04 +0200883 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200884 http_msg_hdr_l2_lws:
885 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200886 if (!skip_update) {
887 /* LWS: replace HT,CR,LF with spaces */
888 for (; start + eol < ptr; eol++)
889 start[eol] = ' ';
890 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200891 goto http_msg_hdr_val;
892 }
893 http_msg_complete_header:
894 /*
895 * It was a new header, so the last one is finished. Assumes
896 * <sol> points to the first char of the name, <col> to the
897 * colon, <sov> points to the first character of the value and
898 * <eol> to the first CR or LF so we know how the line ends. We
899 * will trim spaces around the value. It's possible to do it by
900 * adjusting <eol> and <sov> which are no more used after this.
901 * We can add the header field to the list.
902 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200903 if (likely(!skip_update)) {
904 while (sov < eol && HTTP_IS_LWS(start[sov]))
905 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200906
Christopher Faulet2912f872018-09-19 14:01:04 +0200907 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
908 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200909
910
Christopher Faulet2912f872018-09-19 14:01:04 +0200911 n = ist2(start + sol, col - sol);
912 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200913
Christopher Faulet2912f872018-09-19 14:01:04 +0200914 do {
915 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200916
Christopher Faulet2912f872018-09-19 14:01:04 +0200917 if (unlikely(hdr_count >= hdr_num)) {
918 state = H1_MSG_HDR_L2_LWS;
919 goto http_output_full;
920 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200921
Christopher Faulet2912f872018-09-19 14:01:04 +0200922 if (isteqi(n, ist("transfer-encoding"))) {
923 h1_parse_xfer_enc_header(h1m, v);
924 }
925 else if (isteqi(n, ist("content-length"))) {
926 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200927
Christopher Faulet2912f872018-09-19 14:01:04 +0200928 if (ret < 0) {
929 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100930 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200931 goto http_msg_invalid;
932 }
933 else if (ret == 0) {
934 /* skip it */
935 break;
936 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200937 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200938 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100939 h1_parse_connection_header(h1m, &v);
940 if (!v.len) {
941 /* skip it */
942 break;
943 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200944 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100945 else if (isteqi(n, ist("upgrade"))) {
946 h1_parse_upgrade_header(h1m, v);
947 }
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200948 else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
949 if (host_idx == -1)
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200950 host_idx = hdr_count;
951 else {
952 if (!isteqi(v, hdr[host_idx].v)) {
953 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100954 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200955 goto http_msg_invalid;
956 }
957 /* if the same host, skip it */
958 break;
959 }
960 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200961
Christopher Faulet2912f872018-09-19 14:01:04 +0200962 http_set_hdr(&hdr[hdr_count++], n, v);
963 } while (0);
964 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200965
966 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200967
Willy Tarreau794f9af2017-07-26 09:07:47 +0200968 if (likely(!HTTP_IS_CRLF(*ptr)))
969 goto http_msg_hdr_name;
970
971 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200972 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200973 goto http_msg_last_lf;
974
Willy Tarreau801250e2018-09-11 11:45:04 +0200975 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200976 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200977 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200978 ptr++;
979 /* <ptr> now points to the first byte of payload. If needed sol
980 * still points to the first of either CR or LF of the empty
981 * line ending the headers block.
982 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200983 if (likely(!skip_update)) {
984 if (unlikely(hdr_count >= hdr_num)) {
985 state = H1_MSG_LAST_LF;
986 goto http_output_full;
987 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200988 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200989 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200990
991 /* reaching here we've parsed the whole message. We may detect
992 * that we were already continuing an interrupted parsing pass
993 * so we were silently looking for the end of message not
994 * updating anything before deciding to parse it fully at once.
995 * It's guaranteed that we won't match this test twice in a row
996 * since restarting will turn zero.
997 */
998 if (restarting)
999 goto restart;
1000
Christopher Faulet63f95ed2022-07-05 14:50:17 +02001001
1002 if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
1003 struct ist authority;
1004
1005 authority = http_get_authority(sl.rq.u, 1);
1006 if (sl.rq.meth == HTTP_METH_CONNECT) {
1007 struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
1008 int ret;
1009
1010 ret = h1_validate_connect_authority(authority, host);
1011 if (ret < 0) {
1012 if (h1m->err_pos < -1) {
1013 state = H1_MSG_LAST_LF;
Willy Tarreaue21ad302022-10-04 08:02:03 +02001014 /* WT: gcc seems to see a path where sl.rq.u.ptr was used
1015 * uninitialized, but it doesn't know that the function is
1016 * called with initial states making this impossible.
1017 */
1018 ALREADY_CHECKED(sl.rq.u.ptr);
Christopher Faulet63f95ed2022-07-05 14:50:17 +02001019 ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
1020 goto http_msg_invalid;
1021 }
1022 if (h1m->err_pos == -1) /* capture the error pointer */
1023 h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
1024 }
1025 }
1026 else if (host_idx != -1 && istlen(authority)) {
1027 struct ist host = hdr[host_idx].v;
1028
1029 /* For non-CONNECT method, the authority must match the host header value */
1030 if (!isteqi(authority, host)) {
1031 if (h1m->err_pos < -1) {
1032 state = H1_MSG_LAST_LF;
1033 ptr = host.ptr; /* Set ptr on the error */
1034 goto http_msg_invalid;
1035 }
1036 if (h1m->err_pos == -1) /* capture the error pointer */
1037 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
1038 }
1039
1040 }
1041 }
1042
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001043 state = H1_MSG_DATA;
1044 if (h1m->flags & H1_MF_XFER_ENC) {
1045 if (h1m->flags & H1_MF_CLEN) {
1046 h1m->flags &= ~H1_MF_CLEN;
1047 hdr_count = http_del_hdr(hdr, ist("content-length"));
1048 }
1049
1050 if (h1m->flags & H1_MF_CHNK)
1051 state = H1_MSG_CHUNK_SIZE;
1052 else if (!(h1m->flags & H1_MF_RESP)) {
1053 /* cf RFC7230#3.3.3 : transfer-encoding in
1054 * request without chunked encoding is invalid.
1055 */
1056 goto http_msg_invalid;
1057 }
1058 }
1059
Willy Tarreau794f9af2017-07-26 09:07:47 +02001060 break;
1061
1062 default:
1063 /* impossible states */
1064 goto http_msg_invalid;
1065 }
1066
Willy Tarreau001823c2018-09-12 17:25:32 +02001067 /* Now we've left the headers state and are either in H1_MSG_DATA or
1068 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001069 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001070
Willy Tarreau5384aac2018-09-11 16:04:48 +02001071 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001072 *slp = sl;
1073
Willy Tarreau4433c082018-09-11 15:33:32 +02001074 h1m->state = state;
1075 h1m->next = ptr - start + skip;
1076 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001077
1078 http_msg_ood:
1079 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001080 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001081 *slp = sl;
1082
Willy Tarreau4433c082018-09-11 15:33:32 +02001083 h1m->state = state;
1084 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001085 return 0;
1086
1087 http_msg_invalid:
1088 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001089 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001090 *slp = sl;
1091
Willy Tarreau4433c082018-09-11 15:33:32 +02001092 h1m->err_state = h1m->state = state;
1093 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001094 return -1;
1095
1096 http_output_full:
1097 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001098 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001099 *slp = sl;
1100
Willy Tarreau4433c082018-09-11 15:33:32 +02001101 h1m->err_state = h1m->state = state;
1102 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001103 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001104
1105 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +02001106 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
1107 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001108 if (h1m->flags & H1_MF_RESP)
1109 h1m->state = H1_MSG_RPBEFORE;
1110 else
1111 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001112 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001113}
1114
Willy Tarreau2510f702017-10-31 17:14:16 +01001115/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001116 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001117 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1118 * input data, or < 0 in case of parse error (in which case the caller may have
1119 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001120 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001121int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001122{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001123 const char *stop = b_peek(buf, ofs + max);
1124 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001125
1126 while (1) {
1127 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001128 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001129 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001130
1131 /* scan current line and stop at LF or CRLF */
1132 while (1) {
1133 if (ptr == stop)
1134 return 0;
1135
1136 if (*ptr == '\n') {
1137 if (!p1)
1138 p1 = ptr;
1139 p2 = ptr;
1140 break;
1141 }
1142
1143 if (*ptr == '\r') {
1144 if (p1)
1145 return -1;
1146 p1 = ptr;
1147 }
1148
Willy Tarreau7314be82018-06-14 13:32:50 +02001149 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001150 }
1151
1152 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001153 p2 = b_next(buf, p2);
1154 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001155
1156 /* LF/CRLF at beginning of line => end of trailers at p2.
1157 * Everything was scheduled for forwarding, there's nothing left
1158 * from this message. */
1159 if (p1 == start)
1160 break;
1161 /* OK, next line then */
1162 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001163 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001164}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001165
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001166/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1167 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1168 * (25 bytes long).
1169 */
1170void h1_generate_random_ws_input_key(char key_out[25])
1171{
1172 /* generate a random websocket key */
1173 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1174 char key[16];
1175
1176 memcpy(key, &rand1, 8);
1177 memcpy(&key[8], &rand2, 8);
1178 a2base64(key, 16, key_out, 25);
1179}
1180
Amaury Denoyellec1938232020-12-11 17:53:03 +01001181#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1182
1183/*
1184 * Calculate the WebSocket handshake response key from <key_in>. Following the
1185 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1186 * as a 29 bytes long string.
1187 */
1188void h1_calculate_ws_output_key(const char *key, char *result)
1189{
1190 blk_SHA_CTX sha1_ctx;
1191 char hash_in[60], hash_out[20];
1192
1193 /* concatenate the key with a fixed suffix */
1194 memcpy(hash_in, key, 24);
1195 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1196
1197 /* sha1 the result */
1198 blk_SHA1_Init(&sha1_ctx);
1199 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1200 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1201
1202 /* encode in base64 the hash */
1203 a2base64(hash_out, 20, result, 29);
1204}