blob: 5abb65627c0e24de9895eac1fee6941f53d251c7 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
Willy Tarreaua32f99f2023-08-09 08:32:48 +020037 word.ptr = value->ptr;
Willy Tarreau73373ab2018-09-14 17:11:33 +020038 e = value->ptr + value->len;
39
Willy Tarreaua32f99f2023-08-09 08:32:48 +020040 while (1) {
41 if (word.ptr >= e) {
42 /* empty header or empty value */
43 goto fail;
44 }
45
Ilya Shipitsin47d17182020-06-21 21:42:57 +050046 /* skip leading delimiter and blanks */
Willy Tarreaua32f99f2023-08-09 08:32:48 +020047 if (unlikely(HTTP_IS_LWS(*word.ptr))) {
48 word.ptr++;
Willy Tarreau73373ab2018-09-14 17:11:33 +020049 continue;
Willy Tarreaua32f99f2023-08-09 08:32:48 +020050 }
Willy Tarreau73373ab2018-09-14 17:11:33 +020051
52 /* digits only now */
53 for (cl = 0, n = word.ptr; n < e; n++) {
54 unsigned int c = *n - '0';
55 if (unlikely(c > 9)) {
56 /* non-digit */
57 if (unlikely(n == word.ptr)) // spaces only
58 goto fail;
59 break;
60 }
Willy Tarreauc33738c2023-08-09 11:02:34 +020061
62 if (unlikely(!cl && n > word.ptr)) {
63 /* There was a leading zero before this digit,
64 * let's trim it.
65 */
66 word.ptr = n;
67 }
68
Willy Tarreau73373ab2018-09-14 17:11:33 +020069 if (unlikely(cl > ULLONG_MAX / 10ULL))
70 goto fail; /* multiply overflow */
71 cl = cl * 10ULL;
72 if (unlikely(cl + c < cl))
73 goto fail; /* addition overflow */
74 cl = cl + c;
75 }
76
77 /* keep a copy of the exact cleaned value */
78 word.len = n - word.ptr;
79
80 /* skip trailing LWS till next comma or EOL */
81 for (; n < e; n++) {
82 if (!HTTP_IS_LWS(*n)) {
83 if (unlikely(*n != ','))
84 goto fail;
85 break;
86 }
87 }
88
89 /* if duplicate, must be equal */
90 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
91 goto fail;
92
93 /* OK, store this result as the one to be indexed */
94 h1m->flags |= H1_MF_CLEN;
95 h1m->curr_len = h1m->body_len = cl;
96 *value = word;
Willy Tarreaua32f99f2023-08-09 08:32:48 +020097
98 /* Now either n==e and we're done, or n points to the comma,
99 * and we skip it and continue.
100 */
101 if (n++ == e)
102 break;
103
Willy Tarreau73373ab2018-09-14 17:11:33 +0200104 word.ptr = n;
105 }
106 /* here we've reached the end with a single value or a series of
107 * identical values, all matching previous series if any. The last
108 * parsed value was sent back into <value>. We just have to decide
109 * if this occurrence has to be indexed (it's the first one) or
110 * silently skipped (it's not the first one)
111 */
112 return !not_first;
113 fail:
114 return -1;
115}
116
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200117/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
Christopher Faulet545fbba2021-09-28 09:36:25 +0200118 * "chunked" encoding to perform some checks (it must be the last encoding for
119 * the request and must not be performed twice for any message). The
120 * H1_MF_TE_CHUNKED is set if a valid "chunked" encoding is found. The
121 * H1_MF_TE_OTHER flag is set if any other encoding is found. The H1_MF_XFER_ENC
122 * flag is always set. The H1_MF_CHNK is set when "chunked" encoding is the last
123 * one. Note that transfer codings are case-insensitive (cf RFC7230#4). This
124 * function returns <0 if a error is found, 0 if the whole header can be dropped
125 * (not used yet), or >0 if the value can be indexed.
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200126 */
Christopher Faulet545fbba2021-09-28 09:36:25 +0200127int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200128{
129 char *e, *n;
130 struct ist word;
131
132 h1m->flags |= H1_MF_XFER_ENC;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200133
134 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
Tim Duesterhus4c8f75f2021-11-06 15:14:44 +0100135 e = istend(value);
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200136
137 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500138 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200139 if (HTTP_IS_LWS(*word.ptr))
140 continue;
141
142 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
143 word.len = n - word.ptr;
144
145 /* trim trailing blanks */
146 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
147 word.len--;
148
149 h1m->flags &= ~H1_MF_CHNK;
Christopher Faulet545fbba2021-09-28 09:36:25 +0200150 if (isteqi(word, ist("chunked"))) {
151 if (h1m->flags & H1_MF_TE_CHUNKED) {
152 /* cf RFC7230#3.3.1 : A sender MUST NOT apply
153 * chunked more than once to a message body
154 * (i.e., chunking an already chunked message is
155 * not allowed)
156 */
157 goto fail;
158 }
159 h1m->flags |= (H1_MF_TE_CHUNKED|H1_MF_CHNK);
160 }
161 else {
162 if ((h1m->flags & (H1_MF_RESP|H1_MF_TE_CHUNKED)) == H1_MF_TE_CHUNKED) {
163 /* cf RFC7230#3.3.1 : If any transfer coding
164 * other than chunked is applied to a request
165 * payload body, the sender MUST apply chunked
166 * as the final transfer coding to ensure that
167 * the message is properly framed.
168 */
169 goto fail;
170 }
171 h1m->flags |= H1_MF_TE_OTHER;
172 }
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200173
174 word.ptr = n;
175 }
Christopher Faulet545fbba2021-09-28 09:36:25 +0200176
177 return 1;
178 fail:
179 return -1;
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200180}
181
Christopher Faulet3f5fbe92022-07-05 14:50:17 +0200182/* Validate the authority and the host header value for CONNECT method. If there
183 * is hast header, its value is normalized. 0 is returned on success, -1 if the
184 * authority is invalid and -2 if the host is invalid.
185 */
Christopher Faulet342db912024-05-14 15:06:48 +0200186static int h1_validate_connect_authority(struct ist scheme, struct ist authority, struct ist *host_hdr)
Christopher Faulet3f5fbe92022-07-05 14:50:17 +0200187{
188 struct ist uri_host, uri_port, host, host_port;
189
Christopher Faulet342db912024-05-14 15:06:48 +0200190 if (isttest(scheme) || !isttest(authority))
Christopher Faulet3f5fbe92022-07-05 14:50:17 +0200191 goto invalid_authority;
192 uri_host = authority;
193 uri_port = http_get_host_port(authority);
Christopher Faulet75348c22022-11-22 10:27:54 +0100194 if (!istlen(uri_port))
Christopher Faulet3f5fbe92022-07-05 14:50:17 +0200195 goto invalid_authority;
196 uri_host.len -= (istlen(uri_port) + 1);
197
198 if (!host_hdr || !isttest(*host_hdr))
199 goto end;
200
201 /* Get the port of the host header value, if any */
202 host = *host_hdr;
203 host_port = http_get_host_port(*host_hdr);
Christopher Faulet75348c22022-11-22 10:27:54 +0100204 if (isttest(host_port))
Christopher Faulet3f5fbe92022-07-05 14:50:17 +0200205 host.len -= (istlen(host_port) + 1);
Christopher Faulet75348c22022-11-22 10:27:54 +0100206
207 if (istlen(host_port)) {
Christopher Faulet3f5fbe92022-07-05 14:50:17 +0200208 if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
209 goto invalid_host;
210 if (http_is_default_port(IST_NULL, uri_port))
211 *host_hdr = host; /* normalize */
212 }
213 else {
214 if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
215 goto invalid_host;
216 }
217
218 end:
219 return 0;
220
221 invalid_authority:
222 return -1;
223
224 invalid_host:
225 return -2;
226}
227
Christopher Faulete16ffb02022-11-22 10:04:16 +0100228
229/* Validate the authority and the host header value for non-CONNECT method, when
230 * an absolute-URI is detected but when it does not exactly match the host
231 * value. The idea is to detect default port (http or https). authority and host
232 * are defined here. 0 is returned on success, -1 if the host is does not match
233 * the authority.
234 */
235static int h1_validate_mismatch_authority(struct ist scheme, struct ist authority, struct ist host_hdr)
236{
237 struct ist uri_host, uri_port, host, host_port;
238
239 if (!isttest(scheme))
240 goto mismatch;
241
242 uri_host = authority;
243 uri_port = http_get_host_port(authority);
244 if (isttest(uri_port))
245 uri_host.len -= (istlen(uri_port) + 1);
246
247 host = host_hdr;
248 host_port = http_get_host_port(host_hdr);
249 if (isttest(host_port))
250 host.len -= (istlen(host_port) + 1);
251
252 if (!isttest(uri_port) && !isttest(host_port)) {
253 /* No port on both: we already know the authority does not match
254 * the host value
255 */
256 goto mismatch;
257 }
258 else if (isttest(uri_port) && !http_is_default_port(scheme, uri_port)) {
259 /* here there is no port for the host value and the port for the
260 * authority is not the default one
261 */
262 goto mismatch;
263 }
264 else if (isttest(host_port) && !http_is_default_port(scheme, host_port)) {
265 /* here there is no port for the authority and the port for the
266 * host value is not the default one
267 */
268 goto mismatch;
269 }
270 else {
271 /* the authority or the host value contain a default port and
272 * there is no port on the other value
273 */
274 if (!isteqi(uri_host, host))
275 goto mismatch;
276 }
277
278 return 0;
279
280 mismatch:
281 return -1;
282}
283
284
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200285/* Parse the Connection: header of an HTTP/1 request, looking for "close",
286 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
287 * what was found there. Note that flags are only added, not removed, so the
288 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100289 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
290 * up from "keep-alive" and "close" values. To do so, the header value is
291 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200292 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100293void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200294{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100295 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200296 struct ist word;
297
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100298 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
299 p = value->ptr;
300 e = value->ptr + value->len;
301 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
302 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200303
304 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500305 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200306 if (HTTP_IS_LWS(*word.ptr))
307 continue;
308
309 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
310 word.len = n - word.ptr;
311
312 /* trim trailing blanks */
313 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
314 word.len--;
315
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100316 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200317 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100318 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
319 goto skip_val;
320 }
321 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200322 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100323 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
324 goto skip_val;
325 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200326 else if (isteqi(word, ist("upgrade")))
327 h1m->flags |= H1_MF_CONN_UPG;
328
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100329 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
330 if (value->ptr + value->len == p) {
331 /* no rewrite done till now */
332 value->len = n - value->ptr;
333 }
334 else {
335 if (value->len)
336 value->ptr[value->len++] = ',';
337 istcat(value, word, e - value->ptr);
338 }
339 }
340
341 skip_val:
342 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200343 }
344}
345
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100346/* Parse the Upgrade: header of an HTTP/1 request.
347 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
348 */
349void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
350{
351 char *e, *n;
352 struct ist word;
353
354 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
355
356 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
Tim Duesterhus4c8f75f2021-11-06 15:14:44 +0100357 e = istend(value);
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100358
359 while (++word.ptr < e) {
360 /* skip leading delimiter and blanks */
361 if (HTTP_IS_LWS(*word.ptr))
362 continue;
363
364 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
365 word.len = n - word.ptr;
366
367 /* trim trailing blanks */
368 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
369 word.len--;
370
371 if (isteqi(word, ist("websocket")))
372 h1m->flags |= H1_MF_UPG_WEBSOCKET;
373
374 word.ptr = n;
375 }
376}
377
Willy Tarreau538746a2018-12-11 10:59:20 +0100378/* Macros used in the HTTP/1 parser, to check for the expected presence of
379 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
380 */
381
382/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
383 * <bad>.
384 */
385#define EXPECT_LF_HERE(ptr, bad, state, where) \
386 do { \
387 if (unlikely(*(ptr) != '\n')) { \
388 state = (where); \
389 goto bad; \
390 } \
391 } while (0)
392
393/* Increments pointer <ptr>, continues to label <more> if it's still below
394 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
395 * of buffer was reached.
396 */
397#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
398 do { \
399 if (likely(++(ptr) < (end))) \
400 goto more; \
401 else { \
402 state = (where); \
403 goto stop; \
404 } \
405 } while (0)
406
Willy Tarreau794f9af2017-07-26 09:07:47 +0200407/* This function parses a contiguous HTTP/1 headers block starting at <start>
408 * and ending before <stop>, at once, and converts it a list of (name,value)
409 * pairs representing header fields into the array <hdr> of size <hdr_num>,
410 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200411 * too small to represent the whole message, an error is returned. Some
412 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200413 * parsed and stored into h1m as well. <hdr> may be null, in which case only
414 * the parsing state will be updated. This may be used to restart the parsing
415 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200416 *
417 * For now it's limited to the response. If the header block is incomplete,
418 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200419 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200420 * and h1m->next to zero on the first call, the parser will do the rest. If
421 * an incomplete message is seen, the caller only needs to present h1m->state
422 * and h1m->next again, with an empty header list so that the parser can start
423 * again. In this case, it will detect that it interrupted a previous session
424 * and will first look for the end of the message before reparsing it again and
425 * indexing it at the same time. This ensures that incomplete messages fed 1
426 * character at a time are never processed entirely more than exactly twice,
427 * and that there is no need to store all the internal state and pre-parsed
428 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200429 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200430 * A pointer to a start line descriptor may be passed in <slp>, in which case
431 * the parser will fill it with whatever it found.
432 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200433 * The code derived from the main HTTP/1 parser above but was simplified and
434 * optimized to process responses produced or forwarded by haproxy. The caller
435 * is responsible for ensuring that the message doesn't wrap, and should ensure
436 * it is complete to avoid having to retry the operation after a failed
437 * attempt. The message is not supposed to be invalid, which is why a few
438 * properties such as the character set used in the header field names are not
439 * checked. In case of an unparsable response message, a negative value will be
440 * returned with h1m->err_pos and h1m->err_state matching the location and
441 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100442 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
443 * parsed and the start line is skipped. It is not required to set h1m->state
444 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200445 *
446 * This function returns :
447 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200448 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200449 * the position relative to <start>
450 * -2 if the output is full (hdr_num reached). err_state and err_pos also
451 * indicate where it failed.
452 * 0 in case of missing data.
453 * > 0 on success, it then corresponds to the number of bytes read since
454 * <start> so that the caller can go on with the payload.
455 */
456int h1_headers_to_hdr_list(char *start, const char *stop,
457 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200458 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200459{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200460 enum h1m_state state;
461 register char *ptr;
462 register const char *end;
463 unsigned int hdr_count;
464 unsigned int skip; /* number of bytes skipped at the beginning */
465 unsigned int sol; /* start of line */
466 unsigned int col; /* position of the colon */
467 unsigned int eol; /* end of line */
468 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200469 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200470 int skip_update;
471 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200472 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200473 struct ist n, v; /* header name and value during parsing */
474
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200475 skip = 0; // do it only once to keep track of the leading CRLF.
476
477 try_again:
478 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200479 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200480 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200481 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200482
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100483 if (h1m->flags & H1_MF_HDRS_ONLY) {
484 state = H1_MSG_HDR_FIRST;
485 h1m->next = 0;
486 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100487 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100488 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100489 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
490 restarting = 1;
491 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100492
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200493 ptr = start + h1m->next;
494 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200495
Willy Tarreau794f9af2017-07-26 09:07:47 +0200496 if (unlikely(ptr >= end))
497 goto http_msg_ood;
498
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200499 /* don't update output if hdr is NULL or if we're restarting */
500 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200501 skip_update = 1;
502
Willy Tarreau794f9af2017-07-26 09:07:47 +0200503 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200504 case H1_MSG_RQBEFORE:
505 http_msg_rqbefore:
506 if (likely(HTTP_IS_TOKEN(*ptr))) {
507 /* we have a start of message, we may have skipped some
508 * heading CRLF. Skip them now.
509 */
510 skip += ptr - start;
511 start = ptr;
512
513 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200514 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200515 hdr_count = 0;
516 state = H1_MSG_RQMETH;
517 goto http_msg_rqmeth;
518 }
519
520 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
521 state = H1_MSG_RQBEFORE;
522 goto http_msg_invalid;
523 }
524
525 if (unlikely(*ptr == '\n'))
526 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
527 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
528 /* stop here */
529
530 case H1_MSG_RQBEFORE_CR:
531 http_msg_rqbefore_cr:
532 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
533 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
534 /* stop here */
535
536 case H1_MSG_RQMETH:
537 http_msg_rqmeth:
538 if (likely(HTTP_IS_TOKEN(*ptr)))
539 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
540
541 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200542 sl.rq.m.len = ptr - sl.rq.m.ptr;
543 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200544 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
545 }
546
547 if (likely(HTTP_IS_CRLF(*ptr))) {
548 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200549 sl.rq.m.len = ptr - sl.rq.m.ptr;
550 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200551 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200552 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200553 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200554 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200555 http_msg_req09_ver:
Tim Duesterhus77508502022-03-15 13:11:06 +0100556 sl.rq.v = ist2(ptr, 0);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200557 goto http_msg_rqline_eol;
558 }
559 state = H1_MSG_RQMETH;
560 goto http_msg_invalid;
561
562 case H1_MSG_RQMETH_SP:
563 http_msg_rqmeth_sp:
564 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200565 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200566 goto http_msg_rquri;
567 }
568 if (likely(HTTP_IS_SPHT(*ptr)))
569 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
570 /* so it's a CR/LF, meaning an HTTP 0.9 request */
571 goto http_msg_req09_uri;
572
573 case H1_MSG_RQURI:
574 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100575#ifdef HA_UNALIGNED_LE
Willy Tarreau9bf75c82023-08-08 16:17:22 +0200576 /* speedup: skip bytes not between 0x24 and 0x7e inclusive */
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200577 while (ptr <= end - sizeof(int)) {
Willy Tarreau30a58952024-04-24 11:37:06 +0200578 uint x = *(uint *)ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200579
Willy Tarreau30a58952024-04-24 11:37:06 +0200580 if (((x - 0x24242424) | (0x7e7e7e7e - x)) & 0x80808080U)
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200581 break;
582
583 ptr += sizeof(int);
584 }
585#endif
586 if (ptr >= end) {
587 state = H1_MSG_RQURI;
588 goto http_msg_ood;
589 }
590 http_msg_rquri2:
Willy Tarreau9bf75c82023-08-08 16:17:22 +0200591 if (likely((unsigned char)(*ptr - 33) <= 93)) { /* 33 to 126 included */
592 if (*ptr == '#') {
593 if (h1m->err_pos < -1) /* PR_O2_REQBUG_OK not set */
594 goto invalid_char;
595 if (h1m->err_pos == -1) /* PR_O2_REQBUG_OK set: just log */
596 h1m->err_pos = ptr - start + skip;
597 }
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200598 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
Willy Tarreau9bf75c82023-08-08 16:17:22 +0200599 }
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200600
601 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200602 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200603 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
604 }
605 if (likely((unsigned char)*ptr >= 128)) {
606 /* non-ASCII chars are forbidden unless option
607 * accept-invalid-http-request is enabled in the frontend.
608 * In any case, we capture the faulty char.
609 */
610 if (h1m->err_pos < -1)
611 goto invalid_char;
612 if (h1m->err_pos == -1)
613 h1m->err_pos = ptr - start + skip;
614 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
615 }
616
617 if (likely(HTTP_IS_CRLF(*ptr))) {
618 /* so it's a CR/LF, meaning an HTTP 0.9 request */
619 goto http_msg_req09_uri_e;
620 }
621
622 /* OK forbidden chars, 0..31 or 127 */
623 invalid_char:
624 state = H1_MSG_RQURI;
625 goto http_msg_invalid;
626
627 case H1_MSG_RQURI_SP:
628 http_msg_rquri_sp:
629 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200630 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200631 goto http_msg_rqver;
632 }
633 if (likely(HTTP_IS_SPHT(*ptr)))
634 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
635 /* so it's a CR/LF, meaning an HTTP 0.9 request */
636 goto http_msg_req09_ver;
637
638
639 case H1_MSG_RQVER:
640 http_msg_rqver:
641 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
642 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
643
644 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200645 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200646 http_msg_rqline_eol:
647 /* We have seen the end of line. Note that we do not
648 * necessarily have the \n yet, but at least we know that we
649 * have EITHER \r OR \n, otherwise the request would not be
650 * complete. We can then record the request length and return
651 * to the caller which will be able to register it.
652 */
653
654 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200655 if ((sl.rq.v.len == 8) &&
656 (*(sl.rq.v.ptr + 5) > '1' ||
657 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200658 h1m->flags |= H1_MF_VER_11;
659
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200660 if (unlikely(hdr_count >= hdr_num)) {
661 state = H1_MSG_RQVER;
662 goto http_output_full;
663 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200664 if (!(h1m->flags & H1_MF_NO_PHDR))
665 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200666
667 if (unlikely(hdr_count >= hdr_num)) {
668 state = H1_MSG_RQVER;
669 goto http_output_full;
670 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200671 if (!(h1m->flags & H1_MF_NO_PHDR))
672 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200673 }
674
675 sol = ptr - start;
676 if (likely(*ptr == '\r'))
677 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
678 goto http_msg_rqline_end;
679 }
680
681 /* neither an HTTP_VER token nor a CRLF */
682 state = H1_MSG_RQVER;
683 goto http_msg_invalid;
684
685 case H1_MSG_RQLINE_END:
686 http_msg_rqline_end:
687 /* check for HTTP/0.9 request : no version information
688 * available. sol must point to the first of CR or LF. However
689 * since we don't save these elements between calls, if we come
690 * here from a restart, we don't necessarily know. Thus in this
691 * case we simply start over.
692 */
693 if (restarting)
694 goto restart;
695
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200696 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200697 goto http_msg_last_lf;
698
699 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
700 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
701 /* stop here */
702
703 /*
704 * Common states below
705 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200706 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200707 http_msg_rpbefore:
708 if (likely(HTTP_IS_TOKEN(*ptr))) {
709 /* we have a start of message, we may have skipped some
710 * heading CRLF. Skip them now.
711 */
712 skip += ptr - start;
713 start = ptr;
714
715 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200716 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200717 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200718 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200719 goto http_msg_rpver;
720 }
721
722 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200723 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200724 goto http_msg_invalid;
725 }
726
727 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200728 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
729 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200730 /* stop here */
731
Willy Tarreau801250e2018-09-11 11:45:04 +0200732 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200733 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200734 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
735 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200736 /* stop here */
737
Willy Tarreau801250e2018-09-11 11:45:04 +0200738 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200739 http_msg_rpver:
740 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200741 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200742
743 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200744 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200745
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200746 if ((sl.st.v.len == 8) &&
747 (*(sl.st.v.ptr + 5) > '1' ||
748 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200749 h1m->flags |= H1_MF_VER_11;
750
Willy Tarreau801250e2018-09-11 11:45:04 +0200751 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200752 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200753 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200754 goto http_msg_invalid;
755
Willy Tarreau801250e2018-09-11 11:45:04 +0200756 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200757 http_msg_rpver_sp:
758 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200759 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200760 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200761 goto http_msg_rpcode;
762 }
763 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200764 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200765 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200766 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200767 goto http_msg_invalid;
768
Willy Tarreau801250e2018-09-11 11:45:04 +0200769 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200770 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100771 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200772 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200773 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200774 }
775
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100776 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200777 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100778 goto http_msg_invalid;
779 }
780
Willy Tarreau794f9af2017-07-26 09:07:47 +0200781 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200782 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200783 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200784 }
785
786 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200787 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200788
789 http_msg_rsp_reason:
Tim Duesterhus77508502022-03-15 13:11:06 +0100790 sl.st.r = ist2(ptr, 0);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200791 goto http_msg_rpline_eol;
792
Willy Tarreau801250e2018-09-11 11:45:04 +0200793 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200794 http_msg_rpcode_sp:
795 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200796 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200797 goto http_msg_rpreason;
798 }
799 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200800 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200801 /* so it's a CR/LF, so there is no reason phrase */
802 goto http_msg_rsp_reason;
803
Willy Tarreau801250e2018-09-11 11:45:04 +0200804 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200805 http_msg_rpreason:
806 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200807 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200808 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200809 http_msg_rpline_eol:
810 /* We have seen the end of line. Note that we do not
811 * necessarily have the \n yet, but at least we know that we
812 * have EITHER \r OR \n, otherwise the response would not be
813 * complete. We can then record the response length and return
814 * to the caller which will be able to register it.
815 */
816
Willy Tarreau5384aac2018-09-11 16:04:48 +0200817 if (likely(!skip_update)) {
818 if (unlikely(hdr_count >= hdr_num)) {
819 state = H1_MSG_RPREASON;
820 goto http_output_full;
821 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200822 if (!(h1m->flags & H1_MF_NO_PHDR))
823 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200824 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200825
826 sol = ptr - start;
827 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200828 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200829 goto http_msg_rpline_end;
830
Willy Tarreau801250e2018-09-11 11:45:04 +0200831 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832 http_msg_rpline_end:
833 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200834 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
835 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200836 /* stop here */
837
Willy Tarreau801250e2018-09-11 11:45:04 +0200838 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200839 http_msg_hdr_first:
840 sol = ptr - start;
841 if (likely(!HTTP_IS_CRLF(*ptr))) {
842 goto http_msg_hdr_name;
843 }
844
845 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200846 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200847 goto http_msg_last_lf;
848
Willy Tarreau801250e2018-09-11 11:45:04 +0200849 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200850 http_msg_hdr_name:
851 /* assumes sol points to the first char */
852 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200853 if (!skip_update) {
854 /* turn it to lower case if needed */
855 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200856 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200857 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200858 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200859 }
860
861 if (likely(*ptr == ':')) {
862 col = ptr - start;
Willy Tarreaua8598a22023-02-09 21:36:54 +0100863 if (col <= sol) {
864 state = H1_MSG_HDR_NAME;
865 goto http_msg_invalid;
866 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200867 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200868 }
869
Willy Tarreau9aec3052018-09-12 09:20:40 +0200870 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200871 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200872 goto http_msg_invalid;
873 }
874
Willy Tarreau9aec3052018-09-12 09:20:40 +0200875 if (h1m->err_pos == -1) /* capture the error pointer */
876 h1m->err_pos = ptr - start + skip; /* >= 0 now */
877
878 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200879 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200880
Willy Tarreau801250e2018-09-11 11:45:04 +0200881 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200882 http_msg_hdr_l1_sp:
883 /* assumes sol points to the first char */
884 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200885 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200886
887 /* header value can be basically anything except CR/LF */
888 sov = ptr - start;
889
890 if (likely(!HTTP_IS_CRLF(*ptr))) {
891 goto http_msg_hdr_val;
892 }
893
894 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200895 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200896 goto http_msg_hdr_l1_lf;
897
Willy Tarreau801250e2018-09-11 11:45:04 +0200898 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200899 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200900 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
901 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200902
Willy Tarreau801250e2018-09-11 11:45:04 +0200903 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200904 http_msg_hdr_l1_lws:
905 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200906 if (!skip_update) {
907 /* replace HT,CR,LF with spaces */
908 for (; start + sov < ptr; sov++)
909 start[sov] = ' ';
910 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200911 goto http_msg_hdr_l1_sp;
912 }
913 /* we had a header consisting only in spaces ! */
914 eol = sov;
915 goto http_msg_complete_header;
916
Willy Tarreau801250e2018-09-11 11:45:04 +0200917 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200918 http_msg_hdr_val:
919 /* assumes sol points to the first char, and sov
920 * points to the first character of the value.
921 */
922
923 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
924 * and lower. In fact since most of the time is spent in the loop, we
925 * also remove the sign bit test so that bytes 0x8e..0x0d break the
926 * loop, but we don't care since they're very rare in header values.
927 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100928#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200929 while (ptr <= end - sizeof(long)) {
930 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
931 goto http_msg_hdr_val2;
932 ptr += sizeof(long);
933 }
934#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100935#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200936 while (ptr <= end - sizeof(int)) {
937 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
938 goto http_msg_hdr_val2;
939 ptr += sizeof(int);
940 }
941#endif
942 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200943 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200944 goto http_msg_ood;
945 }
946 http_msg_hdr_val2:
Willy Tarreau08ebf192024-01-31 15:10:39 +0100947 if (likely(!*ptr)) {
948 /* RFC9110 clarified that NUL is explicitly forbidden in header values
949 * (like CR and LF).
950 */
951 if (h1m->err_pos < -1) { /* PR_O2_REQBUG_OK not set */
952 state = H1_MSG_HDR_VAL;
953 goto http_msg_invalid;
954 }
955 if (h1m->err_pos == -1) /* PR_O2_REQBUG_OK set: just log */
956 h1m->err_pos = ptr - start + skip;
957 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200958 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200959 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200960
961 eol = ptr - start;
962 /* Note: we could also copy eol into ->eoh so that we have the
963 * real header end in case it ends with lots of LWS, but is this
964 * really needed ?
965 */
966 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200967 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200968 goto http_msg_hdr_l2_lf;
969
Willy Tarreau801250e2018-09-11 11:45:04 +0200970 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200971 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200972 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
973 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200974
Willy Tarreau801250e2018-09-11 11:45:04 +0200975 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200976 http_msg_hdr_l2_lws:
977 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200978 if (!skip_update) {
979 /* LWS: replace HT,CR,LF with spaces */
980 for (; start + eol < ptr; eol++)
981 start[eol] = ' ';
982 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200983 goto http_msg_hdr_val;
984 }
985 http_msg_complete_header:
986 /*
987 * It was a new header, so the last one is finished. Assumes
988 * <sol> points to the first char of the name, <col> to the
989 * colon, <sov> points to the first character of the value and
990 * <eol> to the first CR or LF so we know how the line ends. We
991 * will trim spaces around the value. It's possible to do it by
992 * adjusting <eol> and <sov> which are no more used after this.
993 * We can add the header field to the list.
994 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200995 if (likely(!skip_update)) {
996 while (sov < eol && HTTP_IS_LWS(start[sov]))
997 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200998
Christopher Faulet2912f872018-09-19 14:01:04 +0200999 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1000 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001001
1002
Christopher Faulet2912f872018-09-19 14:01:04 +02001003 n = ist2(start + sol, col - sol);
1004 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001005
Christopher Faulet2912f872018-09-19 14:01:04 +02001006 do {
1007 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001008
Christopher Faulet2912f872018-09-19 14:01:04 +02001009 if (unlikely(hdr_count >= hdr_num)) {
1010 state = H1_MSG_HDR_L2_LWS;
1011 goto http_output_full;
1012 }
Willy Tarreau5384aac2018-09-11 16:04:48 +02001013
Christopher Faulet2912f872018-09-19 14:01:04 +02001014 if (isteqi(n, ist("transfer-encoding"))) {
Christopher Faulet545fbba2021-09-28 09:36:25 +02001015 ret = h1_parse_xfer_enc_header(h1m, v);
1016 if (ret < 0) {
1017 state = H1_MSG_HDR_L2_LWS;
1018 ptr = v.ptr; /* Set ptr on the error */
1019 goto http_msg_invalid;
1020 }
1021 else if (ret == 0) {
1022 /* skip it */
1023 break;
1024 }
Christopher Faulet2912f872018-09-19 14:01:04 +02001025 }
1026 else if (isteqi(n, ist("content-length"))) {
1027 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +02001028
Christopher Faulet2912f872018-09-19 14:01:04 +02001029 if (ret < 0) {
1030 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +01001031 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +02001032 goto http_msg_invalid;
1033 }
1034 else if (ret == 0) {
1035 /* skip it */
1036 break;
1037 }
Willy Tarreau73373ab2018-09-14 17:11:33 +02001038 }
Christopher Faulet2912f872018-09-19 14:01:04 +02001039 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +01001040 h1_parse_connection_header(h1m, &v);
1041 if (!v.len) {
1042 /* skip it */
1043 break;
1044 }
Willy Tarreau73373ab2018-09-14 17:11:33 +02001045 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +01001046 else if (isteqi(n, ist("upgrade"))) {
1047 h1_parse_upgrade_header(h1m, v);
1048 }
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001049 else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
1050 if (host_idx == -1)
Christopher Faulet497ab4f2019-10-11 09:01:44 +02001051 host_idx = hdr_count;
1052 else {
1053 if (!isteqi(v, hdr[host_idx].v)) {
1054 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +01001055 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +02001056 goto http_msg_invalid;
1057 }
1058 /* if the same host, skip it */
1059 break;
1060 }
1061 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +02001062
Christopher Faulet2912f872018-09-19 14:01:04 +02001063 http_set_hdr(&hdr[hdr_count++], n, v);
1064 } while (0);
1065 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001066
1067 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +02001068
Willy Tarreau794f9af2017-07-26 09:07:47 +02001069 if (likely(!HTTP_IS_CRLF(*ptr)))
1070 goto http_msg_hdr_name;
1071
1072 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001073 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001074 goto http_msg_last_lf;
1075
Willy Tarreau801250e2018-09-11 11:45:04 +02001076 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001077 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001078 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001079 ptr++;
1080 /* <ptr> now points to the first byte of payload. If needed sol
1081 * still points to the first of either CR or LF of the empty
1082 * line ending the headers block.
1083 */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001084 if (likely(!skip_update)) {
1085 if (unlikely(hdr_count >= hdr_num)) {
1086 state = H1_MSG_LAST_LF;
1087 goto http_output_full;
1088 }
Christopher Fauletff08a922018-09-25 13:59:46 +02001089 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +02001090 }
Willy Tarreau001823c2018-09-12 17:25:32 +02001091
1092 /* reaching here we've parsed the whole message. We may detect
1093 * that we were already continuing an interrupted parsing pass
1094 * so we were silently looking for the end of message not
1095 * updating anything before deciding to parse it fully at once.
1096 * It's guaranteed that we won't match this test twice in a row
1097 * since restarting will turn zero.
1098 */
1099 if (restarting)
1100 goto restart;
1101
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001102
1103 if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
1104 struct http_uri_parser parser = http_uri_parser_init(sl.rq.u);
Christopher Faulet3fff9b42024-05-14 11:42:21 +02001105 struct ist scheme, authority = IST_NULL;
Christopher Faulete16ffb02022-11-22 10:04:16 +01001106 int ret;
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001107
Christopher Faulete16ffb02022-11-22 10:04:16 +01001108 scheme = http_parse_scheme(&parser);
Christopher Faulet3fff9b42024-05-14 11:42:21 +02001109 if (istlen(scheme) || sl.rq.meth == HTTP_METH_CONNECT) {
1110 /* Expect an authority if for CONNECT method or if there is a scheme */
1111 authority = http_parse_authority(&parser, 1);
1112 }
1113
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001114 if (sl.rq.meth == HTTP_METH_CONNECT) {
1115 struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001116
Christopher Faulet342db912024-05-14 15:06:48 +02001117 ret = h1_validate_connect_authority(scheme, authority, host);
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001118 if (ret < 0) {
1119 if (h1m->err_pos < -1) {
1120 state = H1_MSG_LAST_LF;
Willy Tarreau55d2e852022-10-04 08:02:03 +02001121 /* WT: gcc seems to see a path where sl.rq.u.ptr was used
1122 * uninitialized, but it doesn't know that the function is
1123 * called with initial states making this impossible.
1124 */
1125 ALREADY_CHECKED(sl.rq.u.ptr);
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001126 ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
1127 goto http_msg_invalid;
1128 }
1129 if (h1m->err_pos == -1) /* capture the error pointer */
1130 h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
1131 }
1132 }
1133 else if (host_idx != -1 && istlen(authority)) {
1134 struct ist host = hdr[host_idx].v;
1135
1136 /* For non-CONNECT method, the authority must match the host header value */
1137 if (!isteqi(authority, host)) {
Christopher Faulete16ffb02022-11-22 10:04:16 +01001138 ret = h1_validate_mismatch_authority(scheme, authority, host);
1139 if (ret < 0) {
1140 if (h1m->err_pos < -1) {
1141 state = H1_MSG_LAST_LF;
1142 ptr = host.ptr; /* Set ptr on the error */
1143 goto http_msg_invalid;
1144 }
1145 if (h1m->err_pos == -1) /* capture the error pointer */
1146 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001147 }
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001148 }
Christopher Faulet3f5fbe92022-07-05 14:50:17 +02001149 }
1150 }
1151
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001152 state = H1_MSG_DATA;
1153 if (h1m->flags & H1_MF_XFER_ENC) {
1154 if (h1m->flags & H1_MF_CLEN) {
Christopher Faulet631c7e82021-09-27 09:47:03 +02001155 /* T-E + C-L: force close and remove C-L */
1156 h1m->flags |= H1_MF_CONN_CLO;
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001157 h1m->flags &= ~H1_MF_CLEN;
Christopher Faulet18e41322023-09-27 15:21:28 +02001158 h1m->curr_len = h1m->body_len = 0;
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001159 hdr_count = http_del_hdr(hdr, ist("content-length"));
1160 }
Christopher Faulet631c7e82021-09-27 09:47:03 +02001161 else if (!(h1m->flags & H1_MF_VER_11)) {
1162 /* T-E + HTTP/1.0: force close */
1163 h1m->flags |= H1_MF_CONN_CLO;
1164 }
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001165
1166 if (h1m->flags & H1_MF_CHNK)
1167 state = H1_MSG_CHUNK_SIZE;
1168 else if (!(h1m->flags & H1_MF_RESP)) {
1169 /* cf RFC7230#3.3.3 : transfer-encoding in
1170 * request without chunked encoding is invalid.
1171 */
1172 goto http_msg_invalid;
1173 }
1174 }
1175
Willy Tarreau794f9af2017-07-26 09:07:47 +02001176 break;
1177
1178 default:
1179 /* impossible states */
1180 goto http_msg_invalid;
1181 }
1182
Willy Tarreau001823c2018-09-12 17:25:32 +02001183 /* Now we've left the headers state and are either in H1_MSG_DATA or
1184 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001185 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001186
Willy Tarreau5384aac2018-09-11 16:04:48 +02001187 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001188 *slp = sl;
1189
Willy Tarreau4433c082018-09-11 15:33:32 +02001190 h1m->state = state;
1191 h1m->next = ptr - start + skip;
1192 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001193
1194 http_msg_ood:
1195 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001196 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001197 *slp = sl;
1198
Willy Tarreau4433c082018-09-11 15:33:32 +02001199 h1m->state = state;
1200 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001201 return 0;
1202
1203 http_msg_invalid:
1204 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001205 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001206 *slp = sl;
1207
Willy Tarreau4433c082018-09-11 15:33:32 +02001208 h1m->err_state = h1m->state = state;
1209 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001210 return -1;
1211
1212 http_output_full:
1213 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001214 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001215 *slp = sl;
1216
Willy Tarreau4433c082018-09-11 15:33:32 +02001217 h1m->err_state = h1m->state = state;
1218 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001219 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001220
1221 restart:
Christopher Faulet02c89332021-12-01 18:01:48 +01001222 h1m->flags &= H1_MF_RESTART_MASK;
Christopher Faulet84f06532019-09-03 16:05:31 +02001223 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001224 if (h1m->flags & H1_MF_RESP)
1225 h1m->state = H1_MSG_RPBEFORE;
1226 else
1227 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001228 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001229}
1230
Willy Tarreau2510f702017-10-31 17:14:16 +01001231/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001232 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001233 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1234 * input data, or < 0 in case of parse error (in which case the caller may have
1235 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001236 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001237int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001238{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001239 const char *stop = b_peek(buf, ofs + max);
1240 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001241
1242 while (1) {
1243 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001244 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001245 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001246
1247 /* scan current line and stop at LF or CRLF */
1248 while (1) {
1249 if (ptr == stop)
1250 return 0;
1251
1252 if (*ptr == '\n') {
1253 if (!p1)
1254 p1 = ptr;
1255 p2 = ptr;
1256 break;
1257 }
1258
1259 if (*ptr == '\r') {
1260 if (p1)
1261 return -1;
1262 p1 = ptr;
1263 }
1264
Willy Tarreau7314be82018-06-14 13:32:50 +02001265 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001266 }
1267
1268 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001269 p2 = b_next(buf, p2);
1270 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001271
1272 /* LF/CRLF at beginning of line => end of trailers at p2.
1273 * Everything was scheduled for forwarding, there's nothing left
1274 * from this message. */
1275 if (p1 == start)
1276 break;
1277 /* OK, next line then */
1278 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001279 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001280}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001281
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001282/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1283 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1284 * (25 bytes long).
1285 */
1286void h1_generate_random_ws_input_key(char key_out[25])
1287{
1288 /* generate a random websocket key */
1289 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1290 char key[16];
1291
1292 memcpy(key, &rand1, 8);
1293 memcpy(&key[8], &rand2, 8);
1294 a2base64(key, 16, key_out, 25);
1295}
1296
Amaury Denoyellec1938232020-12-11 17:53:03 +01001297#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1298
1299/*
1300 * Calculate the WebSocket handshake response key from <key_in>. Following the
1301 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1302 * as a 29 bytes long string.
1303 */
1304void h1_calculate_ws_output_key(const char *key, char *result)
1305{
1306 blk_SHA_CTX sha1_ctx;
1307 char hash_in[60], hash_out[20];
1308
1309 /* concatenate the key with a fixed suffix */
1310 memcpy(hash_in, key, 24);
1311 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1312
1313 /* sha1 the result */
1314 blk_SHA1_Init(&sha1_ctx);
1315 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1316 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1317
1318 /* encode in base64 the hash */
1319 a2base64(hash_out, 20, result, 29);
1320}