blob: 73de48be0105a6b1ef641d8ef913d2d472daf9cb [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010014
15#include <import/sha1.h>
16
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020017#include <haproxy/api.h>
Amaury Denoyellec1938232020-12-11 17:53:03 +010018#include <haproxy/base64.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020019#include <haproxy/h1.h>
Willy Tarreau0017be02020-06-02 19:25:28 +020020#include <haproxy/http-hdr.h>
Amaury Denoyelleaad333a2020-12-11 17:53:07 +010021#include <haproxy/tools.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020022
Willy Tarreau73373ab2018-09-14 17:11:33 +020023/* Parse the Content-Length header field of an HTTP/1 request. The function
24 * checks all possible occurrences of a comma-delimited value, and verifies
25 * if any of them doesn't match a previous value. It returns <0 if a value
26 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
27 * if the value can be indexed (first one). In the last case, the value might
28 * be adjusted and the caller must only add the updated value.
29 */
30int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
31{
32 char *e, *n;
33 long long cl;
34 int not_first = !!(h1m->flags & H1_MF_CLEN);
35 struct ist word;
36
37 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
38 e = value->ptr + value->len;
39
40 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +050041 /* skip leading delimiter and blanks */
Willy Tarreau73373ab2018-09-14 17:11:33 +020042 if (unlikely(HTTP_IS_LWS(*word.ptr)))
43 continue;
44
45 /* digits only now */
46 for (cl = 0, n = word.ptr; n < e; n++) {
47 unsigned int c = *n - '0';
48 if (unlikely(c > 9)) {
49 /* non-digit */
50 if (unlikely(n == word.ptr)) // spaces only
51 goto fail;
52 break;
53 }
54 if (unlikely(cl > ULLONG_MAX / 10ULL))
55 goto fail; /* multiply overflow */
56 cl = cl * 10ULL;
57 if (unlikely(cl + c < cl))
58 goto fail; /* addition overflow */
59 cl = cl + c;
60 }
61
62 /* keep a copy of the exact cleaned value */
63 word.len = n - word.ptr;
64
65 /* skip trailing LWS till next comma or EOL */
66 for (; n < e; n++) {
67 if (!HTTP_IS_LWS(*n)) {
68 if (unlikely(*n != ','))
69 goto fail;
70 break;
71 }
72 }
73
74 /* if duplicate, must be equal */
75 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
76 goto fail;
77
78 /* OK, store this result as the one to be indexed */
79 h1m->flags |= H1_MF_CLEN;
80 h1m->curr_len = h1m->body_len = cl;
81 *value = word;
82 word.ptr = n;
83 }
84 /* here we've reached the end with a single value or a series of
85 * identical values, all matching previous series if any. The last
86 * parsed value was sent back into <value>. We just have to decide
87 * if this occurrence has to be indexed (it's the first one) or
88 * silently skipped (it's not the first one)
89 */
90 return !not_first;
91 fail:
92 return -1;
93}
94
Willy Tarreau2557f6a2018-09-14 16:34:47 +020095/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
96 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
97 * this case. Any other token found or any empty header field found will reset
98 * this flag, so that it accurately represents the token's presence at the last
99 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
100 * are case-insensitive (cf RFC7230#4).
101 */
102void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
103{
104 char *e, *n;
105 struct ist word;
106
107 h1m->flags |= H1_MF_XFER_ENC;
108 h1m->flags &= ~H1_MF_CHNK;
109
110 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
111 e = value.ptr + value.len;
112
113 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500114 /* skip leading delimiter and blanks */
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200115 if (HTTP_IS_LWS(*word.ptr))
116 continue;
117
118 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
119 word.len = n - word.ptr;
120
121 /* trim trailing blanks */
122 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
123 word.len--;
124
125 h1m->flags &= ~H1_MF_CHNK;
126 if (isteqi(word, ist("chunked")))
127 h1m->flags |= H1_MF_CHNK;
128
129 word.ptr = n;
130 }
131}
132
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200133/* Validate the authority and the host header value for CONNECT method. If there
134 * is hast header, its value is normalized. 0 is returned on success, -1 if the
135 * authority is invalid and -2 if the host is invalid.
136 */
137static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
138{
139 struct ist uri_host, uri_port, host, host_port;
140
141 if (!isttest(authority))
142 goto invalid_authority;
143 uri_host = authority;
144 uri_port = http_get_host_port(authority);
145 if (!isttest(uri_port))
146 goto invalid_authority;
147 uri_host.len -= (istlen(uri_port) + 1);
148
149 if (!host_hdr || !isttest(*host_hdr))
150 goto end;
151
152 /* Get the port of the host header value, if any */
153 host = *host_hdr;
154 host_port = http_get_host_port(*host_hdr);
155 if (isttest(host_port)) {
156 host.len -= (istlen(host_port) + 1);
157 if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
158 goto invalid_host;
159 if (http_is_default_port(IST_NULL, uri_port))
160 *host_hdr = host; /* normalize */
161 }
162 else {
163 if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
164 goto invalid_host;
165 }
166
167 end:
168 return 0;
169
170 invalid_authority:
171 return -1;
172
173 invalid_host:
174 return -2;
175}
176
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200177/* Parse the Connection: header of an HTTP/1 request, looking for "close",
178 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
179 * what was found there. Note that flags are only added, not removed, so the
180 * function is safe for being called multiple times if multiple occurrences
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100181 * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
182 * up from "keep-alive" and "close" values. To do so, the header value is
183 * rewritten in place and its length is updated.
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200184 */
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100185void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200186{
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100187 char *e, *n, *p;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200188 struct ist word;
189
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100190 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
191 p = value->ptr;
192 e = value->ptr + value->len;
193 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
194 value->len = 0;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200195
196 while (++word.ptr < e) {
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500197 /* skip leading delimiter and blanks */
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200198 if (HTTP_IS_LWS(*word.ptr))
199 continue;
200
201 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
202 word.len = n - word.ptr;
203
204 /* trim trailing blanks */
205 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
206 word.len--;
207
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100208 if (isteqi(word, ist("keep-alive"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200209 h1m->flags |= H1_MF_CONN_KAL;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100210 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
211 goto skip_val;
212 }
213 else if (isteqi(word, ist("close"))) {
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200214 h1m->flags |= H1_MF_CONN_CLO;
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100215 if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
216 goto skip_val;
217 }
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200218 else if (isteqi(word, ist("upgrade")))
219 h1m->flags |= H1_MF_CONN_UPG;
220
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100221 if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
222 if (value->ptr + value->len == p) {
223 /* no rewrite done till now */
224 value->len = n - value->ptr;
225 }
226 else {
227 if (value->len)
228 value->ptr[value->len++] = ',';
229 istcat(value, word, e - value->ptr);
230 }
231 }
232
233 skip_val:
234 word.ptr = p = n;
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200235 }
236}
237
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100238/* Parse the Upgrade: header of an HTTP/1 request.
239 * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
240 */
241void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
242{
243 char *e, *n;
244 struct ist word;
245
246 h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
247
248 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
249 e = value.ptr + value.len;
250
251 while (++word.ptr < e) {
252 /* skip leading delimiter and blanks */
253 if (HTTP_IS_LWS(*word.ptr))
254 continue;
255
256 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
257 word.len = n - word.ptr;
258
259 /* trim trailing blanks */
260 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
261 word.len--;
262
263 if (isteqi(word, ist("websocket")))
264 h1m->flags |= H1_MF_UPG_WEBSOCKET;
265
266 word.ptr = n;
267 }
268}
269
Willy Tarreau538746a2018-12-11 10:59:20 +0100270/* Macros used in the HTTP/1 parser, to check for the expected presence of
271 * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
272 */
273
274/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
275 * <bad>.
276 */
277#define EXPECT_LF_HERE(ptr, bad, state, where) \
278 do { \
279 if (unlikely(*(ptr) != '\n')) { \
280 state = (where); \
281 goto bad; \
282 } \
283 } while (0)
284
285/* Increments pointer <ptr>, continues to label <more> if it's still below
286 * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
287 * of buffer was reached.
288 */
289#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
290 do { \
291 if (likely(++(ptr) < (end))) \
292 goto more; \
293 else { \
294 state = (where); \
295 goto stop; \
296 } \
297 } while (0)
298
Willy Tarreau794f9af2017-07-26 09:07:47 +0200299/* This function parses a contiguous HTTP/1 headers block starting at <start>
300 * and ending before <stop>, at once, and converts it a list of (name,value)
301 * pairs representing header fields into the array <hdr> of size <hdr_num>,
302 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200303 * too small to represent the whole message, an error is returned. Some
304 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200305 * parsed and stored into h1m as well. <hdr> may be null, in which case only
306 * the parsing state will be updated. This may be used to restart the parsing
307 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200308 *
309 * For now it's limited to the response. If the header block is incomplete,
310 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200311 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200312 * and h1m->next to zero on the first call, the parser will do the rest. If
313 * an incomplete message is seen, the caller only needs to present h1m->state
314 * and h1m->next again, with an empty header list so that the parser can start
315 * again. In this case, it will detect that it interrupted a previous session
316 * and will first look for the end of the message before reparsing it again and
317 * indexing it at the same time. This ensures that incomplete messages fed 1
318 * character at a time are never processed entirely more than exactly twice,
319 * and that there is no need to store all the internal state and pre-parsed
320 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200321 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200322 * A pointer to a start line descriptor may be passed in <slp>, in which case
323 * the parser will fill it with whatever it found.
324 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200325 * The code derived from the main HTTP/1 parser above but was simplified and
326 * optimized to process responses produced or forwarded by haproxy. The caller
327 * is responsible for ensuring that the message doesn't wrap, and should ensure
328 * it is complete to avoid having to retry the operation after a failed
329 * attempt. The message is not supposed to be invalid, which is why a few
330 * properties such as the character set used in the header field names are not
331 * checked. In case of an unparsable response message, a negative value will be
332 * returned with h1m->err_pos and h1m->err_state matching the location and
333 * state where the error was met. Leading blank likes are tolerated but not
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100334 * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
335 * parsed and the start line is skipped. It is not required to set h1m->state
336 * nor h1m->next in this case.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200337 *
338 * This function returns :
339 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200340 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200341 * the position relative to <start>
342 * -2 if the output is full (hdr_num reached). err_state and err_pos also
343 * indicate where it failed.
344 * 0 in case of missing data.
345 * > 0 on success, it then corresponds to the number of bytes read since
346 * <start> so that the caller can go on with the payload.
347 */
348int h1_headers_to_hdr_list(char *start, const char *stop,
349 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200350 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200351{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200352 enum h1m_state state;
353 register char *ptr;
354 register const char *end;
355 unsigned int hdr_count;
356 unsigned int skip; /* number of bytes skipped at the beginning */
357 unsigned int sol; /* start of line */
358 unsigned int col; /* position of the colon */
359 unsigned int eol; /* end of line */
360 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200361 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200362 int skip_update;
363 int restarting;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200364 int host_idx;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200365 struct ist n, v; /* header name and value during parsing */
366
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200367 skip = 0; // do it only once to keep track of the leading CRLF.
368
369 try_again:
370 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200371 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200372 skip_update = restarting = 0;
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200373 host_idx = -1;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200374
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100375 if (h1m->flags & H1_MF_HDRS_ONLY) {
376 state = H1_MSG_HDR_FIRST;
377 h1m->next = 0;
378 }
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100379 else {
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100380 state = h1m->state;
Christopher Faulet68b1bbd2019-01-04 16:06:48 +0100381 if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
382 restarting = 1;
383 }
Willy Tarreau0f8fb6b2019-01-04 10:48:03 +0100384
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200385 ptr = start + h1m->next;
386 end = stop;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200387
Willy Tarreau794f9af2017-07-26 09:07:47 +0200388 if (unlikely(ptr >= end))
389 goto http_msg_ood;
390
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200391 /* don't update output if hdr is NULL or if we're restarting */
392 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200393 skip_update = 1;
394
Willy Tarreau794f9af2017-07-26 09:07:47 +0200395 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200396 case H1_MSG_RQBEFORE:
397 http_msg_rqbefore:
398 if (likely(HTTP_IS_TOKEN(*ptr))) {
399 /* we have a start of message, we may have skipped some
400 * heading CRLF. Skip them now.
401 */
402 skip += ptr - start;
403 start = ptr;
404
405 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200406 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200407 hdr_count = 0;
408 state = H1_MSG_RQMETH;
409 goto http_msg_rqmeth;
410 }
411
412 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
413 state = H1_MSG_RQBEFORE;
414 goto http_msg_invalid;
415 }
416
417 if (unlikely(*ptr == '\n'))
418 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
419 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
420 /* stop here */
421
422 case H1_MSG_RQBEFORE_CR:
423 http_msg_rqbefore_cr:
424 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
425 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
426 /* stop here */
427
428 case H1_MSG_RQMETH:
429 http_msg_rqmeth:
430 if (likely(HTTP_IS_TOKEN(*ptr)))
431 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
432
433 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200434 sl.rq.m.len = ptr - sl.rq.m.ptr;
435 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200436 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
437 }
438
439 if (likely(HTTP_IS_CRLF(*ptr))) {
440 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200441 sl.rq.m.len = ptr - sl.rq.m.ptr;
442 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200443 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200444 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200445 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200446 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200447 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200448 sl.rq.v.ptr = ptr;
449 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200450 goto http_msg_rqline_eol;
451 }
452 state = H1_MSG_RQMETH;
453 goto http_msg_invalid;
454
455 case H1_MSG_RQMETH_SP:
456 http_msg_rqmeth_sp:
457 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200458 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200459 goto http_msg_rquri;
460 }
461 if (likely(HTTP_IS_SPHT(*ptr)))
462 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
463 /* so it's a CR/LF, meaning an HTTP 0.9 request */
464 goto http_msg_req09_uri;
465
466 case H1_MSG_RQURI:
467 http_msg_rquri:
Willy Tarreau02ac9502020-02-21 16:31:22 +0100468#ifdef HA_UNALIGNED_LE
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200469 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
470 while (ptr <= end - sizeof(int)) {
471 int x = *(int *)ptr - 0x21212121;
472 if (x & 0x80808080)
473 break;
474
475 x -= 0x5e5e5e5e;
476 if (!(x & 0x80808080))
477 break;
478
479 ptr += sizeof(int);
480 }
481#endif
482 if (ptr >= end) {
483 state = H1_MSG_RQURI;
484 goto http_msg_ood;
485 }
486 http_msg_rquri2:
487 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
488 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
489
490 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200491 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200492 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
493 }
494 if (likely((unsigned char)*ptr >= 128)) {
495 /* non-ASCII chars are forbidden unless option
496 * accept-invalid-http-request is enabled in the frontend.
497 * In any case, we capture the faulty char.
498 */
499 if (h1m->err_pos < -1)
500 goto invalid_char;
501 if (h1m->err_pos == -1)
502 h1m->err_pos = ptr - start + skip;
503 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
504 }
505
506 if (likely(HTTP_IS_CRLF(*ptr))) {
507 /* so it's a CR/LF, meaning an HTTP 0.9 request */
508 goto http_msg_req09_uri_e;
509 }
510
511 /* OK forbidden chars, 0..31 or 127 */
512 invalid_char:
513 state = H1_MSG_RQURI;
514 goto http_msg_invalid;
515
516 case H1_MSG_RQURI_SP:
517 http_msg_rquri_sp:
518 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200519 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200520 goto http_msg_rqver;
521 }
522 if (likely(HTTP_IS_SPHT(*ptr)))
523 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
524 /* so it's a CR/LF, meaning an HTTP 0.9 request */
525 goto http_msg_req09_ver;
526
527
528 case H1_MSG_RQVER:
529 http_msg_rqver:
530 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
531 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
532
533 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200534 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200535 http_msg_rqline_eol:
536 /* We have seen the end of line. Note that we do not
537 * necessarily have the \n yet, but at least we know that we
538 * have EITHER \r OR \n, otherwise the request would not be
539 * complete. We can then record the request length and return
540 * to the caller which will be able to register it.
541 */
542
543 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200544 if ((sl.rq.v.len == 8) &&
545 (*(sl.rq.v.ptr + 5) > '1' ||
546 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200547 h1m->flags |= H1_MF_VER_11;
548
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200549 if (unlikely(hdr_count >= hdr_num)) {
550 state = H1_MSG_RQVER;
551 goto http_output_full;
552 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200553 if (!(h1m->flags & H1_MF_NO_PHDR))
554 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200555
556 if (unlikely(hdr_count >= hdr_num)) {
557 state = H1_MSG_RQVER;
558 goto http_output_full;
559 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200560 if (!(h1m->flags & H1_MF_NO_PHDR))
561 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200562 }
563
564 sol = ptr - start;
565 if (likely(*ptr == '\r'))
566 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
567 goto http_msg_rqline_end;
568 }
569
570 /* neither an HTTP_VER token nor a CRLF */
571 state = H1_MSG_RQVER;
572 goto http_msg_invalid;
573
574 case H1_MSG_RQLINE_END:
575 http_msg_rqline_end:
576 /* check for HTTP/0.9 request : no version information
577 * available. sol must point to the first of CR or LF. However
578 * since we don't save these elements between calls, if we come
579 * here from a restart, we don't necessarily know. Thus in this
580 * case we simply start over.
581 */
582 if (restarting)
583 goto restart;
584
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200585 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200586 goto http_msg_last_lf;
587
588 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
589 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
590 /* stop here */
591
592 /*
593 * Common states below
594 */
Willy Tarreau801250e2018-09-11 11:45:04 +0200595 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200596 http_msg_rpbefore:
597 if (likely(HTTP_IS_TOKEN(*ptr))) {
598 /* we have a start of message, we may have skipped some
599 * heading CRLF. Skip them now.
600 */
601 skip += ptr - start;
602 start = ptr;
603
604 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200605 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200606 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200607 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200608 goto http_msg_rpver;
609 }
610
611 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200612 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200613 goto http_msg_invalid;
614 }
615
616 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200617 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
618 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200619 /* stop here */
620
Willy Tarreau801250e2018-09-11 11:45:04 +0200621 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200622 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200623 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
624 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200625 /* stop here */
626
Willy Tarreau801250e2018-09-11 11:45:04 +0200627 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200628 http_msg_rpver:
629 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200630 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200631
632 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200633 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200634
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200635 if ((sl.st.v.len == 8) &&
636 (*(sl.st.v.ptr + 5) > '1' ||
637 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +0200638 h1m->flags |= H1_MF_VER_11;
639
Willy Tarreau801250e2018-09-11 11:45:04 +0200640 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200641 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200642 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200643 goto http_msg_invalid;
644
Willy Tarreau801250e2018-09-11 11:45:04 +0200645 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200646 http_msg_rpver_sp:
647 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200648 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200649 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200650 goto http_msg_rpcode;
651 }
652 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200653 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200654 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200655 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200656 goto http_msg_invalid;
657
Willy Tarreau801250e2018-09-11 11:45:04 +0200658 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200659 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100660 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200661 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200662 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200663 }
664
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100665 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200666 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100667 goto http_msg_invalid;
668 }
669
Willy Tarreau794f9af2017-07-26 09:07:47 +0200670 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200671 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +0200672 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200673 }
674
675 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200676 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200677
678 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200679 sl.st.r.ptr = ptr;
680 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200681 goto http_msg_rpline_eol;
682
Willy Tarreau801250e2018-09-11 11:45:04 +0200683 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200684 http_msg_rpcode_sp:
685 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200686 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200687 goto http_msg_rpreason;
688 }
689 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200690 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200691 /* so it's a CR/LF, so there is no reason phrase */
692 goto http_msg_rsp_reason;
693
Willy Tarreau801250e2018-09-11 11:45:04 +0200694 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200695 http_msg_rpreason:
696 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200697 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200698 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200699 http_msg_rpline_eol:
700 /* We have seen the end of line. Note that we do not
701 * necessarily have the \n yet, but at least we know that we
702 * have EITHER \r OR \n, otherwise the response would not be
703 * complete. We can then record the response length and return
704 * to the caller which will be able to register it.
705 */
706
Willy Tarreau5384aac2018-09-11 16:04:48 +0200707 if (likely(!skip_update)) {
708 if (unlikely(hdr_count >= hdr_num)) {
709 state = H1_MSG_RPREASON;
710 goto http_output_full;
711 }
Christopher Faulet25da9e32018-10-08 15:50:15 +0200712 if (!(h1m->flags & H1_MF_NO_PHDR))
713 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200714 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200715
716 sol = ptr - start;
717 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200718 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200719 goto http_msg_rpline_end;
720
Willy Tarreau801250e2018-09-11 11:45:04 +0200721 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200722 http_msg_rpline_end:
723 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200724 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
725 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200726 /* stop here */
727
Willy Tarreau801250e2018-09-11 11:45:04 +0200728 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200729 http_msg_hdr_first:
730 sol = ptr - start;
731 if (likely(!HTTP_IS_CRLF(*ptr))) {
732 goto http_msg_hdr_name;
733 }
734
735 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200736 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200737 goto http_msg_last_lf;
738
Willy Tarreau801250e2018-09-11 11:45:04 +0200739 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200740 http_msg_hdr_name:
741 /* assumes sol points to the first char */
742 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200743 if (!skip_update) {
744 /* turn it to lower case if needed */
745 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
Willy Tarreauf278eec2020-07-05 21:46:32 +0200746 *ptr = tolower((unsigned char)*ptr);
Christopher Faulet2912f872018-09-19 14:01:04 +0200747 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200748 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200749 }
750
751 if (likely(*ptr == ':')) {
752 col = ptr - start;
Willy Tarreau486cd732023-02-09 21:36:54 +0100753 if (col <= sol) {
754 state = H1_MSG_HDR_NAME;
755 goto http_msg_invalid;
756 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200757 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200758 }
759
Willy Tarreau9aec3052018-09-12 09:20:40 +0200760 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200761 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200762 goto http_msg_invalid;
763 }
764
Willy Tarreau9aec3052018-09-12 09:20:40 +0200765 if (h1m->err_pos == -1) /* capture the error pointer */
766 h1m->err_pos = ptr - start + skip; /* >= 0 now */
767
768 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200769 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200770
Willy Tarreau801250e2018-09-11 11:45:04 +0200771 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200772 http_msg_hdr_l1_sp:
773 /* assumes sol points to the first char */
774 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200775 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200776
777 /* header value can be basically anything except CR/LF */
778 sov = ptr - start;
779
780 if (likely(!HTTP_IS_CRLF(*ptr))) {
781 goto http_msg_hdr_val;
782 }
783
784 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200785 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200786 goto http_msg_hdr_l1_lf;
787
Willy Tarreau801250e2018-09-11 11:45:04 +0200788 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200789 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200790 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
791 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200792
Willy Tarreau801250e2018-09-11 11:45:04 +0200793 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200794 http_msg_hdr_l1_lws:
795 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200796 if (!skip_update) {
797 /* replace HT,CR,LF with spaces */
798 for (; start + sov < ptr; sov++)
799 start[sov] = ' ';
800 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200801 goto http_msg_hdr_l1_sp;
802 }
803 /* we had a header consisting only in spaces ! */
804 eol = sov;
805 goto http_msg_complete_header;
806
Willy Tarreau801250e2018-09-11 11:45:04 +0200807 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200808 http_msg_hdr_val:
809 /* assumes sol points to the first char, and sov
810 * points to the first character of the value.
811 */
812
813 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
814 * and lower. In fact since most of the time is spent in the loop, we
815 * also remove the sign bit test so that bytes 0x8e..0x0d break the
816 * loop, but we don't care since they're very rare in header values.
817 */
Willy Tarreau02ac9502020-02-21 16:31:22 +0100818#ifdef HA_UNALIGNED_LE64
Willy Tarreau794f9af2017-07-26 09:07:47 +0200819 while (ptr <= end - sizeof(long)) {
820 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
821 goto http_msg_hdr_val2;
822 ptr += sizeof(long);
823 }
824#endif
Willy Tarreau02ac9502020-02-21 16:31:22 +0100825#ifdef HA_UNALIGNED_LE
Willy Tarreau794f9af2017-07-26 09:07:47 +0200826 while (ptr <= end - sizeof(int)) {
827 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
828 goto http_msg_hdr_val2;
829 ptr += sizeof(int);
830 }
831#endif
832 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200833 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200834 goto http_msg_ood;
835 }
836 http_msg_hdr_val2:
837 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200838 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200839
840 eol = ptr - start;
841 /* Note: we could also copy eol into ->eoh so that we have the
842 * real header end in case it ends with lots of LWS, but is this
843 * really needed ?
844 */
845 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200846 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200847 goto http_msg_hdr_l2_lf;
848
Willy Tarreau801250e2018-09-11 11:45:04 +0200849 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200850 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200851 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
852 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200853
Willy Tarreau801250e2018-09-11 11:45:04 +0200854 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200855 http_msg_hdr_l2_lws:
856 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +0200857 if (!skip_update) {
858 /* LWS: replace HT,CR,LF with spaces */
859 for (; start + eol < ptr; eol++)
860 start[eol] = ' ';
861 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200862 goto http_msg_hdr_val;
863 }
864 http_msg_complete_header:
865 /*
866 * It was a new header, so the last one is finished. Assumes
867 * <sol> points to the first char of the name, <col> to the
868 * colon, <sov> points to the first character of the value and
869 * <eol> to the first CR or LF so we know how the line ends. We
870 * will trim spaces around the value. It's possible to do it by
871 * adjusting <eol> and <sov> which are no more used after this.
872 * We can add the header field to the list.
873 */
Christopher Faulet2912f872018-09-19 14:01:04 +0200874 if (likely(!skip_update)) {
875 while (sov < eol && HTTP_IS_LWS(start[sov]))
876 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200877
Christopher Faulet2912f872018-09-19 14:01:04 +0200878 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
879 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200880
881
Christopher Faulet2912f872018-09-19 14:01:04 +0200882 n = ist2(start + sol, col - sol);
883 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200884
Christopher Faulet2912f872018-09-19 14:01:04 +0200885 do {
886 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200887
Christopher Faulet2912f872018-09-19 14:01:04 +0200888 if (unlikely(hdr_count >= hdr_num)) {
889 state = H1_MSG_HDR_L2_LWS;
890 goto http_output_full;
891 }
Willy Tarreau5384aac2018-09-11 16:04:48 +0200892
Christopher Faulet2912f872018-09-19 14:01:04 +0200893 if (isteqi(n, ist("transfer-encoding"))) {
894 h1_parse_xfer_enc_header(h1m, v);
895 }
896 else if (isteqi(n, ist("content-length"))) {
897 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +0200898
Christopher Faulet2912f872018-09-19 14:01:04 +0200899 if (ret < 0) {
900 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100901 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet2912f872018-09-19 14:01:04 +0200902 goto http_msg_invalid;
903 }
904 else if (ret == 0) {
905 /* skip it */
906 break;
907 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200908 }
Christopher Faulet2912f872018-09-19 14:01:04 +0200909 else if (isteqi(n, ist("connection"))) {
Christopher Fauleta51ebb72019-03-29 15:03:13 +0100910 h1_parse_connection_header(h1m, &v);
911 if (!v.len) {
912 /* skip it */
913 break;
914 }
Willy Tarreau73373ab2018-09-14 17:11:33 +0200915 }
Amaury Denoyelle18ee5c32020-12-11 17:53:02 +0100916 else if (isteqi(n, ist("upgrade"))) {
917 h1_parse_upgrade_header(h1m, v);
918 }
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200919 else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
920 if (host_idx == -1)
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200921 host_idx = hdr_count;
922 else {
923 if (!isteqi(v, hdr[host_idx].v)) {
924 state = H1_MSG_HDR_L2_LWS;
Christopher Faulet17034782020-01-06 13:41:01 +0100925 ptr = v.ptr; /* Set ptr on the error */
Christopher Faulet497ab4f2019-10-11 09:01:44 +0200926 goto http_msg_invalid;
927 }
928 /* if the same host, skip it */
929 break;
930 }
931 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +0200932
Christopher Faulet2912f872018-09-19 14:01:04 +0200933 http_set_hdr(&hdr[hdr_count++], n, v);
934 } while (0);
935 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200936
937 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +0200938
Willy Tarreau794f9af2017-07-26 09:07:47 +0200939 if (likely(!HTTP_IS_CRLF(*ptr)))
940 goto http_msg_hdr_name;
941
942 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200943 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200944 goto http_msg_last_lf;
945
Willy Tarreau801250e2018-09-11 11:45:04 +0200946 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200947 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200948 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200949 ptr++;
950 /* <ptr> now points to the first byte of payload. If needed sol
951 * still points to the first of either CR or LF of the empty
952 * line ending the headers block.
953 */
Willy Tarreau5384aac2018-09-11 16:04:48 +0200954 if (likely(!skip_update)) {
955 if (unlikely(hdr_count >= hdr_num)) {
956 state = H1_MSG_LAST_LF;
957 goto http_output_full;
958 }
Christopher Fauletff08a922018-09-25 13:59:46 +0200959 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200960 }
Willy Tarreau001823c2018-09-12 17:25:32 +0200961
962 /* reaching here we've parsed the whole message. We may detect
963 * that we were already continuing an interrupted parsing pass
964 * so we were silently looking for the end of message not
965 * updating anything before deciding to parse it fully at once.
966 * It's guaranteed that we won't match this test twice in a row
967 * since restarting will turn zero.
968 */
969 if (restarting)
970 goto restart;
971
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200972
973 if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
974 struct ist authority;
975
976 authority = http_get_authority(sl.rq.u, 1);
977 if (sl.rq.meth == HTTP_METH_CONNECT) {
978 struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
979 int ret;
980
981 ret = h1_validate_connect_authority(authority, host);
982 if (ret < 0) {
983 if (h1m->err_pos < -1) {
984 state = H1_MSG_LAST_LF;
Willy Tarreaue21ad302022-10-04 08:02:03 +0200985 /* WT: gcc seems to see a path where sl.rq.u.ptr was used
986 * uninitialized, but it doesn't know that the function is
987 * called with initial states making this impossible.
988 */
989 ALREADY_CHECKED(sl.rq.u.ptr);
Christopher Faulet63f95ed2022-07-05 14:50:17 +0200990 ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
991 goto http_msg_invalid;
992 }
993 if (h1m->err_pos == -1) /* capture the error pointer */
994 h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
995 }
996 }
997 else if (host_idx != -1 && istlen(authority)) {
998 struct ist host = hdr[host_idx].v;
999
1000 /* For non-CONNECT method, the authority must match the host header value */
1001 if (!isteqi(authority, host)) {
1002 if (h1m->err_pos < -1) {
1003 state = H1_MSG_LAST_LF;
1004 ptr = host.ptr; /* Set ptr on the error */
1005 goto http_msg_invalid;
1006 }
1007 if (h1m->err_pos == -1) /* capture the error pointer */
1008 h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
1009 }
1010
1011 }
1012 }
1013
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001014 state = H1_MSG_DATA;
1015 if (h1m->flags & H1_MF_XFER_ENC) {
1016 if (h1m->flags & H1_MF_CLEN) {
1017 h1m->flags &= ~H1_MF_CLEN;
1018 hdr_count = http_del_hdr(hdr, ist("content-length"));
1019 }
1020
1021 if (h1m->flags & H1_MF_CHNK)
1022 state = H1_MSG_CHUNK_SIZE;
1023 else if (!(h1m->flags & H1_MF_RESP)) {
1024 /* cf RFC7230#3.3.3 : transfer-encoding in
1025 * request without chunked encoding is invalid.
1026 */
1027 goto http_msg_invalid;
1028 }
1029 }
1030
Willy Tarreau794f9af2017-07-26 09:07:47 +02001031 break;
1032
1033 default:
1034 /* impossible states */
1035 goto http_msg_invalid;
1036 }
1037
Willy Tarreau001823c2018-09-12 17:25:32 +02001038 /* Now we've left the headers state and are either in H1_MSG_DATA or
1039 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001040 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001041
Willy Tarreau5384aac2018-09-11 16:04:48 +02001042 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001043 *slp = sl;
1044
Willy Tarreau4433c082018-09-11 15:33:32 +02001045 h1m->state = state;
1046 h1m->next = ptr - start + skip;
1047 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001048
1049 http_msg_ood:
1050 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001051 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001052 *slp = sl;
1053
Willy Tarreau4433c082018-09-11 15:33:32 +02001054 h1m->state = state;
1055 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001056 return 0;
1057
1058 http_msg_invalid:
1059 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001060 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001061 *slp = sl;
1062
Willy Tarreau4433c082018-09-11 15:33:32 +02001063 h1m->err_state = h1m->state = state;
1064 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001065 return -1;
1066
1067 http_output_full:
1068 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001069 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001070 *slp = sl;
1071
Willy Tarreau4433c082018-09-11 15:33:32 +02001072 h1m->err_state = h1m->state = state;
1073 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001074 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001075
1076 restart:
Christopher Faulet84f06532019-09-03 16:05:31 +02001077 h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
1078 h1m->curr_len = h1m->body_len = h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001079 if (h1m->flags & H1_MF_RESP)
1080 h1m->state = H1_MSG_RPBEFORE;
1081 else
1082 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001083 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001084}
1085
Willy Tarreau2510f702017-10-31 17:14:16 +01001086/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001087 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001088 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1089 * input data, or < 0 in case of parse error (in which case the caller may have
1090 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001091 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001092int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001093{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001094 const char *stop = b_peek(buf, ofs + max);
1095 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001096
1097 while (1) {
1098 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001099 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001100 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001101
1102 /* scan current line and stop at LF or CRLF */
1103 while (1) {
1104 if (ptr == stop)
1105 return 0;
1106
1107 if (*ptr == '\n') {
1108 if (!p1)
1109 p1 = ptr;
1110 p2 = ptr;
1111 break;
1112 }
1113
1114 if (*ptr == '\r') {
1115 if (p1)
1116 return -1;
1117 p1 = ptr;
1118 }
1119
Willy Tarreau7314be82018-06-14 13:32:50 +02001120 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001121 }
1122
1123 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001124 p2 = b_next(buf, p2);
1125 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001126
1127 /* LF/CRLF at beginning of line => end of trailers at p2.
1128 * Everything was scheduled for forwarding, there's nothing left
1129 * from this message. */
1130 if (p1 == start)
1131 break;
1132 /* OK, next line then */
1133 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001134 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001135}
Amaury Denoyellec1938232020-12-11 17:53:03 +01001136
Amaury Denoyelleaad333a2020-12-11 17:53:07 +01001137/* Generate a random key for a WebSocket Handshake in respect with rfc6455
1138 * The key is 128-bits long encoded as a base64 string in <key_out> parameter
1139 * (25 bytes long).
1140 */
1141void h1_generate_random_ws_input_key(char key_out[25])
1142{
1143 /* generate a random websocket key */
1144 const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
1145 char key[16];
1146
1147 memcpy(key, &rand1, 8);
1148 memcpy(&key[8], &rand2, 8);
1149 a2base64(key, 16, key_out, 25);
1150}
1151
Amaury Denoyellec1938232020-12-11 17:53:03 +01001152#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
1153
1154/*
1155 * Calculate the WebSocket handshake response key from <key_in>. Following the
1156 * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
1157 * as a 29 bytes long string.
1158 */
1159void h1_calculate_ws_output_key(const char *key, char *result)
1160{
1161 blk_SHA_CTX sha1_ctx;
1162 char hash_in[60], hash_out[20];
1163
1164 /* concatenate the key with a fixed suffix */
1165 memcpy(hash_in, key, 24);
1166 memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
1167
1168 /* sha1 the result */
1169 blk_SHA1_Init(&sha1_ctx);
1170 blk_SHA1_Update(&sha1_ctx, hash_in, 60);
1171 blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
1172
1173 /* encode in base64 the hash */
1174 a2base64(hash_out, 20, result, 29);
1175}