blob: c617372210925c9789b02be174646ec0211384a3 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
Willy Tarreau8740c8b2017-09-21 10:22:25 +020021/*
22 * This function parses a status line between <ptr> and <end>, starting with
23 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
24 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
25 * will give undefined results.
26 * Note that it is upon the caller's responsibility to ensure that ptr < end,
27 * and that msg->sol points to the beginning of the response.
28 * If a complete line is found (which implies that at least one CR or LF is
29 * found before <end>, the updated <ptr> is returned, otherwise NULL is
30 * returned indicating an incomplete line (which does not mean that parts have
31 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
32 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
33 * upon next call.
34 *
35 * This function was intentionally designed to be called from
36 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
37 * within its state machine and use the same macros, hence the need for same
38 * labels and variable names. Note that msg->sol is left unchanged.
39 */
40const char *http_parse_stsline(struct http_msg *msg,
41 enum h1_state state, const char *ptr, const char *end,
42 unsigned int *ret_ptr, enum h1_state *ret_state)
43{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +020044 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +020045
46 switch (state) {
47 case HTTP_MSG_RPVER:
48 http_msg_rpver:
49 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
50 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
51
52 if (likely(HTTP_IS_SPHT(*ptr))) {
53 msg->sl.st.v_l = ptr - msg_start;
54 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
55 }
56 msg->err_state = HTTP_MSG_RPVER;
57 state = HTTP_MSG_ERROR;
58 break;
59
60 case HTTP_MSG_RPVER_SP:
61 http_msg_rpver_sp:
62 if (likely(!HTTP_IS_LWS(*ptr))) {
63 msg->sl.st.c = ptr - msg_start;
64 goto http_msg_rpcode;
65 }
66 if (likely(HTTP_IS_SPHT(*ptr)))
67 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
68 /* so it's a CR/LF, this is invalid */
69 msg->err_state = HTTP_MSG_RPVER_SP;
70 state = HTTP_MSG_ERROR;
71 break;
72
73 case HTTP_MSG_RPCODE:
74 http_msg_rpcode:
75 if (likely(!HTTP_IS_LWS(*ptr)))
76 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
77
78 if (likely(HTTP_IS_SPHT(*ptr))) {
79 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
80 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
81 }
82
83 /* so it's a CR/LF, so there is no reason phrase */
84 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
85 http_msg_rsp_reason:
86 /* FIXME: should we support HTTP responses without any reason phrase ? */
87 msg->sl.st.r = ptr - msg_start;
88 msg->sl.st.r_l = 0;
89 goto http_msg_rpline_eol;
90
91 case HTTP_MSG_RPCODE_SP:
92 http_msg_rpcode_sp:
93 if (likely(!HTTP_IS_LWS(*ptr))) {
94 msg->sl.st.r = ptr - msg_start;
95 goto http_msg_rpreason;
96 }
97 if (likely(HTTP_IS_SPHT(*ptr)))
98 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
99 /* so it's a CR/LF, so there is no reason phrase */
100 goto http_msg_rsp_reason;
101
102 case HTTP_MSG_RPREASON:
103 http_msg_rpreason:
104 if (likely(!HTTP_IS_CRLF(*ptr)))
105 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
106 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
107 http_msg_rpline_eol:
108 /* We have seen the end of line. Note that we do not
109 * necessarily have the \n yet, but at least we know that we
110 * have EITHER \r OR \n, otherwise the response would not be
111 * complete. We can then record the response length and return
112 * to the caller which will be able to register it.
113 */
114 msg->sl.st.l = ptr - msg_start - msg->sol;
115 return ptr;
116
117 default:
118#ifdef DEBUG_FULL
119 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
120 exit(1);
121#endif
122 ;
123 }
124
125 http_msg_ood:
126 /* out of valid data */
127 if (ret_state)
128 *ret_state = state;
129 if (ret_ptr)
130 *ret_ptr = ptr - msg_start;
131 return NULL;
132}
133
134/*
135 * This function parses a request line between <ptr> and <end>, starting with
136 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
137 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
138 * will give undefined results.
139 * Note that it is upon the caller's responsibility to ensure that ptr < end,
140 * and that msg->sol points to the beginning of the request.
141 * If a complete line is found (which implies that at least one CR or LF is
142 * found before <end>, the updated <ptr> is returned, otherwise NULL is
143 * returned indicating an incomplete line (which does not mean that parts have
144 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
145 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
146 * upon next call.
147 *
148 * This function was intentionally designed to be called from
149 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
150 * within its state machine and use the same macros, hence the need for same
151 * labels and variable names. Note that msg->sol is left unchanged.
152 */
153const char *http_parse_reqline(struct http_msg *msg,
154 enum h1_state state, const char *ptr, const char *end,
155 unsigned int *ret_ptr, enum h1_state *ret_state)
156{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200157 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200158
159 switch (state) {
160 case HTTP_MSG_RQMETH:
161 http_msg_rqmeth:
162 if (likely(HTTP_IS_TOKEN(*ptr)))
163 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
164
165 if (likely(HTTP_IS_SPHT(*ptr))) {
166 msg->sl.rq.m_l = ptr - msg_start;
167 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
168 }
169
170 if (likely(HTTP_IS_CRLF(*ptr))) {
171 /* HTTP 0.9 request */
172 msg->sl.rq.m_l = ptr - msg_start;
173 http_msg_req09_uri:
174 msg->sl.rq.u = ptr - msg_start;
175 http_msg_req09_uri_e:
176 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
177 http_msg_req09_ver:
178 msg->sl.rq.v = ptr - msg_start;
179 msg->sl.rq.v_l = 0;
180 goto http_msg_rqline_eol;
181 }
182 msg->err_state = HTTP_MSG_RQMETH;
183 state = HTTP_MSG_ERROR;
184 break;
185
186 case HTTP_MSG_RQMETH_SP:
187 http_msg_rqmeth_sp:
188 if (likely(!HTTP_IS_LWS(*ptr))) {
189 msg->sl.rq.u = ptr - msg_start;
190 goto http_msg_rquri;
191 }
192 if (likely(HTTP_IS_SPHT(*ptr)))
193 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
194 /* so it's a CR/LF, meaning an HTTP 0.9 request */
195 goto http_msg_req09_uri;
196
197 case HTTP_MSG_RQURI:
198 http_msg_rquri:
199#if defined(__x86_64__) || \
200 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
201 defined(__ARM_ARCH_7A__)
202 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
203 while (ptr <= end - sizeof(int)) {
204 int x = *(int *)ptr - 0x21212121;
205 if (x & 0x80808080)
206 break;
207
208 x -= 0x5e5e5e5e;
209 if (!(x & 0x80808080))
210 break;
211
212 ptr += sizeof(int);
213 }
214#endif
215 if (ptr >= end) {
216 state = HTTP_MSG_RQURI;
217 goto http_msg_ood;
218 }
219 http_msg_rquri2:
220 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
222
223 if (likely(HTTP_IS_SPHT(*ptr))) {
224 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
225 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
226 }
227
228 if (likely((unsigned char)*ptr >= 128)) {
229 /* non-ASCII chars are forbidden unless option
230 * accept-invalid-http-request is enabled in the frontend.
231 * In any case, we capture the faulty char.
232 */
233 if (msg->err_pos < -1)
234 goto invalid_char;
235 if (msg->err_pos == -1)
236 msg->err_pos = ptr - msg_start;
237 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
238 }
239
240 if (likely(HTTP_IS_CRLF(*ptr))) {
241 /* so it's a CR/LF, meaning an HTTP 0.9 request */
242 goto http_msg_req09_uri_e;
243 }
244
245 /* OK forbidden chars, 0..31 or 127 */
246 invalid_char:
247 msg->err_pos = ptr - msg_start;
248 msg->err_state = HTTP_MSG_RQURI;
249 state = HTTP_MSG_ERROR;
250 break;
251
252 case HTTP_MSG_RQURI_SP:
253 http_msg_rquri_sp:
254 if (likely(!HTTP_IS_LWS(*ptr))) {
255 msg->sl.rq.v = ptr - msg_start;
256 goto http_msg_rqver;
257 }
258 if (likely(HTTP_IS_SPHT(*ptr)))
259 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
260 /* so it's a CR/LF, meaning an HTTP 0.9 request */
261 goto http_msg_req09_ver;
262
263 case HTTP_MSG_RQVER:
264 http_msg_rqver:
265 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
266 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
267
268 if (likely(HTTP_IS_CRLF(*ptr))) {
269 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
270 http_msg_rqline_eol:
271 /* We have seen the end of line. Note that we do not
272 * necessarily have the \n yet, but at least we know that we
273 * have EITHER \r OR \n, otherwise the request would not be
274 * complete. We can then record the request length and return
275 * to the caller which will be able to register it.
276 */
277 msg->sl.rq.l = ptr - msg_start - msg->sol;
278 return ptr;
279 }
280
281 /* neither an HTTP_VER token nor a CRLF */
282 msg->err_state = HTTP_MSG_RQVER;
283 state = HTTP_MSG_ERROR;
284 break;
285
286 default:
287#ifdef DEBUG_FULL
288 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
289 exit(1);
290#endif
291 ;
292 }
293
294 http_msg_ood:
295 /* out of valid data */
296 if (ret_state)
297 *ret_state = state;
298 if (ret_ptr)
299 *ret_ptr = ptr - msg_start;
300 return NULL;
301}
302
303/*
304 * This function parses an HTTP message, either a request or a response,
305 * depending on the initial msg->msg_state. The caller is responsible for
306 * ensuring that the message does not wrap. The function can be preempted
307 * everywhere when data are missing and recalled at the exact same location
308 * with no information loss. The message may even be realigned between two
309 * calls. The header index is re-initialized when switching from
310 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
311 * fields. Note that msg->sol will be initialized after completing the first
312 * state, so that none of the msg pointers has to be initialized prior to the
313 * first call.
314 */
315void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
316{
317 enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200318 register const char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreau950a8a62018-09-06 10:48:15 +0200319 struct buffer *buf = &msg->chn->buf;
320 char *input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200321
322 state = msg->msg_state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200323 ptr = input + msg->next;
324 end = b_stop(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200325
326 if (unlikely(ptr >= end))
327 goto http_msg_ood;
328
329 switch (state) {
330 /*
331 * First, states that are specific to the response only.
332 * We check them first so that request and headers are
333 * closer to each other (accessed more often).
334 */
335 case HTTP_MSG_RPBEFORE:
336 http_msg_rpbefore:
337 if (likely(HTTP_IS_TOKEN(*ptr))) {
338 /* we have a start of message, but we have to check
339 * first if we need to remove some CRLF. We can only
340 * do this when o=0.
341 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200342 if (unlikely(ptr != input)) {
343 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200344 goto http_msg_ood;
345 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200346 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200347 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200348 }
349 msg->sol = 0;
350 msg->sl.st.l = 0; /* used in debug mode */
351 hdr_idx_init(idx);
352 state = HTTP_MSG_RPVER;
353 goto http_msg_rpver;
354 }
355
356 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
357 state = HTTP_MSG_RPBEFORE;
358 goto http_msg_invalid;
359 }
360
361 if (unlikely(*ptr == '\n'))
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
363 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
364 /* stop here */
365
366 case HTTP_MSG_RPBEFORE_CR:
367 http_msg_rpbefore_cr:
368 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
369 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
370 /* stop here */
371
372 case HTTP_MSG_RPVER:
373 http_msg_rpver:
374 case HTTP_MSG_RPVER_SP:
375 case HTTP_MSG_RPCODE:
376 case HTTP_MSG_RPCODE_SP:
377 case HTTP_MSG_RPREASON:
378 ptr = (char *)http_parse_stsline(msg,
379 state, ptr, end,
380 &msg->next, &msg->msg_state);
381 if (unlikely(!ptr))
382 return;
383
384 /* we have a full response and we know that we have either a CR
385 * or an LF at <ptr>.
386 */
387 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
388
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200389 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200390 if (likely(*ptr == '\r'))
391 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
392 goto http_msg_rpline_end;
393
394 case HTTP_MSG_RPLINE_END:
395 http_msg_rpline_end:
396 /* msg->sol must point to the first of CR or LF. */
397 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
398 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
399 /* stop here */
400
401 /*
402 * Second, states that are specific to the request only
403 */
404 case HTTP_MSG_RQBEFORE:
405 http_msg_rqbefore:
406 if (likely(HTTP_IS_TOKEN(*ptr))) {
407 /* we have a start of message, but we have to check
408 * first if we need to remove some CRLF. We can only
409 * do this when o=0.
410 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200411 if (likely(ptr != input)) {
412 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200413 goto http_msg_ood;
414 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200415 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200416 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200417 }
418 msg->sol = 0;
419 msg->sl.rq.l = 0; /* used in debug mode */
420 state = HTTP_MSG_RQMETH;
421 goto http_msg_rqmeth;
422 }
423
424 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
425 state = HTTP_MSG_RQBEFORE;
426 goto http_msg_invalid;
427 }
428
429 if (unlikely(*ptr == '\n'))
430 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
431 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
432 /* stop here */
433
434 case HTTP_MSG_RQBEFORE_CR:
435 http_msg_rqbefore_cr:
436 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
437 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
438 /* stop here */
439
440 case HTTP_MSG_RQMETH:
441 http_msg_rqmeth:
442 case HTTP_MSG_RQMETH_SP:
443 case HTTP_MSG_RQURI:
444 case HTTP_MSG_RQURI_SP:
445 case HTTP_MSG_RQVER:
446 ptr = (char *)http_parse_reqline(msg,
447 state, ptr, end,
448 &msg->next, &msg->msg_state);
449 if (unlikely(!ptr))
450 return;
451
452 /* we have a full request and we know that we have either a CR
453 * or an LF at <ptr>.
454 */
455 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
456
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200457 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200458 if (likely(*ptr == '\r'))
459 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
460 goto http_msg_rqline_end;
461
462 case HTTP_MSG_RQLINE_END:
463 http_msg_rqline_end:
464 /* check for HTTP/0.9 request : no version information available.
465 * msg->sol must point to the first of CR or LF.
466 */
467 if (unlikely(msg->sl.rq.v_l == 0))
468 goto http_msg_last_lf;
469
470 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
471 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
472 /* stop here */
473
474 /*
475 * Common states below
476 */
477 case HTTP_MSG_HDR_FIRST:
478 http_msg_hdr_first:
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200479 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200480 if (likely(!HTTP_IS_CRLF(*ptr))) {
481 goto http_msg_hdr_name;
482 }
483
484 if (likely(*ptr == '\r'))
485 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
486 goto http_msg_last_lf;
487
488 case HTTP_MSG_HDR_NAME:
489 http_msg_hdr_name:
490 /* assumes msg->sol points to the first char */
491 if (likely(HTTP_IS_TOKEN(*ptr)))
492 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
493
494 if (likely(*ptr == ':'))
495 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
496
497 if (likely(msg->err_pos < -1) || *ptr == '\n') {
498 state = HTTP_MSG_HDR_NAME;
499 goto http_msg_invalid;
500 }
501
502 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200503 msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200504
505 /* and we still accept this non-token character */
506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
507
508 case HTTP_MSG_HDR_L1_SP:
509 http_msg_hdr_l1_sp:
510 /* assumes msg->sol points to the first char */
511 if (likely(HTTP_IS_SPHT(*ptr)))
512 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
513
514 /* header value can be basically anything except CR/LF */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200515 msg->sov = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200516
517 if (likely(!HTTP_IS_CRLF(*ptr))) {
518 goto http_msg_hdr_val;
519 }
520
521 if (likely(*ptr == '\r'))
522 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
523 goto http_msg_hdr_l1_lf;
524
525 case HTTP_MSG_HDR_L1_LF:
526 http_msg_hdr_l1_lf:
527 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
528 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
529
530 case HTTP_MSG_HDR_L1_LWS:
531 http_msg_hdr_l1_lws:
532 if (likely(HTTP_IS_SPHT(*ptr))) {
533 /* replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200534 for (; input + msg->sov < ptr; msg->sov++)
535 input[msg->sov] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200536 goto http_msg_hdr_l1_sp;
537 }
538 /* we had a header consisting only in spaces ! */
539 msg->eol = msg->sov;
540 goto http_msg_complete_header;
541
542 case HTTP_MSG_HDR_VAL:
543 http_msg_hdr_val:
544 /* assumes msg->sol points to the first char, and msg->sov
545 * points to the first character of the value.
546 */
547
548 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
549 * and lower. In fact since most of the time is spent in the loop, we
550 * also remove the sign bit test so that bytes 0x8e..0x0d break the
551 * loop, but we don't care since they're very rare in header values.
552 */
553#if defined(__x86_64__)
554 while (ptr <= end - sizeof(long)) {
555 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
556 goto http_msg_hdr_val2;
557 ptr += sizeof(long);
558 }
559#endif
560#if defined(__x86_64__) || \
561 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
562 defined(__ARM_ARCH_7A__)
563 while (ptr <= end - sizeof(int)) {
564 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
565 goto http_msg_hdr_val2;
566 ptr += sizeof(int);
567 }
568#endif
569 if (ptr >= end) {
570 state = HTTP_MSG_HDR_VAL;
571 goto http_msg_ood;
572 }
573 http_msg_hdr_val2:
574 if (likely(!HTTP_IS_CRLF(*ptr)))
575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
576
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200577 msg->eol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200578 /* Note: we could also copy eol into ->eoh so that we have the
579 * real header end in case it ends with lots of LWS, but is this
580 * really needed ?
581 */
582 if (likely(*ptr == '\r'))
583 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
584 goto http_msg_hdr_l2_lf;
585
586 case HTTP_MSG_HDR_L2_LF:
587 http_msg_hdr_l2_lf:
588 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
589 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
590
591 case HTTP_MSG_HDR_L2_LWS:
592 http_msg_hdr_l2_lws:
593 if (unlikely(HTTP_IS_SPHT(*ptr))) {
594 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200595 for (; input + msg->eol < ptr; msg->eol++)
596 input[msg->eol] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200597 goto http_msg_hdr_val;
598 }
599 http_msg_complete_header:
600 /*
601 * It was a new header, so the last one is finished.
602 * Assumes msg->sol points to the first char, msg->sov points
603 * to the first character of the value and msg->eol to the
604 * first CR or LF so we know how the line ends. We insert last
605 * header into the index.
606 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200607 if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200608 idx, idx->tail) < 0)) {
609 state = HTTP_MSG_HDR_L2_LWS;
610 goto http_msg_invalid;
611 }
612
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200613 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200614 if (likely(!HTTP_IS_CRLF(*ptr))) {
615 goto http_msg_hdr_name;
616 }
617
618 if (likely(*ptr == '\r'))
619 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
620 goto http_msg_last_lf;
621
622 case HTTP_MSG_LAST_LF:
623 http_msg_last_lf:
624 /* Assumes msg->sol points to the first of either CR or LF.
625 * Sets ->sov and ->next to the total header length, ->eoh to
626 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
627 */
628 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
629 ptr++;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200630 msg->sov = msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200631 msg->eoh = msg->sol;
632 msg->sol = 0;
633 msg->eol = msg->sov - msg->eoh;
634 msg->msg_state = HTTP_MSG_BODY;
635 return;
636
637 case HTTP_MSG_ERROR:
638 /* this may only happen if we call http_msg_analyser() twice with an error */
639 break;
640
641 default:
642#ifdef DEBUG_FULL
643 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
644 exit(1);
645#endif
646 ;
647 }
648 http_msg_ood:
649 /* out of data */
650 msg->msg_state = state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200651 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200652 return;
653
654 http_msg_invalid:
655 /* invalid message */
656 msg->err_state = state;
657 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200658 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200659 return;
660}
661
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200662
Willy Tarreau73373ab2018-09-14 17:11:33 +0200663/* Parse the Content-Length header field of an HTTP/1 request. The function
664 * checks all possible occurrences of a comma-delimited value, and verifies
665 * if any of them doesn't match a previous value. It returns <0 if a value
666 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
667 * if the value can be indexed (first one). In the last case, the value might
668 * be adjusted and the caller must only add the updated value.
669 */
670int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
671{
672 char *e, *n;
673 long long cl;
674 int not_first = !!(h1m->flags & H1_MF_CLEN);
675 struct ist word;
676
677 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
678 e = value->ptr + value->len;
679
680 while (++word.ptr < e) {
681 /* skip leading delimitor and blanks */
682 if (unlikely(HTTP_IS_LWS(*word.ptr)))
683 continue;
684
685 /* digits only now */
686 for (cl = 0, n = word.ptr; n < e; n++) {
687 unsigned int c = *n - '0';
688 if (unlikely(c > 9)) {
689 /* non-digit */
690 if (unlikely(n == word.ptr)) // spaces only
691 goto fail;
692 break;
693 }
694 if (unlikely(cl > ULLONG_MAX / 10ULL))
695 goto fail; /* multiply overflow */
696 cl = cl * 10ULL;
697 if (unlikely(cl + c < cl))
698 goto fail; /* addition overflow */
699 cl = cl + c;
700 }
701
702 /* keep a copy of the exact cleaned value */
703 word.len = n - word.ptr;
704
705 /* skip trailing LWS till next comma or EOL */
706 for (; n < e; n++) {
707 if (!HTTP_IS_LWS(*n)) {
708 if (unlikely(*n != ','))
709 goto fail;
710 break;
711 }
712 }
713
714 /* if duplicate, must be equal */
715 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
716 goto fail;
717
718 /* OK, store this result as the one to be indexed */
719 h1m->flags |= H1_MF_CLEN;
720 h1m->curr_len = h1m->body_len = cl;
721 *value = word;
722 word.ptr = n;
723 }
724 /* here we've reached the end with a single value or a series of
725 * identical values, all matching previous series if any. The last
726 * parsed value was sent back into <value>. We just have to decide
727 * if this occurrence has to be indexed (it's the first one) or
728 * silently skipped (it's not the first one)
729 */
730 return !not_first;
731 fail:
732 return -1;
733}
734
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200735/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
736 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
737 * this case. Any other token found or any empty header field found will reset
738 * this flag, so that it accurately represents the token's presence at the last
739 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
740 * are case-insensitive (cf RFC7230#4).
741 */
742void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
743{
744 char *e, *n;
745 struct ist word;
746
747 h1m->flags |= H1_MF_XFER_ENC;
748 h1m->flags &= ~H1_MF_CHNK;
749
750 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
751 e = value.ptr + value.len;
752
753 while (++word.ptr < e) {
754 /* skip leading delimitor and blanks */
755 if (HTTP_IS_LWS(*word.ptr))
756 continue;
757
758 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
759 word.len = n - word.ptr;
760
761 /* trim trailing blanks */
762 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
763 word.len--;
764
765 h1m->flags &= ~H1_MF_CHNK;
766 if (isteqi(word, ist("chunked")))
767 h1m->flags |= H1_MF_CHNK;
768
769 word.ptr = n;
770 }
771}
772
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200773/* Parse the Connection: header of an HTTP/1 request, looking for "close",
774 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
775 * what was found there. Note that flags are only added, not removed, so the
776 * function is safe for being called multiple times if multiple occurrences
777 * are found.
778 */
779void h1_parse_connection_header(struct h1m *h1m, struct ist value)
780{
781 char *e, *n;
782 struct ist word;
783
784 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
785 e = value.ptr + value.len;
786
787 while (++word.ptr < e) {
788 /* skip leading delimitor and blanks */
789 if (HTTP_IS_LWS(*word.ptr))
790 continue;
791
792 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
793 word.len = n - word.ptr;
794
795 /* trim trailing blanks */
796 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
797 word.len--;
798
799 if (isteqi(word, ist("keep-alive")))
800 h1m->flags |= H1_MF_CONN_KAL;
801 else if (isteqi(word, ist("close")))
802 h1m->flags |= H1_MF_CONN_CLO;
803 else if (isteqi(word, ist("upgrade")))
804 h1m->flags |= H1_MF_CONN_UPG;
805
806 word.ptr = n;
807 }
808}
809
Willy Tarreau794f9af2017-07-26 09:07:47 +0200810/* This function parses a contiguous HTTP/1 headers block starting at <start>
811 * and ending before <stop>, at once, and converts it a list of (name,value)
812 * pairs representing header fields into the array <hdr> of size <hdr_num>,
813 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200814 * too small to represent the whole message, an error is returned. Some
815 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200816 * parsed and stored into h1m as well. <hdr> may be null, in which case only
817 * the parsing state will be updated. This may be used to restart the parsing
818 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200819 *
820 * For now it's limited to the response. If the header block is incomplete,
821 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200822 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200823 * and h1m->next to zero on the first call, the parser will do the rest. If
824 * an incomplete message is seen, the caller only needs to present h1m->state
825 * and h1m->next again, with an empty header list so that the parser can start
826 * again. In this case, it will detect that it interrupted a previous session
827 * and will first look for the end of the message before reparsing it again and
828 * indexing it at the same time. This ensures that incomplete messages fed 1
829 * character at a time are never processed entirely more than exactly twice,
830 * and that there is no need to store all the internal state and pre-parsed
831 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200833 * A pointer to a start line descriptor may be passed in <slp>, in which case
834 * the parser will fill it with whatever it found.
835 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200836 * The code derived from the main HTTP/1 parser above but was simplified and
837 * optimized to process responses produced or forwarded by haproxy. The caller
838 * is responsible for ensuring that the message doesn't wrap, and should ensure
839 * it is complete to avoid having to retry the operation after a failed
840 * attempt. The message is not supposed to be invalid, which is why a few
841 * properties such as the character set used in the header field names are not
842 * checked. In case of an unparsable response message, a negative value will be
843 * returned with h1m->err_pos and h1m->err_state matching the location and
844 * state where the error was met. Leading blank likes are tolerated but not
845 * recommended.
846 *
847 * This function returns :
848 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200849 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200850 * the position relative to <start>
851 * -2 if the output is full (hdr_num reached). err_state and err_pos also
852 * indicate where it failed.
853 * 0 in case of missing data.
854 * > 0 on success, it then corresponds to the number of bytes read since
855 * <start> so that the caller can go on with the payload.
856 */
857int h1_headers_to_hdr_list(char *start, const char *stop,
858 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200859 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200860{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200861 enum h1m_state state;
862 register char *ptr;
863 register const char *end;
864 unsigned int hdr_count;
865 unsigned int skip; /* number of bytes skipped at the beginning */
866 unsigned int sol; /* start of line */
867 unsigned int col; /* position of the colon */
868 unsigned int eol; /* end of line */
869 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200870 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200871 int skip_update;
872 int restarting;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200873 struct ist n, v; /* header name and value during parsing */
874
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200875 skip = 0; // do it only once to keep track of the leading CRLF.
876
877 try_again:
878 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200879 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200880 skip_update = restarting = 0;
881
882 ptr = start + h1m->next;
883 end = stop;
884 state = h1m->state;
885
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200886 if (state != H1_MSG_RQBEFORE && state != H1_MSG_RPBEFORE)
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200887 restarting = 1;
888
Willy Tarreau794f9af2017-07-26 09:07:47 +0200889 if (unlikely(ptr >= end))
890 goto http_msg_ood;
891
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200892 /* don't update output if hdr is NULL or if we're restarting */
893 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200894 skip_update = 1;
895
Willy Tarreau794f9af2017-07-26 09:07:47 +0200896 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200897 case H1_MSG_RQBEFORE:
898 http_msg_rqbefore:
899 if (likely(HTTP_IS_TOKEN(*ptr))) {
900 /* we have a start of message, we may have skipped some
901 * heading CRLF. Skip them now.
902 */
903 skip += ptr - start;
904 start = ptr;
905
906 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200907 sl.rq.m.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200908 hdr_count = 0;
909 state = H1_MSG_RQMETH;
910 goto http_msg_rqmeth;
911 }
912
913 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
914 state = H1_MSG_RQBEFORE;
915 goto http_msg_invalid;
916 }
917
918 if (unlikely(*ptr == '\n'))
919 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
920 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
921 /* stop here */
922
923 case H1_MSG_RQBEFORE_CR:
924 http_msg_rqbefore_cr:
925 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
926 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
927 /* stop here */
928
929 case H1_MSG_RQMETH:
930 http_msg_rqmeth:
931 if (likely(HTTP_IS_TOKEN(*ptr)))
932 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
933
934 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200935 sl.rq.m.len = ptr - sl.rq.m.ptr;
936 sl.rq.meth = find_http_meth(start, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200937 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
938 }
939
940 if (likely(HTTP_IS_CRLF(*ptr))) {
941 /* HTTP 0.9 request */
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200942 sl.rq.m.len = ptr - sl.rq.m.ptr;
943 sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200944 http_msg_req09_uri:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200945 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200946 http_msg_req09_uri_e:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200947 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200948 http_msg_req09_ver:
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200949 sl.rq.v.ptr = ptr;
950 sl.rq.v.len = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200951 goto http_msg_rqline_eol;
952 }
953 state = H1_MSG_RQMETH;
954 goto http_msg_invalid;
955
956 case H1_MSG_RQMETH_SP:
957 http_msg_rqmeth_sp:
958 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200959 sl.rq.u.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200960 goto http_msg_rquri;
961 }
962 if (likely(HTTP_IS_SPHT(*ptr)))
963 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
964 /* so it's a CR/LF, meaning an HTTP 0.9 request */
965 goto http_msg_req09_uri;
966
967 case H1_MSG_RQURI:
968 http_msg_rquri:
969#if defined(__x86_64__) || \
970 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
971 defined(__ARM_ARCH_7A__)
972 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
973 while (ptr <= end - sizeof(int)) {
974 int x = *(int *)ptr - 0x21212121;
975 if (x & 0x80808080)
976 break;
977
978 x -= 0x5e5e5e5e;
979 if (!(x & 0x80808080))
980 break;
981
982 ptr += sizeof(int);
983 }
984#endif
985 if (ptr >= end) {
986 state = H1_MSG_RQURI;
987 goto http_msg_ood;
988 }
989 http_msg_rquri2:
990 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
991 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
992
993 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +0200994 sl.rq.u.len = ptr - sl.rq.u.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200995 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
996 }
997 if (likely((unsigned char)*ptr >= 128)) {
998 /* non-ASCII chars are forbidden unless option
999 * accept-invalid-http-request is enabled in the frontend.
1000 * In any case, we capture the faulty char.
1001 */
1002 if (h1m->err_pos < -1)
1003 goto invalid_char;
1004 if (h1m->err_pos == -1)
1005 h1m->err_pos = ptr - start + skip;
1006 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
1007 }
1008
1009 if (likely(HTTP_IS_CRLF(*ptr))) {
1010 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1011 goto http_msg_req09_uri_e;
1012 }
1013
1014 /* OK forbidden chars, 0..31 or 127 */
1015 invalid_char:
1016 state = H1_MSG_RQURI;
1017 goto http_msg_invalid;
1018
1019 case H1_MSG_RQURI_SP:
1020 http_msg_rquri_sp:
1021 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001022 sl.rq.v.ptr = ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001023 goto http_msg_rqver;
1024 }
1025 if (likely(HTTP_IS_SPHT(*ptr)))
1026 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
1027 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1028 goto http_msg_req09_ver;
1029
1030
1031 case H1_MSG_RQVER:
1032 http_msg_rqver:
1033 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
1034 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
1035
1036 if (likely(HTTP_IS_CRLF(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001037 sl.rq.v.len = ptr - sl.rq.v.ptr;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001038 http_msg_rqline_eol:
1039 /* We have seen the end of line. Note that we do not
1040 * necessarily have the \n yet, but at least we know that we
1041 * have EITHER \r OR \n, otherwise the request would not be
1042 * complete. We can then record the request length and return
1043 * to the caller which will be able to register it.
1044 */
1045
1046 if (likely(!skip_update)) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001047 if ((sl.rq.v.len == 8) &&
1048 (*(sl.rq.v.ptr + 5) > '1' ||
1049 (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +02001050 h1m->flags |= H1_MF_VER_11;
1051
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001052 if (unlikely(hdr_count >= hdr_num)) {
1053 state = H1_MSG_RQVER;
1054 goto http_output_full;
1055 }
Christopher Faulet25da9e32018-10-08 15:50:15 +02001056 if (!(h1m->flags & H1_MF_NO_PHDR))
1057 http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001058
1059 if (unlikely(hdr_count >= hdr_num)) {
1060 state = H1_MSG_RQVER;
1061 goto http_output_full;
1062 }
Christopher Faulet25da9e32018-10-08 15:50:15 +02001063 if (!(h1m->flags & H1_MF_NO_PHDR))
1064 http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001065 }
1066
1067 sol = ptr - start;
1068 if (likely(*ptr == '\r'))
1069 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
1070 goto http_msg_rqline_end;
1071 }
1072
1073 /* neither an HTTP_VER token nor a CRLF */
1074 state = H1_MSG_RQVER;
1075 goto http_msg_invalid;
1076
1077 case H1_MSG_RQLINE_END:
1078 http_msg_rqline_end:
1079 /* check for HTTP/0.9 request : no version information
1080 * available. sol must point to the first of CR or LF. However
1081 * since we don't save these elements between calls, if we come
1082 * here from a restart, we don't necessarily know. Thus in this
1083 * case we simply start over.
1084 */
1085 if (restarting)
1086 goto restart;
1087
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001088 if (unlikely(sl.rq.v.len == 0))
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001089 goto http_msg_last_lf;
1090
1091 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
1092 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
1093 /* stop here */
1094
1095 /*
1096 * Common states below
1097 */
Willy Tarreau801250e2018-09-11 11:45:04 +02001098 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001099 http_msg_rpbefore:
1100 if (likely(HTTP_IS_TOKEN(*ptr))) {
1101 /* we have a start of message, we may have skipped some
1102 * heading CRLF. Skip them now.
1103 */
1104 skip += ptr - start;
1105 start = ptr;
1106
1107 sol = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001108 sl.st.v.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001109 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +02001110 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001111 goto http_msg_rpver;
1112 }
1113
1114 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +02001115 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001116 goto http_msg_invalid;
1117 }
1118
1119 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001120 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
1121 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001122 /* stop here */
1123
Willy Tarreau801250e2018-09-11 11:45:04 +02001124 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001125 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +02001126 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
1127 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001128 /* stop here */
1129
Willy Tarreau801250e2018-09-11 11:45:04 +02001130 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001131 http_msg_rpver:
1132 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001133 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001134
1135 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001136 sl.st.v.len = ptr - sl.st.v.ptr;
Willy Tarreauba5fbca2018-09-13 11:32:51 +02001137
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001138 if ((sl.st.v.len == 8) &&
1139 (*(sl.st.v.ptr + 5) > '1' ||
1140 (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
Willy Tarreauba5fbca2018-09-13 11:32:51 +02001141 h1m->flags |= H1_MF_VER_11;
1142
Willy Tarreau801250e2018-09-11 11:45:04 +02001143 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001144 }
Willy Tarreau801250e2018-09-11 11:45:04 +02001145 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001146 goto http_msg_invalid;
1147
Willy Tarreau801250e2018-09-11 11:45:04 +02001148 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001149 http_msg_rpver_sp:
1150 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001151 sl.st.status = 0;
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001152 sl.st.c.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001153 goto http_msg_rpcode;
1154 }
1155 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001156 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001157 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +02001158 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001159 goto http_msg_invalid;
1160
Willy Tarreau801250e2018-09-11 11:45:04 +02001161 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001162 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +01001163 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001164 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +02001165 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001166 }
1167
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +01001168 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +02001169 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +01001170 goto http_msg_invalid;
1171 }
1172
Willy Tarreau794f9af2017-07-26 09:07:47 +02001173 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001174 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau801250e2018-09-11 11:45:04 +02001175 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001176 }
1177
1178 /* so it's a CR/LF, so there is no reason phrase */
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001179 sl.st.c.len = ptr - sl.st.c.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001180
1181 http_msg_rsp_reason:
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001182 sl.st.r.ptr = ptr;
1183 sl.st.r.len = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001184 goto http_msg_rpline_eol;
1185
Willy Tarreau801250e2018-09-11 11:45:04 +02001186 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001187 http_msg_rpcode_sp:
1188 if (likely(!HTTP_IS_LWS(*ptr))) {
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001189 sl.st.r.ptr = ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001190 goto http_msg_rpreason;
1191 }
1192 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001193 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001194 /* so it's a CR/LF, so there is no reason phrase */
1195 goto http_msg_rsp_reason;
1196
Willy Tarreau801250e2018-09-11 11:45:04 +02001197 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001198 http_msg_rpreason:
1199 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001200 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Christopher Faulet1dc2b492018-10-08 15:34:02 +02001201 sl.st.r.len = ptr - sl.st.r.ptr;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001202 http_msg_rpline_eol:
1203 /* We have seen the end of line. Note that we do not
1204 * necessarily have the \n yet, but at least we know that we
1205 * have EITHER \r OR \n, otherwise the response would not be
1206 * complete. We can then record the response length and return
1207 * to the caller which will be able to register it.
1208 */
1209
Willy Tarreau5384aac2018-09-11 16:04:48 +02001210 if (likely(!skip_update)) {
1211 if (unlikely(hdr_count >= hdr_num)) {
1212 state = H1_MSG_RPREASON;
1213 goto http_output_full;
1214 }
Christopher Faulet25da9e32018-10-08 15:50:15 +02001215 if (!(h1m->flags & H1_MF_NO_PHDR))
1216 http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001217 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001218
1219 sol = ptr - start;
1220 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001222 goto http_msg_rpline_end;
1223
Willy Tarreau801250e2018-09-11 11:45:04 +02001224 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001225 http_msg_rpline_end:
1226 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +02001227 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
1228 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001229 /* stop here */
1230
Willy Tarreau801250e2018-09-11 11:45:04 +02001231 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001232 http_msg_hdr_first:
1233 sol = ptr - start;
1234 if (likely(!HTTP_IS_CRLF(*ptr))) {
1235 goto http_msg_hdr_name;
1236 }
1237
1238 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001239 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001240 goto http_msg_last_lf;
1241
Willy Tarreau801250e2018-09-11 11:45:04 +02001242 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001243 http_msg_hdr_name:
1244 /* assumes sol points to the first char */
1245 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +02001246 if (!skip_update) {
1247 /* turn it to lower case if needed */
1248 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
1249 *ptr = tolower(*ptr);
1250 }
Willy Tarreau801250e2018-09-11 11:45:04 +02001251 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001252 }
1253
1254 if (likely(*ptr == ':')) {
1255 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +02001256 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001257 }
1258
Willy Tarreau9aec3052018-09-12 09:20:40 +02001259 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +02001260 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001261 goto http_msg_invalid;
1262 }
1263
Willy Tarreau9aec3052018-09-12 09:20:40 +02001264 if (h1m->err_pos == -1) /* capture the error pointer */
1265 h1m->err_pos = ptr - start + skip; /* >= 0 now */
1266
1267 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +02001268 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001269
Willy Tarreau801250e2018-09-11 11:45:04 +02001270 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001271 http_msg_hdr_l1_sp:
1272 /* assumes sol points to the first char */
1273 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001274 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001275
1276 /* header value can be basically anything except CR/LF */
1277 sov = ptr - start;
1278
1279 if (likely(!HTTP_IS_CRLF(*ptr))) {
1280 goto http_msg_hdr_val;
1281 }
1282
1283 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001284 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001285 goto http_msg_hdr_l1_lf;
1286
Willy Tarreau801250e2018-09-11 11:45:04 +02001287 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001288 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001289 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
1290 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001291
Willy Tarreau801250e2018-09-11 11:45:04 +02001292 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001293 http_msg_hdr_l1_lws:
1294 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +02001295 if (!skip_update) {
1296 /* replace HT,CR,LF with spaces */
1297 for (; start + sov < ptr; sov++)
1298 start[sov] = ' ';
1299 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001300 goto http_msg_hdr_l1_sp;
1301 }
1302 /* we had a header consisting only in spaces ! */
1303 eol = sov;
1304 goto http_msg_complete_header;
1305
Willy Tarreau801250e2018-09-11 11:45:04 +02001306 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001307 http_msg_hdr_val:
1308 /* assumes sol points to the first char, and sov
1309 * points to the first character of the value.
1310 */
1311
1312 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
1313 * and lower. In fact since most of the time is spent in the loop, we
1314 * also remove the sign bit test so that bytes 0x8e..0x0d break the
1315 * loop, but we don't care since they're very rare in header values.
1316 */
1317#if defined(__x86_64__)
1318 while (ptr <= end - sizeof(long)) {
1319 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
1320 goto http_msg_hdr_val2;
1321 ptr += sizeof(long);
1322 }
1323#endif
1324#if defined(__x86_64__) || \
1325 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
1326 defined(__ARM_ARCH_7A__)
1327 while (ptr <= end - sizeof(int)) {
1328 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
1329 goto http_msg_hdr_val2;
1330 ptr += sizeof(int);
1331 }
1332#endif
1333 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +02001334 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001335 goto http_msg_ood;
1336 }
1337 http_msg_hdr_val2:
1338 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001339 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001340
1341 eol = ptr - start;
1342 /* Note: we could also copy eol into ->eoh so that we have the
1343 * real header end in case it ends with lots of LWS, but is this
1344 * really needed ?
1345 */
1346 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001347 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001348 goto http_msg_hdr_l2_lf;
1349
Willy Tarreau801250e2018-09-11 11:45:04 +02001350 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001351 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001352 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
1353 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001354
Willy Tarreau801250e2018-09-11 11:45:04 +02001355 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001356 http_msg_hdr_l2_lws:
1357 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +02001358 if (!skip_update) {
1359 /* LWS: replace HT,CR,LF with spaces */
1360 for (; start + eol < ptr; eol++)
1361 start[eol] = ' ';
1362 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001363 goto http_msg_hdr_val;
1364 }
1365 http_msg_complete_header:
1366 /*
1367 * It was a new header, so the last one is finished. Assumes
1368 * <sol> points to the first char of the name, <col> to the
1369 * colon, <sov> points to the first character of the value and
1370 * <eol> to the first CR or LF so we know how the line ends. We
1371 * will trim spaces around the value. It's possible to do it by
1372 * adjusting <eol> and <sov> which are no more used after this.
1373 * We can add the header field to the list.
1374 */
Christopher Faulet2912f872018-09-19 14:01:04 +02001375 if (likely(!skip_update)) {
1376 while (sov < eol && HTTP_IS_LWS(start[sov]))
1377 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001378
Christopher Faulet2912f872018-09-19 14:01:04 +02001379 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1380 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001381
1382
Christopher Faulet2912f872018-09-19 14:01:04 +02001383 n = ist2(start + sol, col - sol);
1384 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001385
Christopher Faulet2912f872018-09-19 14:01:04 +02001386 do {
1387 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001388
Christopher Faulet2912f872018-09-19 14:01:04 +02001389 if (unlikely(hdr_count >= hdr_num)) {
1390 state = H1_MSG_HDR_L2_LWS;
1391 goto http_output_full;
1392 }
Willy Tarreau5384aac2018-09-11 16:04:48 +02001393
Christopher Faulet2912f872018-09-19 14:01:04 +02001394 if (isteqi(n, ist("transfer-encoding"))) {
1395 h1_parse_xfer_enc_header(h1m, v);
1396 }
1397 else if (isteqi(n, ist("content-length"))) {
1398 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +02001399
Christopher Faulet2912f872018-09-19 14:01:04 +02001400 if (ret < 0) {
1401 state = H1_MSG_HDR_L2_LWS;
1402 goto http_msg_invalid;
1403 }
1404 else if (ret == 0) {
1405 /* skip it */
1406 break;
1407 }
Willy Tarreau73373ab2018-09-14 17:11:33 +02001408 }
Christopher Faulet2912f872018-09-19 14:01:04 +02001409 else if (isteqi(n, ist("connection"))) {
1410 h1_parse_connection_header(h1m, v);
Willy Tarreau73373ab2018-09-14 17:11:33 +02001411 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +02001412
Christopher Faulet2912f872018-09-19 14:01:04 +02001413 http_set_hdr(&hdr[hdr_count++], n, v);
1414 } while (0);
1415 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001416
1417 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +02001418
Willy Tarreau794f9af2017-07-26 09:07:47 +02001419 if (likely(!HTTP_IS_CRLF(*ptr)))
1420 goto http_msg_hdr_name;
1421
1422 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001423 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001424 goto http_msg_last_lf;
1425
Willy Tarreau801250e2018-09-11 11:45:04 +02001426 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001427 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001428 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001429 ptr++;
1430 /* <ptr> now points to the first byte of payload. If needed sol
1431 * still points to the first of either CR or LF of the empty
1432 * line ending the headers block.
1433 */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001434 if (likely(!skip_update)) {
1435 if (unlikely(hdr_count >= hdr_num)) {
1436 state = H1_MSG_LAST_LF;
1437 goto http_output_full;
1438 }
Christopher Fauletff08a922018-09-25 13:59:46 +02001439 http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +02001440 }
Willy Tarreau001823c2018-09-12 17:25:32 +02001441
1442 /* reaching here we've parsed the whole message. We may detect
1443 * that we were already continuing an interrupted parsing pass
1444 * so we were silently looking for the end of message not
1445 * updating anything before deciding to parse it fully at once.
1446 * It's guaranteed that we won't match this test twice in a row
1447 * since restarting will turn zero.
1448 */
1449 if (restarting)
1450 goto restart;
1451
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001452 state = H1_MSG_DATA;
1453 if (h1m->flags & H1_MF_XFER_ENC) {
1454 if (h1m->flags & H1_MF_CLEN) {
1455 h1m->flags &= ~H1_MF_CLEN;
1456 hdr_count = http_del_hdr(hdr, ist("content-length"));
1457 }
1458
1459 if (h1m->flags & H1_MF_CHNK)
1460 state = H1_MSG_CHUNK_SIZE;
1461 else if (!(h1m->flags & H1_MF_RESP)) {
1462 /* cf RFC7230#3.3.3 : transfer-encoding in
1463 * request without chunked encoding is invalid.
1464 */
1465 goto http_msg_invalid;
1466 }
1467 }
1468
Willy Tarreau794f9af2017-07-26 09:07:47 +02001469 break;
1470
1471 default:
1472 /* impossible states */
1473 goto http_msg_invalid;
1474 }
1475
Willy Tarreau001823c2018-09-12 17:25:32 +02001476 /* Now we've left the headers state and are either in H1_MSG_DATA or
1477 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001478 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001479
Willy Tarreau5384aac2018-09-11 16:04:48 +02001480 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001481 *slp = sl;
1482
Willy Tarreau4433c082018-09-11 15:33:32 +02001483 h1m->state = state;
1484 h1m->next = ptr - start + skip;
1485 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001486
1487 http_msg_ood:
1488 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001489 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001490 *slp = sl;
1491
Willy Tarreau4433c082018-09-11 15:33:32 +02001492 h1m->state = state;
1493 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001494 return 0;
1495
1496 http_msg_invalid:
1497 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001498 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001499 *slp = sl;
1500
Willy Tarreau4433c082018-09-11 15:33:32 +02001501 h1m->err_state = h1m->state = state;
1502 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001503 return -1;
1504
1505 http_output_full:
1506 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001507 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001508 *slp = sl;
1509
Willy Tarreau4433c082018-09-11 15:33:32 +02001510 h1m->err_state = h1m->state = state;
1511 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001512 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001513
1514 restart:
1515 h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001516 if (h1m->flags & H1_MF_RESP)
1517 h1m->state = H1_MSG_RPBEFORE;
1518 else
1519 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001520 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001521}
1522
Willy Tarreau2510f702017-10-31 17:14:16 +01001523/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001524 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001525 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1526 * input data, or < 0 in case of parse error (in which case the caller may have
1527 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001528 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001529int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001530{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001531 const char *stop = b_peek(buf, ofs + max);
1532 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001533
1534 while (1) {
1535 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001536 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001537 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001538
1539 /* scan current line and stop at LF or CRLF */
1540 while (1) {
1541 if (ptr == stop)
1542 return 0;
1543
1544 if (*ptr == '\n') {
1545 if (!p1)
1546 p1 = ptr;
1547 p2 = ptr;
1548 break;
1549 }
1550
1551 if (*ptr == '\r') {
1552 if (p1)
1553 return -1;
1554 p1 = ptr;
1555 }
1556
Willy Tarreau7314be82018-06-14 13:32:50 +02001557 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001558 }
1559
1560 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001561 p2 = b_next(buf, p2);
1562 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001563
1564 /* LF/CRLF at beginning of line => end of trailers at p2.
1565 * Everything was scheduled for forwarding, there's nothing left
1566 * from this message. */
1567 if (p1 == start)
1568 break;
1569 /* OK, next line then */
1570 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001571 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001572}
1573
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001574/* This function skips trailers in the buffer associated with HTTP message
1575 * <msg>. The first visited position is msg->next. If the end of the trailers is
1576 * found, the function returns >0. So, the caller can automatically schedul it
1577 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1578 * data are available, the function does not change anything except maybe
1579 * msg->sol if it could parse some lines, and returns zero. If a parse error
1580 * is encountered, the function returns < 0 and does not change anything except
1581 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1582 * state before calling this function, which implies that all non-trailers data
1583 * have already been scheduled for forwarding, and that msg->next exactly
1584 * matches the length of trailers already parsed and not forwarded. It is also
1585 * important to note that this function is designed to be able to parse wrapped
1586 * headers at end of buffer.
1587 */
1588int http_forward_trailers(struct http_msg *msg)
1589{
Willy Tarreauc9fa0482018-07-10 17:43:27 +02001590 const struct buffer *buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001591 const char *parse = ci_head(msg->chn);
1592 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001593
1594 /* we have msg->next which points to next line. Look for CRLF. But
1595 * first, we reset msg->sol */
1596 msg->sol = 0;
1597 while (1) {
1598 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001599 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001600 const char *ptr = start;
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001601
1602 /* scan current line and stop at LF or CRLF */
1603 while (1) {
1604 if (ptr == stop)
1605 return 0;
1606
1607 if (*ptr == '\n') {
1608 if (!p1)
1609 p1 = ptr;
1610 p2 = ptr;
1611 break;
1612 }
1613
1614 if (*ptr == '\r') {
1615 if (p1) {
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001616 msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001617 return -1;
1618 }
1619 p1 = ptr;
1620 }
1621
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001622 ptr = b_next(buf, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001623 }
1624
1625 /* after LF; point to beginning of next line */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001626 p2 = b_next(buf, p2);
1627 msg->sol += b_dist(buf, start, p2);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001628
1629 /* LF/CRLF at beginning of line => end of trailers at p2.
1630 * Everything was scheduled for forwarding, there's nothing left
1631 * from this message. */
1632 if (p1 == start)
1633 return 1;
1634
1635 /* OK, next line then */
1636 }
1637}