blob: b1da880f835ad0434b658cde2542fe6abd88222f [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
Willy Tarreau8740c8b2017-09-21 10:22:25 +020021/*
22 * This function parses a status line between <ptr> and <end>, starting with
23 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
24 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
25 * will give undefined results.
26 * Note that it is upon the caller's responsibility to ensure that ptr < end,
27 * and that msg->sol points to the beginning of the response.
28 * If a complete line is found (which implies that at least one CR or LF is
29 * found before <end>, the updated <ptr> is returned, otherwise NULL is
30 * returned indicating an incomplete line (which does not mean that parts have
31 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
32 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
33 * upon next call.
34 *
35 * This function was intentionally designed to be called from
36 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
37 * within its state machine and use the same macros, hence the need for same
38 * labels and variable names. Note that msg->sol is left unchanged.
39 */
40const char *http_parse_stsline(struct http_msg *msg,
41 enum h1_state state, const char *ptr, const char *end,
42 unsigned int *ret_ptr, enum h1_state *ret_state)
43{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +020044 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +020045
46 switch (state) {
47 case HTTP_MSG_RPVER:
48 http_msg_rpver:
49 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
50 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
51
52 if (likely(HTTP_IS_SPHT(*ptr))) {
53 msg->sl.st.v_l = ptr - msg_start;
54 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
55 }
56 msg->err_state = HTTP_MSG_RPVER;
57 state = HTTP_MSG_ERROR;
58 break;
59
60 case HTTP_MSG_RPVER_SP:
61 http_msg_rpver_sp:
62 if (likely(!HTTP_IS_LWS(*ptr))) {
63 msg->sl.st.c = ptr - msg_start;
64 goto http_msg_rpcode;
65 }
66 if (likely(HTTP_IS_SPHT(*ptr)))
67 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
68 /* so it's a CR/LF, this is invalid */
69 msg->err_state = HTTP_MSG_RPVER_SP;
70 state = HTTP_MSG_ERROR;
71 break;
72
73 case HTTP_MSG_RPCODE:
74 http_msg_rpcode:
75 if (likely(!HTTP_IS_LWS(*ptr)))
76 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
77
78 if (likely(HTTP_IS_SPHT(*ptr))) {
79 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
80 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
81 }
82
83 /* so it's a CR/LF, so there is no reason phrase */
84 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
85 http_msg_rsp_reason:
86 /* FIXME: should we support HTTP responses without any reason phrase ? */
87 msg->sl.st.r = ptr - msg_start;
88 msg->sl.st.r_l = 0;
89 goto http_msg_rpline_eol;
90
91 case HTTP_MSG_RPCODE_SP:
92 http_msg_rpcode_sp:
93 if (likely(!HTTP_IS_LWS(*ptr))) {
94 msg->sl.st.r = ptr - msg_start;
95 goto http_msg_rpreason;
96 }
97 if (likely(HTTP_IS_SPHT(*ptr)))
98 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
99 /* so it's a CR/LF, so there is no reason phrase */
100 goto http_msg_rsp_reason;
101
102 case HTTP_MSG_RPREASON:
103 http_msg_rpreason:
104 if (likely(!HTTP_IS_CRLF(*ptr)))
105 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
106 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
107 http_msg_rpline_eol:
108 /* We have seen the end of line. Note that we do not
109 * necessarily have the \n yet, but at least we know that we
110 * have EITHER \r OR \n, otherwise the response would not be
111 * complete. We can then record the response length and return
112 * to the caller which will be able to register it.
113 */
114 msg->sl.st.l = ptr - msg_start - msg->sol;
115 return ptr;
116
117 default:
118#ifdef DEBUG_FULL
119 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
120 exit(1);
121#endif
122 ;
123 }
124
125 http_msg_ood:
126 /* out of valid data */
127 if (ret_state)
128 *ret_state = state;
129 if (ret_ptr)
130 *ret_ptr = ptr - msg_start;
131 return NULL;
132}
133
134/*
135 * This function parses a request line between <ptr> and <end>, starting with
136 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
137 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
138 * will give undefined results.
139 * Note that it is upon the caller's responsibility to ensure that ptr < end,
140 * and that msg->sol points to the beginning of the request.
141 * If a complete line is found (which implies that at least one CR or LF is
142 * found before <end>, the updated <ptr> is returned, otherwise NULL is
143 * returned indicating an incomplete line (which does not mean that parts have
144 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
145 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
146 * upon next call.
147 *
148 * This function was intentionally designed to be called from
149 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
150 * within its state machine and use the same macros, hence the need for same
151 * labels and variable names. Note that msg->sol is left unchanged.
152 */
153const char *http_parse_reqline(struct http_msg *msg,
154 enum h1_state state, const char *ptr, const char *end,
155 unsigned int *ret_ptr, enum h1_state *ret_state)
156{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200157 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200158
159 switch (state) {
160 case HTTP_MSG_RQMETH:
161 http_msg_rqmeth:
162 if (likely(HTTP_IS_TOKEN(*ptr)))
163 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
164
165 if (likely(HTTP_IS_SPHT(*ptr))) {
166 msg->sl.rq.m_l = ptr - msg_start;
167 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
168 }
169
170 if (likely(HTTP_IS_CRLF(*ptr))) {
171 /* HTTP 0.9 request */
172 msg->sl.rq.m_l = ptr - msg_start;
173 http_msg_req09_uri:
174 msg->sl.rq.u = ptr - msg_start;
175 http_msg_req09_uri_e:
176 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
177 http_msg_req09_ver:
178 msg->sl.rq.v = ptr - msg_start;
179 msg->sl.rq.v_l = 0;
180 goto http_msg_rqline_eol;
181 }
182 msg->err_state = HTTP_MSG_RQMETH;
183 state = HTTP_MSG_ERROR;
184 break;
185
186 case HTTP_MSG_RQMETH_SP:
187 http_msg_rqmeth_sp:
188 if (likely(!HTTP_IS_LWS(*ptr))) {
189 msg->sl.rq.u = ptr - msg_start;
190 goto http_msg_rquri;
191 }
192 if (likely(HTTP_IS_SPHT(*ptr)))
193 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
194 /* so it's a CR/LF, meaning an HTTP 0.9 request */
195 goto http_msg_req09_uri;
196
197 case HTTP_MSG_RQURI:
198 http_msg_rquri:
199#if defined(__x86_64__) || \
200 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
201 defined(__ARM_ARCH_7A__)
202 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
203 while (ptr <= end - sizeof(int)) {
204 int x = *(int *)ptr - 0x21212121;
205 if (x & 0x80808080)
206 break;
207
208 x -= 0x5e5e5e5e;
209 if (!(x & 0x80808080))
210 break;
211
212 ptr += sizeof(int);
213 }
214#endif
215 if (ptr >= end) {
216 state = HTTP_MSG_RQURI;
217 goto http_msg_ood;
218 }
219 http_msg_rquri2:
220 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
222
223 if (likely(HTTP_IS_SPHT(*ptr))) {
224 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
225 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
226 }
227
228 if (likely((unsigned char)*ptr >= 128)) {
229 /* non-ASCII chars are forbidden unless option
230 * accept-invalid-http-request is enabled in the frontend.
231 * In any case, we capture the faulty char.
232 */
233 if (msg->err_pos < -1)
234 goto invalid_char;
235 if (msg->err_pos == -1)
236 msg->err_pos = ptr - msg_start;
237 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
238 }
239
240 if (likely(HTTP_IS_CRLF(*ptr))) {
241 /* so it's a CR/LF, meaning an HTTP 0.9 request */
242 goto http_msg_req09_uri_e;
243 }
244
245 /* OK forbidden chars, 0..31 or 127 */
246 invalid_char:
247 msg->err_pos = ptr - msg_start;
248 msg->err_state = HTTP_MSG_RQURI;
249 state = HTTP_MSG_ERROR;
250 break;
251
252 case HTTP_MSG_RQURI_SP:
253 http_msg_rquri_sp:
254 if (likely(!HTTP_IS_LWS(*ptr))) {
255 msg->sl.rq.v = ptr - msg_start;
256 goto http_msg_rqver;
257 }
258 if (likely(HTTP_IS_SPHT(*ptr)))
259 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
260 /* so it's a CR/LF, meaning an HTTP 0.9 request */
261 goto http_msg_req09_ver;
262
263 case HTTP_MSG_RQVER:
264 http_msg_rqver:
265 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
266 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
267
268 if (likely(HTTP_IS_CRLF(*ptr))) {
269 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
270 http_msg_rqline_eol:
271 /* We have seen the end of line. Note that we do not
272 * necessarily have the \n yet, but at least we know that we
273 * have EITHER \r OR \n, otherwise the request would not be
274 * complete. We can then record the request length and return
275 * to the caller which will be able to register it.
276 */
277 msg->sl.rq.l = ptr - msg_start - msg->sol;
278 return ptr;
279 }
280
281 /* neither an HTTP_VER token nor a CRLF */
282 msg->err_state = HTTP_MSG_RQVER;
283 state = HTTP_MSG_ERROR;
284 break;
285
286 default:
287#ifdef DEBUG_FULL
288 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
289 exit(1);
290#endif
291 ;
292 }
293
294 http_msg_ood:
295 /* out of valid data */
296 if (ret_state)
297 *ret_state = state;
298 if (ret_ptr)
299 *ret_ptr = ptr - msg_start;
300 return NULL;
301}
302
303/*
304 * This function parses an HTTP message, either a request or a response,
305 * depending on the initial msg->msg_state. The caller is responsible for
306 * ensuring that the message does not wrap. The function can be preempted
307 * everywhere when data are missing and recalled at the exact same location
308 * with no information loss. The message may even be realigned between two
309 * calls. The header index is re-initialized when switching from
310 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
311 * fields. Note that msg->sol will be initialized after completing the first
312 * state, so that none of the msg pointers has to be initialized prior to the
313 * first call.
314 */
315void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
316{
317 enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200318 register const char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreau950a8a62018-09-06 10:48:15 +0200319 struct buffer *buf = &msg->chn->buf;
320 char *input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200321
322 state = msg->msg_state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200323 ptr = input + msg->next;
324 end = b_stop(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200325
326 if (unlikely(ptr >= end))
327 goto http_msg_ood;
328
329 switch (state) {
330 /*
331 * First, states that are specific to the response only.
332 * We check them first so that request and headers are
333 * closer to each other (accessed more often).
334 */
335 case HTTP_MSG_RPBEFORE:
336 http_msg_rpbefore:
337 if (likely(HTTP_IS_TOKEN(*ptr))) {
338 /* we have a start of message, but we have to check
339 * first if we need to remove some CRLF. We can only
340 * do this when o=0.
341 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200342 if (unlikely(ptr != input)) {
343 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200344 goto http_msg_ood;
345 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200346 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200347 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200348 }
349 msg->sol = 0;
350 msg->sl.st.l = 0; /* used in debug mode */
351 hdr_idx_init(idx);
352 state = HTTP_MSG_RPVER;
353 goto http_msg_rpver;
354 }
355
356 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
357 state = HTTP_MSG_RPBEFORE;
358 goto http_msg_invalid;
359 }
360
361 if (unlikely(*ptr == '\n'))
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
363 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
364 /* stop here */
365
366 case HTTP_MSG_RPBEFORE_CR:
367 http_msg_rpbefore_cr:
368 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
369 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
370 /* stop here */
371
372 case HTTP_MSG_RPVER:
373 http_msg_rpver:
374 case HTTP_MSG_RPVER_SP:
375 case HTTP_MSG_RPCODE:
376 case HTTP_MSG_RPCODE_SP:
377 case HTTP_MSG_RPREASON:
378 ptr = (char *)http_parse_stsline(msg,
379 state, ptr, end,
380 &msg->next, &msg->msg_state);
381 if (unlikely(!ptr))
382 return;
383
384 /* we have a full response and we know that we have either a CR
385 * or an LF at <ptr>.
386 */
387 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
388
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200389 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200390 if (likely(*ptr == '\r'))
391 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
392 goto http_msg_rpline_end;
393
394 case HTTP_MSG_RPLINE_END:
395 http_msg_rpline_end:
396 /* msg->sol must point to the first of CR or LF. */
397 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
398 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
399 /* stop here */
400
401 /*
402 * Second, states that are specific to the request only
403 */
404 case HTTP_MSG_RQBEFORE:
405 http_msg_rqbefore:
406 if (likely(HTTP_IS_TOKEN(*ptr))) {
407 /* we have a start of message, but we have to check
408 * first if we need to remove some CRLF. We can only
409 * do this when o=0.
410 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200411 if (likely(ptr != input)) {
412 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200413 goto http_msg_ood;
414 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200415 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200416 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200417 }
418 msg->sol = 0;
419 msg->sl.rq.l = 0; /* used in debug mode */
420 state = HTTP_MSG_RQMETH;
421 goto http_msg_rqmeth;
422 }
423
424 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
425 state = HTTP_MSG_RQBEFORE;
426 goto http_msg_invalid;
427 }
428
429 if (unlikely(*ptr == '\n'))
430 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
431 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
432 /* stop here */
433
434 case HTTP_MSG_RQBEFORE_CR:
435 http_msg_rqbefore_cr:
436 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
437 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
438 /* stop here */
439
440 case HTTP_MSG_RQMETH:
441 http_msg_rqmeth:
442 case HTTP_MSG_RQMETH_SP:
443 case HTTP_MSG_RQURI:
444 case HTTP_MSG_RQURI_SP:
445 case HTTP_MSG_RQVER:
446 ptr = (char *)http_parse_reqline(msg,
447 state, ptr, end,
448 &msg->next, &msg->msg_state);
449 if (unlikely(!ptr))
450 return;
451
452 /* we have a full request and we know that we have either a CR
453 * or an LF at <ptr>.
454 */
455 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
456
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200457 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200458 if (likely(*ptr == '\r'))
459 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
460 goto http_msg_rqline_end;
461
462 case HTTP_MSG_RQLINE_END:
463 http_msg_rqline_end:
464 /* check for HTTP/0.9 request : no version information available.
465 * msg->sol must point to the first of CR or LF.
466 */
467 if (unlikely(msg->sl.rq.v_l == 0))
468 goto http_msg_last_lf;
469
470 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
471 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
472 /* stop here */
473
474 /*
475 * Common states below
476 */
477 case HTTP_MSG_HDR_FIRST:
478 http_msg_hdr_first:
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200479 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200480 if (likely(!HTTP_IS_CRLF(*ptr))) {
481 goto http_msg_hdr_name;
482 }
483
484 if (likely(*ptr == '\r'))
485 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
486 goto http_msg_last_lf;
487
488 case HTTP_MSG_HDR_NAME:
489 http_msg_hdr_name:
490 /* assumes msg->sol points to the first char */
491 if (likely(HTTP_IS_TOKEN(*ptr)))
492 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
493
494 if (likely(*ptr == ':'))
495 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
496
497 if (likely(msg->err_pos < -1) || *ptr == '\n') {
498 state = HTTP_MSG_HDR_NAME;
499 goto http_msg_invalid;
500 }
501
502 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200503 msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200504
505 /* and we still accept this non-token character */
506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
507
508 case HTTP_MSG_HDR_L1_SP:
509 http_msg_hdr_l1_sp:
510 /* assumes msg->sol points to the first char */
511 if (likely(HTTP_IS_SPHT(*ptr)))
512 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
513
514 /* header value can be basically anything except CR/LF */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200515 msg->sov = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200516
517 if (likely(!HTTP_IS_CRLF(*ptr))) {
518 goto http_msg_hdr_val;
519 }
520
521 if (likely(*ptr == '\r'))
522 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
523 goto http_msg_hdr_l1_lf;
524
525 case HTTP_MSG_HDR_L1_LF:
526 http_msg_hdr_l1_lf:
527 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
528 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
529
530 case HTTP_MSG_HDR_L1_LWS:
531 http_msg_hdr_l1_lws:
532 if (likely(HTTP_IS_SPHT(*ptr))) {
533 /* replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200534 for (; input + msg->sov < ptr; msg->sov++)
535 input[msg->sov] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200536 goto http_msg_hdr_l1_sp;
537 }
538 /* we had a header consisting only in spaces ! */
539 msg->eol = msg->sov;
540 goto http_msg_complete_header;
541
542 case HTTP_MSG_HDR_VAL:
543 http_msg_hdr_val:
544 /* assumes msg->sol points to the first char, and msg->sov
545 * points to the first character of the value.
546 */
547
548 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
549 * and lower. In fact since most of the time is spent in the loop, we
550 * also remove the sign bit test so that bytes 0x8e..0x0d break the
551 * loop, but we don't care since they're very rare in header values.
552 */
553#if defined(__x86_64__)
554 while (ptr <= end - sizeof(long)) {
555 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
556 goto http_msg_hdr_val2;
557 ptr += sizeof(long);
558 }
559#endif
560#if defined(__x86_64__) || \
561 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
562 defined(__ARM_ARCH_7A__)
563 while (ptr <= end - sizeof(int)) {
564 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
565 goto http_msg_hdr_val2;
566 ptr += sizeof(int);
567 }
568#endif
569 if (ptr >= end) {
570 state = HTTP_MSG_HDR_VAL;
571 goto http_msg_ood;
572 }
573 http_msg_hdr_val2:
574 if (likely(!HTTP_IS_CRLF(*ptr)))
575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
576
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200577 msg->eol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200578 /* Note: we could also copy eol into ->eoh so that we have the
579 * real header end in case it ends with lots of LWS, but is this
580 * really needed ?
581 */
582 if (likely(*ptr == '\r'))
583 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
584 goto http_msg_hdr_l2_lf;
585
586 case HTTP_MSG_HDR_L2_LF:
587 http_msg_hdr_l2_lf:
588 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
589 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
590
591 case HTTP_MSG_HDR_L2_LWS:
592 http_msg_hdr_l2_lws:
593 if (unlikely(HTTP_IS_SPHT(*ptr))) {
594 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200595 for (; input + msg->eol < ptr; msg->eol++)
596 input[msg->eol] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200597 goto http_msg_hdr_val;
598 }
599 http_msg_complete_header:
600 /*
601 * It was a new header, so the last one is finished.
602 * Assumes msg->sol points to the first char, msg->sov points
603 * to the first character of the value and msg->eol to the
604 * first CR or LF so we know how the line ends. We insert last
605 * header into the index.
606 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200607 if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200608 idx, idx->tail) < 0)) {
609 state = HTTP_MSG_HDR_L2_LWS;
610 goto http_msg_invalid;
611 }
612
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200613 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200614 if (likely(!HTTP_IS_CRLF(*ptr))) {
615 goto http_msg_hdr_name;
616 }
617
618 if (likely(*ptr == '\r'))
619 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
620 goto http_msg_last_lf;
621
622 case HTTP_MSG_LAST_LF:
623 http_msg_last_lf:
624 /* Assumes msg->sol points to the first of either CR or LF.
625 * Sets ->sov and ->next to the total header length, ->eoh to
626 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
627 */
628 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
629 ptr++;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200630 msg->sov = msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200631 msg->eoh = msg->sol;
632 msg->sol = 0;
633 msg->eol = msg->sov - msg->eoh;
634 msg->msg_state = HTTP_MSG_BODY;
635 return;
636
637 case HTTP_MSG_ERROR:
638 /* this may only happen if we call http_msg_analyser() twice with an error */
639 break;
640
641 default:
642#ifdef DEBUG_FULL
643 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
644 exit(1);
645#endif
646 ;
647 }
648 http_msg_ood:
649 /* out of data */
650 msg->msg_state = state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200651 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200652 return;
653
654 http_msg_invalid:
655 /* invalid message */
656 msg->err_state = state;
657 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200658 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200659 return;
660}
661
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200662
Willy Tarreau73373ab2018-09-14 17:11:33 +0200663/* Parse the Content-Length header field of an HTTP/1 request. The function
664 * checks all possible occurrences of a comma-delimited value, and verifies
665 * if any of them doesn't match a previous value. It returns <0 if a value
666 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
667 * if the value can be indexed (first one). In the last case, the value might
668 * be adjusted and the caller must only add the updated value.
669 */
670int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
671{
672 char *e, *n;
673 long long cl;
674 int not_first = !!(h1m->flags & H1_MF_CLEN);
675 struct ist word;
676
677 word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
678 e = value->ptr + value->len;
679
680 while (++word.ptr < e) {
681 /* skip leading delimitor and blanks */
682 if (unlikely(HTTP_IS_LWS(*word.ptr)))
683 continue;
684
685 /* digits only now */
686 for (cl = 0, n = word.ptr; n < e; n++) {
687 unsigned int c = *n - '0';
688 if (unlikely(c > 9)) {
689 /* non-digit */
690 if (unlikely(n == word.ptr)) // spaces only
691 goto fail;
692 break;
693 }
694 if (unlikely(cl > ULLONG_MAX / 10ULL))
695 goto fail; /* multiply overflow */
696 cl = cl * 10ULL;
697 if (unlikely(cl + c < cl))
698 goto fail; /* addition overflow */
699 cl = cl + c;
700 }
701
702 /* keep a copy of the exact cleaned value */
703 word.len = n - word.ptr;
704
705 /* skip trailing LWS till next comma or EOL */
706 for (; n < e; n++) {
707 if (!HTTP_IS_LWS(*n)) {
708 if (unlikely(*n != ','))
709 goto fail;
710 break;
711 }
712 }
713
714 /* if duplicate, must be equal */
715 if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
716 goto fail;
717
718 /* OK, store this result as the one to be indexed */
719 h1m->flags |= H1_MF_CLEN;
720 h1m->curr_len = h1m->body_len = cl;
721 *value = word;
722 word.ptr = n;
723 }
724 /* here we've reached the end with a single value or a series of
725 * identical values, all matching previous series if any. The last
726 * parsed value was sent back into <value>. We just have to decide
727 * if this occurrence has to be indexed (it's the first one) or
728 * silently skipped (it's not the first one)
729 */
730 return !not_first;
731 fail:
732 return -1;
733}
734
Willy Tarreau2557f6a2018-09-14 16:34:47 +0200735/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
736 * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
737 * this case. Any other token found or any empty header field found will reset
738 * this flag, so that it accurately represents the token's presence at the last
739 * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
740 * are case-insensitive (cf RFC7230#4).
741 */
742void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
743{
744 char *e, *n;
745 struct ist word;
746
747 h1m->flags |= H1_MF_XFER_ENC;
748 h1m->flags &= ~H1_MF_CHNK;
749
750 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
751 e = value.ptr + value.len;
752
753 while (++word.ptr < e) {
754 /* skip leading delimitor and blanks */
755 if (HTTP_IS_LWS(*word.ptr))
756 continue;
757
758 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
759 word.len = n - word.ptr;
760
761 /* trim trailing blanks */
762 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
763 word.len--;
764
765 h1m->flags &= ~H1_MF_CHNK;
766 if (isteqi(word, ist("chunked")))
767 h1m->flags |= H1_MF_CHNK;
768
769 word.ptr = n;
770 }
771}
772
Willy Tarreau98f5cf72018-09-13 14:15:58 +0200773/* Parse the Connection: header of an HTTP/1 request, looking for "close",
774 * "keep-alive", and "upgrade" values, and updating h1m->flags according to
775 * what was found there. Note that flags are only added, not removed, so the
776 * function is safe for being called multiple times if multiple occurrences
777 * are found.
778 */
779void h1_parse_connection_header(struct h1m *h1m, struct ist value)
780{
781 char *e, *n;
782 struct ist word;
783
784 word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
785 e = value.ptr + value.len;
786
787 while (++word.ptr < e) {
788 /* skip leading delimitor and blanks */
789 if (HTTP_IS_LWS(*word.ptr))
790 continue;
791
792 n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
793 word.len = n - word.ptr;
794
795 /* trim trailing blanks */
796 while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
797 word.len--;
798
799 if (isteqi(word, ist("keep-alive")))
800 h1m->flags |= H1_MF_CONN_KAL;
801 else if (isteqi(word, ist("close")))
802 h1m->flags |= H1_MF_CONN_CLO;
803 else if (isteqi(word, ist("upgrade")))
804 h1m->flags |= H1_MF_CONN_UPG;
805
806 word.ptr = n;
807 }
808}
809
Willy Tarreau794f9af2017-07-26 09:07:47 +0200810/* This function parses a contiguous HTTP/1 headers block starting at <start>
811 * and ending before <stop>, at once, and converts it a list of (name,value)
812 * pairs representing header fields into the array <hdr> of size <hdr_num>,
813 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200814 * too small to represent the whole message, an error is returned. Some
815 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200816 * parsed and stored into h1m as well. <hdr> may be null, in which case only
817 * the parsing state will be updated. This may be used to restart the parsing
818 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200819 *
820 * For now it's limited to the response. If the header block is incomplete,
821 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200822 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200823 * and h1m->next to zero on the first call, the parser will do the rest. If
824 * an incomplete message is seen, the caller only needs to present h1m->state
825 * and h1m->next again, with an empty header list so that the parser can start
826 * again. In this case, it will detect that it interrupted a previous session
827 * and will first look for the end of the message before reparsing it again and
828 * indexing it at the same time. This ensures that incomplete messages fed 1
829 * character at a time are never processed entirely more than exactly twice,
830 * and that there is no need to store all the internal state and pre-parsed
831 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200833 * A pointer to a start line descriptor may be passed in <slp>, in which case
834 * the parser will fill it with whatever it found.
835 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200836 * The code derived from the main HTTP/1 parser above but was simplified and
837 * optimized to process responses produced or forwarded by haproxy. The caller
838 * is responsible for ensuring that the message doesn't wrap, and should ensure
839 * it is complete to avoid having to retry the operation after a failed
840 * attempt. The message is not supposed to be invalid, which is why a few
841 * properties such as the character set used in the header field names are not
842 * checked. In case of an unparsable response message, a negative value will be
843 * returned with h1m->err_pos and h1m->err_state matching the location and
844 * state where the error was met. Leading blank likes are tolerated but not
845 * recommended.
846 *
847 * This function returns :
848 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200849 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200850 * the position relative to <start>
851 * -2 if the output is full (hdr_num reached). err_state and err_pos also
852 * indicate where it failed.
853 * 0 in case of missing data.
854 * > 0 on success, it then corresponds to the number of bytes read since
855 * <start> so that the caller can go on with the payload.
856 */
857int h1_headers_to_hdr_list(char *start, const char *stop,
858 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200859 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200860{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200861 enum h1m_state state;
862 register char *ptr;
863 register const char *end;
864 unsigned int hdr_count;
865 unsigned int skip; /* number of bytes skipped at the beginning */
866 unsigned int sol; /* start of line */
867 unsigned int col; /* position of the colon */
868 unsigned int eol; /* end of line */
869 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200870 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200871 int skip_update;
872 int restarting;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200873 struct ist n, v; /* header name and value during parsing */
874
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200875 skip = 0; // do it only once to keep track of the leading CRLF.
876
877 try_again:
878 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200879 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200880 skip_update = restarting = 0;
881
882 ptr = start + h1m->next;
883 end = stop;
884 state = h1m->state;
885
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200886 if (state != H1_MSG_RQBEFORE && state != H1_MSG_RPBEFORE)
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200887 restarting = 1;
888
Willy Tarreau794f9af2017-07-26 09:07:47 +0200889 if (unlikely(ptr >= end))
890 goto http_msg_ood;
891
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200892 /* don't update output if hdr is NULL or if we're restarting */
893 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200894 skip_update = 1;
895
Willy Tarreau794f9af2017-07-26 09:07:47 +0200896 switch (state) {
Willy Tarreauc2ab9f52018-09-11 17:57:05 +0200897 case H1_MSG_RQBEFORE:
898 http_msg_rqbefore:
899 if (likely(HTTP_IS_TOKEN(*ptr))) {
900 /* we have a start of message, we may have skipped some
901 * heading CRLF. Skip them now.
902 */
903 skip += ptr - start;
904 start = ptr;
905
906 sol = 0;
907 sl.rq.m = skip;
908 hdr_count = 0;
909 state = H1_MSG_RQMETH;
910 goto http_msg_rqmeth;
911 }
912
913 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
914 state = H1_MSG_RQBEFORE;
915 goto http_msg_invalid;
916 }
917
918 if (unlikely(*ptr == '\n'))
919 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
920 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
921 /* stop here */
922
923 case H1_MSG_RQBEFORE_CR:
924 http_msg_rqbefore_cr:
925 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
926 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
927 /* stop here */
928
929 case H1_MSG_RQMETH:
930 http_msg_rqmeth:
931 if (likely(HTTP_IS_TOKEN(*ptr)))
932 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
933
934 if (likely(HTTP_IS_SPHT(*ptr))) {
935 sl.rq.m_l = ptr - start;
936 sl.rq.meth = find_http_meth(start, sl.rq.m_l);
937 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
938 }
939
940 if (likely(HTTP_IS_CRLF(*ptr))) {
941 /* HTTP 0.9 request */
942 sl.rq.m_l = ptr - start;
943 sl.rq.meth = find_http_meth(start, sl.rq.m_l);
944 http_msg_req09_uri:
945 sl.rq.u = ptr - start + skip;
946 http_msg_req09_uri_e:
947 sl.rq.u_l = ptr - start + skip - sl.rq.u;
948 http_msg_req09_ver:
949 sl.rq.v = ptr - start + skip;
950 sl.rq.v_l = 0;
951 goto http_msg_rqline_eol;
952 }
953 state = H1_MSG_RQMETH;
954 goto http_msg_invalid;
955
956 case H1_MSG_RQMETH_SP:
957 http_msg_rqmeth_sp:
958 if (likely(!HTTP_IS_LWS(*ptr))) {
959 sl.rq.u = ptr - start + skip;
960 goto http_msg_rquri;
961 }
962 if (likely(HTTP_IS_SPHT(*ptr)))
963 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
964 /* so it's a CR/LF, meaning an HTTP 0.9 request */
965 goto http_msg_req09_uri;
966
967 case H1_MSG_RQURI:
968 http_msg_rquri:
969#if defined(__x86_64__) || \
970 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
971 defined(__ARM_ARCH_7A__)
972 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
973 while (ptr <= end - sizeof(int)) {
974 int x = *(int *)ptr - 0x21212121;
975 if (x & 0x80808080)
976 break;
977
978 x -= 0x5e5e5e5e;
979 if (!(x & 0x80808080))
980 break;
981
982 ptr += sizeof(int);
983 }
984#endif
985 if (ptr >= end) {
986 state = H1_MSG_RQURI;
987 goto http_msg_ood;
988 }
989 http_msg_rquri2:
990 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
991 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
992
993 if (likely(HTTP_IS_SPHT(*ptr))) {
994 sl.rq.u_l = ptr - start + skip - sl.rq.u;
995 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
996 }
997 if (likely((unsigned char)*ptr >= 128)) {
998 /* non-ASCII chars are forbidden unless option
999 * accept-invalid-http-request is enabled in the frontend.
1000 * In any case, we capture the faulty char.
1001 */
1002 if (h1m->err_pos < -1)
1003 goto invalid_char;
1004 if (h1m->err_pos == -1)
1005 h1m->err_pos = ptr - start + skip;
1006 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
1007 }
1008
1009 if (likely(HTTP_IS_CRLF(*ptr))) {
1010 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1011 goto http_msg_req09_uri_e;
1012 }
1013
1014 /* OK forbidden chars, 0..31 or 127 */
1015 invalid_char:
1016 state = H1_MSG_RQURI;
1017 goto http_msg_invalid;
1018
1019 case H1_MSG_RQURI_SP:
1020 http_msg_rquri_sp:
1021 if (likely(!HTTP_IS_LWS(*ptr))) {
1022 sl.rq.v = ptr - start + skip;
1023 goto http_msg_rqver;
1024 }
1025 if (likely(HTTP_IS_SPHT(*ptr)))
1026 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
1027 /* so it's a CR/LF, meaning an HTTP 0.9 request */
1028 goto http_msg_req09_ver;
1029
1030
1031 case H1_MSG_RQVER:
1032 http_msg_rqver:
1033 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
1034 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
1035
1036 if (likely(HTTP_IS_CRLF(*ptr))) {
1037 sl.rq.v_l = ptr - start + skip - sl.rq.v;
1038 http_msg_rqline_eol:
1039 /* We have seen the end of line. Note that we do not
1040 * necessarily have the \n yet, but at least we know that we
1041 * have EITHER \r OR \n, otherwise the request would not be
1042 * complete. We can then record the request length and return
1043 * to the caller which will be able to register it.
1044 */
1045
1046 if (likely(!skip_update)) {
Willy Tarreauba5fbca2018-09-13 11:32:51 +02001047 if ((sl.rq.v_l == 8) &&
1048 ((start[sl.rq.v + 5] > '1') ||
1049 ((start[sl.rq.v + 5] == '1') && (start[sl.rq.v + 7] >= '1'))))
1050 h1m->flags |= H1_MF_VER_11;
1051
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001052 if (unlikely(hdr_count >= hdr_num)) {
1053 state = H1_MSG_RQVER;
1054 goto http_output_full;
1055 }
1056 http_set_hdr(&hdr[hdr_count++], ist(":method"), ist2(start + sl.rq.m, sl.rq.m_l));
1057
1058 if (unlikely(hdr_count >= hdr_num)) {
1059 state = H1_MSG_RQVER;
1060 goto http_output_full;
1061 }
1062 http_set_hdr(&hdr[hdr_count++], ist(":path"), ist2(start + sl.rq.u, sl.rq.u_l));
1063 }
1064
1065 sol = ptr - start;
1066 if (likely(*ptr == '\r'))
1067 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
1068 goto http_msg_rqline_end;
1069 }
1070
1071 /* neither an HTTP_VER token nor a CRLF */
1072 state = H1_MSG_RQVER;
1073 goto http_msg_invalid;
1074
1075 case H1_MSG_RQLINE_END:
1076 http_msg_rqline_end:
1077 /* check for HTTP/0.9 request : no version information
1078 * available. sol must point to the first of CR or LF. However
1079 * since we don't save these elements between calls, if we come
1080 * here from a restart, we don't necessarily know. Thus in this
1081 * case we simply start over.
1082 */
1083 if (restarting)
1084 goto restart;
1085
1086 if (unlikely(sl.rq.v_l == 0))
1087 goto http_msg_last_lf;
1088
1089 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
1090 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
1091 /* stop here */
1092
1093 /*
1094 * Common states below
1095 */
Willy Tarreau801250e2018-09-11 11:45:04 +02001096 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001097 http_msg_rpbefore:
1098 if (likely(HTTP_IS_TOKEN(*ptr))) {
1099 /* we have a start of message, we may have skipped some
1100 * heading CRLF. Skip them now.
1101 */
1102 skip += ptr - start;
1103 start = ptr;
1104
1105 sol = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +02001106 sl.st.v = skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001107 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +02001108 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001109 goto http_msg_rpver;
1110 }
1111
1112 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +02001113 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001114 goto http_msg_invalid;
1115 }
1116
1117 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001118 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
1119 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001120 /* stop here */
1121
Willy Tarreau801250e2018-09-11 11:45:04 +02001122 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001123 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +02001124 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
1125 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001126 /* stop here */
1127
Willy Tarreau801250e2018-09-11 11:45:04 +02001128 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001129 http_msg_rpver:
1130 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001131 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001132
1133 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001134 sl.st.v_l = ptr - start;
Willy Tarreauba5fbca2018-09-13 11:32:51 +02001135
1136 if ((sl.st.v_l == 8) &&
1137 ((start[sl.st.v + 5] > '1') ||
1138 ((start[sl.st.v + 5] == '1') && (start[sl.st.v + 7] >= '1'))))
1139 h1m->flags |= H1_MF_VER_11;
1140
Willy Tarreau801250e2018-09-11 11:45:04 +02001141 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001142 }
Willy Tarreau801250e2018-09-11 11:45:04 +02001143 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001144 goto http_msg_invalid;
1145
Willy Tarreau801250e2018-09-11 11:45:04 +02001146 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001147 http_msg_rpver_sp:
1148 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001149 sl.st.status = 0;
1150 sl.st.c = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001151 goto http_msg_rpcode;
1152 }
1153 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001154 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001155 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +02001156 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001157 goto http_msg_invalid;
1158
Willy Tarreau801250e2018-09-11 11:45:04 +02001159 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001160 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +01001161 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001162 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +02001163 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001164 }
1165
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +01001166 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +02001167 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +01001168 goto http_msg_invalid;
1169 }
1170
Willy Tarreau794f9af2017-07-26 09:07:47 +02001171 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001172 sl.st.c_l = ptr - start + skip - sl.st.c;
Willy Tarreau801250e2018-09-11 11:45:04 +02001173 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001174 }
1175
1176 /* so it's a CR/LF, so there is no reason phrase */
Willy Tarreaua41393f2018-09-11 15:34:50 +02001177 sl.st.c_l = ptr - start + skip - sl.st.c;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001178
1179 http_msg_rsp_reason:
Willy Tarreaua41393f2018-09-11 15:34:50 +02001180 sl.st.r = ptr - start + skip;
1181 sl.st.r_l = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001182 goto http_msg_rpline_eol;
1183
Willy Tarreau801250e2018-09-11 11:45:04 +02001184 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001185 http_msg_rpcode_sp:
1186 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +02001187 sl.st.r = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001188 goto http_msg_rpreason;
1189 }
1190 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001191 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001192 /* so it's a CR/LF, so there is no reason phrase */
1193 goto http_msg_rsp_reason;
1194
Willy Tarreau801250e2018-09-11 11:45:04 +02001195 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001196 http_msg_rpreason:
1197 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001198 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Willy Tarreaua41393f2018-09-11 15:34:50 +02001199 sl.st.r_l = ptr - start + skip - sl.st.r;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001200 http_msg_rpline_eol:
1201 /* We have seen the end of line. Note that we do not
1202 * necessarily have the \n yet, but at least we know that we
1203 * have EITHER \r OR \n, otherwise the response would not be
1204 * complete. We can then record the response length and return
1205 * to the caller which will be able to register it.
1206 */
1207
Willy Tarreau5384aac2018-09-11 16:04:48 +02001208 if (likely(!skip_update)) {
1209 if (unlikely(hdr_count >= hdr_num)) {
1210 state = H1_MSG_RPREASON;
1211 goto http_output_full;
1212 }
1213 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + sl.st.c, sl.st.c_l));
Willy Tarreau794f9af2017-07-26 09:07:47 +02001214 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001215
1216 sol = ptr - start;
1217 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001218 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001219 goto http_msg_rpline_end;
1220
Willy Tarreau801250e2018-09-11 11:45:04 +02001221 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001222 http_msg_rpline_end:
1223 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +02001224 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
1225 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001226 /* stop here */
1227
Willy Tarreau801250e2018-09-11 11:45:04 +02001228 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001229 http_msg_hdr_first:
1230 sol = ptr - start;
1231 if (likely(!HTTP_IS_CRLF(*ptr))) {
1232 goto http_msg_hdr_name;
1233 }
1234
1235 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001236 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001237 goto http_msg_last_lf;
1238
Willy Tarreau801250e2018-09-11 11:45:04 +02001239 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001240 http_msg_hdr_name:
1241 /* assumes sol points to the first char */
1242 if (likely(HTTP_IS_TOKEN(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +02001243 if (!skip_update) {
1244 /* turn it to lower case if needed */
1245 if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
1246 *ptr = tolower(*ptr);
1247 }
Willy Tarreau801250e2018-09-11 11:45:04 +02001248 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001249 }
1250
1251 if (likely(*ptr == ':')) {
1252 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +02001253 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001254 }
1255
Willy Tarreau9aec3052018-09-12 09:20:40 +02001256 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +02001257 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001258 goto http_msg_invalid;
1259 }
1260
Willy Tarreau9aec3052018-09-12 09:20:40 +02001261 if (h1m->err_pos == -1) /* capture the error pointer */
1262 h1m->err_pos = ptr - start + skip; /* >= 0 now */
1263
1264 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +02001265 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001266
Willy Tarreau801250e2018-09-11 11:45:04 +02001267 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001268 http_msg_hdr_l1_sp:
1269 /* assumes sol points to the first char */
1270 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001271 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001272
1273 /* header value can be basically anything except CR/LF */
1274 sov = ptr - start;
1275
1276 if (likely(!HTTP_IS_CRLF(*ptr))) {
1277 goto http_msg_hdr_val;
1278 }
1279
1280 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001281 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001282 goto http_msg_hdr_l1_lf;
1283
Willy Tarreau801250e2018-09-11 11:45:04 +02001284 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001285 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001286 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
1287 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001288
Willy Tarreau801250e2018-09-11 11:45:04 +02001289 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001290 http_msg_hdr_l1_lws:
1291 if (likely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +02001292 if (!skip_update) {
1293 /* replace HT,CR,LF with spaces */
1294 for (; start + sov < ptr; sov++)
1295 start[sov] = ' ';
1296 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001297 goto http_msg_hdr_l1_sp;
1298 }
1299 /* we had a header consisting only in spaces ! */
1300 eol = sov;
1301 goto http_msg_complete_header;
1302
Willy Tarreau801250e2018-09-11 11:45:04 +02001303 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001304 http_msg_hdr_val:
1305 /* assumes sol points to the first char, and sov
1306 * points to the first character of the value.
1307 */
1308
1309 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
1310 * and lower. In fact since most of the time is spent in the loop, we
1311 * also remove the sign bit test so that bytes 0x8e..0x0d break the
1312 * loop, but we don't care since they're very rare in header values.
1313 */
1314#if defined(__x86_64__)
1315 while (ptr <= end - sizeof(long)) {
1316 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
1317 goto http_msg_hdr_val2;
1318 ptr += sizeof(long);
1319 }
1320#endif
1321#if defined(__x86_64__) || \
1322 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
1323 defined(__ARM_ARCH_7A__)
1324 while (ptr <= end - sizeof(int)) {
1325 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
1326 goto http_msg_hdr_val2;
1327 ptr += sizeof(int);
1328 }
1329#endif
1330 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +02001331 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001332 goto http_msg_ood;
1333 }
1334 http_msg_hdr_val2:
1335 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +02001336 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001337
1338 eol = ptr - start;
1339 /* Note: we could also copy eol into ->eoh so that we have the
1340 * real header end in case it ends with lots of LWS, but is this
1341 * really needed ?
1342 */
1343 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001344 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001345 goto http_msg_hdr_l2_lf;
1346
Willy Tarreau801250e2018-09-11 11:45:04 +02001347 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001348 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001349 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
1350 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001351
Willy Tarreau801250e2018-09-11 11:45:04 +02001352 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001353 http_msg_hdr_l2_lws:
1354 if (unlikely(HTTP_IS_SPHT(*ptr))) {
Christopher Faulet2912f872018-09-19 14:01:04 +02001355 if (!skip_update) {
1356 /* LWS: replace HT,CR,LF with spaces */
1357 for (; start + eol < ptr; eol++)
1358 start[eol] = ' ';
1359 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001360 goto http_msg_hdr_val;
1361 }
1362 http_msg_complete_header:
1363 /*
1364 * It was a new header, so the last one is finished. Assumes
1365 * <sol> points to the first char of the name, <col> to the
1366 * colon, <sov> points to the first character of the value and
1367 * <eol> to the first CR or LF so we know how the line ends. We
1368 * will trim spaces around the value. It's possible to do it by
1369 * adjusting <eol> and <sov> which are no more used after this.
1370 * We can add the header field to the list.
1371 */
Christopher Faulet2912f872018-09-19 14:01:04 +02001372 if (likely(!skip_update)) {
1373 while (sov < eol && HTTP_IS_LWS(start[sov]))
1374 sov++;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001375
Christopher Faulet2912f872018-09-19 14:01:04 +02001376 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1377 eol--;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001378
1379
Christopher Faulet2912f872018-09-19 14:01:04 +02001380 n = ist2(start + sol, col - sol);
1381 v = ist2(start + sov, eol - sov);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001382
Christopher Faulet2912f872018-09-19 14:01:04 +02001383 do {
1384 int ret;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001385
Christopher Faulet2912f872018-09-19 14:01:04 +02001386 if (unlikely(hdr_count >= hdr_num)) {
1387 state = H1_MSG_HDR_L2_LWS;
1388 goto http_output_full;
1389 }
Willy Tarreau5384aac2018-09-11 16:04:48 +02001390
Christopher Faulet2912f872018-09-19 14:01:04 +02001391 if (isteqi(n, ist("transfer-encoding"))) {
1392 h1_parse_xfer_enc_header(h1m, v);
1393 }
1394 else if (isteqi(n, ist("content-length"))) {
1395 ret = h1_parse_cont_len_header(h1m, &v);
Willy Tarreau73373ab2018-09-14 17:11:33 +02001396
Christopher Faulet2912f872018-09-19 14:01:04 +02001397 if (ret < 0) {
1398 state = H1_MSG_HDR_L2_LWS;
1399 goto http_msg_invalid;
1400 }
1401 else if (ret == 0) {
1402 /* skip it */
1403 break;
1404 }
Willy Tarreau73373ab2018-09-14 17:11:33 +02001405 }
Christopher Faulet2912f872018-09-19 14:01:04 +02001406 else if (isteqi(n, ist("connection"))) {
1407 h1_parse_connection_header(h1m, v);
Willy Tarreau73373ab2018-09-14 17:11:33 +02001408 }
Willy Tarreau2ea6bb52018-09-14 16:28:15 +02001409
Christopher Faulet2912f872018-09-19 14:01:04 +02001410 http_set_hdr(&hdr[hdr_count++], n, v);
1411 } while (0);
1412 }
Willy Tarreau794f9af2017-07-26 09:07:47 +02001413
1414 sol = ptr - start;
Christopher Faulet2912f872018-09-19 14:01:04 +02001415
Willy Tarreau794f9af2017-07-26 09:07:47 +02001416 if (likely(!HTTP_IS_CRLF(*ptr)))
1417 goto http_msg_hdr_name;
1418
1419 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001420 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001421 goto http_msg_last_lf;
1422
Willy Tarreau801250e2018-09-11 11:45:04 +02001423 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001424 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001425 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001426 ptr++;
1427 /* <ptr> now points to the first byte of payload. If needed sol
1428 * still points to the first of either CR or LF of the empty
1429 * line ending the headers block.
1430 */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001431 if (likely(!skip_update)) {
1432 if (unlikely(hdr_count >= hdr_num)) {
1433 state = H1_MSG_LAST_LF;
1434 goto http_output_full;
1435 }
1436 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +02001437 }
Willy Tarreau001823c2018-09-12 17:25:32 +02001438
1439 /* reaching here we've parsed the whole message. We may detect
1440 * that we were already continuing an interrupted parsing pass
1441 * so we were silently looking for the end of message not
1442 * updating anything before deciding to parse it fully at once.
1443 * It's guaranteed that we won't match this test twice in a row
1444 * since restarting will turn zero.
1445 */
1446 if (restarting)
1447 goto restart;
1448
Willy Tarreau2557f6a2018-09-14 16:34:47 +02001449 state = H1_MSG_DATA;
1450 if (h1m->flags & H1_MF_XFER_ENC) {
1451 if (h1m->flags & H1_MF_CLEN) {
1452 h1m->flags &= ~H1_MF_CLEN;
1453 hdr_count = http_del_hdr(hdr, ist("content-length"));
1454 }
1455
1456 if (h1m->flags & H1_MF_CHNK)
1457 state = H1_MSG_CHUNK_SIZE;
1458 else if (!(h1m->flags & H1_MF_RESP)) {
1459 /* cf RFC7230#3.3.3 : transfer-encoding in
1460 * request without chunked encoding is invalid.
1461 */
1462 goto http_msg_invalid;
1463 }
1464 }
1465
Willy Tarreau794f9af2017-07-26 09:07:47 +02001466 break;
1467
1468 default:
1469 /* impossible states */
1470 goto http_msg_invalid;
1471 }
1472
Willy Tarreau001823c2018-09-12 17:25:32 +02001473 /* Now we've left the headers state and are either in H1_MSG_DATA or
1474 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001475 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001476
Willy Tarreau5384aac2018-09-11 16:04:48 +02001477 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001478 *slp = sl;
1479
Willy Tarreau4433c082018-09-11 15:33:32 +02001480 h1m->state = state;
1481 h1m->next = ptr - start + skip;
1482 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001483
1484 http_msg_ood:
1485 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001486 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001487 *slp = sl;
1488
Willy Tarreau4433c082018-09-11 15:33:32 +02001489 h1m->state = state;
1490 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001491 return 0;
1492
1493 http_msg_invalid:
1494 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001495 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001496 *slp = sl;
1497
Willy Tarreau4433c082018-09-11 15:33:32 +02001498 h1m->err_state = h1m->state = state;
1499 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001500 return -1;
1501
1502 http_output_full:
1503 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001504 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001505 *slp = sl;
1506
Willy Tarreau4433c082018-09-11 15:33:32 +02001507 h1m->err_state = h1m->state = state;
1508 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001509 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001510
1511 restart:
1512 h1m->next = 0;
Willy Tarreauc2ab9f52018-09-11 17:57:05 +02001513 if (h1m->flags & H1_MF_RESP)
1514 h1m->state = H1_MSG_RPBEFORE;
1515 else
1516 h1m->state = H1_MSG_RQBEFORE;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001517 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001518}
1519
Willy Tarreau2510f702017-10-31 17:14:16 +01001520/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001521 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001522 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1523 * input data, or < 0 in case of parse error (in which case the caller may have
1524 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001525 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001526int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001527{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001528 const char *stop = b_peek(buf, ofs + max);
1529 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001530
1531 while (1) {
1532 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001533 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001534 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001535
1536 /* scan current line and stop at LF or CRLF */
1537 while (1) {
1538 if (ptr == stop)
1539 return 0;
1540
1541 if (*ptr == '\n') {
1542 if (!p1)
1543 p1 = ptr;
1544 p2 = ptr;
1545 break;
1546 }
1547
1548 if (*ptr == '\r') {
1549 if (p1)
1550 return -1;
1551 p1 = ptr;
1552 }
1553
Willy Tarreau7314be82018-06-14 13:32:50 +02001554 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001555 }
1556
1557 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001558 p2 = b_next(buf, p2);
1559 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001560
1561 /* LF/CRLF at beginning of line => end of trailers at p2.
1562 * Everything was scheduled for forwarding, there's nothing left
1563 * from this message. */
1564 if (p1 == start)
1565 break;
1566 /* OK, next line then */
1567 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001568 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001569}
1570
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001571/* This function skips trailers in the buffer associated with HTTP message
1572 * <msg>. The first visited position is msg->next. If the end of the trailers is
1573 * found, the function returns >0. So, the caller can automatically schedul it
1574 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1575 * data are available, the function does not change anything except maybe
1576 * msg->sol if it could parse some lines, and returns zero. If a parse error
1577 * is encountered, the function returns < 0 and does not change anything except
1578 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1579 * state before calling this function, which implies that all non-trailers data
1580 * have already been scheduled for forwarding, and that msg->next exactly
1581 * matches the length of trailers already parsed and not forwarded. It is also
1582 * important to note that this function is designed to be able to parse wrapped
1583 * headers at end of buffer.
1584 */
1585int http_forward_trailers(struct http_msg *msg)
1586{
Willy Tarreauc9fa0482018-07-10 17:43:27 +02001587 const struct buffer *buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001588 const char *parse = ci_head(msg->chn);
1589 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001590
1591 /* we have msg->next which points to next line. Look for CRLF. But
1592 * first, we reset msg->sol */
1593 msg->sol = 0;
1594 while (1) {
1595 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001596 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001597 const char *ptr = start;
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001598
1599 /* scan current line and stop at LF or CRLF */
1600 while (1) {
1601 if (ptr == stop)
1602 return 0;
1603
1604 if (*ptr == '\n') {
1605 if (!p1)
1606 p1 = ptr;
1607 p2 = ptr;
1608 break;
1609 }
1610
1611 if (*ptr == '\r') {
1612 if (p1) {
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001613 msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001614 return -1;
1615 }
1616 p1 = ptr;
1617 }
1618
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001619 ptr = b_next(buf, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001620 }
1621
1622 /* after LF; point to beginning of next line */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001623 p2 = b_next(buf, p2);
1624 msg->sol += b_dist(buf, start, p2);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001625
1626 /* LF/CRLF at beginning of line => end of trailers at p2.
1627 * Everything was scheduled for forwarding, there's nothing left
1628 * from this message. */
1629 if (p1 == start)
1630 return 1;
1631
1632 /* OK, next line then */
1633 }
1634}