blob: bca820c20e42eb3af2e99db74e37f40d40ee31c7 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <common/config.h>
14
15#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020016#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020017
18/* It is about twice as fast on recent architectures to lookup a byte in a
19 * table than to perform a boolean AND or OR between two tests. Refer to
20 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
21 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
22 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
23 * digit. Note: please do not overwrite values in assignment since gcc-2.95
24 * will not handle them correctly. It's worth noting that chars 128..255 are
25 * nothing, not even control chars.
26 */
27const unsigned char h1_char_classes[256] = {
28 [ 0] = H1_FLG_CTL,
29 [ 1] = H1_FLG_CTL,
30 [ 2] = H1_FLG_CTL,
31 [ 3] = H1_FLG_CTL,
32 [ 4] = H1_FLG_CTL,
33 [ 5] = H1_FLG_CTL,
34 [ 6] = H1_FLG_CTL,
35 [ 7] = H1_FLG_CTL,
36 [ 8] = H1_FLG_CTL,
37 [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL,
38 [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
39 [ 11] = H1_FLG_CTL,
40 [ 12] = H1_FLG_CTL,
41 [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
42 [ 14] = H1_FLG_CTL,
43 [ 15] = H1_FLG_CTL,
44 [ 16] = H1_FLG_CTL,
45 [ 17] = H1_FLG_CTL,
46 [ 18] = H1_FLG_CTL,
47 [ 19] = H1_FLG_CTL,
48 [ 20] = H1_FLG_CTL,
49 [ 21] = H1_FLG_CTL,
50 [ 22] = H1_FLG_CTL,
51 [ 23] = H1_FLG_CTL,
52 [ 24] = H1_FLG_CTL,
53 [ 25] = H1_FLG_CTL,
54 [ 26] = H1_FLG_CTL,
55 [ 27] = H1_FLG_CTL,
56 [ 28] = H1_FLG_CTL,
57 [ 29] = H1_FLG_CTL,
58 [ 30] = H1_FLG_CTL,
59 [ 31] = H1_FLG_CTL,
60 [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP,
61 ['!'] = H1_FLG_TOK,
62 ['"'] = H1_FLG_SEP,
63 ['#'] = H1_FLG_TOK,
64 ['$'] = H1_FLG_TOK,
65 ['%'] = H1_FLG_TOK,
66 ['&'] = H1_FLG_TOK,
67 [ 39] = H1_FLG_TOK,
68 ['('] = H1_FLG_SEP,
69 [')'] = H1_FLG_SEP,
70 ['*'] = H1_FLG_TOK,
71 ['+'] = H1_FLG_TOK,
72 [','] = H1_FLG_SEP,
73 ['-'] = H1_FLG_TOK,
74 ['.'] = H1_FLG_TOK | H1_FLG_VER,
75 ['/'] = H1_FLG_SEP | H1_FLG_VER,
76 ['0'] = H1_FLG_TOK | H1_FLG_VER,
77 ['1'] = H1_FLG_TOK | H1_FLG_VER,
78 ['2'] = H1_FLG_TOK | H1_FLG_VER,
79 ['3'] = H1_FLG_TOK | H1_FLG_VER,
80 ['4'] = H1_FLG_TOK | H1_FLG_VER,
81 ['5'] = H1_FLG_TOK | H1_FLG_VER,
82 ['6'] = H1_FLG_TOK | H1_FLG_VER,
83 ['7'] = H1_FLG_TOK | H1_FLG_VER,
84 ['8'] = H1_FLG_TOK | H1_FLG_VER,
85 ['9'] = H1_FLG_TOK | H1_FLG_VER,
86 [':'] = H1_FLG_SEP,
87 [';'] = H1_FLG_SEP,
88 ['<'] = H1_FLG_SEP,
89 ['='] = H1_FLG_SEP,
90 ['>'] = H1_FLG_SEP,
91 ['?'] = H1_FLG_SEP,
92 ['@'] = H1_FLG_SEP,
93 ['A'] = H1_FLG_TOK,
94 ['B'] = H1_FLG_TOK,
95 ['C'] = H1_FLG_TOK,
96 ['D'] = H1_FLG_TOK,
97 ['E'] = H1_FLG_TOK,
98 ['F'] = H1_FLG_TOK,
99 ['G'] = H1_FLG_TOK,
100 ['H'] = H1_FLG_TOK | H1_FLG_VER,
101 ['I'] = H1_FLG_TOK,
102 ['J'] = H1_FLG_TOK,
103 ['K'] = H1_FLG_TOK,
104 ['L'] = H1_FLG_TOK,
105 ['M'] = H1_FLG_TOK,
106 ['N'] = H1_FLG_TOK,
107 ['O'] = H1_FLG_TOK,
108 ['P'] = H1_FLG_TOK | H1_FLG_VER,
109 ['Q'] = H1_FLG_TOK,
110 ['R'] = H1_FLG_TOK | H1_FLG_VER,
111 ['S'] = H1_FLG_TOK | H1_FLG_VER,
112 ['T'] = H1_FLG_TOK | H1_FLG_VER,
113 ['U'] = H1_FLG_TOK,
114 ['V'] = H1_FLG_TOK,
115 ['W'] = H1_FLG_TOK,
116 ['X'] = H1_FLG_TOK,
117 ['Y'] = H1_FLG_TOK,
118 ['Z'] = H1_FLG_TOK,
119 ['['] = H1_FLG_SEP,
120 [ 92] = H1_FLG_SEP,
121 [']'] = H1_FLG_SEP,
122 ['^'] = H1_FLG_TOK,
123 ['_'] = H1_FLG_TOK,
124 ['`'] = H1_FLG_TOK,
125 ['a'] = H1_FLG_TOK,
126 ['b'] = H1_FLG_TOK,
127 ['c'] = H1_FLG_TOK,
128 ['d'] = H1_FLG_TOK,
129 ['e'] = H1_FLG_TOK,
130 ['f'] = H1_FLG_TOK,
131 ['g'] = H1_FLG_TOK,
132 ['h'] = H1_FLG_TOK,
133 ['i'] = H1_FLG_TOK,
134 ['j'] = H1_FLG_TOK,
135 ['k'] = H1_FLG_TOK,
136 ['l'] = H1_FLG_TOK,
137 ['m'] = H1_FLG_TOK,
138 ['n'] = H1_FLG_TOK,
139 ['o'] = H1_FLG_TOK,
140 ['p'] = H1_FLG_TOK,
141 ['q'] = H1_FLG_TOK,
142 ['r'] = H1_FLG_TOK,
143 ['s'] = H1_FLG_TOK,
144 ['t'] = H1_FLG_TOK,
145 ['u'] = H1_FLG_TOK,
146 ['v'] = H1_FLG_TOK,
147 ['w'] = H1_FLG_TOK,
148 ['x'] = H1_FLG_TOK,
149 ['y'] = H1_FLG_TOK,
150 ['z'] = H1_FLG_TOK,
151 ['{'] = H1_FLG_SEP,
152 ['|'] = H1_FLG_TOK,
153 ['}'] = H1_FLG_SEP,
154 ['~'] = H1_FLG_TOK,
155 [127] = H1_FLG_CTL,
156};
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200157
158
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200159/*
160 * This function parses a status line between <ptr> and <end>, starting with
161 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
162 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
163 * will give undefined results.
164 * Note that it is upon the caller's responsibility to ensure that ptr < end,
165 * and that msg->sol points to the beginning of the response.
166 * If a complete line is found (which implies that at least one CR or LF is
167 * found before <end>, the updated <ptr> is returned, otherwise NULL is
168 * returned indicating an incomplete line (which does not mean that parts have
169 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
170 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
171 * upon next call.
172 *
173 * This function was intentionally designed to be called from
174 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
175 * within its state machine and use the same macros, hence the need for same
176 * labels and variable names. Note that msg->sol is left unchanged.
177 */
178const char *http_parse_stsline(struct http_msg *msg,
179 enum h1_state state, const char *ptr, const char *end,
180 unsigned int *ret_ptr, enum h1_state *ret_state)
181{
182 const char *msg_start = msg->chn->buf->p;
183
184 switch (state) {
185 case HTTP_MSG_RPVER:
186 http_msg_rpver:
187 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
188 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
189
190 if (likely(HTTP_IS_SPHT(*ptr))) {
191 msg->sl.st.v_l = ptr - msg_start;
192 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
193 }
194 msg->err_state = HTTP_MSG_RPVER;
195 state = HTTP_MSG_ERROR;
196 break;
197
198 case HTTP_MSG_RPVER_SP:
199 http_msg_rpver_sp:
200 if (likely(!HTTP_IS_LWS(*ptr))) {
201 msg->sl.st.c = ptr - msg_start;
202 goto http_msg_rpcode;
203 }
204 if (likely(HTTP_IS_SPHT(*ptr)))
205 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
206 /* so it's a CR/LF, this is invalid */
207 msg->err_state = HTTP_MSG_RPVER_SP;
208 state = HTTP_MSG_ERROR;
209 break;
210
211 case HTTP_MSG_RPCODE:
212 http_msg_rpcode:
213 if (likely(!HTTP_IS_LWS(*ptr)))
214 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
215
216 if (likely(HTTP_IS_SPHT(*ptr))) {
217 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
218 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
219 }
220
221 /* so it's a CR/LF, so there is no reason phrase */
222 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
223 http_msg_rsp_reason:
224 /* FIXME: should we support HTTP responses without any reason phrase ? */
225 msg->sl.st.r = ptr - msg_start;
226 msg->sl.st.r_l = 0;
227 goto http_msg_rpline_eol;
228
229 case HTTP_MSG_RPCODE_SP:
230 http_msg_rpcode_sp:
231 if (likely(!HTTP_IS_LWS(*ptr))) {
232 msg->sl.st.r = ptr - msg_start;
233 goto http_msg_rpreason;
234 }
235 if (likely(HTTP_IS_SPHT(*ptr)))
236 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
237 /* so it's a CR/LF, so there is no reason phrase */
238 goto http_msg_rsp_reason;
239
240 case HTTP_MSG_RPREASON:
241 http_msg_rpreason:
242 if (likely(!HTTP_IS_CRLF(*ptr)))
243 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
244 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
245 http_msg_rpline_eol:
246 /* We have seen the end of line. Note that we do not
247 * necessarily have the \n yet, but at least we know that we
248 * have EITHER \r OR \n, otherwise the response would not be
249 * complete. We can then record the response length and return
250 * to the caller which will be able to register it.
251 */
252 msg->sl.st.l = ptr - msg_start - msg->sol;
253 return ptr;
254
255 default:
256#ifdef DEBUG_FULL
257 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
258 exit(1);
259#endif
260 ;
261 }
262
263 http_msg_ood:
264 /* out of valid data */
265 if (ret_state)
266 *ret_state = state;
267 if (ret_ptr)
268 *ret_ptr = ptr - msg_start;
269 return NULL;
270}
271
272/*
273 * This function parses a request line between <ptr> and <end>, starting with
274 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
275 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
276 * will give undefined results.
277 * Note that it is upon the caller's responsibility to ensure that ptr < end,
278 * and that msg->sol points to the beginning of the request.
279 * If a complete line is found (which implies that at least one CR or LF is
280 * found before <end>, the updated <ptr> is returned, otherwise NULL is
281 * returned indicating an incomplete line (which does not mean that parts have
282 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
283 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
284 * upon next call.
285 *
286 * This function was intentionally designed to be called from
287 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
288 * within its state machine and use the same macros, hence the need for same
289 * labels and variable names. Note that msg->sol is left unchanged.
290 */
291const char *http_parse_reqline(struct http_msg *msg,
292 enum h1_state state, const char *ptr, const char *end,
293 unsigned int *ret_ptr, enum h1_state *ret_state)
294{
295 const char *msg_start = msg->chn->buf->p;
296
297 switch (state) {
298 case HTTP_MSG_RQMETH:
299 http_msg_rqmeth:
300 if (likely(HTTP_IS_TOKEN(*ptr)))
301 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
302
303 if (likely(HTTP_IS_SPHT(*ptr))) {
304 msg->sl.rq.m_l = ptr - msg_start;
305 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
306 }
307
308 if (likely(HTTP_IS_CRLF(*ptr))) {
309 /* HTTP 0.9 request */
310 msg->sl.rq.m_l = ptr - msg_start;
311 http_msg_req09_uri:
312 msg->sl.rq.u = ptr - msg_start;
313 http_msg_req09_uri_e:
314 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
315 http_msg_req09_ver:
316 msg->sl.rq.v = ptr - msg_start;
317 msg->sl.rq.v_l = 0;
318 goto http_msg_rqline_eol;
319 }
320 msg->err_state = HTTP_MSG_RQMETH;
321 state = HTTP_MSG_ERROR;
322 break;
323
324 case HTTP_MSG_RQMETH_SP:
325 http_msg_rqmeth_sp:
326 if (likely(!HTTP_IS_LWS(*ptr))) {
327 msg->sl.rq.u = ptr - msg_start;
328 goto http_msg_rquri;
329 }
330 if (likely(HTTP_IS_SPHT(*ptr)))
331 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
332 /* so it's a CR/LF, meaning an HTTP 0.9 request */
333 goto http_msg_req09_uri;
334
335 case HTTP_MSG_RQURI:
336 http_msg_rquri:
337#if defined(__x86_64__) || \
338 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
339 defined(__ARM_ARCH_7A__)
340 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
341 while (ptr <= end - sizeof(int)) {
342 int x = *(int *)ptr - 0x21212121;
343 if (x & 0x80808080)
344 break;
345
346 x -= 0x5e5e5e5e;
347 if (!(x & 0x80808080))
348 break;
349
350 ptr += sizeof(int);
351 }
352#endif
353 if (ptr >= end) {
354 state = HTTP_MSG_RQURI;
355 goto http_msg_ood;
356 }
357 http_msg_rquri2:
358 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
359 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
360
361 if (likely(HTTP_IS_SPHT(*ptr))) {
362 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
363 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
364 }
365
366 if (likely((unsigned char)*ptr >= 128)) {
367 /* non-ASCII chars are forbidden unless option
368 * accept-invalid-http-request is enabled in the frontend.
369 * In any case, we capture the faulty char.
370 */
371 if (msg->err_pos < -1)
372 goto invalid_char;
373 if (msg->err_pos == -1)
374 msg->err_pos = ptr - msg_start;
375 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
376 }
377
378 if (likely(HTTP_IS_CRLF(*ptr))) {
379 /* so it's a CR/LF, meaning an HTTP 0.9 request */
380 goto http_msg_req09_uri_e;
381 }
382
383 /* OK forbidden chars, 0..31 or 127 */
384 invalid_char:
385 msg->err_pos = ptr - msg_start;
386 msg->err_state = HTTP_MSG_RQURI;
387 state = HTTP_MSG_ERROR;
388 break;
389
390 case HTTP_MSG_RQURI_SP:
391 http_msg_rquri_sp:
392 if (likely(!HTTP_IS_LWS(*ptr))) {
393 msg->sl.rq.v = ptr - msg_start;
394 goto http_msg_rqver;
395 }
396 if (likely(HTTP_IS_SPHT(*ptr)))
397 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
398 /* so it's a CR/LF, meaning an HTTP 0.9 request */
399 goto http_msg_req09_ver;
400
401 case HTTP_MSG_RQVER:
402 http_msg_rqver:
403 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
404 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
405
406 if (likely(HTTP_IS_CRLF(*ptr))) {
407 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
408 http_msg_rqline_eol:
409 /* We have seen the end of line. Note that we do not
410 * necessarily have the \n yet, but at least we know that we
411 * have EITHER \r OR \n, otherwise the request would not be
412 * complete. We can then record the request length and return
413 * to the caller which will be able to register it.
414 */
415 msg->sl.rq.l = ptr - msg_start - msg->sol;
416 return ptr;
417 }
418
419 /* neither an HTTP_VER token nor a CRLF */
420 msg->err_state = HTTP_MSG_RQVER;
421 state = HTTP_MSG_ERROR;
422 break;
423
424 default:
425#ifdef DEBUG_FULL
426 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
427 exit(1);
428#endif
429 ;
430 }
431
432 http_msg_ood:
433 /* out of valid data */
434 if (ret_state)
435 *ret_state = state;
436 if (ret_ptr)
437 *ret_ptr = ptr - msg_start;
438 return NULL;
439}
440
441/*
442 * This function parses an HTTP message, either a request or a response,
443 * depending on the initial msg->msg_state. The caller is responsible for
444 * ensuring that the message does not wrap. The function can be preempted
445 * everywhere when data are missing and recalled at the exact same location
446 * with no information loss. The message may even be realigned between two
447 * calls. The header index is re-initialized when switching from
448 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
449 * fields. Note that msg->sol will be initialized after completing the first
450 * state, so that none of the msg pointers has to be initialized prior to the
451 * first call.
452 */
453void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
454{
455 enum h1_state state; /* updated only when leaving the FSM */
456 register char *ptr, *end; /* request pointers, to avoid dereferences */
457 struct buffer *buf;
458
459 state = msg->msg_state;
460 buf = msg->chn->buf;
461 ptr = buf->p + msg->next;
462 end = buf->p + buf->i;
463
464 if (unlikely(ptr >= end))
465 goto http_msg_ood;
466
467 switch (state) {
468 /*
469 * First, states that are specific to the response only.
470 * We check them first so that request and headers are
471 * closer to each other (accessed more often).
472 */
473 case HTTP_MSG_RPBEFORE:
474 http_msg_rpbefore:
475 if (likely(HTTP_IS_TOKEN(*ptr))) {
476 /* we have a start of message, but we have to check
477 * first if we need to remove some CRLF. We can only
478 * do this when o=0.
479 */
480 if (unlikely(ptr != buf->p)) {
481 if (buf->o)
482 goto http_msg_ood;
483 /* Remove empty leading lines, as recommended by RFC2616. */
484 bi_fast_delete(buf, ptr - buf->p);
485 }
486 msg->sol = 0;
487 msg->sl.st.l = 0; /* used in debug mode */
488 hdr_idx_init(idx);
489 state = HTTP_MSG_RPVER;
490 goto http_msg_rpver;
491 }
492
493 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
494 state = HTTP_MSG_RPBEFORE;
495 goto http_msg_invalid;
496 }
497
498 if (unlikely(*ptr == '\n'))
499 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
500 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
501 /* stop here */
502
503 case HTTP_MSG_RPBEFORE_CR:
504 http_msg_rpbefore_cr:
505 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
507 /* stop here */
508
509 case HTTP_MSG_RPVER:
510 http_msg_rpver:
511 case HTTP_MSG_RPVER_SP:
512 case HTTP_MSG_RPCODE:
513 case HTTP_MSG_RPCODE_SP:
514 case HTTP_MSG_RPREASON:
515 ptr = (char *)http_parse_stsline(msg,
516 state, ptr, end,
517 &msg->next, &msg->msg_state);
518 if (unlikely(!ptr))
519 return;
520
521 /* we have a full response and we know that we have either a CR
522 * or an LF at <ptr>.
523 */
524 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
525
526 msg->sol = ptr - buf->p;
527 if (likely(*ptr == '\r'))
528 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
529 goto http_msg_rpline_end;
530
531 case HTTP_MSG_RPLINE_END:
532 http_msg_rpline_end:
533 /* msg->sol must point to the first of CR or LF. */
534 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
535 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
536 /* stop here */
537
538 /*
539 * Second, states that are specific to the request only
540 */
541 case HTTP_MSG_RQBEFORE:
542 http_msg_rqbefore:
543 if (likely(HTTP_IS_TOKEN(*ptr))) {
544 /* we have a start of message, but we have to check
545 * first if we need to remove some CRLF. We can only
546 * do this when o=0.
547 */
548 if (likely(ptr != buf->p)) {
549 if (buf->o)
550 goto http_msg_ood;
551 /* Remove empty leading lines, as recommended by RFC2616. */
552 bi_fast_delete(buf, ptr - buf->p);
553 }
554 msg->sol = 0;
555 msg->sl.rq.l = 0; /* used in debug mode */
556 state = HTTP_MSG_RQMETH;
557 goto http_msg_rqmeth;
558 }
559
560 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
561 state = HTTP_MSG_RQBEFORE;
562 goto http_msg_invalid;
563 }
564
565 if (unlikely(*ptr == '\n'))
566 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
567 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
568 /* stop here */
569
570 case HTTP_MSG_RQBEFORE_CR:
571 http_msg_rqbefore_cr:
572 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
573 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
574 /* stop here */
575
576 case HTTP_MSG_RQMETH:
577 http_msg_rqmeth:
578 case HTTP_MSG_RQMETH_SP:
579 case HTTP_MSG_RQURI:
580 case HTTP_MSG_RQURI_SP:
581 case HTTP_MSG_RQVER:
582 ptr = (char *)http_parse_reqline(msg,
583 state, ptr, end,
584 &msg->next, &msg->msg_state);
585 if (unlikely(!ptr))
586 return;
587
588 /* we have a full request and we know that we have either a CR
589 * or an LF at <ptr>.
590 */
591 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
592
593 msg->sol = ptr - buf->p;
594 if (likely(*ptr == '\r'))
595 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
596 goto http_msg_rqline_end;
597
598 case HTTP_MSG_RQLINE_END:
599 http_msg_rqline_end:
600 /* check for HTTP/0.9 request : no version information available.
601 * msg->sol must point to the first of CR or LF.
602 */
603 if (unlikely(msg->sl.rq.v_l == 0))
604 goto http_msg_last_lf;
605
606 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
607 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
608 /* stop here */
609
610 /*
611 * Common states below
612 */
613 case HTTP_MSG_HDR_FIRST:
614 http_msg_hdr_first:
615 msg->sol = ptr - buf->p;
616 if (likely(!HTTP_IS_CRLF(*ptr))) {
617 goto http_msg_hdr_name;
618 }
619
620 if (likely(*ptr == '\r'))
621 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
622 goto http_msg_last_lf;
623
624 case HTTP_MSG_HDR_NAME:
625 http_msg_hdr_name:
626 /* assumes msg->sol points to the first char */
627 if (likely(HTTP_IS_TOKEN(*ptr)))
628 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
629
630 if (likely(*ptr == ':'))
631 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
632
633 if (likely(msg->err_pos < -1) || *ptr == '\n') {
634 state = HTTP_MSG_HDR_NAME;
635 goto http_msg_invalid;
636 }
637
638 if (msg->err_pos == -1) /* capture error pointer */
639 msg->err_pos = ptr - buf->p; /* >= 0 now */
640
641 /* and we still accept this non-token character */
642 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
643
644 case HTTP_MSG_HDR_L1_SP:
645 http_msg_hdr_l1_sp:
646 /* assumes msg->sol points to the first char */
647 if (likely(HTTP_IS_SPHT(*ptr)))
648 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
649
650 /* header value can be basically anything except CR/LF */
651 msg->sov = ptr - buf->p;
652
653 if (likely(!HTTP_IS_CRLF(*ptr))) {
654 goto http_msg_hdr_val;
655 }
656
657 if (likely(*ptr == '\r'))
658 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
659 goto http_msg_hdr_l1_lf;
660
661 case HTTP_MSG_HDR_L1_LF:
662 http_msg_hdr_l1_lf:
663 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
664 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
665
666 case HTTP_MSG_HDR_L1_LWS:
667 http_msg_hdr_l1_lws:
668 if (likely(HTTP_IS_SPHT(*ptr))) {
669 /* replace HT,CR,LF with spaces */
670 for (; buf->p + msg->sov < ptr; msg->sov++)
671 buf->p[msg->sov] = ' ';
672 goto http_msg_hdr_l1_sp;
673 }
674 /* we had a header consisting only in spaces ! */
675 msg->eol = msg->sov;
676 goto http_msg_complete_header;
677
678 case HTTP_MSG_HDR_VAL:
679 http_msg_hdr_val:
680 /* assumes msg->sol points to the first char, and msg->sov
681 * points to the first character of the value.
682 */
683
684 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
685 * and lower. In fact since most of the time is spent in the loop, we
686 * also remove the sign bit test so that bytes 0x8e..0x0d break the
687 * loop, but we don't care since they're very rare in header values.
688 */
689#if defined(__x86_64__)
690 while (ptr <= end - sizeof(long)) {
691 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
692 goto http_msg_hdr_val2;
693 ptr += sizeof(long);
694 }
695#endif
696#if defined(__x86_64__) || \
697 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
698 defined(__ARM_ARCH_7A__)
699 while (ptr <= end - sizeof(int)) {
700 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
701 goto http_msg_hdr_val2;
702 ptr += sizeof(int);
703 }
704#endif
705 if (ptr >= end) {
706 state = HTTP_MSG_HDR_VAL;
707 goto http_msg_ood;
708 }
709 http_msg_hdr_val2:
710 if (likely(!HTTP_IS_CRLF(*ptr)))
711 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
712
713 msg->eol = ptr - buf->p;
714 /* Note: we could also copy eol into ->eoh so that we have the
715 * real header end in case it ends with lots of LWS, but is this
716 * really needed ?
717 */
718 if (likely(*ptr == '\r'))
719 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
720 goto http_msg_hdr_l2_lf;
721
722 case HTTP_MSG_HDR_L2_LF:
723 http_msg_hdr_l2_lf:
724 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
725 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
726
727 case HTTP_MSG_HDR_L2_LWS:
728 http_msg_hdr_l2_lws:
729 if (unlikely(HTTP_IS_SPHT(*ptr))) {
730 /* LWS: replace HT,CR,LF with spaces */
731 for (; buf->p + msg->eol < ptr; msg->eol++)
732 buf->p[msg->eol] = ' ';
733 goto http_msg_hdr_val;
734 }
735 http_msg_complete_header:
736 /*
737 * It was a new header, so the last one is finished.
738 * Assumes msg->sol points to the first char, msg->sov points
739 * to the first character of the value and msg->eol to the
740 * first CR or LF so we know how the line ends. We insert last
741 * header into the index.
742 */
743 if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
744 idx, idx->tail) < 0)) {
745 state = HTTP_MSG_HDR_L2_LWS;
746 goto http_msg_invalid;
747 }
748
749 msg->sol = ptr - buf->p;
750 if (likely(!HTTP_IS_CRLF(*ptr))) {
751 goto http_msg_hdr_name;
752 }
753
754 if (likely(*ptr == '\r'))
755 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
756 goto http_msg_last_lf;
757
758 case HTTP_MSG_LAST_LF:
759 http_msg_last_lf:
760 /* Assumes msg->sol points to the first of either CR or LF.
761 * Sets ->sov and ->next to the total header length, ->eoh to
762 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
763 */
764 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
765 ptr++;
766 msg->sov = msg->next = ptr - buf->p;
767 msg->eoh = msg->sol;
768 msg->sol = 0;
769 msg->eol = msg->sov - msg->eoh;
770 msg->msg_state = HTTP_MSG_BODY;
771 return;
772
773 case HTTP_MSG_ERROR:
774 /* this may only happen if we call http_msg_analyser() twice with an error */
775 break;
776
777 default:
778#ifdef DEBUG_FULL
779 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
780 exit(1);
781#endif
782 ;
783 }
784 http_msg_ood:
785 /* out of data */
786 msg->msg_state = state;
787 msg->next = ptr - buf->p;
788 return;
789
790 http_msg_invalid:
791 /* invalid message */
792 msg->err_state = state;
793 msg->msg_state = HTTP_MSG_ERROR;
794 msg->next = ptr - buf->p;
795 return;
796}
797
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200798/* This function skips trailers in the buffer associated with HTTP message
799 * <msg>. The first visited position is msg->next. If the end of the trailers is
800 * found, the function returns >0. So, the caller can automatically schedul it
801 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
802 * data are available, the function does not change anything except maybe
803 * msg->sol if it could parse some lines, and returns zero. If a parse error
804 * is encountered, the function returns < 0 and does not change anything except
805 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
806 * state before calling this function, which implies that all non-trailers data
807 * have already been scheduled for forwarding, and that msg->next exactly
808 * matches the length of trailers already parsed and not forwarded. It is also
809 * important to note that this function is designed to be able to parse wrapped
810 * headers at end of buffer.
811 */
812int http_forward_trailers(struct http_msg *msg)
813{
814 const struct buffer *buf = msg->chn->buf;
815
816 /* we have msg->next which points to next line. Look for CRLF. But
817 * first, we reset msg->sol */
818 msg->sol = 0;
819 while (1) {
820 const char *p1 = NULL, *p2 = NULL;
821 const char *start = b_ptr(buf, msg->next + msg->sol);
822 const char *stop = bi_end(buf);
823 const char *ptr = start;
824 int bytes = 0;
825
826 /* scan current line and stop at LF or CRLF */
827 while (1) {
828 if (ptr == stop)
829 return 0;
830
831 if (*ptr == '\n') {
832 if (!p1)
833 p1 = ptr;
834 p2 = ptr;
835 break;
836 }
837
838 if (*ptr == '\r') {
839 if (p1) {
840 msg->err_pos = buffer_count(buf, buf->p, ptr);
841 return -1;
842 }
843 p1 = ptr;
844 }
845
846 ptr++;
847 if (ptr >= buf->data + buf->size)
848 ptr = buf->data;
849 }
850
851 /* after LF; point to beginning of next line */
852 p2++;
853 if (p2 >= buf->data + buf->size)
854 p2 = buf->data;
855
856 bytes = p2 - start;
857 if (bytes < 0)
858 bytes += buf->size;
859 msg->sol += bytes;
860
861 /* LF/CRLF at beginning of line => end of trailers at p2.
862 * Everything was scheduled for forwarding, there's nothing left
863 * from this message. */
864 if (p1 == start)
865 return 1;
866
867 /* OK, next line then */
868 }
869}