blob: bbfaabcf76000b7979d08044cbef38423b22a9ca [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
17#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020018#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020019
20/* It is about twice as fast on recent architectures to lookup a byte in a
21 * table than to perform a boolean AND or OR between two tests. Refer to
22 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
23 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
24 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
25 * digit. Note: please do not overwrite values in assignment since gcc-2.95
26 * will not handle them correctly. It's worth noting that chars 128..255 are
27 * nothing, not even control chars.
28 */
29const unsigned char h1_char_classes[256] = {
30 [ 0] = H1_FLG_CTL,
31 [ 1] = H1_FLG_CTL,
32 [ 2] = H1_FLG_CTL,
33 [ 3] = H1_FLG_CTL,
34 [ 4] = H1_FLG_CTL,
35 [ 5] = H1_FLG_CTL,
36 [ 6] = H1_FLG_CTL,
37 [ 7] = H1_FLG_CTL,
38 [ 8] = H1_FLG_CTL,
39 [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL,
40 [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
41 [ 11] = H1_FLG_CTL,
42 [ 12] = H1_FLG_CTL,
43 [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
44 [ 14] = H1_FLG_CTL,
45 [ 15] = H1_FLG_CTL,
46 [ 16] = H1_FLG_CTL,
47 [ 17] = H1_FLG_CTL,
48 [ 18] = H1_FLG_CTL,
49 [ 19] = H1_FLG_CTL,
50 [ 20] = H1_FLG_CTL,
51 [ 21] = H1_FLG_CTL,
52 [ 22] = H1_FLG_CTL,
53 [ 23] = H1_FLG_CTL,
54 [ 24] = H1_FLG_CTL,
55 [ 25] = H1_FLG_CTL,
56 [ 26] = H1_FLG_CTL,
57 [ 27] = H1_FLG_CTL,
58 [ 28] = H1_FLG_CTL,
59 [ 29] = H1_FLG_CTL,
60 [ 30] = H1_FLG_CTL,
61 [ 31] = H1_FLG_CTL,
62 [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP,
63 ['!'] = H1_FLG_TOK,
64 ['"'] = H1_FLG_SEP,
65 ['#'] = H1_FLG_TOK,
66 ['$'] = H1_FLG_TOK,
67 ['%'] = H1_FLG_TOK,
68 ['&'] = H1_FLG_TOK,
69 [ 39] = H1_FLG_TOK,
70 ['('] = H1_FLG_SEP,
71 [')'] = H1_FLG_SEP,
72 ['*'] = H1_FLG_TOK,
73 ['+'] = H1_FLG_TOK,
74 [','] = H1_FLG_SEP,
75 ['-'] = H1_FLG_TOK,
76 ['.'] = H1_FLG_TOK | H1_FLG_VER,
77 ['/'] = H1_FLG_SEP | H1_FLG_VER,
78 ['0'] = H1_FLG_TOK | H1_FLG_VER,
79 ['1'] = H1_FLG_TOK | H1_FLG_VER,
80 ['2'] = H1_FLG_TOK | H1_FLG_VER,
81 ['3'] = H1_FLG_TOK | H1_FLG_VER,
82 ['4'] = H1_FLG_TOK | H1_FLG_VER,
83 ['5'] = H1_FLG_TOK | H1_FLG_VER,
84 ['6'] = H1_FLG_TOK | H1_FLG_VER,
85 ['7'] = H1_FLG_TOK | H1_FLG_VER,
86 ['8'] = H1_FLG_TOK | H1_FLG_VER,
87 ['9'] = H1_FLG_TOK | H1_FLG_VER,
88 [':'] = H1_FLG_SEP,
89 [';'] = H1_FLG_SEP,
90 ['<'] = H1_FLG_SEP,
91 ['='] = H1_FLG_SEP,
92 ['>'] = H1_FLG_SEP,
93 ['?'] = H1_FLG_SEP,
94 ['@'] = H1_FLG_SEP,
95 ['A'] = H1_FLG_TOK,
96 ['B'] = H1_FLG_TOK,
97 ['C'] = H1_FLG_TOK,
98 ['D'] = H1_FLG_TOK,
99 ['E'] = H1_FLG_TOK,
100 ['F'] = H1_FLG_TOK,
101 ['G'] = H1_FLG_TOK,
102 ['H'] = H1_FLG_TOK | H1_FLG_VER,
103 ['I'] = H1_FLG_TOK,
104 ['J'] = H1_FLG_TOK,
105 ['K'] = H1_FLG_TOK,
106 ['L'] = H1_FLG_TOK,
107 ['M'] = H1_FLG_TOK,
108 ['N'] = H1_FLG_TOK,
109 ['O'] = H1_FLG_TOK,
110 ['P'] = H1_FLG_TOK | H1_FLG_VER,
111 ['Q'] = H1_FLG_TOK,
112 ['R'] = H1_FLG_TOK | H1_FLG_VER,
113 ['S'] = H1_FLG_TOK | H1_FLG_VER,
114 ['T'] = H1_FLG_TOK | H1_FLG_VER,
115 ['U'] = H1_FLG_TOK,
116 ['V'] = H1_FLG_TOK,
117 ['W'] = H1_FLG_TOK,
118 ['X'] = H1_FLG_TOK,
119 ['Y'] = H1_FLG_TOK,
120 ['Z'] = H1_FLG_TOK,
121 ['['] = H1_FLG_SEP,
122 [ 92] = H1_FLG_SEP,
123 [']'] = H1_FLG_SEP,
124 ['^'] = H1_FLG_TOK,
125 ['_'] = H1_FLG_TOK,
126 ['`'] = H1_FLG_TOK,
127 ['a'] = H1_FLG_TOK,
128 ['b'] = H1_FLG_TOK,
129 ['c'] = H1_FLG_TOK,
130 ['d'] = H1_FLG_TOK,
131 ['e'] = H1_FLG_TOK,
132 ['f'] = H1_FLG_TOK,
133 ['g'] = H1_FLG_TOK,
134 ['h'] = H1_FLG_TOK,
135 ['i'] = H1_FLG_TOK,
136 ['j'] = H1_FLG_TOK,
137 ['k'] = H1_FLG_TOK,
138 ['l'] = H1_FLG_TOK,
139 ['m'] = H1_FLG_TOK,
140 ['n'] = H1_FLG_TOK,
141 ['o'] = H1_FLG_TOK,
142 ['p'] = H1_FLG_TOK,
143 ['q'] = H1_FLG_TOK,
144 ['r'] = H1_FLG_TOK,
145 ['s'] = H1_FLG_TOK,
146 ['t'] = H1_FLG_TOK,
147 ['u'] = H1_FLG_TOK,
148 ['v'] = H1_FLG_TOK,
149 ['w'] = H1_FLG_TOK,
150 ['x'] = H1_FLG_TOK,
151 ['y'] = H1_FLG_TOK,
152 ['z'] = H1_FLG_TOK,
153 ['{'] = H1_FLG_SEP,
154 ['|'] = H1_FLG_TOK,
155 ['}'] = H1_FLG_SEP,
156 ['~'] = H1_FLG_TOK,
157 [127] = H1_FLG_CTL,
158};
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200159
160
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200161/*
162 * This function parses a status line between <ptr> and <end>, starting with
163 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
164 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
165 * will give undefined results.
166 * Note that it is upon the caller's responsibility to ensure that ptr < end,
167 * and that msg->sol points to the beginning of the response.
168 * If a complete line is found (which implies that at least one CR or LF is
169 * found before <end>, the updated <ptr> is returned, otherwise NULL is
170 * returned indicating an incomplete line (which does not mean that parts have
171 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
172 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
173 * upon next call.
174 *
175 * This function was intentionally designed to be called from
176 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
177 * within its state machine and use the same macros, hence the need for same
178 * labels and variable names. Note that msg->sol is left unchanged.
179 */
180const char *http_parse_stsline(struct http_msg *msg,
181 enum h1_state state, const char *ptr, const char *end,
182 unsigned int *ret_ptr, enum h1_state *ret_state)
183{
184 const char *msg_start = msg->chn->buf->p;
185
186 switch (state) {
187 case HTTP_MSG_RPVER:
188 http_msg_rpver:
189 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
190 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
191
192 if (likely(HTTP_IS_SPHT(*ptr))) {
193 msg->sl.st.v_l = ptr - msg_start;
194 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
195 }
196 msg->err_state = HTTP_MSG_RPVER;
197 state = HTTP_MSG_ERROR;
198 break;
199
200 case HTTP_MSG_RPVER_SP:
201 http_msg_rpver_sp:
202 if (likely(!HTTP_IS_LWS(*ptr))) {
203 msg->sl.st.c = ptr - msg_start;
204 goto http_msg_rpcode;
205 }
206 if (likely(HTTP_IS_SPHT(*ptr)))
207 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
208 /* so it's a CR/LF, this is invalid */
209 msg->err_state = HTTP_MSG_RPVER_SP;
210 state = HTTP_MSG_ERROR;
211 break;
212
213 case HTTP_MSG_RPCODE:
214 http_msg_rpcode:
215 if (likely(!HTTP_IS_LWS(*ptr)))
216 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
217
218 if (likely(HTTP_IS_SPHT(*ptr))) {
219 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
220 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
221 }
222
223 /* so it's a CR/LF, so there is no reason phrase */
224 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
225 http_msg_rsp_reason:
226 /* FIXME: should we support HTTP responses without any reason phrase ? */
227 msg->sl.st.r = ptr - msg_start;
228 msg->sl.st.r_l = 0;
229 goto http_msg_rpline_eol;
230
231 case HTTP_MSG_RPCODE_SP:
232 http_msg_rpcode_sp:
233 if (likely(!HTTP_IS_LWS(*ptr))) {
234 msg->sl.st.r = ptr - msg_start;
235 goto http_msg_rpreason;
236 }
237 if (likely(HTTP_IS_SPHT(*ptr)))
238 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
239 /* so it's a CR/LF, so there is no reason phrase */
240 goto http_msg_rsp_reason;
241
242 case HTTP_MSG_RPREASON:
243 http_msg_rpreason:
244 if (likely(!HTTP_IS_CRLF(*ptr)))
245 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
246 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
247 http_msg_rpline_eol:
248 /* We have seen the end of line. Note that we do not
249 * necessarily have the \n yet, but at least we know that we
250 * have EITHER \r OR \n, otherwise the response would not be
251 * complete. We can then record the response length and return
252 * to the caller which will be able to register it.
253 */
254 msg->sl.st.l = ptr - msg_start - msg->sol;
255 return ptr;
256
257 default:
258#ifdef DEBUG_FULL
259 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
260 exit(1);
261#endif
262 ;
263 }
264
265 http_msg_ood:
266 /* out of valid data */
267 if (ret_state)
268 *ret_state = state;
269 if (ret_ptr)
270 *ret_ptr = ptr - msg_start;
271 return NULL;
272}
273
274/*
275 * This function parses a request line between <ptr> and <end>, starting with
276 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
277 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
278 * will give undefined results.
279 * Note that it is upon the caller's responsibility to ensure that ptr < end,
280 * and that msg->sol points to the beginning of the request.
281 * If a complete line is found (which implies that at least one CR or LF is
282 * found before <end>, the updated <ptr> is returned, otherwise NULL is
283 * returned indicating an incomplete line (which does not mean that parts have
284 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
285 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
286 * upon next call.
287 *
288 * This function was intentionally designed to be called from
289 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
290 * within its state machine and use the same macros, hence the need for same
291 * labels and variable names. Note that msg->sol is left unchanged.
292 */
293const char *http_parse_reqline(struct http_msg *msg,
294 enum h1_state state, const char *ptr, const char *end,
295 unsigned int *ret_ptr, enum h1_state *ret_state)
296{
297 const char *msg_start = msg->chn->buf->p;
298
299 switch (state) {
300 case HTTP_MSG_RQMETH:
301 http_msg_rqmeth:
302 if (likely(HTTP_IS_TOKEN(*ptr)))
303 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
304
305 if (likely(HTTP_IS_SPHT(*ptr))) {
306 msg->sl.rq.m_l = ptr - msg_start;
307 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
308 }
309
310 if (likely(HTTP_IS_CRLF(*ptr))) {
311 /* HTTP 0.9 request */
312 msg->sl.rq.m_l = ptr - msg_start;
313 http_msg_req09_uri:
314 msg->sl.rq.u = ptr - msg_start;
315 http_msg_req09_uri_e:
316 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
317 http_msg_req09_ver:
318 msg->sl.rq.v = ptr - msg_start;
319 msg->sl.rq.v_l = 0;
320 goto http_msg_rqline_eol;
321 }
322 msg->err_state = HTTP_MSG_RQMETH;
323 state = HTTP_MSG_ERROR;
324 break;
325
326 case HTTP_MSG_RQMETH_SP:
327 http_msg_rqmeth_sp:
328 if (likely(!HTTP_IS_LWS(*ptr))) {
329 msg->sl.rq.u = ptr - msg_start;
330 goto http_msg_rquri;
331 }
332 if (likely(HTTP_IS_SPHT(*ptr)))
333 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
334 /* so it's a CR/LF, meaning an HTTP 0.9 request */
335 goto http_msg_req09_uri;
336
337 case HTTP_MSG_RQURI:
338 http_msg_rquri:
339#if defined(__x86_64__) || \
340 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
341 defined(__ARM_ARCH_7A__)
342 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
343 while (ptr <= end - sizeof(int)) {
344 int x = *(int *)ptr - 0x21212121;
345 if (x & 0x80808080)
346 break;
347
348 x -= 0x5e5e5e5e;
349 if (!(x & 0x80808080))
350 break;
351
352 ptr += sizeof(int);
353 }
354#endif
355 if (ptr >= end) {
356 state = HTTP_MSG_RQURI;
357 goto http_msg_ood;
358 }
359 http_msg_rquri2:
360 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
361 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
362
363 if (likely(HTTP_IS_SPHT(*ptr))) {
364 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
365 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
366 }
367
368 if (likely((unsigned char)*ptr >= 128)) {
369 /* non-ASCII chars are forbidden unless option
370 * accept-invalid-http-request is enabled in the frontend.
371 * In any case, we capture the faulty char.
372 */
373 if (msg->err_pos < -1)
374 goto invalid_char;
375 if (msg->err_pos == -1)
376 msg->err_pos = ptr - msg_start;
377 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
378 }
379
380 if (likely(HTTP_IS_CRLF(*ptr))) {
381 /* so it's a CR/LF, meaning an HTTP 0.9 request */
382 goto http_msg_req09_uri_e;
383 }
384
385 /* OK forbidden chars, 0..31 or 127 */
386 invalid_char:
387 msg->err_pos = ptr - msg_start;
388 msg->err_state = HTTP_MSG_RQURI;
389 state = HTTP_MSG_ERROR;
390 break;
391
392 case HTTP_MSG_RQURI_SP:
393 http_msg_rquri_sp:
394 if (likely(!HTTP_IS_LWS(*ptr))) {
395 msg->sl.rq.v = ptr - msg_start;
396 goto http_msg_rqver;
397 }
398 if (likely(HTTP_IS_SPHT(*ptr)))
399 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
400 /* so it's a CR/LF, meaning an HTTP 0.9 request */
401 goto http_msg_req09_ver;
402
403 case HTTP_MSG_RQVER:
404 http_msg_rqver:
405 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
406 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
407
408 if (likely(HTTP_IS_CRLF(*ptr))) {
409 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
410 http_msg_rqline_eol:
411 /* We have seen the end of line. Note that we do not
412 * necessarily have the \n yet, but at least we know that we
413 * have EITHER \r OR \n, otherwise the request would not be
414 * complete. We can then record the request length and return
415 * to the caller which will be able to register it.
416 */
417 msg->sl.rq.l = ptr - msg_start - msg->sol;
418 return ptr;
419 }
420
421 /* neither an HTTP_VER token nor a CRLF */
422 msg->err_state = HTTP_MSG_RQVER;
423 state = HTTP_MSG_ERROR;
424 break;
425
426 default:
427#ifdef DEBUG_FULL
428 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
429 exit(1);
430#endif
431 ;
432 }
433
434 http_msg_ood:
435 /* out of valid data */
436 if (ret_state)
437 *ret_state = state;
438 if (ret_ptr)
439 *ret_ptr = ptr - msg_start;
440 return NULL;
441}
442
443/*
444 * This function parses an HTTP message, either a request or a response,
445 * depending on the initial msg->msg_state. The caller is responsible for
446 * ensuring that the message does not wrap. The function can be preempted
447 * everywhere when data are missing and recalled at the exact same location
448 * with no information loss. The message may even be realigned between two
449 * calls. The header index is re-initialized when switching from
450 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
451 * fields. Note that msg->sol will be initialized after completing the first
452 * state, so that none of the msg pointers has to be initialized prior to the
453 * first call.
454 */
455void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
456{
457 enum h1_state state; /* updated only when leaving the FSM */
458 register char *ptr, *end; /* request pointers, to avoid dereferences */
459 struct buffer *buf;
460
461 state = msg->msg_state;
462 buf = msg->chn->buf;
463 ptr = buf->p + msg->next;
464 end = buf->p + buf->i;
465
466 if (unlikely(ptr >= end))
467 goto http_msg_ood;
468
469 switch (state) {
470 /*
471 * First, states that are specific to the response only.
472 * We check them first so that request and headers are
473 * closer to each other (accessed more often).
474 */
475 case HTTP_MSG_RPBEFORE:
476 http_msg_rpbefore:
477 if (likely(HTTP_IS_TOKEN(*ptr))) {
478 /* we have a start of message, but we have to check
479 * first if we need to remove some CRLF. We can only
480 * do this when o=0.
481 */
482 if (unlikely(ptr != buf->p)) {
483 if (buf->o)
484 goto http_msg_ood;
485 /* Remove empty leading lines, as recommended by RFC2616. */
486 bi_fast_delete(buf, ptr - buf->p);
487 }
488 msg->sol = 0;
489 msg->sl.st.l = 0; /* used in debug mode */
490 hdr_idx_init(idx);
491 state = HTTP_MSG_RPVER;
492 goto http_msg_rpver;
493 }
494
495 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
496 state = HTTP_MSG_RPBEFORE;
497 goto http_msg_invalid;
498 }
499
500 if (unlikely(*ptr == '\n'))
501 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
502 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
503 /* stop here */
504
505 case HTTP_MSG_RPBEFORE_CR:
506 http_msg_rpbefore_cr:
507 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
508 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
509 /* stop here */
510
511 case HTTP_MSG_RPVER:
512 http_msg_rpver:
513 case HTTP_MSG_RPVER_SP:
514 case HTTP_MSG_RPCODE:
515 case HTTP_MSG_RPCODE_SP:
516 case HTTP_MSG_RPREASON:
517 ptr = (char *)http_parse_stsline(msg,
518 state, ptr, end,
519 &msg->next, &msg->msg_state);
520 if (unlikely(!ptr))
521 return;
522
523 /* we have a full response and we know that we have either a CR
524 * or an LF at <ptr>.
525 */
526 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
527
528 msg->sol = ptr - buf->p;
529 if (likely(*ptr == '\r'))
530 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
531 goto http_msg_rpline_end;
532
533 case HTTP_MSG_RPLINE_END:
534 http_msg_rpline_end:
535 /* msg->sol must point to the first of CR or LF. */
536 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
537 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
538 /* stop here */
539
540 /*
541 * Second, states that are specific to the request only
542 */
543 case HTTP_MSG_RQBEFORE:
544 http_msg_rqbefore:
545 if (likely(HTTP_IS_TOKEN(*ptr))) {
546 /* we have a start of message, but we have to check
547 * first if we need to remove some CRLF. We can only
548 * do this when o=0.
549 */
550 if (likely(ptr != buf->p)) {
551 if (buf->o)
552 goto http_msg_ood;
553 /* Remove empty leading lines, as recommended by RFC2616. */
554 bi_fast_delete(buf, ptr - buf->p);
555 }
556 msg->sol = 0;
557 msg->sl.rq.l = 0; /* used in debug mode */
558 state = HTTP_MSG_RQMETH;
559 goto http_msg_rqmeth;
560 }
561
562 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
563 state = HTTP_MSG_RQBEFORE;
564 goto http_msg_invalid;
565 }
566
567 if (unlikely(*ptr == '\n'))
568 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
569 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
570 /* stop here */
571
572 case HTTP_MSG_RQBEFORE_CR:
573 http_msg_rqbefore_cr:
574 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
576 /* stop here */
577
578 case HTTP_MSG_RQMETH:
579 http_msg_rqmeth:
580 case HTTP_MSG_RQMETH_SP:
581 case HTTP_MSG_RQURI:
582 case HTTP_MSG_RQURI_SP:
583 case HTTP_MSG_RQVER:
584 ptr = (char *)http_parse_reqline(msg,
585 state, ptr, end,
586 &msg->next, &msg->msg_state);
587 if (unlikely(!ptr))
588 return;
589
590 /* we have a full request and we know that we have either a CR
591 * or an LF at <ptr>.
592 */
593 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
594
595 msg->sol = ptr - buf->p;
596 if (likely(*ptr == '\r'))
597 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
598 goto http_msg_rqline_end;
599
600 case HTTP_MSG_RQLINE_END:
601 http_msg_rqline_end:
602 /* check for HTTP/0.9 request : no version information available.
603 * msg->sol must point to the first of CR or LF.
604 */
605 if (unlikely(msg->sl.rq.v_l == 0))
606 goto http_msg_last_lf;
607
608 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
609 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
610 /* stop here */
611
612 /*
613 * Common states below
614 */
615 case HTTP_MSG_HDR_FIRST:
616 http_msg_hdr_first:
617 msg->sol = ptr - buf->p;
618 if (likely(!HTTP_IS_CRLF(*ptr))) {
619 goto http_msg_hdr_name;
620 }
621
622 if (likely(*ptr == '\r'))
623 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
624 goto http_msg_last_lf;
625
626 case HTTP_MSG_HDR_NAME:
627 http_msg_hdr_name:
628 /* assumes msg->sol points to the first char */
629 if (likely(HTTP_IS_TOKEN(*ptr)))
630 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
631
632 if (likely(*ptr == ':'))
633 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
634
635 if (likely(msg->err_pos < -1) || *ptr == '\n') {
636 state = HTTP_MSG_HDR_NAME;
637 goto http_msg_invalid;
638 }
639
640 if (msg->err_pos == -1) /* capture error pointer */
641 msg->err_pos = ptr - buf->p; /* >= 0 now */
642
643 /* and we still accept this non-token character */
644 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
645
646 case HTTP_MSG_HDR_L1_SP:
647 http_msg_hdr_l1_sp:
648 /* assumes msg->sol points to the first char */
649 if (likely(HTTP_IS_SPHT(*ptr)))
650 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
651
652 /* header value can be basically anything except CR/LF */
653 msg->sov = ptr - buf->p;
654
655 if (likely(!HTTP_IS_CRLF(*ptr))) {
656 goto http_msg_hdr_val;
657 }
658
659 if (likely(*ptr == '\r'))
660 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
661 goto http_msg_hdr_l1_lf;
662
663 case HTTP_MSG_HDR_L1_LF:
664 http_msg_hdr_l1_lf:
665 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
666 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
667
668 case HTTP_MSG_HDR_L1_LWS:
669 http_msg_hdr_l1_lws:
670 if (likely(HTTP_IS_SPHT(*ptr))) {
671 /* replace HT,CR,LF with spaces */
672 for (; buf->p + msg->sov < ptr; msg->sov++)
673 buf->p[msg->sov] = ' ';
674 goto http_msg_hdr_l1_sp;
675 }
676 /* we had a header consisting only in spaces ! */
677 msg->eol = msg->sov;
678 goto http_msg_complete_header;
679
680 case HTTP_MSG_HDR_VAL:
681 http_msg_hdr_val:
682 /* assumes msg->sol points to the first char, and msg->sov
683 * points to the first character of the value.
684 */
685
686 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
687 * and lower. In fact since most of the time is spent in the loop, we
688 * also remove the sign bit test so that bytes 0x8e..0x0d break the
689 * loop, but we don't care since they're very rare in header values.
690 */
691#if defined(__x86_64__)
692 while (ptr <= end - sizeof(long)) {
693 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
694 goto http_msg_hdr_val2;
695 ptr += sizeof(long);
696 }
697#endif
698#if defined(__x86_64__) || \
699 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
700 defined(__ARM_ARCH_7A__)
701 while (ptr <= end - sizeof(int)) {
702 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
703 goto http_msg_hdr_val2;
704 ptr += sizeof(int);
705 }
706#endif
707 if (ptr >= end) {
708 state = HTTP_MSG_HDR_VAL;
709 goto http_msg_ood;
710 }
711 http_msg_hdr_val2:
712 if (likely(!HTTP_IS_CRLF(*ptr)))
713 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
714
715 msg->eol = ptr - buf->p;
716 /* Note: we could also copy eol into ->eoh so that we have the
717 * real header end in case it ends with lots of LWS, but is this
718 * really needed ?
719 */
720 if (likely(*ptr == '\r'))
721 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
722 goto http_msg_hdr_l2_lf;
723
724 case HTTP_MSG_HDR_L2_LF:
725 http_msg_hdr_l2_lf:
726 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
727 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
728
729 case HTTP_MSG_HDR_L2_LWS:
730 http_msg_hdr_l2_lws:
731 if (unlikely(HTTP_IS_SPHT(*ptr))) {
732 /* LWS: replace HT,CR,LF with spaces */
733 for (; buf->p + msg->eol < ptr; msg->eol++)
734 buf->p[msg->eol] = ' ';
735 goto http_msg_hdr_val;
736 }
737 http_msg_complete_header:
738 /*
739 * It was a new header, so the last one is finished.
740 * Assumes msg->sol points to the first char, msg->sov points
741 * to the first character of the value and msg->eol to the
742 * first CR or LF so we know how the line ends. We insert last
743 * header into the index.
744 */
745 if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
746 idx, idx->tail) < 0)) {
747 state = HTTP_MSG_HDR_L2_LWS;
748 goto http_msg_invalid;
749 }
750
751 msg->sol = ptr - buf->p;
752 if (likely(!HTTP_IS_CRLF(*ptr))) {
753 goto http_msg_hdr_name;
754 }
755
756 if (likely(*ptr == '\r'))
757 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
758 goto http_msg_last_lf;
759
760 case HTTP_MSG_LAST_LF:
761 http_msg_last_lf:
762 /* Assumes msg->sol points to the first of either CR or LF.
763 * Sets ->sov and ->next to the total header length, ->eoh to
764 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
765 */
766 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
767 ptr++;
768 msg->sov = msg->next = ptr - buf->p;
769 msg->eoh = msg->sol;
770 msg->sol = 0;
771 msg->eol = msg->sov - msg->eoh;
772 msg->msg_state = HTTP_MSG_BODY;
773 return;
774
775 case HTTP_MSG_ERROR:
776 /* this may only happen if we call http_msg_analyser() twice with an error */
777 break;
778
779 default:
780#ifdef DEBUG_FULL
781 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
782 exit(1);
783#endif
784 ;
785 }
786 http_msg_ood:
787 /* out of data */
788 msg->msg_state = state;
789 msg->next = ptr - buf->p;
790 return;
791
792 http_msg_invalid:
793 /* invalid message */
794 msg->err_state = state;
795 msg->msg_state = HTTP_MSG_ERROR;
796 msg->next = ptr - buf->p;
797 return;
798}
799
Willy Tarreau794f9af2017-07-26 09:07:47 +0200800/* This function parses a contiguous HTTP/1 headers block starting at <start>
801 * and ending before <stop>, at once, and converts it a list of (name,value)
802 * pairs representing header fields into the array <hdr> of size <hdr_num>,
803 * whose last entry will have an empty name and an empty value. If <hdr_num> is
804 * too small to represent the whole message, an error is returned. If <h1m> is
805 * not NULL, some protocol elements such as content-length and transfer-encoding
806 * will be parsed and stored there as well.
807 *
808 * For now it's limited to the response. If the header block is incomplete,
809 * 0 is returned, waiting to be called again with more data to try it again.
810 *
811 * The code derived from the main HTTP/1 parser above but was simplified and
812 * optimized to process responses produced or forwarded by haproxy. The caller
813 * is responsible for ensuring that the message doesn't wrap, and should ensure
814 * it is complete to avoid having to retry the operation after a failed
815 * attempt. The message is not supposed to be invalid, which is why a few
816 * properties such as the character set used in the header field names are not
817 * checked. In case of an unparsable response message, a negative value will be
818 * returned with h1m->err_pos and h1m->err_state matching the location and
819 * state where the error was met. Leading blank likes are tolerated but not
820 * recommended.
821 *
822 * This function returns :
823 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
824 * set) with the state the error occurred in and h2-m>err_pos with the
825 * the position relative to <start>
826 * -2 if the output is full (hdr_num reached). err_state and err_pos also
827 * indicate where it failed.
828 * 0 in case of missing data.
829 * > 0 on success, it then corresponds to the number of bytes read since
830 * <start> so that the caller can go on with the payload.
831 */
832int h1_headers_to_hdr_list(char *start, const char *stop,
833 struct http_hdr *hdr, unsigned int hdr_num,
834 struct h1m *h1m)
835{
836 enum h1_state state = HTTP_MSG_RPBEFORE;
837 register char *ptr = start;
838 register const char *end = stop;
839 unsigned int hdr_count = 0;
840 unsigned int code = 0; /* status code, ASCII form */
841 unsigned int st_c; /* beginning of status code, relative to msg_start */
842 unsigned int st_c_l; /* length of status code */
843 unsigned int sol = 0; /* start of line */
844 unsigned int col = 0; /* position of the colon */
845 unsigned int eol = 0; /* end of line */
846 unsigned int sov = 0; /* start of value */
847 unsigned int skip = 0; /* number of bytes skipped at the beginning */
848 struct ist n, v; /* header name and value during parsing */
849
850 if (unlikely(ptr >= end))
851 goto http_msg_ood;
852
853 switch (state) {
854 case HTTP_MSG_RPBEFORE:
855 http_msg_rpbefore:
856 if (likely(HTTP_IS_TOKEN(*ptr))) {
857 /* we have a start of message, we may have skipped some
858 * heading CRLF. Skip them now.
859 */
860 skip += ptr - start;
861 start = ptr;
862
863 sol = 0;
864 hdr_count = 0;
865 state = HTTP_MSG_RPVER;
866 goto http_msg_rpver;
867 }
868
869 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
870 state = HTTP_MSG_RPBEFORE;
871 goto http_msg_invalid;
872 }
873
874 if (unlikely(*ptr == '\n'))
875 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
876 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
877 /* stop here */
878
879 case HTTP_MSG_RPBEFORE_CR:
880 http_msg_rpbefore_cr:
881 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
882 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
883 /* stop here */
884
885 case HTTP_MSG_RPVER:
886 http_msg_rpver:
887 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
888 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
889
890 if (likely(HTTP_IS_SPHT(*ptr))) {
891 /* version length = ptr - start */
892 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
893 }
894 state = HTTP_MSG_RPVER;
895 goto http_msg_invalid;
896
897 case HTTP_MSG_RPVER_SP:
898 http_msg_rpver_sp:
899 if (likely(!HTTP_IS_LWS(*ptr))) {
900 code = 0;
901 st_c = ptr - start;
902 goto http_msg_rpcode;
903 }
904 if (likely(HTTP_IS_SPHT(*ptr)))
905 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
906 /* so it's a CR/LF, this is invalid */
907 state = HTTP_MSG_RPVER_SP;
908 goto http_msg_invalid;
909
910 case HTTP_MSG_RPCODE:
911 http_msg_rpcode:
912 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100913 code = code * 10 + *ptr - '0';
Willy Tarreau794f9af2017-07-26 09:07:47 +0200914 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
915 }
916
917 if (likely(HTTP_IS_SPHT(*ptr))) {
918 st_c_l = ptr - start - st_c;
919 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
920 }
921
922 /* so it's a CR/LF, so there is no reason phrase */
923 st_c_l = ptr - start - st_c;
924
925 http_msg_rsp_reason:
926 /* reason = ptr - start; */
927 /* reason length = 0 */
928 goto http_msg_rpline_eol;
929
930 case HTTP_MSG_RPCODE_SP:
931 http_msg_rpcode_sp:
932 if (likely(!HTTP_IS_LWS(*ptr))) {
933 /* reason = ptr - start */
934 goto http_msg_rpreason;
935 }
936 if (likely(HTTP_IS_SPHT(*ptr)))
937 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
938 /* so it's a CR/LF, so there is no reason phrase */
939 goto http_msg_rsp_reason;
940
941 case HTTP_MSG_RPREASON:
942 http_msg_rpreason:
943 if (likely(!HTTP_IS_CRLF(*ptr)))
944 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
945 /* reason length = ptr - start - reason */
946 http_msg_rpline_eol:
947 /* We have seen the end of line. Note that we do not
948 * necessarily have the \n yet, but at least we know that we
949 * have EITHER \r OR \n, otherwise the response would not be
950 * complete. We can then record the response length and return
951 * to the caller which will be able to register it.
952 */
953
954 if (unlikely(hdr_count >= hdr_num)) {
955 state = HTTP_MSG_RPREASON;
956 goto http_output_full;
957 }
958 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100959 if (h1m)
960 h1m->status = code;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200961
962 sol = ptr - start;
963 if (likely(*ptr == '\r'))
964 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
965 goto http_msg_rpline_end;
966
967 case HTTP_MSG_RPLINE_END:
968 http_msg_rpline_end:
969 /* sol must point to the first of CR or LF. */
970 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
971 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
972 /* stop here */
973
974 case HTTP_MSG_HDR_FIRST:
975 http_msg_hdr_first:
976 sol = ptr - start;
977 if (likely(!HTTP_IS_CRLF(*ptr))) {
978 goto http_msg_hdr_name;
979 }
980
981 if (likely(*ptr == '\r'))
982 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
983 goto http_msg_last_lf;
984
985 case HTTP_MSG_HDR_NAME:
986 http_msg_hdr_name:
987 /* assumes sol points to the first char */
988 if (likely(HTTP_IS_TOKEN(*ptr))) {
989 /* turn it to lower case if needed */
990 if (isupper((unsigned char)*ptr))
991 *ptr = tolower(*ptr);
992 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
993 }
994
995 if (likely(*ptr == ':')) {
996 col = ptr - start;
997 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
998 }
999
1000 if (HTTP_IS_LWS(*ptr)) {
1001 state = HTTP_MSG_HDR_NAME;
1002 goto http_msg_invalid;
1003 }
1004
1005 /* now we have a non-token character in the header field name,
1006 * it's up to the H1 layer to have decided whether or not it
1007 * was acceptable. If we find it here, it was considered
1008 * acceptable due to configuration rules so we obey.
1009 */
1010 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
1011
1012 case HTTP_MSG_HDR_L1_SP:
1013 http_msg_hdr_l1_sp:
1014 /* assumes sol points to the first char */
1015 if (likely(HTTP_IS_SPHT(*ptr)))
1016 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1017
1018 /* header value can be basically anything except CR/LF */
1019 sov = ptr - start;
1020
1021 if (likely(!HTTP_IS_CRLF(*ptr))) {
1022 goto http_msg_hdr_val;
1023 }
1024
1025 if (likely(*ptr == '\r'))
1026 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
1027 goto http_msg_hdr_l1_lf;
1028
1029 case HTTP_MSG_HDR_L1_LF:
1030 http_msg_hdr_l1_lf:
1031 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
1032 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
1033
1034 case HTTP_MSG_HDR_L1_LWS:
1035 http_msg_hdr_l1_lws:
1036 if (likely(HTTP_IS_SPHT(*ptr))) {
1037 /* replace HT,CR,LF with spaces */
1038 for (; start + sov < ptr; sov++)
1039 start[sov] = ' ';
1040 goto http_msg_hdr_l1_sp;
1041 }
1042 /* we had a header consisting only in spaces ! */
1043 eol = sov;
1044 goto http_msg_complete_header;
1045
1046 case HTTP_MSG_HDR_VAL:
1047 http_msg_hdr_val:
1048 /* assumes sol points to the first char, and sov
1049 * points to the first character of the value.
1050 */
1051
1052 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
1053 * and lower. In fact since most of the time is spent in the loop, we
1054 * also remove the sign bit test so that bytes 0x8e..0x0d break the
1055 * loop, but we don't care since they're very rare in header values.
1056 */
1057#if defined(__x86_64__)
1058 while (ptr <= end - sizeof(long)) {
1059 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
1060 goto http_msg_hdr_val2;
1061 ptr += sizeof(long);
1062 }
1063#endif
1064#if defined(__x86_64__) || \
1065 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
1066 defined(__ARM_ARCH_7A__)
1067 while (ptr <= end - sizeof(int)) {
1068 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
1069 goto http_msg_hdr_val2;
1070 ptr += sizeof(int);
1071 }
1072#endif
1073 if (ptr >= end) {
1074 state = HTTP_MSG_HDR_VAL;
1075 goto http_msg_ood;
1076 }
1077 http_msg_hdr_val2:
1078 if (likely(!HTTP_IS_CRLF(*ptr)))
1079 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
1080
1081 eol = ptr - start;
1082 /* Note: we could also copy eol into ->eoh so that we have the
1083 * real header end in case it ends with lots of LWS, but is this
1084 * really needed ?
1085 */
1086 if (likely(*ptr == '\r'))
1087 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
1088 goto http_msg_hdr_l2_lf;
1089
1090 case HTTP_MSG_HDR_L2_LF:
1091 http_msg_hdr_l2_lf:
1092 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
1093 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
1094
1095 case HTTP_MSG_HDR_L2_LWS:
1096 http_msg_hdr_l2_lws:
1097 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1098 /* LWS: replace HT,CR,LF with spaces */
1099 for (; start + eol < ptr; eol++)
1100 start[eol] = ' ';
1101 goto http_msg_hdr_val;
1102 }
1103 http_msg_complete_header:
1104 /*
1105 * It was a new header, so the last one is finished. Assumes
1106 * <sol> points to the first char of the name, <col> to the
1107 * colon, <sov> points to the first character of the value and
1108 * <eol> to the first CR or LF so we know how the line ends. We
1109 * will trim spaces around the value. It's possible to do it by
1110 * adjusting <eol> and <sov> which are no more used after this.
1111 * We can add the header field to the list.
1112 */
1113 while (sov < eol && HTTP_IS_LWS(start[sov]))
1114 sov++;
1115
1116 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1117 eol--;
1118
1119
1120 n = ist2(start + sol, col - sol);
1121 v = ist2(start + sov, eol - sov);
1122
1123 if (unlikely(hdr_count >= hdr_num)) {
1124 state = HTTP_MSG_HDR_L2_LWS;
1125 goto http_output_full;
1126 }
1127 http_set_hdr(&hdr[hdr_count++], n, v);
1128
1129 if (h1m) {
1130 long long cl;
1131
Willy Tarreaud22e83a2017-10-31 08:02:24 +01001132 if (h1m->status >= 100 && h1m->status < 200)
1133 h1m->curr_len = h1m->body_len = 0;
1134 else if (h1m->status == 304 || h1m->status == 204) {
Willy Tarreau8ea0f382017-10-30 19:31:59 +01001135 /* no contents, claim c-len is present and set to zero */
1136 h1m->flags |= H1_MF_CLEN;
1137 h1m->curr_len = h1m->body_len = 0;
1138 }
1139 else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001140 h1m->flags &= ~H1_MF_CLEN;
1141 h1m->flags |= H1_MF_CHNK;
1142 }
1143 else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1144 h1m->flags |= H1_MF_CLEN;
1145 strl2llrc(v.ptr, v.len, &cl);
1146 h1m->curr_len = h1m->body_len = cl;
1147 }
1148 }
1149
1150 sol = ptr - start;
1151 if (likely(!HTTP_IS_CRLF(*ptr)))
1152 goto http_msg_hdr_name;
1153
1154 if (likely(*ptr == '\r'))
1155 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
1156 goto http_msg_last_lf;
1157
1158 case HTTP_MSG_LAST_LF:
1159 http_msg_last_lf:
1160 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
1161 ptr++;
1162 /* <ptr> now points to the first byte of payload. If needed sol
1163 * still points to the first of either CR or LF of the empty
1164 * line ending the headers block.
1165 */
1166 if (unlikely(hdr_count >= hdr_num)) {
1167 state = HTTP_MSG_LAST_LF;
1168 goto http_output_full;
1169 }
1170 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
1171 state = HTTP_MSG_BODY;
1172 break;
1173
1174 default:
1175 /* impossible states */
1176 goto http_msg_invalid;
1177 }
1178
1179 /* reaching here, we've parsed the whole message and the state is
1180 * HTTP_MSG_BODY.
1181 */
1182 return ptr - start + skip;
1183
1184 http_msg_ood:
1185 /* out of data at <ptr> during state <state> */
1186 return 0;
1187
1188 http_msg_invalid:
1189 /* invalid message, error at <ptr> */
1190 if (h1m) {
1191 h1m->err_state = state;
1192 h1m->err_pos = ptr - start + skip;
1193 }
1194 return -1;
1195
1196 http_output_full:
1197 /* no more room to store the current header, error at <ptr> */
1198 if (h1m) {
1199 h1m->err_state = state;
1200 h1m->err_pos = ptr - start + skip;
1201 }
1202 return -2;
1203}
1204
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001205/* This function skips trailers in the buffer associated with HTTP message
1206 * <msg>. The first visited position is msg->next. If the end of the trailers is
1207 * found, the function returns >0. So, the caller can automatically schedul it
1208 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1209 * data are available, the function does not change anything except maybe
1210 * msg->sol if it could parse some lines, and returns zero. If a parse error
1211 * is encountered, the function returns < 0 and does not change anything except
1212 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1213 * state before calling this function, which implies that all non-trailers data
1214 * have already been scheduled for forwarding, and that msg->next exactly
1215 * matches the length of trailers already parsed and not forwarded. It is also
1216 * important to note that this function is designed to be able to parse wrapped
1217 * headers at end of buffer.
1218 */
1219int http_forward_trailers(struct http_msg *msg)
1220{
1221 const struct buffer *buf = msg->chn->buf;
1222
1223 /* we have msg->next which points to next line. Look for CRLF. But
1224 * first, we reset msg->sol */
1225 msg->sol = 0;
1226 while (1) {
1227 const char *p1 = NULL, *p2 = NULL;
1228 const char *start = b_ptr(buf, msg->next + msg->sol);
1229 const char *stop = bi_end(buf);
1230 const char *ptr = start;
1231 int bytes = 0;
1232
1233 /* scan current line and stop at LF or CRLF */
1234 while (1) {
1235 if (ptr == stop)
1236 return 0;
1237
1238 if (*ptr == '\n') {
1239 if (!p1)
1240 p1 = ptr;
1241 p2 = ptr;
1242 break;
1243 }
1244
1245 if (*ptr == '\r') {
1246 if (p1) {
1247 msg->err_pos = buffer_count(buf, buf->p, ptr);
1248 return -1;
1249 }
1250 p1 = ptr;
1251 }
1252
1253 ptr++;
1254 if (ptr >= buf->data + buf->size)
1255 ptr = buf->data;
1256 }
1257
1258 /* after LF; point to beginning of next line */
1259 p2++;
1260 if (p2 >= buf->data + buf->size)
1261 p2 = buf->data;
1262
1263 bytes = p2 - start;
1264 if (bytes < 0)
1265 bytes += buf->size;
1266 msg->sol += bytes;
1267
1268 /* LF/CRLF at beginning of line => end of trailers at p2.
1269 * Everything was scheduled for forwarding, there's nothing left
1270 * from this message. */
1271 if (p1 == start)
1272 return 1;
1273
1274 /* OK, next line then */
1275 }
1276}