blob: 436180952c3cab3afc142bdd6ad1f427b882504c [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
21/* It is about twice as fast on recent architectures to lookup a byte in a
22 * table than to perform a boolean AND or OR between two tests. Refer to
23 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
24 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
25 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
26 * digit. Note: please do not overwrite values in assignment since gcc-2.95
27 * will not handle them correctly. It's worth noting that chars 128..255 are
28 * nothing, not even control chars.
29 */
30const unsigned char h1_char_classes[256] = {
31 [ 0] = H1_FLG_CTL,
32 [ 1] = H1_FLG_CTL,
33 [ 2] = H1_FLG_CTL,
34 [ 3] = H1_FLG_CTL,
35 [ 4] = H1_FLG_CTL,
36 [ 5] = H1_FLG_CTL,
37 [ 6] = H1_FLG_CTL,
38 [ 7] = H1_FLG_CTL,
39 [ 8] = H1_FLG_CTL,
40 [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL,
41 [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
42 [ 11] = H1_FLG_CTL,
43 [ 12] = H1_FLG_CTL,
44 [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
45 [ 14] = H1_FLG_CTL,
46 [ 15] = H1_FLG_CTL,
47 [ 16] = H1_FLG_CTL,
48 [ 17] = H1_FLG_CTL,
49 [ 18] = H1_FLG_CTL,
50 [ 19] = H1_FLG_CTL,
51 [ 20] = H1_FLG_CTL,
52 [ 21] = H1_FLG_CTL,
53 [ 22] = H1_FLG_CTL,
54 [ 23] = H1_FLG_CTL,
55 [ 24] = H1_FLG_CTL,
56 [ 25] = H1_FLG_CTL,
57 [ 26] = H1_FLG_CTL,
58 [ 27] = H1_FLG_CTL,
59 [ 28] = H1_FLG_CTL,
60 [ 29] = H1_FLG_CTL,
61 [ 30] = H1_FLG_CTL,
62 [ 31] = H1_FLG_CTL,
63 [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP,
64 ['!'] = H1_FLG_TOK,
65 ['"'] = H1_FLG_SEP,
66 ['#'] = H1_FLG_TOK,
67 ['$'] = H1_FLG_TOK,
68 ['%'] = H1_FLG_TOK,
69 ['&'] = H1_FLG_TOK,
70 [ 39] = H1_FLG_TOK,
71 ['('] = H1_FLG_SEP,
72 [')'] = H1_FLG_SEP,
73 ['*'] = H1_FLG_TOK,
74 ['+'] = H1_FLG_TOK,
75 [','] = H1_FLG_SEP,
76 ['-'] = H1_FLG_TOK,
77 ['.'] = H1_FLG_TOK | H1_FLG_VER,
78 ['/'] = H1_FLG_SEP | H1_FLG_VER,
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +010079 ['0'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
80 ['1'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
81 ['2'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
82 ['3'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
83 ['4'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
84 ['5'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
85 ['6'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
86 ['7'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
87 ['8'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
88 ['9'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020089 [':'] = H1_FLG_SEP,
90 [';'] = H1_FLG_SEP,
91 ['<'] = H1_FLG_SEP,
92 ['='] = H1_FLG_SEP,
93 ['>'] = H1_FLG_SEP,
94 ['?'] = H1_FLG_SEP,
95 ['@'] = H1_FLG_SEP,
96 ['A'] = H1_FLG_TOK,
97 ['B'] = H1_FLG_TOK,
98 ['C'] = H1_FLG_TOK,
99 ['D'] = H1_FLG_TOK,
100 ['E'] = H1_FLG_TOK,
101 ['F'] = H1_FLG_TOK,
102 ['G'] = H1_FLG_TOK,
103 ['H'] = H1_FLG_TOK | H1_FLG_VER,
104 ['I'] = H1_FLG_TOK,
105 ['J'] = H1_FLG_TOK,
106 ['K'] = H1_FLG_TOK,
107 ['L'] = H1_FLG_TOK,
108 ['M'] = H1_FLG_TOK,
109 ['N'] = H1_FLG_TOK,
110 ['O'] = H1_FLG_TOK,
111 ['P'] = H1_FLG_TOK | H1_FLG_VER,
112 ['Q'] = H1_FLG_TOK,
113 ['R'] = H1_FLG_TOK | H1_FLG_VER,
114 ['S'] = H1_FLG_TOK | H1_FLG_VER,
115 ['T'] = H1_FLG_TOK | H1_FLG_VER,
116 ['U'] = H1_FLG_TOK,
117 ['V'] = H1_FLG_TOK,
118 ['W'] = H1_FLG_TOK,
119 ['X'] = H1_FLG_TOK,
120 ['Y'] = H1_FLG_TOK,
121 ['Z'] = H1_FLG_TOK,
122 ['['] = H1_FLG_SEP,
123 [ 92] = H1_FLG_SEP,
124 [']'] = H1_FLG_SEP,
125 ['^'] = H1_FLG_TOK,
126 ['_'] = H1_FLG_TOK,
127 ['`'] = H1_FLG_TOK,
128 ['a'] = H1_FLG_TOK,
129 ['b'] = H1_FLG_TOK,
130 ['c'] = H1_FLG_TOK,
131 ['d'] = H1_FLG_TOK,
132 ['e'] = H1_FLG_TOK,
133 ['f'] = H1_FLG_TOK,
134 ['g'] = H1_FLG_TOK,
135 ['h'] = H1_FLG_TOK,
136 ['i'] = H1_FLG_TOK,
137 ['j'] = H1_FLG_TOK,
138 ['k'] = H1_FLG_TOK,
139 ['l'] = H1_FLG_TOK,
140 ['m'] = H1_FLG_TOK,
141 ['n'] = H1_FLG_TOK,
142 ['o'] = H1_FLG_TOK,
143 ['p'] = H1_FLG_TOK,
144 ['q'] = H1_FLG_TOK,
145 ['r'] = H1_FLG_TOK,
146 ['s'] = H1_FLG_TOK,
147 ['t'] = H1_FLG_TOK,
148 ['u'] = H1_FLG_TOK,
149 ['v'] = H1_FLG_TOK,
150 ['w'] = H1_FLG_TOK,
151 ['x'] = H1_FLG_TOK,
152 ['y'] = H1_FLG_TOK,
153 ['z'] = H1_FLG_TOK,
154 ['{'] = H1_FLG_SEP,
155 ['|'] = H1_FLG_TOK,
156 ['}'] = H1_FLG_SEP,
157 ['~'] = H1_FLG_TOK,
158 [127] = H1_FLG_CTL,
159};
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200160
161
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200162/*
163 * This function parses a status line between <ptr> and <end>, starting with
164 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
165 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
166 * will give undefined results.
167 * Note that it is upon the caller's responsibility to ensure that ptr < end,
168 * and that msg->sol points to the beginning of the response.
169 * If a complete line is found (which implies that at least one CR or LF is
170 * found before <end>, the updated <ptr> is returned, otherwise NULL is
171 * returned indicating an incomplete line (which does not mean that parts have
172 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
173 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
174 * upon next call.
175 *
176 * This function was intentionally designed to be called from
177 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
178 * within its state machine and use the same macros, hence the need for same
179 * labels and variable names. Note that msg->sol is left unchanged.
180 */
181const char *http_parse_stsline(struct http_msg *msg,
182 enum h1_state state, const char *ptr, const char *end,
183 unsigned int *ret_ptr, enum h1_state *ret_state)
184{
185 const char *msg_start = msg->chn->buf->p;
186
187 switch (state) {
188 case HTTP_MSG_RPVER:
189 http_msg_rpver:
190 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
191 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
192
193 if (likely(HTTP_IS_SPHT(*ptr))) {
194 msg->sl.st.v_l = ptr - msg_start;
195 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
196 }
197 msg->err_state = HTTP_MSG_RPVER;
198 state = HTTP_MSG_ERROR;
199 break;
200
201 case HTTP_MSG_RPVER_SP:
202 http_msg_rpver_sp:
203 if (likely(!HTTP_IS_LWS(*ptr))) {
204 msg->sl.st.c = ptr - msg_start;
205 goto http_msg_rpcode;
206 }
207 if (likely(HTTP_IS_SPHT(*ptr)))
208 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
209 /* so it's a CR/LF, this is invalid */
210 msg->err_state = HTTP_MSG_RPVER_SP;
211 state = HTTP_MSG_ERROR;
212 break;
213
214 case HTTP_MSG_RPCODE:
215 http_msg_rpcode:
216 if (likely(!HTTP_IS_LWS(*ptr)))
217 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
218
219 if (likely(HTTP_IS_SPHT(*ptr))) {
220 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
222 }
223
224 /* so it's a CR/LF, so there is no reason phrase */
225 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
226 http_msg_rsp_reason:
227 /* FIXME: should we support HTTP responses without any reason phrase ? */
228 msg->sl.st.r = ptr - msg_start;
229 msg->sl.st.r_l = 0;
230 goto http_msg_rpline_eol;
231
232 case HTTP_MSG_RPCODE_SP:
233 http_msg_rpcode_sp:
234 if (likely(!HTTP_IS_LWS(*ptr))) {
235 msg->sl.st.r = ptr - msg_start;
236 goto http_msg_rpreason;
237 }
238 if (likely(HTTP_IS_SPHT(*ptr)))
239 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
240 /* so it's a CR/LF, so there is no reason phrase */
241 goto http_msg_rsp_reason;
242
243 case HTTP_MSG_RPREASON:
244 http_msg_rpreason:
245 if (likely(!HTTP_IS_CRLF(*ptr)))
246 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
247 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
248 http_msg_rpline_eol:
249 /* We have seen the end of line. Note that we do not
250 * necessarily have the \n yet, but at least we know that we
251 * have EITHER \r OR \n, otherwise the response would not be
252 * complete. We can then record the response length and return
253 * to the caller which will be able to register it.
254 */
255 msg->sl.st.l = ptr - msg_start - msg->sol;
256 return ptr;
257
258 default:
259#ifdef DEBUG_FULL
260 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
261 exit(1);
262#endif
263 ;
264 }
265
266 http_msg_ood:
267 /* out of valid data */
268 if (ret_state)
269 *ret_state = state;
270 if (ret_ptr)
271 *ret_ptr = ptr - msg_start;
272 return NULL;
273}
274
275/*
276 * This function parses a request line between <ptr> and <end>, starting with
277 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
278 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
279 * will give undefined results.
280 * Note that it is upon the caller's responsibility to ensure that ptr < end,
281 * and that msg->sol points to the beginning of the request.
282 * If a complete line is found (which implies that at least one CR or LF is
283 * found before <end>, the updated <ptr> is returned, otherwise NULL is
284 * returned indicating an incomplete line (which does not mean that parts have
285 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
286 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
287 * upon next call.
288 *
289 * This function was intentionally designed to be called from
290 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
291 * within its state machine and use the same macros, hence the need for same
292 * labels and variable names. Note that msg->sol is left unchanged.
293 */
294const char *http_parse_reqline(struct http_msg *msg,
295 enum h1_state state, const char *ptr, const char *end,
296 unsigned int *ret_ptr, enum h1_state *ret_state)
297{
298 const char *msg_start = msg->chn->buf->p;
299
300 switch (state) {
301 case HTTP_MSG_RQMETH:
302 http_msg_rqmeth:
303 if (likely(HTTP_IS_TOKEN(*ptr)))
304 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
305
306 if (likely(HTTP_IS_SPHT(*ptr))) {
307 msg->sl.rq.m_l = ptr - msg_start;
308 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
309 }
310
311 if (likely(HTTP_IS_CRLF(*ptr))) {
312 /* HTTP 0.9 request */
313 msg->sl.rq.m_l = ptr - msg_start;
314 http_msg_req09_uri:
315 msg->sl.rq.u = ptr - msg_start;
316 http_msg_req09_uri_e:
317 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
318 http_msg_req09_ver:
319 msg->sl.rq.v = ptr - msg_start;
320 msg->sl.rq.v_l = 0;
321 goto http_msg_rqline_eol;
322 }
323 msg->err_state = HTTP_MSG_RQMETH;
324 state = HTTP_MSG_ERROR;
325 break;
326
327 case HTTP_MSG_RQMETH_SP:
328 http_msg_rqmeth_sp:
329 if (likely(!HTTP_IS_LWS(*ptr))) {
330 msg->sl.rq.u = ptr - msg_start;
331 goto http_msg_rquri;
332 }
333 if (likely(HTTP_IS_SPHT(*ptr)))
334 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
335 /* so it's a CR/LF, meaning an HTTP 0.9 request */
336 goto http_msg_req09_uri;
337
338 case HTTP_MSG_RQURI:
339 http_msg_rquri:
340#if defined(__x86_64__) || \
341 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
342 defined(__ARM_ARCH_7A__)
343 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
344 while (ptr <= end - sizeof(int)) {
345 int x = *(int *)ptr - 0x21212121;
346 if (x & 0x80808080)
347 break;
348
349 x -= 0x5e5e5e5e;
350 if (!(x & 0x80808080))
351 break;
352
353 ptr += sizeof(int);
354 }
355#endif
356 if (ptr >= end) {
357 state = HTTP_MSG_RQURI;
358 goto http_msg_ood;
359 }
360 http_msg_rquri2:
361 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
363
364 if (likely(HTTP_IS_SPHT(*ptr))) {
365 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
366 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
367 }
368
369 if (likely((unsigned char)*ptr >= 128)) {
370 /* non-ASCII chars are forbidden unless option
371 * accept-invalid-http-request is enabled in the frontend.
372 * In any case, we capture the faulty char.
373 */
374 if (msg->err_pos < -1)
375 goto invalid_char;
376 if (msg->err_pos == -1)
377 msg->err_pos = ptr - msg_start;
378 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
379 }
380
381 if (likely(HTTP_IS_CRLF(*ptr))) {
382 /* so it's a CR/LF, meaning an HTTP 0.9 request */
383 goto http_msg_req09_uri_e;
384 }
385
386 /* OK forbidden chars, 0..31 or 127 */
387 invalid_char:
388 msg->err_pos = ptr - msg_start;
389 msg->err_state = HTTP_MSG_RQURI;
390 state = HTTP_MSG_ERROR;
391 break;
392
393 case HTTP_MSG_RQURI_SP:
394 http_msg_rquri_sp:
395 if (likely(!HTTP_IS_LWS(*ptr))) {
396 msg->sl.rq.v = ptr - msg_start;
397 goto http_msg_rqver;
398 }
399 if (likely(HTTP_IS_SPHT(*ptr)))
400 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
401 /* so it's a CR/LF, meaning an HTTP 0.9 request */
402 goto http_msg_req09_ver;
403
404 case HTTP_MSG_RQVER:
405 http_msg_rqver:
406 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
407 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
408
409 if (likely(HTTP_IS_CRLF(*ptr))) {
410 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
411 http_msg_rqline_eol:
412 /* We have seen the end of line. Note that we do not
413 * necessarily have the \n yet, but at least we know that we
414 * have EITHER \r OR \n, otherwise the request would not be
415 * complete. We can then record the request length and return
416 * to the caller which will be able to register it.
417 */
418 msg->sl.rq.l = ptr - msg_start - msg->sol;
419 return ptr;
420 }
421
422 /* neither an HTTP_VER token nor a CRLF */
423 msg->err_state = HTTP_MSG_RQVER;
424 state = HTTP_MSG_ERROR;
425 break;
426
427 default:
428#ifdef DEBUG_FULL
429 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
430 exit(1);
431#endif
432 ;
433 }
434
435 http_msg_ood:
436 /* out of valid data */
437 if (ret_state)
438 *ret_state = state;
439 if (ret_ptr)
440 *ret_ptr = ptr - msg_start;
441 return NULL;
442}
443
444/*
445 * This function parses an HTTP message, either a request or a response,
446 * depending on the initial msg->msg_state. The caller is responsible for
447 * ensuring that the message does not wrap. The function can be preempted
448 * everywhere when data are missing and recalled at the exact same location
449 * with no information loss. The message may even be realigned between two
450 * calls. The header index is re-initialized when switching from
451 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
452 * fields. Note that msg->sol will be initialized after completing the first
453 * state, so that none of the msg pointers has to be initialized prior to the
454 * first call.
455 */
456void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
457{
458 enum h1_state state; /* updated only when leaving the FSM */
459 register char *ptr, *end; /* request pointers, to avoid dereferences */
460 struct buffer *buf;
461
462 state = msg->msg_state;
463 buf = msg->chn->buf;
464 ptr = buf->p + msg->next;
465 end = buf->p + buf->i;
466
467 if (unlikely(ptr >= end))
468 goto http_msg_ood;
469
470 switch (state) {
471 /*
472 * First, states that are specific to the response only.
473 * We check them first so that request and headers are
474 * closer to each other (accessed more often).
475 */
476 case HTTP_MSG_RPBEFORE:
477 http_msg_rpbefore:
478 if (likely(HTTP_IS_TOKEN(*ptr))) {
479 /* we have a start of message, but we have to check
480 * first if we need to remove some CRLF. We can only
481 * do this when o=0.
482 */
483 if (unlikely(ptr != buf->p)) {
484 if (buf->o)
485 goto http_msg_ood;
486 /* Remove empty leading lines, as recommended by RFC2616. */
487 bi_fast_delete(buf, ptr - buf->p);
488 }
489 msg->sol = 0;
490 msg->sl.st.l = 0; /* used in debug mode */
491 hdr_idx_init(idx);
492 state = HTTP_MSG_RPVER;
493 goto http_msg_rpver;
494 }
495
496 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
497 state = HTTP_MSG_RPBEFORE;
498 goto http_msg_invalid;
499 }
500
501 if (unlikely(*ptr == '\n'))
502 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
503 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
504 /* stop here */
505
506 case HTTP_MSG_RPBEFORE_CR:
507 http_msg_rpbefore_cr:
508 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
509 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
510 /* stop here */
511
512 case HTTP_MSG_RPVER:
513 http_msg_rpver:
514 case HTTP_MSG_RPVER_SP:
515 case HTTP_MSG_RPCODE:
516 case HTTP_MSG_RPCODE_SP:
517 case HTTP_MSG_RPREASON:
518 ptr = (char *)http_parse_stsline(msg,
519 state, ptr, end,
520 &msg->next, &msg->msg_state);
521 if (unlikely(!ptr))
522 return;
523
524 /* we have a full response and we know that we have either a CR
525 * or an LF at <ptr>.
526 */
527 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
528
529 msg->sol = ptr - buf->p;
530 if (likely(*ptr == '\r'))
531 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
532 goto http_msg_rpline_end;
533
534 case HTTP_MSG_RPLINE_END:
535 http_msg_rpline_end:
536 /* msg->sol must point to the first of CR or LF. */
537 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
538 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
539 /* stop here */
540
541 /*
542 * Second, states that are specific to the request only
543 */
544 case HTTP_MSG_RQBEFORE:
545 http_msg_rqbefore:
546 if (likely(HTTP_IS_TOKEN(*ptr))) {
547 /* we have a start of message, but we have to check
548 * first if we need to remove some CRLF. We can only
549 * do this when o=0.
550 */
551 if (likely(ptr != buf->p)) {
552 if (buf->o)
553 goto http_msg_ood;
554 /* Remove empty leading lines, as recommended by RFC2616. */
555 bi_fast_delete(buf, ptr - buf->p);
556 }
557 msg->sol = 0;
558 msg->sl.rq.l = 0; /* used in debug mode */
559 state = HTTP_MSG_RQMETH;
560 goto http_msg_rqmeth;
561 }
562
563 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
564 state = HTTP_MSG_RQBEFORE;
565 goto http_msg_invalid;
566 }
567
568 if (unlikely(*ptr == '\n'))
569 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
570 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
571 /* stop here */
572
573 case HTTP_MSG_RQBEFORE_CR:
574 http_msg_rqbefore_cr:
575 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
576 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
577 /* stop here */
578
579 case HTTP_MSG_RQMETH:
580 http_msg_rqmeth:
581 case HTTP_MSG_RQMETH_SP:
582 case HTTP_MSG_RQURI:
583 case HTTP_MSG_RQURI_SP:
584 case HTTP_MSG_RQVER:
585 ptr = (char *)http_parse_reqline(msg,
586 state, ptr, end,
587 &msg->next, &msg->msg_state);
588 if (unlikely(!ptr))
589 return;
590
591 /* we have a full request and we know that we have either a CR
592 * or an LF at <ptr>.
593 */
594 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
595
596 msg->sol = ptr - buf->p;
597 if (likely(*ptr == '\r'))
598 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
599 goto http_msg_rqline_end;
600
601 case HTTP_MSG_RQLINE_END:
602 http_msg_rqline_end:
603 /* check for HTTP/0.9 request : no version information available.
604 * msg->sol must point to the first of CR or LF.
605 */
606 if (unlikely(msg->sl.rq.v_l == 0))
607 goto http_msg_last_lf;
608
609 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
610 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
611 /* stop here */
612
613 /*
614 * Common states below
615 */
616 case HTTP_MSG_HDR_FIRST:
617 http_msg_hdr_first:
618 msg->sol = ptr - buf->p;
619 if (likely(!HTTP_IS_CRLF(*ptr))) {
620 goto http_msg_hdr_name;
621 }
622
623 if (likely(*ptr == '\r'))
624 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
625 goto http_msg_last_lf;
626
627 case HTTP_MSG_HDR_NAME:
628 http_msg_hdr_name:
629 /* assumes msg->sol points to the first char */
630 if (likely(HTTP_IS_TOKEN(*ptr)))
631 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
632
633 if (likely(*ptr == ':'))
634 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
635
636 if (likely(msg->err_pos < -1) || *ptr == '\n') {
637 state = HTTP_MSG_HDR_NAME;
638 goto http_msg_invalid;
639 }
640
641 if (msg->err_pos == -1) /* capture error pointer */
642 msg->err_pos = ptr - buf->p; /* >= 0 now */
643
644 /* and we still accept this non-token character */
645 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
646
647 case HTTP_MSG_HDR_L1_SP:
648 http_msg_hdr_l1_sp:
649 /* assumes msg->sol points to the first char */
650 if (likely(HTTP_IS_SPHT(*ptr)))
651 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
652
653 /* header value can be basically anything except CR/LF */
654 msg->sov = ptr - buf->p;
655
656 if (likely(!HTTP_IS_CRLF(*ptr))) {
657 goto http_msg_hdr_val;
658 }
659
660 if (likely(*ptr == '\r'))
661 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
662 goto http_msg_hdr_l1_lf;
663
664 case HTTP_MSG_HDR_L1_LF:
665 http_msg_hdr_l1_lf:
666 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
667 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
668
669 case HTTP_MSG_HDR_L1_LWS:
670 http_msg_hdr_l1_lws:
671 if (likely(HTTP_IS_SPHT(*ptr))) {
672 /* replace HT,CR,LF with spaces */
673 for (; buf->p + msg->sov < ptr; msg->sov++)
674 buf->p[msg->sov] = ' ';
675 goto http_msg_hdr_l1_sp;
676 }
677 /* we had a header consisting only in spaces ! */
678 msg->eol = msg->sov;
679 goto http_msg_complete_header;
680
681 case HTTP_MSG_HDR_VAL:
682 http_msg_hdr_val:
683 /* assumes msg->sol points to the first char, and msg->sov
684 * points to the first character of the value.
685 */
686
687 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
688 * and lower. In fact since most of the time is spent in the loop, we
689 * also remove the sign bit test so that bytes 0x8e..0x0d break the
690 * loop, but we don't care since they're very rare in header values.
691 */
692#if defined(__x86_64__)
693 while (ptr <= end - sizeof(long)) {
694 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
695 goto http_msg_hdr_val2;
696 ptr += sizeof(long);
697 }
698#endif
699#if defined(__x86_64__) || \
700 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
701 defined(__ARM_ARCH_7A__)
702 while (ptr <= end - sizeof(int)) {
703 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
704 goto http_msg_hdr_val2;
705 ptr += sizeof(int);
706 }
707#endif
708 if (ptr >= end) {
709 state = HTTP_MSG_HDR_VAL;
710 goto http_msg_ood;
711 }
712 http_msg_hdr_val2:
713 if (likely(!HTTP_IS_CRLF(*ptr)))
714 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
715
716 msg->eol = ptr - buf->p;
717 /* Note: we could also copy eol into ->eoh so that we have the
718 * real header end in case it ends with lots of LWS, but is this
719 * really needed ?
720 */
721 if (likely(*ptr == '\r'))
722 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
723 goto http_msg_hdr_l2_lf;
724
725 case HTTP_MSG_HDR_L2_LF:
726 http_msg_hdr_l2_lf:
727 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
728 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
729
730 case HTTP_MSG_HDR_L2_LWS:
731 http_msg_hdr_l2_lws:
732 if (unlikely(HTTP_IS_SPHT(*ptr))) {
733 /* LWS: replace HT,CR,LF with spaces */
734 for (; buf->p + msg->eol < ptr; msg->eol++)
735 buf->p[msg->eol] = ' ';
736 goto http_msg_hdr_val;
737 }
738 http_msg_complete_header:
739 /*
740 * It was a new header, so the last one is finished.
741 * Assumes msg->sol points to the first char, msg->sov points
742 * to the first character of the value and msg->eol to the
743 * first CR or LF so we know how the line ends. We insert last
744 * header into the index.
745 */
746 if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
747 idx, idx->tail) < 0)) {
748 state = HTTP_MSG_HDR_L2_LWS;
749 goto http_msg_invalid;
750 }
751
752 msg->sol = ptr - buf->p;
753 if (likely(!HTTP_IS_CRLF(*ptr))) {
754 goto http_msg_hdr_name;
755 }
756
757 if (likely(*ptr == '\r'))
758 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
759 goto http_msg_last_lf;
760
761 case HTTP_MSG_LAST_LF:
762 http_msg_last_lf:
763 /* Assumes msg->sol points to the first of either CR or LF.
764 * Sets ->sov and ->next to the total header length, ->eoh to
765 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
766 */
767 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
768 ptr++;
769 msg->sov = msg->next = ptr - buf->p;
770 msg->eoh = msg->sol;
771 msg->sol = 0;
772 msg->eol = msg->sov - msg->eoh;
773 msg->msg_state = HTTP_MSG_BODY;
774 return;
775
776 case HTTP_MSG_ERROR:
777 /* this may only happen if we call http_msg_analyser() twice with an error */
778 break;
779
780 default:
781#ifdef DEBUG_FULL
782 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
783 exit(1);
784#endif
785 ;
786 }
787 http_msg_ood:
788 /* out of data */
789 msg->msg_state = state;
790 msg->next = ptr - buf->p;
791 return;
792
793 http_msg_invalid:
794 /* invalid message */
795 msg->err_state = state;
796 msg->msg_state = HTTP_MSG_ERROR;
797 msg->next = ptr - buf->p;
798 return;
799}
800
Willy Tarreau794f9af2017-07-26 09:07:47 +0200801/* This function parses a contiguous HTTP/1 headers block starting at <start>
802 * and ending before <stop>, at once, and converts it a list of (name,value)
803 * pairs representing header fields into the array <hdr> of size <hdr_num>,
804 * whose last entry will have an empty name and an empty value. If <hdr_num> is
805 * too small to represent the whole message, an error is returned. If <h1m> is
806 * not NULL, some protocol elements such as content-length and transfer-encoding
807 * will be parsed and stored there as well.
808 *
809 * For now it's limited to the response. If the header block is incomplete,
810 * 0 is returned, waiting to be called again with more data to try it again.
811 *
812 * The code derived from the main HTTP/1 parser above but was simplified and
813 * optimized to process responses produced or forwarded by haproxy. The caller
814 * is responsible for ensuring that the message doesn't wrap, and should ensure
815 * it is complete to avoid having to retry the operation after a failed
816 * attempt. The message is not supposed to be invalid, which is why a few
817 * properties such as the character set used in the header field names are not
818 * checked. In case of an unparsable response message, a negative value will be
819 * returned with h1m->err_pos and h1m->err_state matching the location and
820 * state where the error was met. Leading blank likes are tolerated but not
821 * recommended.
822 *
823 * This function returns :
824 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
825 * set) with the state the error occurred in and h2-m>err_pos with the
826 * the position relative to <start>
827 * -2 if the output is full (hdr_num reached). err_state and err_pos also
828 * indicate where it failed.
829 * 0 in case of missing data.
830 * > 0 on success, it then corresponds to the number of bytes read since
831 * <start> so that the caller can go on with the payload.
832 */
833int h1_headers_to_hdr_list(char *start, const char *stop,
834 struct http_hdr *hdr, unsigned int hdr_num,
835 struct h1m *h1m)
836{
837 enum h1_state state = HTTP_MSG_RPBEFORE;
838 register char *ptr = start;
839 register const char *end = stop;
840 unsigned int hdr_count = 0;
841 unsigned int code = 0; /* status code, ASCII form */
842 unsigned int st_c; /* beginning of status code, relative to msg_start */
843 unsigned int st_c_l; /* length of status code */
844 unsigned int sol = 0; /* start of line */
845 unsigned int col = 0; /* position of the colon */
846 unsigned int eol = 0; /* end of line */
847 unsigned int sov = 0; /* start of value */
848 unsigned int skip = 0; /* number of bytes skipped at the beginning */
849 struct ist n, v; /* header name and value during parsing */
850
851 if (unlikely(ptr >= end))
852 goto http_msg_ood;
853
854 switch (state) {
855 case HTTP_MSG_RPBEFORE:
856 http_msg_rpbefore:
857 if (likely(HTTP_IS_TOKEN(*ptr))) {
858 /* we have a start of message, we may have skipped some
859 * heading CRLF. Skip them now.
860 */
861 skip += ptr - start;
862 start = ptr;
863
864 sol = 0;
865 hdr_count = 0;
866 state = HTTP_MSG_RPVER;
867 goto http_msg_rpver;
868 }
869
870 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
871 state = HTTP_MSG_RPBEFORE;
872 goto http_msg_invalid;
873 }
874
875 if (unlikely(*ptr == '\n'))
876 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
877 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
878 /* stop here */
879
880 case HTTP_MSG_RPBEFORE_CR:
881 http_msg_rpbefore_cr:
882 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
883 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
884 /* stop here */
885
886 case HTTP_MSG_RPVER:
887 http_msg_rpver:
888 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
889 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
890
891 if (likely(HTTP_IS_SPHT(*ptr))) {
892 /* version length = ptr - start */
893 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
894 }
895 state = HTTP_MSG_RPVER;
896 goto http_msg_invalid;
897
898 case HTTP_MSG_RPVER_SP:
899 http_msg_rpver_sp:
900 if (likely(!HTTP_IS_LWS(*ptr))) {
901 code = 0;
902 st_c = ptr - start;
903 goto http_msg_rpcode;
904 }
905 if (likely(HTTP_IS_SPHT(*ptr)))
906 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
907 /* so it's a CR/LF, this is invalid */
908 state = HTTP_MSG_RPVER_SP;
909 goto http_msg_invalid;
910
911 case HTTP_MSG_RPCODE:
912 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100913 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100914 code = code * 10 + *ptr - '0';
Willy Tarreau794f9af2017-07-26 09:07:47 +0200915 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
916 }
917
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100918 if (unlikely(!HTTP_IS_LWS(*ptr))) {
919 state = HTTP_MSG_RPCODE;
920 goto http_msg_invalid;
921 }
922
Willy Tarreau794f9af2017-07-26 09:07:47 +0200923 if (likely(HTTP_IS_SPHT(*ptr))) {
924 st_c_l = ptr - start - st_c;
925 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
926 }
927
928 /* so it's a CR/LF, so there is no reason phrase */
929 st_c_l = ptr - start - st_c;
930
931 http_msg_rsp_reason:
932 /* reason = ptr - start; */
933 /* reason length = 0 */
934 goto http_msg_rpline_eol;
935
936 case HTTP_MSG_RPCODE_SP:
937 http_msg_rpcode_sp:
938 if (likely(!HTTP_IS_LWS(*ptr))) {
939 /* reason = ptr - start */
940 goto http_msg_rpreason;
941 }
942 if (likely(HTTP_IS_SPHT(*ptr)))
943 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
944 /* so it's a CR/LF, so there is no reason phrase */
945 goto http_msg_rsp_reason;
946
947 case HTTP_MSG_RPREASON:
948 http_msg_rpreason:
949 if (likely(!HTTP_IS_CRLF(*ptr)))
950 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
951 /* reason length = ptr - start - reason */
952 http_msg_rpline_eol:
953 /* We have seen the end of line. Note that we do not
954 * necessarily have the \n yet, but at least we know that we
955 * have EITHER \r OR \n, otherwise the response would not be
956 * complete. We can then record the response length and return
957 * to the caller which will be able to register it.
958 */
959
960 if (unlikely(hdr_count >= hdr_num)) {
961 state = HTTP_MSG_RPREASON;
962 goto http_output_full;
963 }
964 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100965 if (h1m)
966 h1m->status = code;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200967
968 sol = ptr - start;
969 if (likely(*ptr == '\r'))
970 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
971 goto http_msg_rpline_end;
972
973 case HTTP_MSG_RPLINE_END:
974 http_msg_rpline_end:
975 /* sol must point to the first of CR or LF. */
976 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
977 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
978 /* stop here */
979
980 case HTTP_MSG_HDR_FIRST:
981 http_msg_hdr_first:
982 sol = ptr - start;
983 if (likely(!HTTP_IS_CRLF(*ptr))) {
984 goto http_msg_hdr_name;
985 }
986
987 if (likely(*ptr == '\r'))
988 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
989 goto http_msg_last_lf;
990
991 case HTTP_MSG_HDR_NAME:
992 http_msg_hdr_name:
993 /* assumes sol points to the first char */
994 if (likely(HTTP_IS_TOKEN(*ptr))) {
995 /* turn it to lower case if needed */
996 if (isupper((unsigned char)*ptr))
997 *ptr = tolower(*ptr);
998 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
999 }
1000
1001 if (likely(*ptr == ':')) {
1002 col = ptr - start;
1003 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1004 }
1005
1006 if (HTTP_IS_LWS(*ptr)) {
1007 state = HTTP_MSG_HDR_NAME;
1008 goto http_msg_invalid;
1009 }
1010
1011 /* now we have a non-token character in the header field name,
1012 * it's up to the H1 layer to have decided whether or not it
1013 * was acceptable. If we find it here, it was considered
1014 * acceptable due to configuration rules so we obey.
1015 */
1016 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
1017
1018 case HTTP_MSG_HDR_L1_SP:
1019 http_msg_hdr_l1_sp:
1020 /* assumes sol points to the first char */
1021 if (likely(HTTP_IS_SPHT(*ptr)))
1022 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1023
1024 /* header value can be basically anything except CR/LF */
1025 sov = ptr - start;
1026
1027 if (likely(!HTTP_IS_CRLF(*ptr))) {
1028 goto http_msg_hdr_val;
1029 }
1030
1031 if (likely(*ptr == '\r'))
1032 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
1033 goto http_msg_hdr_l1_lf;
1034
1035 case HTTP_MSG_HDR_L1_LF:
1036 http_msg_hdr_l1_lf:
1037 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
1038 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
1039
1040 case HTTP_MSG_HDR_L1_LWS:
1041 http_msg_hdr_l1_lws:
1042 if (likely(HTTP_IS_SPHT(*ptr))) {
1043 /* replace HT,CR,LF with spaces */
1044 for (; start + sov < ptr; sov++)
1045 start[sov] = ' ';
1046 goto http_msg_hdr_l1_sp;
1047 }
1048 /* we had a header consisting only in spaces ! */
1049 eol = sov;
1050 goto http_msg_complete_header;
1051
1052 case HTTP_MSG_HDR_VAL:
1053 http_msg_hdr_val:
1054 /* assumes sol points to the first char, and sov
1055 * points to the first character of the value.
1056 */
1057
1058 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
1059 * and lower. In fact since most of the time is spent in the loop, we
1060 * also remove the sign bit test so that bytes 0x8e..0x0d break the
1061 * loop, but we don't care since they're very rare in header values.
1062 */
1063#if defined(__x86_64__)
1064 while (ptr <= end - sizeof(long)) {
1065 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
1066 goto http_msg_hdr_val2;
1067 ptr += sizeof(long);
1068 }
1069#endif
1070#if defined(__x86_64__) || \
1071 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
1072 defined(__ARM_ARCH_7A__)
1073 while (ptr <= end - sizeof(int)) {
1074 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
1075 goto http_msg_hdr_val2;
1076 ptr += sizeof(int);
1077 }
1078#endif
1079 if (ptr >= end) {
1080 state = HTTP_MSG_HDR_VAL;
1081 goto http_msg_ood;
1082 }
1083 http_msg_hdr_val2:
1084 if (likely(!HTTP_IS_CRLF(*ptr)))
1085 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
1086
1087 eol = ptr - start;
1088 /* Note: we could also copy eol into ->eoh so that we have the
1089 * real header end in case it ends with lots of LWS, but is this
1090 * really needed ?
1091 */
1092 if (likely(*ptr == '\r'))
1093 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
1094 goto http_msg_hdr_l2_lf;
1095
1096 case HTTP_MSG_HDR_L2_LF:
1097 http_msg_hdr_l2_lf:
1098 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
1099 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
1100
1101 case HTTP_MSG_HDR_L2_LWS:
1102 http_msg_hdr_l2_lws:
1103 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1104 /* LWS: replace HT,CR,LF with spaces */
1105 for (; start + eol < ptr; eol++)
1106 start[eol] = ' ';
1107 goto http_msg_hdr_val;
1108 }
1109 http_msg_complete_header:
1110 /*
1111 * It was a new header, so the last one is finished. Assumes
1112 * <sol> points to the first char of the name, <col> to the
1113 * colon, <sov> points to the first character of the value and
1114 * <eol> to the first CR or LF so we know how the line ends. We
1115 * will trim spaces around the value. It's possible to do it by
1116 * adjusting <eol> and <sov> which are no more used after this.
1117 * We can add the header field to the list.
1118 */
1119 while (sov < eol && HTTP_IS_LWS(start[sov]))
1120 sov++;
1121
1122 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1123 eol--;
1124
1125
1126 n = ist2(start + sol, col - sol);
1127 v = ist2(start + sov, eol - sov);
1128
1129 if (unlikely(hdr_count >= hdr_num)) {
1130 state = HTTP_MSG_HDR_L2_LWS;
1131 goto http_output_full;
1132 }
1133 http_set_hdr(&hdr[hdr_count++], n, v);
1134
1135 if (h1m) {
1136 long long cl;
1137
Willy Tarreaud22e83a2017-10-31 08:02:24 +01001138 if (h1m->status >= 100 && h1m->status < 200)
1139 h1m->curr_len = h1m->body_len = 0;
1140 else if (h1m->status == 304 || h1m->status == 204) {
Willy Tarreau8ea0f382017-10-30 19:31:59 +01001141 /* no contents, claim c-len is present and set to zero */
1142 h1m->flags |= H1_MF_CLEN;
1143 h1m->curr_len = h1m->body_len = 0;
1144 }
1145 else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001146 h1m->flags &= ~H1_MF_CLEN;
1147 h1m->flags |= H1_MF_CHNK;
1148 }
1149 else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1150 h1m->flags |= H1_MF_CLEN;
1151 strl2llrc(v.ptr, v.len, &cl);
1152 h1m->curr_len = h1m->body_len = cl;
1153 }
1154 }
1155
1156 sol = ptr - start;
1157 if (likely(!HTTP_IS_CRLF(*ptr)))
1158 goto http_msg_hdr_name;
1159
1160 if (likely(*ptr == '\r'))
1161 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
1162 goto http_msg_last_lf;
1163
1164 case HTTP_MSG_LAST_LF:
1165 http_msg_last_lf:
1166 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
1167 ptr++;
1168 /* <ptr> now points to the first byte of payload. If needed sol
1169 * still points to the first of either CR or LF of the empty
1170 * line ending the headers block.
1171 */
1172 if (unlikely(hdr_count >= hdr_num)) {
1173 state = HTTP_MSG_LAST_LF;
1174 goto http_output_full;
1175 }
1176 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
1177 state = HTTP_MSG_BODY;
1178 break;
1179
1180 default:
1181 /* impossible states */
1182 goto http_msg_invalid;
1183 }
1184
1185 /* reaching here, we've parsed the whole message and the state is
1186 * HTTP_MSG_BODY.
1187 */
1188 return ptr - start + skip;
1189
1190 http_msg_ood:
1191 /* out of data at <ptr> during state <state> */
1192 return 0;
1193
1194 http_msg_invalid:
1195 /* invalid message, error at <ptr> */
1196 if (h1m) {
1197 h1m->err_state = state;
1198 h1m->err_pos = ptr - start + skip;
1199 }
1200 return -1;
1201
1202 http_output_full:
1203 /* no more room to store the current header, error at <ptr> */
1204 if (h1m) {
1205 h1m->err_state = state;
1206 h1m->err_pos = ptr - start + skip;
1207 }
1208 return -2;
1209}
1210
Willy Tarreau2510f702017-10-31 17:14:16 +01001211/* This function performs a very minimal parsing of the trailers block present
Willy Tarreau7314be82018-06-14 13:32:50 +02001212 * in the output part of <buf> for up to <max> bytes, and returns the number of
1213 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1214 * input data, or < 0 in case of parse error (in which case the caller may have
1215 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001216 */
Willy Tarreau7314be82018-06-14 13:32:50 +02001217int h1_measure_trailers(const struct buffer *buf, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001218{
1219 int count = 0;
1220
1221 while (1) {
1222 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001223 const char *start = b_peek(buf, count);
1224 const char *stop = b_peek(buf, max);
Willy Tarreau2510f702017-10-31 17:14:16 +01001225 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001226
1227 /* scan current line and stop at LF or CRLF */
1228 while (1) {
1229 if (ptr == stop)
1230 return 0;
1231
1232 if (*ptr == '\n') {
1233 if (!p1)
1234 p1 = ptr;
1235 p2 = ptr;
1236 break;
1237 }
1238
1239 if (*ptr == '\r') {
1240 if (p1)
1241 return -1;
1242 p1 = ptr;
1243 }
1244
Willy Tarreau7314be82018-06-14 13:32:50 +02001245 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001246 }
1247
1248 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001249 p2 = b_next(buf, p2);
1250 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001251
1252 /* LF/CRLF at beginning of line => end of trailers at p2.
1253 * Everything was scheduled for forwarding, there's nothing left
1254 * from this message. */
1255 if (p1 == start)
1256 break;
1257 /* OK, next line then */
1258 }
1259 return count;
1260}
1261
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001262/* This function skips trailers in the buffer associated with HTTP message
1263 * <msg>. The first visited position is msg->next. If the end of the trailers is
1264 * found, the function returns >0. So, the caller can automatically schedul it
1265 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1266 * data are available, the function does not change anything except maybe
1267 * msg->sol if it could parse some lines, and returns zero. If a parse error
1268 * is encountered, the function returns < 0 and does not change anything except
1269 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1270 * state before calling this function, which implies that all non-trailers data
1271 * have already been scheduled for forwarding, and that msg->next exactly
1272 * matches the length of trailers already parsed and not forwarded. It is also
1273 * important to note that this function is designed to be able to parse wrapped
1274 * headers at end of buffer.
1275 */
1276int http_forward_trailers(struct http_msg *msg)
1277{
1278 const struct buffer *buf = msg->chn->buf;
1279
1280 /* we have msg->next which points to next line. Look for CRLF. But
1281 * first, we reset msg->sol */
1282 msg->sol = 0;
1283 while (1) {
1284 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001285 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreau8f9c72d2018-06-07 18:46:28 +02001286 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001287 const char *ptr = start;
1288 int bytes = 0;
1289
1290 /* scan current line and stop at LF or CRLF */
1291 while (1) {
1292 if (ptr == stop)
1293 return 0;
1294
1295 if (*ptr == '\n') {
1296 if (!p1)
1297 p1 = ptr;
1298 p2 = ptr;
1299 break;
1300 }
1301
1302 if (*ptr == '\r') {
1303 if (p1) {
1304 msg->err_pos = buffer_count(buf, buf->p, ptr);
1305 return -1;
1306 }
1307 p1 = ptr;
1308 }
1309
1310 ptr++;
1311 if (ptr >= buf->data + buf->size)
1312 ptr = buf->data;
1313 }
1314
1315 /* after LF; point to beginning of next line */
1316 p2++;
1317 if (p2 >= buf->data + buf->size)
1318 p2 = buf->data;
1319
1320 bytes = p2 - start;
1321 if (bytes < 0)
1322 bytes += buf->size;
1323 msg->sol += bytes;
1324
1325 /* LF/CRLF at beginning of line => end of trailers at p2.
1326 * Everything was scheduled for forwarding, there's nothing left
1327 * from this message. */
1328 if (p1 == start)
1329 return 1;
1330
1331 /* OK, next line then */
1332 }
1333}