blob: 0d41d0bb1a4b59c10b26c9063db1f30951b8767d [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
21/* It is about twice as fast on recent architectures to lookup a byte in a
22 * table than to perform a boolean AND or OR between two tests. Refer to
23 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
24 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
25 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
26 * digit. Note: please do not overwrite values in assignment since gcc-2.95
27 * will not handle them correctly. It's worth noting that chars 128..255 are
28 * nothing, not even control chars.
29 */
30const unsigned char h1_char_classes[256] = {
31 [ 0] = H1_FLG_CTL,
32 [ 1] = H1_FLG_CTL,
33 [ 2] = H1_FLG_CTL,
34 [ 3] = H1_FLG_CTL,
35 [ 4] = H1_FLG_CTL,
36 [ 5] = H1_FLG_CTL,
37 [ 6] = H1_FLG_CTL,
38 [ 7] = H1_FLG_CTL,
39 [ 8] = H1_FLG_CTL,
40 [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL,
41 [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
42 [ 11] = H1_FLG_CTL,
43 [ 12] = H1_FLG_CTL,
44 [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
45 [ 14] = H1_FLG_CTL,
46 [ 15] = H1_FLG_CTL,
47 [ 16] = H1_FLG_CTL,
48 [ 17] = H1_FLG_CTL,
49 [ 18] = H1_FLG_CTL,
50 [ 19] = H1_FLG_CTL,
51 [ 20] = H1_FLG_CTL,
52 [ 21] = H1_FLG_CTL,
53 [ 22] = H1_FLG_CTL,
54 [ 23] = H1_FLG_CTL,
55 [ 24] = H1_FLG_CTL,
56 [ 25] = H1_FLG_CTL,
57 [ 26] = H1_FLG_CTL,
58 [ 27] = H1_FLG_CTL,
59 [ 28] = H1_FLG_CTL,
60 [ 29] = H1_FLG_CTL,
61 [ 30] = H1_FLG_CTL,
62 [ 31] = H1_FLG_CTL,
63 [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP,
64 ['!'] = H1_FLG_TOK,
65 ['"'] = H1_FLG_SEP,
66 ['#'] = H1_FLG_TOK,
67 ['$'] = H1_FLG_TOK,
68 ['%'] = H1_FLG_TOK,
69 ['&'] = H1_FLG_TOK,
70 [ 39] = H1_FLG_TOK,
71 ['('] = H1_FLG_SEP,
72 [')'] = H1_FLG_SEP,
73 ['*'] = H1_FLG_TOK,
74 ['+'] = H1_FLG_TOK,
75 [','] = H1_FLG_SEP,
76 ['-'] = H1_FLG_TOK,
77 ['.'] = H1_FLG_TOK | H1_FLG_VER,
78 ['/'] = H1_FLG_SEP | H1_FLG_VER,
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +010079 ['0'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
80 ['1'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
81 ['2'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
82 ['3'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
83 ['4'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
84 ['5'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
85 ['6'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
86 ['7'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
87 ['8'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
88 ['9'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020089 [':'] = H1_FLG_SEP,
90 [';'] = H1_FLG_SEP,
91 ['<'] = H1_FLG_SEP,
92 ['='] = H1_FLG_SEP,
93 ['>'] = H1_FLG_SEP,
94 ['?'] = H1_FLG_SEP,
95 ['@'] = H1_FLG_SEP,
96 ['A'] = H1_FLG_TOK,
97 ['B'] = H1_FLG_TOK,
98 ['C'] = H1_FLG_TOK,
99 ['D'] = H1_FLG_TOK,
100 ['E'] = H1_FLG_TOK,
101 ['F'] = H1_FLG_TOK,
102 ['G'] = H1_FLG_TOK,
103 ['H'] = H1_FLG_TOK | H1_FLG_VER,
104 ['I'] = H1_FLG_TOK,
105 ['J'] = H1_FLG_TOK,
106 ['K'] = H1_FLG_TOK,
107 ['L'] = H1_FLG_TOK,
108 ['M'] = H1_FLG_TOK,
109 ['N'] = H1_FLG_TOK,
110 ['O'] = H1_FLG_TOK,
111 ['P'] = H1_FLG_TOK | H1_FLG_VER,
112 ['Q'] = H1_FLG_TOK,
113 ['R'] = H1_FLG_TOK | H1_FLG_VER,
114 ['S'] = H1_FLG_TOK | H1_FLG_VER,
115 ['T'] = H1_FLG_TOK | H1_FLG_VER,
116 ['U'] = H1_FLG_TOK,
117 ['V'] = H1_FLG_TOK,
118 ['W'] = H1_FLG_TOK,
119 ['X'] = H1_FLG_TOK,
120 ['Y'] = H1_FLG_TOK,
121 ['Z'] = H1_FLG_TOK,
122 ['['] = H1_FLG_SEP,
123 [ 92] = H1_FLG_SEP,
124 [']'] = H1_FLG_SEP,
125 ['^'] = H1_FLG_TOK,
126 ['_'] = H1_FLG_TOK,
127 ['`'] = H1_FLG_TOK,
128 ['a'] = H1_FLG_TOK,
129 ['b'] = H1_FLG_TOK,
130 ['c'] = H1_FLG_TOK,
131 ['d'] = H1_FLG_TOK,
132 ['e'] = H1_FLG_TOK,
133 ['f'] = H1_FLG_TOK,
134 ['g'] = H1_FLG_TOK,
135 ['h'] = H1_FLG_TOK,
136 ['i'] = H1_FLG_TOK,
137 ['j'] = H1_FLG_TOK,
138 ['k'] = H1_FLG_TOK,
139 ['l'] = H1_FLG_TOK,
140 ['m'] = H1_FLG_TOK,
141 ['n'] = H1_FLG_TOK,
142 ['o'] = H1_FLG_TOK,
143 ['p'] = H1_FLG_TOK,
144 ['q'] = H1_FLG_TOK,
145 ['r'] = H1_FLG_TOK,
146 ['s'] = H1_FLG_TOK,
147 ['t'] = H1_FLG_TOK,
148 ['u'] = H1_FLG_TOK,
149 ['v'] = H1_FLG_TOK,
150 ['w'] = H1_FLG_TOK,
151 ['x'] = H1_FLG_TOK,
152 ['y'] = H1_FLG_TOK,
153 ['z'] = H1_FLG_TOK,
154 ['{'] = H1_FLG_SEP,
155 ['|'] = H1_FLG_TOK,
156 ['}'] = H1_FLG_SEP,
157 ['~'] = H1_FLG_TOK,
158 [127] = H1_FLG_CTL,
159};
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200160
161
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200162/*
163 * This function parses a status line between <ptr> and <end>, starting with
164 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
165 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
166 * will give undefined results.
167 * Note that it is upon the caller's responsibility to ensure that ptr < end,
168 * and that msg->sol points to the beginning of the response.
169 * If a complete line is found (which implies that at least one CR or LF is
170 * found before <end>, the updated <ptr> is returned, otherwise NULL is
171 * returned indicating an incomplete line (which does not mean that parts have
172 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
173 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
174 * upon next call.
175 *
176 * This function was intentionally designed to be called from
177 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
178 * within its state machine and use the same macros, hence the need for same
179 * labels and variable names. Note that msg->sol is left unchanged.
180 */
181const char *http_parse_stsline(struct http_msg *msg,
182 enum h1_state state, const char *ptr, const char *end,
183 unsigned int *ret_ptr, enum h1_state *ret_state)
184{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200185 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200186
187 switch (state) {
188 case HTTP_MSG_RPVER:
189 http_msg_rpver:
190 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
191 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
192
193 if (likely(HTTP_IS_SPHT(*ptr))) {
194 msg->sl.st.v_l = ptr - msg_start;
195 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
196 }
197 msg->err_state = HTTP_MSG_RPVER;
198 state = HTTP_MSG_ERROR;
199 break;
200
201 case HTTP_MSG_RPVER_SP:
202 http_msg_rpver_sp:
203 if (likely(!HTTP_IS_LWS(*ptr))) {
204 msg->sl.st.c = ptr - msg_start;
205 goto http_msg_rpcode;
206 }
207 if (likely(HTTP_IS_SPHT(*ptr)))
208 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
209 /* so it's a CR/LF, this is invalid */
210 msg->err_state = HTTP_MSG_RPVER_SP;
211 state = HTTP_MSG_ERROR;
212 break;
213
214 case HTTP_MSG_RPCODE:
215 http_msg_rpcode:
216 if (likely(!HTTP_IS_LWS(*ptr)))
217 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
218
219 if (likely(HTTP_IS_SPHT(*ptr))) {
220 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
222 }
223
224 /* so it's a CR/LF, so there is no reason phrase */
225 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
226 http_msg_rsp_reason:
227 /* FIXME: should we support HTTP responses without any reason phrase ? */
228 msg->sl.st.r = ptr - msg_start;
229 msg->sl.st.r_l = 0;
230 goto http_msg_rpline_eol;
231
232 case HTTP_MSG_RPCODE_SP:
233 http_msg_rpcode_sp:
234 if (likely(!HTTP_IS_LWS(*ptr))) {
235 msg->sl.st.r = ptr - msg_start;
236 goto http_msg_rpreason;
237 }
238 if (likely(HTTP_IS_SPHT(*ptr)))
239 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
240 /* so it's a CR/LF, so there is no reason phrase */
241 goto http_msg_rsp_reason;
242
243 case HTTP_MSG_RPREASON:
244 http_msg_rpreason:
245 if (likely(!HTTP_IS_CRLF(*ptr)))
246 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
247 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
248 http_msg_rpline_eol:
249 /* We have seen the end of line. Note that we do not
250 * necessarily have the \n yet, but at least we know that we
251 * have EITHER \r OR \n, otherwise the response would not be
252 * complete. We can then record the response length and return
253 * to the caller which will be able to register it.
254 */
255 msg->sl.st.l = ptr - msg_start - msg->sol;
256 return ptr;
257
258 default:
259#ifdef DEBUG_FULL
260 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
261 exit(1);
262#endif
263 ;
264 }
265
266 http_msg_ood:
267 /* out of valid data */
268 if (ret_state)
269 *ret_state = state;
270 if (ret_ptr)
271 *ret_ptr = ptr - msg_start;
272 return NULL;
273}
274
275/*
276 * This function parses a request line between <ptr> and <end>, starting with
277 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
278 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
279 * will give undefined results.
280 * Note that it is upon the caller's responsibility to ensure that ptr < end,
281 * and that msg->sol points to the beginning of the request.
282 * If a complete line is found (which implies that at least one CR or LF is
283 * found before <end>, the updated <ptr> is returned, otherwise NULL is
284 * returned indicating an incomplete line (which does not mean that parts have
285 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
286 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
287 * upon next call.
288 *
289 * This function was intentionally designed to be called from
290 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
291 * within its state machine and use the same macros, hence the need for same
292 * labels and variable names. Note that msg->sol is left unchanged.
293 */
294const char *http_parse_reqline(struct http_msg *msg,
295 enum h1_state state, const char *ptr, const char *end,
296 unsigned int *ret_ptr, enum h1_state *ret_state)
297{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200298 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200299
300 switch (state) {
301 case HTTP_MSG_RQMETH:
302 http_msg_rqmeth:
303 if (likely(HTTP_IS_TOKEN(*ptr)))
304 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
305
306 if (likely(HTTP_IS_SPHT(*ptr))) {
307 msg->sl.rq.m_l = ptr - msg_start;
308 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
309 }
310
311 if (likely(HTTP_IS_CRLF(*ptr))) {
312 /* HTTP 0.9 request */
313 msg->sl.rq.m_l = ptr - msg_start;
314 http_msg_req09_uri:
315 msg->sl.rq.u = ptr - msg_start;
316 http_msg_req09_uri_e:
317 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
318 http_msg_req09_ver:
319 msg->sl.rq.v = ptr - msg_start;
320 msg->sl.rq.v_l = 0;
321 goto http_msg_rqline_eol;
322 }
323 msg->err_state = HTTP_MSG_RQMETH;
324 state = HTTP_MSG_ERROR;
325 break;
326
327 case HTTP_MSG_RQMETH_SP:
328 http_msg_rqmeth_sp:
329 if (likely(!HTTP_IS_LWS(*ptr))) {
330 msg->sl.rq.u = ptr - msg_start;
331 goto http_msg_rquri;
332 }
333 if (likely(HTTP_IS_SPHT(*ptr)))
334 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
335 /* so it's a CR/LF, meaning an HTTP 0.9 request */
336 goto http_msg_req09_uri;
337
338 case HTTP_MSG_RQURI:
339 http_msg_rquri:
340#if defined(__x86_64__) || \
341 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
342 defined(__ARM_ARCH_7A__)
343 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
344 while (ptr <= end - sizeof(int)) {
345 int x = *(int *)ptr - 0x21212121;
346 if (x & 0x80808080)
347 break;
348
349 x -= 0x5e5e5e5e;
350 if (!(x & 0x80808080))
351 break;
352
353 ptr += sizeof(int);
354 }
355#endif
356 if (ptr >= end) {
357 state = HTTP_MSG_RQURI;
358 goto http_msg_ood;
359 }
360 http_msg_rquri2:
361 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
363
364 if (likely(HTTP_IS_SPHT(*ptr))) {
365 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
366 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
367 }
368
369 if (likely((unsigned char)*ptr >= 128)) {
370 /* non-ASCII chars are forbidden unless option
371 * accept-invalid-http-request is enabled in the frontend.
372 * In any case, we capture the faulty char.
373 */
374 if (msg->err_pos < -1)
375 goto invalid_char;
376 if (msg->err_pos == -1)
377 msg->err_pos = ptr - msg_start;
378 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
379 }
380
381 if (likely(HTTP_IS_CRLF(*ptr))) {
382 /* so it's a CR/LF, meaning an HTTP 0.9 request */
383 goto http_msg_req09_uri_e;
384 }
385
386 /* OK forbidden chars, 0..31 or 127 */
387 invalid_char:
388 msg->err_pos = ptr - msg_start;
389 msg->err_state = HTTP_MSG_RQURI;
390 state = HTTP_MSG_ERROR;
391 break;
392
393 case HTTP_MSG_RQURI_SP:
394 http_msg_rquri_sp:
395 if (likely(!HTTP_IS_LWS(*ptr))) {
396 msg->sl.rq.v = ptr - msg_start;
397 goto http_msg_rqver;
398 }
399 if (likely(HTTP_IS_SPHT(*ptr)))
400 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
401 /* so it's a CR/LF, meaning an HTTP 0.9 request */
402 goto http_msg_req09_ver;
403
404 case HTTP_MSG_RQVER:
405 http_msg_rqver:
406 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
407 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
408
409 if (likely(HTTP_IS_CRLF(*ptr))) {
410 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
411 http_msg_rqline_eol:
412 /* We have seen the end of line. Note that we do not
413 * necessarily have the \n yet, but at least we know that we
414 * have EITHER \r OR \n, otherwise the request would not be
415 * complete. We can then record the request length and return
416 * to the caller which will be able to register it.
417 */
418 msg->sl.rq.l = ptr - msg_start - msg->sol;
419 return ptr;
420 }
421
422 /* neither an HTTP_VER token nor a CRLF */
423 msg->err_state = HTTP_MSG_RQVER;
424 state = HTTP_MSG_ERROR;
425 break;
426
427 default:
428#ifdef DEBUG_FULL
429 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
430 exit(1);
431#endif
432 ;
433 }
434
435 http_msg_ood:
436 /* out of valid data */
437 if (ret_state)
438 *ret_state = state;
439 if (ret_ptr)
440 *ret_ptr = ptr - msg_start;
441 return NULL;
442}
443
444/*
445 * This function parses an HTTP message, either a request or a response,
446 * depending on the initial msg->msg_state. The caller is responsible for
447 * ensuring that the message does not wrap. The function can be preempted
448 * everywhere when data are missing and recalled at the exact same location
449 * with no information loss. The message may even be realigned between two
450 * calls. The header index is re-initialized when switching from
451 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
452 * fields. Note that msg->sol will be initialized after completing the first
453 * state, so that none of the msg pointers has to be initialized prior to the
454 * first call.
455 */
456void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
457{
458 enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200459 register const char *ptr, *end; /* request pointers, to avoid dereferences */
460 char *input = (char *)ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200461 struct buffer *buf;
462
463 state = msg->msg_state;
Willy Tarreauc9fa0482018-07-10 17:43:27 +0200464 buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200465 ptr = input + msg->next;
466 end = b_stop(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200467
468 if (unlikely(ptr >= end))
469 goto http_msg_ood;
470
471 switch (state) {
472 /*
473 * First, states that are specific to the response only.
474 * We check them first so that request and headers are
475 * closer to each other (accessed more often).
476 */
477 case HTTP_MSG_RPBEFORE:
478 http_msg_rpbefore:
479 if (likely(HTTP_IS_TOKEN(*ptr))) {
480 /* we have a start of message, but we have to check
481 * first if we need to remove some CRLF. We can only
482 * do this when o=0.
483 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200484 if (unlikely(ptr != input)) {
485 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200486 goto http_msg_ood;
487 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200488 b_del(buf, ptr - input);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200489 }
490 msg->sol = 0;
491 msg->sl.st.l = 0; /* used in debug mode */
492 hdr_idx_init(idx);
493 state = HTTP_MSG_RPVER;
494 goto http_msg_rpver;
495 }
496
497 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
498 state = HTTP_MSG_RPBEFORE;
499 goto http_msg_invalid;
500 }
501
502 if (unlikely(*ptr == '\n'))
503 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
504 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
505 /* stop here */
506
507 case HTTP_MSG_RPBEFORE_CR:
508 http_msg_rpbefore_cr:
509 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
510 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
511 /* stop here */
512
513 case HTTP_MSG_RPVER:
514 http_msg_rpver:
515 case HTTP_MSG_RPVER_SP:
516 case HTTP_MSG_RPCODE:
517 case HTTP_MSG_RPCODE_SP:
518 case HTTP_MSG_RPREASON:
519 ptr = (char *)http_parse_stsline(msg,
520 state, ptr, end,
521 &msg->next, &msg->msg_state);
522 if (unlikely(!ptr))
523 return;
524
525 /* we have a full response and we know that we have either a CR
526 * or an LF at <ptr>.
527 */
528 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
529
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200530 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200531 if (likely(*ptr == '\r'))
532 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
533 goto http_msg_rpline_end;
534
535 case HTTP_MSG_RPLINE_END:
536 http_msg_rpline_end:
537 /* msg->sol must point to the first of CR or LF. */
538 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
539 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
540 /* stop here */
541
542 /*
543 * Second, states that are specific to the request only
544 */
545 case HTTP_MSG_RQBEFORE:
546 http_msg_rqbefore:
547 if (likely(HTTP_IS_TOKEN(*ptr))) {
548 /* we have a start of message, but we have to check
549 * first if we need to remove some CRLF. We can only
550 * do this when o=0.
551 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200552 if (likely(ptr != input)) {
553 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200554 goto http_msg_ood;
555 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200556 b_del(buf, ptr - input);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200557 }
558 msg->sol = 0;
559 msg->sl.rq.l = 0; /* used in debug mode */
560 state = HTTP_MSG_RQMETH;
561 goto http_msg_rqmeth;
562 }
563
564 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
565 state = HTTP_MSG_RQBEFORE;
566 goto http_msg_invalid;
567 }
568
569 if (unlikely(*ptr == '\n'))
570 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
571 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
572 /* stop here */
573
574 case HTTP_MSG_RQBEFORE_CR:
575 http_msg_rqbefore_cr:
576 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
577 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
578 /* stop here */
579
580 case HTTP_MSG_RQMETH:
581 http_msg_rqmeth:
582 case HTTP_MSG_RQMETH_SP:
583 case HTTP_MSG_RQURI:
584 case HTTP_MSG_RQURI_SP:
585 case HTTP_MSG_RQVER:
586 ptr = (char *)http_parse_reqline(msg,
587 state, ptr, end,
588 &msg->next, &msg->msg_state);
589 if (unlikely(!ptr))
590 return;
591
592 /* we have a full request and we know that we have either a CR
593 * or an LF at <ptr>.
594 */
595 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
596
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200597 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200598 if (likely(*ptr == '\r'))
599 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
600 goto http_msg_rqline_end;
601
602 case HTTP_MSG_RQLINE_END:
603 http_msg_rqline_end:
604 /* check for HTTP/0.9 request : no version information available.
605 * msg->sol must point to the first of CR or LF.
606 */
607 if (unlikely(msg->sl.rq.v_l == 0))
608 goto http_msg_last_lf;
609
610 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
611 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
612 /* stop here */
613
614 /*
615 * Common states below
616 */
617 case HTTP_MSG_HDR_FIRST:
618 http_msg_hdr_first:
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200619 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200620 if (likely(!HTTP_IS_CRLF(*ptr))) {
621 goto http_msg_hdr_name;
622 }
623
624 if (likely(*ptr == '\r'))
625 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
626 goto http_msg_last_lf;
627
628 case HTTP_MSG_HDR_NAME:
629 http_msg_hdr_name:
630 /* assumes msg->sol points to the first char */
631 if (likely(HTTP_IS_TOKEN(*ptr)))
632 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
633
634 if (likely(*ptr == ':'))
635 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
636
637 if (likely(msg->err_pos < -1) || *ptr == '\n') {
638 state = HTTP_MSG_HDR_NAME;
639 goto http_msg_invalid;
640 }
641
642 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200643 msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200644
645 /* and we still accept this non-token character */
646 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
647
648 case HTTP_MSG_HDR_L1_SP:
649 http_msg_hdr_l1_sp:
650 /* assumes msg->sol points to the first char */
651 if (likely(HTTP_IS_SPHT(*ptr)))
652 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
653
654 /* header value can be basically anything except CR/LF */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200655 msg->sov = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200656
657 if (likely(!HTTP_IS_CRLF(*ptr))) {
658 goto http_msg_hdr_val;
659 }
660
661 if (likely(*ptr == '\r'))
662 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
663 goto http_msg_hdr_l1_lf;
664
665 case HTTP_MSG_HDR_L1_LF:
666 http_msg_hdr_l1_lf:
667 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
668 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
669
670 case HTTP_MSG_HDR_L1_LWS:
671 http_msg_hdr_l1_lws:
672 if (likely(HTTP_IS_SPHT(*ptr))) {
673 /* replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200674 for (; input + msg->sov < ptr; msg->sov++)
675 input[msg->sov] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200676 goto http_msg_hdr_l1_sp;
677 }
678 /* we had a header consisting only in spaces ! */
679 msg->eol = msg->sov;
680 goto http_msg_complete_header;
681
682 case HTTP_MSG_HDR_VAL:
683 http_msg_hdr_val:
684 /* assumes msg->sol points to the first char, and msg->sov
685 * points to the first character of the value.
686 */
687
688 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
689 * and lower. In fact since most of the time is spent in the loop, we
690 * also remove the sign bit test so that bytes 0x8e..0x0d break the
691 * loop, but we don't care since they're very rare in header values.
692 */
693#if defined(__x86_64__)
694 while (ptr <= end - sizeof(long)) {
695 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
696 goto http_msg_hdr_val2;
697 ptr += sizeof(long);
698 }
699#endif
700#if defined(__x86_64__) || \
701 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
702 defined(__ARM_ARCH_7A__)
703 while (ptr <= end - sizeof(int)) {
704 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
705 goto http_msg_hdr_val2;
706 ptr += sizeof(int);
707 }
708#endif
709 if (ptr >= end) {
710 state = HTTP_MSG_HDR_VAL;
711 goto http_msg_ood;
712 }
713 http_msg_hdr_val2:
714 if (likely(!HTTP_IS_CRLF(*ptr)))
715 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
716
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200717 msg->eol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200718 /* Note: we could also copy eol into ->eoh so that we have the
719 * real header end in case it ends with lots of LWS, but is this
720 * really needed ?
721 */
722 if (likely(*ptr == '\r'))
723 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
724 goto http_msg_hdr_l2_lf;
725
726 case HTTP_MSG_HDR_L2_LF:
727 http_msg_hdr_l2_lf:
728 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
729 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
730
731 case HTTP_MSG_HDR_L2_LWS:
732 http_msg_hdr_l2_lws:
733 if (unlikely(HTTP_IS_SPHT(*ptr))) {
734 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200735 for (; input + msg->eol < ptr; msg->eol++)
736 input[msg->eol] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200737 goto http_msg_hdr_val;
738 }
739 http_msg_complete_header:
740 /*
741 * It was a new header, so the last one is finished.
742 * Assumes msg->sol points to the first char, msg->sov points
743 * to the first character of the value and msg->eol to the
744 * first CR or LF so we know how the line ends. We insert last
745 * header into the index.
746 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200747 if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200748 idx, idx->tail) < 0)) {
749 state = HTTP_MSG_HDR_L2_LWS;
750 goto http_msg_invalid;
751 }
752
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200753 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200754 if (likely(!HTTP_IS_CRLF(*ptr))) {
755 goto http_msg_hdr_name;
756 }
757
758 if (likely(*ptr == '\r'))
759 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
760 goto http_msg_last_lf;
761
762 case HTTP_MSG_LAST_LF:
763 http_msg_last_lf:
764 /* Assumes msg->sol points to the first of either CR or LF.
765 * Sets ->sov and ->next to the total header length, ->eoh to
766 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
767 */
768 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
769 ptr++;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200770 msg->sov = msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200771 msg->eoh = msg->sol;
772 msg->sol = 0;
773 msg->eol = msg->sov - msg->eoh;
774 msg->msg_state = HTTP_MSG_BODY;
775 return;
776
777 case HTTP_MSG_ERROR:
778 /* this may only happen if we call http_msg_analyser() twice with an error */
779 break;
780
781 default:
782#ifdef DEBUG_FULL
783 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
784 exit(1);
785#endif
786 ;
787 }
788 http_msg_ood:
789 /* out of data */
790 msg->msg_state = state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200791 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200792 return;
793
794 http_msg_invalid:
795 /* invalid message */
796 msg->err_state = state;
797 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200798 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200799 return;
800}
801
Willy Tarreau794f9af2017-07-26 09:07:47 +0200802/* This function parses a contiguous HTTP/1 headers block starting at <start>
803 * and ending before <stop>, at once, and converts it a list of (name,value)
804 * pairs representing header fields into the array <hdr> of size <hdr_num>,
805 * whose last entry will have an empty name and an empty value. If <hdr_num> is
806 * too small to represent the whole message, an error is returned. If <h1m> is
807 * not NULL, some protocol elements such as content-length and transfer-encoding
808 * will be parsed and stored there as well.
809 *
810 * For now it's limited to the response. If the header block is incomplete,
811 * 0 is returned, waiting to be called again with more data to try it again.
812 *
813 * The code derived from the main HTTP/1 parser above but was simplified and
814 * optimized to process responses produced or forwarded by haproxy. The caller
815 * is responsible for ensuring that the message doesn't wrap, and should ensure
816 * it is complete to avoid having to retry the operation after a failed
817 * attempt. The message is not supposed to be invalid, which is why a few
818 * properties such as the character set used in the header field names are not
819 * checked. In case of an unparsable response message, a negative value will be
820 * returned with h1m->err_pos and h1m->err_state matching the location and
821 * state where the error was met. Leading blank likes are tolerated but not
822 * recommended.
823 *
824 * This function returns :
825 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
826 * set) with the state the error occurred in and h2-m>err_pos with the
827 * the position relative to <start>
828 * -2 if the output is full (hdr_num reached). err_state and err_pos also
829 * indicate where it failed.
830 * 0 in case of missing data.
831 * > 0 on success, it then corresponds to the number of bytes read since
832 * <start> so that the caller can go on with the payload.
833 */
834int h1_headers_to_hdr_list(char *start, const char *stop,
835 struct http_hdr *hdr, unsigned int hdr_num,
836 struct h1m *h1m)
837{
838 enum h1_state state = HTTP_MSG_RPBEFORE;
839 register char *ptr = start;
840 register const char *end = stop;
841 unsigned int hdr_count = 0;
842 unsigned int code = 0; /* status code, ASCII form */
843 unsigned int st_c; /* beginning of status code, relative to msg_start */
844 unsigned int st_c_l; /* length of status code */
845 unsigned int sol = 0; /* start of line */
846 unsigned int col = 0; /* position of the colon */
847 unsigned int eol = 0; /* end of line */
848 unsigned int sov = 0; /* start of value */
849 unsigned int skip = 0; /* number of bytes skipped at the beginning */
850 struct ist n, v; /* header name and value during parsing */
851
852 if (unlikely(ptr >= end))
853 goto http_msg_ood;
854
855 switch (state) {
856 case HTTP_MSG_RPBEFORE:
857 http_msg_rpbefore:
858 if (likely(HTTP_IS_TOKEN(*ptr))) {
859 /* we have a start of message, we may have skipped some
860 * heading CRLF. Skip them now.
861 */
862 skip += ptr - start;
863 start = ptr;
864
865 sol = 0;
866 hdr_count = 0;
867 state = HTTP_MSG_RPVER;
868 goto http_msg_rpver;
869 }
870
871 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
872 state = HTTP_MSG_RPBEFORE;
873 goto http_msg_invalid;
874 }
875
876 if (unlikely(*ptr == '\n'))
877 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
878 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
879 /* stop here */
880
881 case HTTP_MSG_RPBEFORE_CR:
882 http_msg_rpbefore_cr:
883 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
884 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
885 /* stop here */
886
887 case HTTP_MSG_RPVER:
888 http_msg_rpver:
889 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
890 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
891
892 if (likely(HTTP_IS_SPHT(*ptr))) {
893 /* version length = ptr - start */
894 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
895 }
896 state = HTTP_MSG_RPVER;
897 goto http_msg_invalid;
898
899 case HTTP_MSG_RPVER_SP:
900 http_msg_rpver_sp:
901 if (likely(!HTTP_IS_LWS(*ptr))) {
902 code = 0;
903 st_c = ptr - start;
904 goto http_msg_rpcode;
905 }
906 if (likely(HTTP_IS_SPHT(*ptr)))
907 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
908 /* so it's a CR/LF, this is invalid */
909 state = HTTP_MSG_RPVER_SP;
910 goto http_msg_invalid;
911
912 case HTTP_MSG_RPCODE:
913 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100914 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100915 code = code * 10 + *ptr - '0';
Willy Tarreau794f9af2017-07-26 09:07:47 +0200916 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
917 }
918
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100919 if (unlikely(!HTTP_IS_LWS(*ptr))) {
920 state = HTTP_MSG_RPCODE;
921 goto http_msg_invalid;
922 }
923
Willy Tarreau794f9af2017-07-26 09:07:47 +0200924 if (likely(HTTP_IS_SPHT(*ptr))) {
925 st_c_l = ptr - start - st_c;
926 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
927 }
928
929 /* so it's a CR/LF, so there is no reason phrase */
930 st_c_l = ptr - start - st_c;
931
932 http_msg_rsp_reason:
933 /* reason = ptr - start; */
934 /* reason length = 0 */
935 goto http_msg_rpline_eol;
936
937 case HTTP_MSG_RPCODE_SP:
938 http_msg_rpcode_sp:
939 if (likely(!HTTP_IS_LWS(*ptr))) {
940 /* reason = ptr - start */
941 goto http_msg_rpreason;
942 }
943 if (likely(HTTP_IS_SPHT(*ptr)))
944 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
945 /* so it's a CR/LF, so there is no reason phrase */
946 goto http_msg_rsp_reason;
947
948 case HTTP_MSG_RPREASON:
949 http_msg_rpreason:
950 if (likely(!HTTP_IS_CRLF(*ptr)))
951 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
952 /* reason length = ptr - start - reason */
953 http_msg_rpline_eol:
954 /* We have seen the end of line. Note that we do not
955 * necessarily have the \n yet, but at least we know that we
956 * have EITHER \r OR \n, otherwise the response would not be
957 * complete. We can then record the response length and return
958 * to the caller which will be able to register it.
959 */
960
961 if (unlikely(hdr_count >= hdr_num)) {
962 state = HTTP_MSG_RPREASON;
963 goto http_output_full;
964 }
965 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100966 if (h1m)
967 h1m->status = code;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200968
969 sol = ptr - start;
970 if (likely(*ptr == '\r'))
971 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
972 goto http_msg_rpline_end;
973
974 case HTTP_MSG_RPLINE_END:
975 http_msg_rpline_end:
976 /* sol must point to the first of CR or LF. */
977 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
978 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
979 /* stop here */
980
981 case HTTP_MSG_HDR_FIRST:
982 http_msg_hdr_first:
983 sol = ptr - start;
984 if (likely(!HTTP_IS_CRLF(*ptr))) {
985 goto http_msg_hdr_name;
986 }
987
988 if (likely(*ptr == '\r'))
989 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
990 goto http_msg_last_lf;
991
992 case HTTP_MSG_HDR_NAME:
993 http_msg_hdr_name:
994 /* assumes sol points to the first char */
995 if (likely(HTTP_IS_TOKEN(*ptr))) {
996 /* turn it to lower case if needed */
997 if (isupper((unsigned char)*ptr))
998 *ptr = tolower(*ptr);
999 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
1000 }
1001
1002 if (likely(*ptr == ':')) {
1003 col = ptr - start;
1004 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1005 }
1006
1007 if (HTTP_IS_LWS(*ptr)) {
1008 state = HTTP_MSG_HDR_NAME;
1009 goto http_msg_invalid;
1010 }
1011
1012 /* now we have a non-token character in the header field name,
1013 * it's up to the H1 layer to have decided whether or not it
1014 * was acceptable. If we find it here, it was considered
1015 * acceptable due to configuration rules so we obey.
1016 */
1017 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
1018
1019 case HTTP_MSG_HDR_L1_SP:
1020 http_msg_hdr_l1_sp:
1021 /* assumes sol points to the first char */
1022 if (likely(HTTP_IS_SPHT(*ptr)))
1023 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1024
1025 /* header value can be basically anything except CR/LF */
1026 sov = ptr - start;
1027
1028 if (likely(!HTTP_IS_CRLF(*ptr))) {
1029 goto http_msg_hdr_val;
1030 }
1031
1032 if (likely(*ptr == '\r'))
1033 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
1034 goto http_msg_hdr_l1_lf;
1035
1036 case HTTP_MSG_HDR_L1_LF:
1037 http_msg_hdr_l1_lf:
1038 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
1039 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
1040
1041 case HTTP_MSG_HDR_L1_LWS:
1042 http_msg_hdr_l1_lws:
1043 if (likely(HTTP_IS_SPHT(*ptr))) {
1044 /* replace HT,CR,LF with spaces */
1045 for (; start + sov < ptr; sov++)
1046 start[sov] = ' ';
1047 goto http_msg_hdr_l1_sp;
1048 }
1049 /* we had a header consisting only in spaces ! */
1050 eol = sov;
1051 goto http_msg_complete_header;
1052
1053 case HTTP_MSG_HDR_VAL:
1054 http_msg_hdr_val:
1055 /* assumes sol points to the first char, and sov
1056 * points to the first character of the value.
1057 */
1058
1059 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
1060 * and lower. In fact since most of the time is spent in the loop, we
1061 * also remove the sign bit test so that bytes 0x8e..0x0d break the
1062 * loop, but we don't care since they're very rare in header values.
1063 */
1064#if defined(__x86_64__)
1065 while (ptr <= end - sizeof(long)) {
1066 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
1067 goto http_msg_hdr_val2;
1068 ptr += sizeof(long);
1069 }
1070#endif
1071#if defined(__x86_64__) || \
1072 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
1073 defined(__ARM_ARCH_7A__)
1074 while (ptr <= end - sizeof(int)) {
1075 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
1076 goto http_msg_hdr_val2;
1077 ptr += sizeof(int);
1078 }
1079#endif
1080 if (ptr >= end) {
1081 state = HTTP_MSG_HDR_VAL;
1082 goto http_msg_ood;
1083 }
1084 http_msg_hdr_val2:
1085 if (likely(!HTTP_IS_CRLF(*ptr)))
1086 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
1087
1088 eol = ptr - start;
1089 /* Note: we could also copy eol into ->eoh so that we have the
1090 * real header end in case it ends with lots of LWS, but is this
1091 * really needed ?
1092 */
1093 if (likely(*ptr == '\r'))
1094 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
1095 goto http_msg_hdr_l2_lf;
1096
1097 case HTTP_MSG_HDR_L2_LF:
1098 http_msg_hdr_l2_lf:
1099 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
1100 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
1101
1102 case HTTP_MSG_HDR_L2_LWS:
1103 http_msg_hdr_l2_lws:
1104 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1105 /* LWS: replace HT,CR,LF with spaces */
1106 for (; start + eol < ptr; eol++)
1107 start[eol] = ' ';
1108 goto http_msg_hdr_val;
1109 }
1110 http_msg_complete_header:
1111 /*
1112 * It was a new header, so the last one is finished. Assumes
1113 * <sol> points to the first char of the name, <col> to the
1114 * colon, <sov> points to the first character of the value and
1115 * <eol> to the first CR or LF so we know how the line ends. We
1116 * will trim spaces around the value. It's possible to do it by
1117 * adjusting <eol> and <sov> which are no more used after this.
1118 * We can add the header field to the list.
1119 */
1120 while (sov < eol && HTTP_IS_LWS(start[sov]))
1121 sov++;
1122
1123 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1124 eol--;
1125
1126
1127 n = ist2(start + sol, col - sol);
1128 v = ist2(start + sov, eol - sov);
1129
1130 if (unlikely(hdr_count >= hdr_num)) {
1131 state = HTTP_MSG_HDR_L2_LWS;
1132 goto http_output_full;
1133 }
1134 http_set_hdr(&hdr[hdr_count++], n, v);
1135
1136 if (h1m) {
1137 long long cl;
1138
Willy Tarreaud22e83a2017-10-31 08:02:24 +01001139 if (h1m->status >= 100 && h1m->status < 200)
1140 h1m->curr_len = h1m->body_len = 0;
1141 else if (h1m->status == 304 || h1m->status == 204) {
Willy Tarreau8ea0f382017-10-30 19:31:59 +01001142 /* no contents, claim c-len is present and set to zero */
1143 h1m->flags |= H1_MF_CLEN;
1144 h1m->curr_len = h1m->body_len = 0;
1145 }
1146 else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001147 h1m->flags &= ~H1_MF_CLEN;
1148 h1m->flags |= H1_MF_CHNK;
1149 }
1150 else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1151 h1m->flags |= H1_MF_CLEN;
1152 strl2llrc(v.ptr, v.len, &cl);
1153 h1m->curr_len = h1m->body_len = cl;
1154 }
1155 }
1156
1157 sol = ptr - start;
1158 if (likely(!HTTP_IS_CRLF(*ptr)))
1159 goto http_msg_hdr_name;
1160
1161 if (likely(*ptr == '\r'))
1162 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
1163 goto http_msg_last_lf;
1164
1165 case HTTP_MSG_LAST_LF:
1166 http_msg_last_lf:
1167 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
1168 ptr++;
1169 /* <ptr> now points to the first byte of payload. If needed sol
1170 * still points to the first of either CR or LF of the empty
1171 * line ending the headers block.
1172 */
1173 if (unlikely(hdr_count >= hdr_num)) {
1174 state = HTTP_MSG_LAST_LF;
1175 goto http_output_full;
1176 }
1177 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
1178 state = HTTP_MSG_BODY;
1179 break;
1180
1181 default:
1182 /* impossible states */
1183 goto http_msg_invalid;
1184 }
1185
1186 /* reaching here, we've parsed the whole message and the state is
1187 * HTTP_MSG_BODY.
1188 */
1189 return ptr - start + skip;
1190
1191 http_msg_ood:
1192 /* out of data at <ptr> during state <state> */
1193 return 0;
1194
1195 http_msg_invalid:
1196 /* invalid message, error at <ptr> */
1197 if (h1m) {
1198 h1m->err_state = state;
1199 h1m->err_pos = ptr - start + skip;
1200 }
1201 return -1;
1202
1203 http_output_full:
1204 /* no more room to store the current header, error at <ptr> */
1205 if (h1m) {
1206 h1m->err_state = state;
1207 h1m->err_pos = ptr - start + skip;
1208 }
1209 return -2;
1210}
1211
Willy Tarreau2510f702017-10-31 17:14:16 +01001212/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001213 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001214 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1215 * input data, or < 0 in case of parse error (in which case the caller may have
1216 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001217 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001218int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001219{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001220 const char *stop = b_peek(buf, ofs + max);
1221 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001222
1223 while (1) {
1224 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001225 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001226 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001227
1228 /* scan current line and stop at LF or CRLF */
1229 while (1) {
1230 if (ptr == stop)
1231 return 0;
1232
1233 if (*ptr == '\n') {
1234 if (!p1)
1235 p1 = ptr;
1236 p2 = ptr;
1237 break;
1238 }
1239
1240 if (*ptr == '\r') {
1241 if (p1)
1242 return -1;
1243 p1 = ptr;
1244 }
1245
Willy Tarreau7314be82018-06-14 13:32:50 +02001246 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001247 }
1248
1249 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001250 p2 = b_next(buf, p2);
1251 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001252
1253 /* LF/CRLF at beginning of line => end of trailers at p2.
1254 * Everything was scheduled for forwarding, there's nothing left
1255 * from this message. */
1256 if (p1 == start)
1257 break;
1258 /* OK, next line then */
1259 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001260 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001261}
1262
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001263/* This function skips trailers in the buffer associated with HTTP message
1264 * <msg>. The first visited position is msg->next. If the end of the trailers is
1265 * found, the function returns >0. So, the caller can automatically schedul it
1266 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1267 * data are available, the function does not change anything except maybe
1268 * msg->sol if it could parse some lines, and returns zero. If a parse error
1269 * is encountered, the function returns < 0 and does not change anything except
1270 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1271 * state before calling this function, which implies that all non-trailers data
1272 * have already been scheduled for forwarding, and that msg->next exactly
1273 * matches the length of trailers already parsed and not forwarded. It is also
1274 * important to note that this function is designed to be able to parse wrapped
1275 * headers at end of buffer.
1276 */
1277int http_forward_trailers(struct http_msg *msg)
1278{
Willy Tarreauc9fa0482018-07-10 17:43:27 +02001279 const struct buffer *buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001280 const char *parse = ci_head(msg->chn);
1281 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001282
1283 /* we have msg->next which points to next line. Look for CRLF. But
1284 * first, we reset msg->sol */
1285 msg->sol = 0;
1286 while (1) {
1287 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001288 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001289 const char *ptr = start;
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001290
1291 /* scan current line and stop at LF or CRLF */
1292 while (1) {
1293 if (ptr == stop)
1294 return 0;
1295
1296 if (*ptr == '\n') {
1297 if (!p1)
1298 p1 = ptr;
1299 p2 = ptr;
1300 break;
1301 }
1302
1303 if (*ptr == '\r') {
1304 if (p1) {
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001305 msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001306 return -1;
1307 }
1308 p1 = ptr;
1309 }
1310
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001311 ptr = b_next(buf, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001312 }
1313
1314 /* after LF; point to beginning of next line */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001315 p2 = b_next(buf, p2);
1316 msg->sol += b_dist(buf, start, p2);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001317
1318 /* LF/CRLF at beginning of line => end of trailers at p2.
1319 * Everything was scheduled for forwarding, there's nothing left
1320 * from this message. */
1321 if (p1 == start)
1322 return 1;
1323
1324 /* OK, next line then */
1325 }
1326}