blob: 63ff99399be5bb07a76118448f7988543120e3ee [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
21/* It is about twice as fast on recent architectures to lookup a byte in a
22 * table than to perform a boolean AND or OR between two tests. Refer to
23 * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
24 * neither a separator nor a CTL char. An http ver_token is any ASCII which can
25 * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
26 * digit. Note: please do not overwrite values in assignment since gcc-2.95
27 * will not handle them correctly. It's worth noting that chars 128..255 are
28 * nothing, not even control chars.
29 */
30const unsigned char h1_char_classes[256] = {
31 [ 0] = H1_FLG_CTL,
32 [ 1] = H1_FLG_CTL,
33 [ 2] = H1_FLG_CTL,
34 [ 3] = H1_FLG_CTL,
35 [ 4] = H1_FLG_CTL,
36 [ 5] = H1_FLG_CTL,
37 [ 6] = H1_FLG_CTL,
38 [ 7] = H1_FLG_CTL,
39 [ 8] = H1_FLG_CTL,
40 [ 9] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP | H1_FLG_CTL,
41 [ 10] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
42 [ 11] = H1_FLG_CTL,
43 [ 12] = H1_FLG_CTL,
44 [ 13] = H1_FLG_CRLF | H1_FLG_LWS | H1_FLG_CTL,
45 [ 14] = H1_FLG_CTL,
46 [ 15] = H1_FLG_CTL,
47 [ 16] = H1_FLG_CTL,
48 [ 17] = H1_FLG_CTL,
49 [ 18] = H1_FLG_CTL,
50 [ 19] = H1_FLG_CTL,
51 [ 20] = H1_FLG_CTL,
52 [ 21] = H1_FLG_CTL,
53 [ 22] = H1_FLG_CTL,
54 [ 23] = H1_FLG_CTL,
55 [ 24] = H1_FLG_CTL,
56 [ 25] = H1_FLG_CTL,
57 [ 26] = H1_FLG_CTL,
58 [ 27] = H1_FLG_CTL,
59 [ 28] = H1_FLG_CTL,
60 [ 29] = H1_FLG_CTL,
61 [ 30] = H1_FLG_CTL,
62 [ 31] = H1_FLG_CTL,
63 [' '] = H1_FLG_SPHT | H1_FLG_LWS | H1_FLG_SEP,
64 ['!'] = H1_FLG_TOK,
65 ['"'] = H1_FLG_SEP,
66 ['#'] = H1_FLG_TOK,
67 ['$'] = H1_FLG_TOK,
68 ['%'] = H1_FLG_TOK,
69 ['&'] = H1_FLG_TOK,
70 [ 39] = H1_FLG_TOK,
71 ['('] = H1_FLG_SEP,
72 [')'] = H1_FLG_SEP,
73 ['*'] = H1_FLG_TOK,
74 ['+'] = H1_FLG_TOK,
75 [','] = H1_FLG_SEP,
76 ['-'] = H1_FLG_TOK,
77 ['.'] = H1_FLG_TOK | H1_FLG_VER,
78 ['/'] = H1_FLG_SEP | H1_FLG_VER,
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +010079 ['0'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
80 ['1'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
81 ['2'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
82 ['3'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
83 ['4'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
84 ['5'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
85 ['6'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
86 ['7'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
87 ['8'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
88 ['9'] = H1_FLG_TOK | H1_FLG_VER | H1_FLG_DIG,
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020089 [':'] = H1_FLG_SEP,
90 [';'] = H1_FLG_SEP,
91 ['<'] = H1_FLG_SEP,
92 ['='] = H1_FLG_SEP,
93 ['>'] = H1_FLG_SEP,
94 ['?'] = H1_FLG_SEP,
95 ['@'] = H1_FLG_SEP,
96 ['A'] = H1_FLG_TOK,
97 ['B'] = H1_FLG_TOK,
98 ['C'] = H1_FLG_TOK,
99 ['D'] = H1_FLG_TOK,
100 ['E'] = H1_FLG_TOK,
101 ['F'] = H1_FLG_TOK,
102 ['G'] = H1_FLG_TOK,
103 ['H'] = H1_FLG_TOK | H1_FLG_VER,
104 ['I'] = H1_FLG_TOK,
105 ['J'] = H1_FLG_TOK,
106 ['K'] = H1_FLG_TOK,
107 ['L'] = H1_FLG_TOK,
108 ['M'] = H1_FLG_TOK,
109 ['N'] = H1_FLG_TOK,
110 ['O'] = H1_FLG_TOK,
111 ['P'] = H1_FLG_TOK | H1_FLG_VER,
112 ['Q'] = H1_FLG_TOK,
113 ['R'] = H1_FLG_TOK | H1_FLG_VER,
114 ['S'] = H1_FLG_TOK | H1_FLG_VER,
115 ['T'] = H1_FLG_TOK | H1_FLG_VER,
116 ['U'] = H1_FLG_TOK,
117 ['V'] = H1_FLG_TOK,
118 ['W'] = H1_FLG_TOK,
119 ['X'] = H1_FLG_TOK,
120 ['Y'] = H1_FLG_TOK,
121 ['Z'] = H1_FLG_TOK,
122 ['['] = H1_FLG_SEP,
123 [ 92] = H1_FLG_SEP,
124 [']'] = H1_FLG_SEP,
125 ['^'] = H1_FLG_TOK,
126 ['_'] = H1_FLG_TOK,
127 ['`'] = H1_FLG_TOK,
128 ['a'] = H1_FLG_TOK,
129 ['b'] = H1_FLG_TOK,
130 ['c'] = H1_FLG_TOK,
131 ['d'] = H1_FLG_TOK,
132 ['e'] = H1_FLG_TOK,
133 ['f'] = H1_FLG_TOK,
134 ['g'] = H1_FLG_TOK,
135 ['h'] = H1_FLG_TOK,
136 ['i'] = H1_FLG_TOK,
137 ['j'] = H1_FLG_TOK,
138 ['k'] = H1_FLG_TOK,
139 ['l'] = H1_FLG_TOK,
140 ['m'] = H1_FLG_TOK,
141 ['n'] = H1_FLG_TOK,
142 ['o'] = H1_FLG_TOK,
143 ['p'] = H1_FLG_TOK,
144 ['q'] = H1_FLG_TOK,
145 ['r'] = H1_FLG_TOK,
146 ['s'] = H1_FLG_TOK,
147 ['t'] = H1_FLG_TOK,
148 ['u'] = H1_FLG_TOK,
149 ['v'] = H1_FLG_TOK,
150 ['w'] = H1_FLG_TOK,
151 ['x'] = H1_FLG_TOK,
152 ['y'] = H1_FLG_TOK,
153 ['z'] = H1_FLG_TOK,
154 ['{'] = H1_FLG_SEP,
155 ['|'] = H1_FLG_TOK,
156 ['}'] = H1_FLG_SEP,
157 ['~'] = H1_FLG_TOK,
158 [127] = H1_FLG_CTL,
159};
Willy Tarreaudb4893d2017-09-21 08:40:02 +0200160
161
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200162/*
163 * This function parses a status line between <ptr> and <end>, starting with
164 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
165 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
166 * will give undefined results.
167 * Note that it is upon the caller's responsibility to ensure that ptr < end,
168 * and that msg->sol points to the beginning of the response.
169 * If a complete line is found (which implies that at least one CR or LF is
170 * found before <end>, the updated <ptr> is returned, otherwise NULL is
171 * returned indicating an incomplete line (which does not mean that parts have
172 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
173 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
174 * upon next call.
175 *
176 * This function was intentionally designed to be called from
177 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
178 * within its state machine and use the same macros, hence the need for same
179 * labels and variable names. Note that msg->sol is left unchanged.
180 */
181const char *http_parse_stsline(struct http_msg *msg,
182 enum h1_state state, const char *ptr, const char *end,
183 unsigned int *ret_ptr, enum h1_state *ret_state)
184{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200185 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200186
187 switch (state) {
188 case HTTP_MSG_RPVER:
189 http_msg_rpver:
190 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
191 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
192
193 if (likely(HTTP_IS_SPHT(*ptr))) {
194 msg->sl.st.v_l = ptr - msg_start;
195 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
196 }
197 msg->err_state = HTTP_MSG_RPVER;
198 state = HTTP_MSG_ERROR;
199 break;
200
201 case HTTP_MSG_RPVER_SP:
202 http_msg_rpver_sp:
203 if (likely(!HTTP_IS_LWS(*ptr))) {
204 msg->sl.st.c = ptr - msg_start;
205 goto http_msg_rpcode;
206 }
207 if (likely(HTTP_IS_SPHT(*ptr)))
208 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
209 /* so it's a CR/LF, this is invalid */
210 msg->err_state = HTTP_MSG_RPVER_SP;
211 state = HTTP_MSG_ERROR;
212 break;
213
214 case HTTP_MSG_RPCODE:
215 http_msg_rpcode:
216 if (likely(!HTTP_IS_LWS(*ptr)))
217 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
218
219 if (likely(HTTP_IS_SPHT(*ptr))) {
220 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
222 }
223
224 /* so it's a CR/LF, so there is no reason phrase */
225 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
226 http_msg_rsp_reason:
227 /* FIXME: should we support HTTP responses without any reason phrase ? */
228 msg->sl.st.r = ptr - msg_start;
229 msg->sl.st.r_l = 0;
230 goto http_msg_rpline_eol;
231
232 case HTTP_MSG_RPCODE_SP:
233 http_msg_rpcode_sp:
234 if (likely(!HTTP_IS_LWS(*ptr))) {
235 msg->sl.st.r = ptr - msg_start;
236 goto http_msg_rpreason;
237 }
238 if (likely(HTTP_IS_SPHT(*ptr)))
239 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
240 /* so it's a CR/LF, so there is no reason phrase */
241 goto http_msg_rsp_reason;
242
243 case HTTP_MSG_RPREASON:
244 http_msg_rpreason:
245 if (likely(!HTTP_IS_CRLF(*ptr)))
246 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
247 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
248 http_msg_rpline_eol:
249 /* We have seen the end of line. Note that we do not
250 * necessarily have the \n yet, but at least we know that we
251 * have EITHER \r OR \n, otherwise the response would not be
252 * complete. We can then record the response length and return
253 * to the caller which will be able to register it.
254 */
255 msg->sl.st.l = ptr - msg_start - msg->sol;
256 return ptr;
257
258 default:
259#ifdef DEBUG_FULL
260 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
261 exit(1);
262#endif
263 ;
264 }
265
266 http_msg_ood:
267 /* out of valid data */
268 if (ret_state)
269 *ret_state = state;
270 if (ret_ptr)
271 *ret_ptr = ptr - msg_start;
272 return NULL;
273}
274
275/*
276 * This function parses a request line between <ptr> and <end>, starting with
277 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
278 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
279 * will give undefined results.
280 * Note that it is upon the caller's responsibility to ensure that ptr < end,
281 * and that msg->sol points to the beginning of the request.
282 * If a complete line is found (which implies that at least one CR or LF is
283 * found before <end>, the updated <ptr> is returned, otherwise NULL is
284 * returned indicating an incomplete line (which does not mean that parts have
285 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
286 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
287 * upon next call.
288 *
289 * This function was intentionally designed to be called from
290 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
291 * within its state machine and use the same macros, hence the need for same
292 * labels and variable names. Note that msg->sol is left unchanged.
293 */
294const char *http_parse_reqline(struct http_msg *msg,
295 enum h1_state state, const char *ptr, const char *end,
296 unsigned int *ret_ptr, enum h1_state *ret_state)
297{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200298 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200299
300 switch (state) {
301 case HTTP_MSG_RQMETH:
302 http_msg_rqmeth:
303 if (likely(HTTP_IS_TOKEN(*ptr)))
304 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
305
306 if (likely(HTTP_IS_SPHT(*ptr))) {
307 msg->sl.rq.m_l = ptr - msg_start;
308 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
309 }
310
311 if (likely(HTTP_IS_CRLF(*ptr))) {
312 /* HTTP 0.9 request */
313 msg->sl.rq.m_l = ptr - msg_start;
314 http_msg_req09_uri:
315 msg->sl.rq.u = ptr - msg_start;
316 http_msg_req09_uri_e:
317 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
318 http_msg_req09_ver:
319 msg->sl.rq.v = ptr - msg_start;
320 msg->sl.rq.v_l = 0;
321 goto http_msg_rqline_eol;
322 }
323 msg->err_state = HTTP_MSG_RQMETH;
324 state = HTTP_MSG_ERROR;
325 break;
326
327 case HTTP_MSG_RQMETH_SP:
328 http_msg_rqmeth_sp:
329 if (likely(!HTTP_IS_LWS(*ptr))) {
330 msg->sl.rq.u = ptr - msg_start;
331 goto http_msg_rquri;
332 }
333 if (likely(HTTP_IS_SPHT(*ptr)))
334 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
335 /* so it's a CR/LF, meaning an HTTP 0.9 request */
336 goto http_msg_req09_uri;
337
338 case HTTP_MSG_RQURI:
339 http_msg_rquri:
340#if defined(__x86_64__) || \
341 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
342 defined(__ARM_ARCH_7A__)
343 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
344 while (ptr <= end - sizeof(int)) {
345 int x = *(int *)ptr - 0x21212121;
346 if (x & 0x80808080)
347 break;
348
349 x -= 0x5e5e5e5e;
350 if (!(x & 0x80808080))
351 break;
352
353 ptr += sizeof(int);
354 }
355#endif
356 if (ptr >= end) {
357 state = HTTP_MSG_RQURI;
358 goto http_msg_ood;
359 }
360 http_msg_rquri2:
361 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
363
364 if (likely(HTTP_IS_SPHT(*ptr))) {
365 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
366 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
367 }
368
369 if (likely((unsigned char)*ptr >= 128)) {
370 /* non-ASCII chars are forbidden unless option
371 * accept-invalid-http-request is enabled in the frontend.
372 * In any case, we capture the faulty char.
373 */
374 if (msg->err_pos < -1)
375 goto invalid_char;
376 if (msg->err_pos == -1)
377 msg->err_pos = ptr - msg_start;
378 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
379 }
380
381 if (likely(HTTP_IS_CRLF(*ptr))) {
382 /* so it's a CR/LF, meaning an HTTP 0.9 request */
383 goto http_msg_req09_uri_e;
384 }
385
386 /* OK forbidden chars, 0..31 or 127 */
387 invalid_char:
388 msg->err_pos = ptr - msg_start;
389 msg->err_state = HTTP_MSG_RQURI;
390 state = HTTP_MSG_ERROR;
391 break;
392
393 case HTTP_MSG_RQURI_SP:
394 http_msg_rquri_sp:
395 if (likely(!HTTP_IS_LWS(*ptr))) {
396 msg->sl.rq.v = ptr - msg_start;
397 goto http_msg_rqver;
398 }
399 if (likely(HTTP_IS_SPHT(*ptr)))
400 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
401 /* so it's a CR/LF, meaning an HTTP 0.9 request */
402 goto http_msg_req09_ver;
403
404 case HTTP_MSG_RQVER:
405 http_msg_rqver:
406 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
407 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
408
409 if (likely(HTTP_IS_CRLF(*ptr))) {
410 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
411 http_msg_rqline_eol:
412 /* We have seen the end of line. Note that we do not
413 * necessarily have the \n yet, but at least we know that we
414 * have EITHER \r OR \n, otherwise the request would not be
415 * complete. We can then record the request length and return
416 * to the caller which will be able to register it.
417 */
418 msg->sl.rq.l = ptr - msg_start - msg->sol;
419 return ptr;
420 }
421
422 /* neither an HTTP_VER token nor a CRLF */
423 msg->err_state = HTTP_MSG_RQVER;
424 state = HTTP_MSG_ERROR;
425 break;
426
427 default:
428#ifdef DEBUG_FULL
429 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
430 exit(1);
431#endif
432 ;
433 }
434
435 http_msg_ood:
436 /* out of valid data */
437 if (ret_state)
438 *ret_state = state;
439 if (ret_ptr)
440 *ret_ptr = ptr - msg_start;
441 return NULL;
442}
443
444/*
445 * This function parses an HTTP message, either a request or a response,
446 * depending on the initial msg->msg_state. The caller is responsible for
447 * ensuring that the message does not wrap. The function can be preempted
448 * everywhere when data are missing and recalled at the exact same location
449 * with no information loss. The message may even be realigned between two
450 * calls. The header index is re-initialized when switching from
451 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
452 * fields. Note that msg->sol will be initialized after completing the first
453 * state, so that none of the msg pointers has to be initialized prior to the
454 * first call.
455 */
456void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
457{
458 enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200459 register const char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreau950a8a62018-09-06 10:48:15 +0200460 struct buffer *buf = &msg->chn->buf;
461 char *input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200462
463 state = msg->msg_state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200464 ptr = input + msg->next;
465 end = b_stop(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200466
467 if (unlikely(ptr >= end))
468 goto http_msg_ood;
469
470 switch (state) {
471 /*
472 * First, states that are specific to the response only.
473 * We check them first so that request and headers are
474 * closer to each other (accessed more often).
475 */
476 case HTTP_MSG_RPBEFORE:
477 http_msg_rpbefore:
478 if (likely(HTTP_IS_TOKEN(*ptr))) {
479 /* we have a start of message, but we have to check
480 * first if we need to remove some CRLF. We can only
481 * do this when o=0.
482 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200483 if (unlikely(ptr != input)) {
484 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200485 goto http_msg_ood;
486 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200487 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200488 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200489 }
490 msg->sol = 0;
491 msg->sl.st.l = 0; /* used in debug mode */
492 hdr_idx_init(idx);
493 state = HTTP_MSG_RPVER;
494 goto http_msg_rpver;
495 }
496
497 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
498 state = HTTP_MSG_RPBEFORE;
499 goto http_msg_invalid;
500 }
501
502 if (unlikely(*ptr == '\n'))
503 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
504 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
505 /* stop here */
506
507 case HTTP_MSG_RPBEFORE_CR:
508 http_msg_rpbefore_cr:
509 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
510 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
511 /* stop here */
512
513 case HTTP_MSG_RPVER:
514 http_msg_rpver:
515 case HTTP_MSG_RPVER_SP:
516 case HTTP_MSG_RPCODE:
517 case HTTP_MSG_RPCODE_SP:
518 case HTTP_MSG_RPREASON:
519 ptr = (char *)http_parse_stsline(msg,
520 state, ptr, end,
521 &msg->next, &msg->msg_state);
522 if (unlikely(!ptr))
523 return;
524
525 /* we have a full response and we know that we have either a CR
526 * or an LF at <ptr>.
527 */
528 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
529
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200530 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200531 if (likely(*ptr == '\r'))
532 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
533 goto http_msg_rpline_end;
534
535 case HTTP_MSG_RPLINE_END:
536 http_msg_rpline_end:
537 /* msg->sol must point to the first of CR or LF. */
538 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
539 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
540 /* stop here */
541
542 /*
543 * Second, states that are specific to the request only
544 */
545 case HTTP_MSG_RQBEFORE:
546 http_msg_rqbefore:
547 if (likely(HTTP_IS_TOKEN(*ptr))) {
548 /* we have a start of message, but we have to check
549 * first if we need to remove some CRLF. We can only
550 * do this when o=0.
551 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200552 if (likely(ptr != input)) {
553 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200554 goto http_msg_ood;
555 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200556 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200557 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200558 }
559 msg->sol = 0;
560 msg->sl.rq.l = 0; /* used in debug mode */
561 state = HTTP_MSG_RQMETH;
562 goto http_msg_rqmeth;
563 }
564
565 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
566 state = HTTP_MSG_RQBEFORE;
567 goto http_msg_invalid;
568 }
569
570 if (unlikely(*ptr == '\n'))
571 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
572 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
573 /* stop here */
574
575 case HTTP_MSG_RQBEFORE_CR:
576 http_msg_rqbefore_cr:
577 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
578 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
579 /* stop here */
580
581 case HTTP_MSG_RQMETH:
582 http_msg_rqmeth:
583 case HTTP_MSG_RQMETH_SP:
584 case HTTP_MSG_RQURI:
585 case HTTP_MSG_RQURI_SP:
586 case HTTP_MSG_RQVER:
587 ptr = (char *)http_parse_reqline(msg,
588 state, ptr, end,
589 &msg->next, &msg->msg_state);
590 if (unlikely(!ptr))
591 return;
592
593 /* we have a full request and we know that we have either a CR
594 * or an LF at <ptr>.
595 */
596 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
597
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200598 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200599 if (likely(*ptr == '\r'))
600 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
601 goto http_msg_rqline_end;
602
603 case HTTP_MSG_RQLINE_END:
604 http_msg_rqline_end:
605 /* check for HTTP/0.9 request : no version information available.
606 * msg->sol must point to the first of CR or LF.
607 */
608 if (unlikely(msg->sl.rq.v_l == 0))
609 goto http_msg_last_lf;
610
611 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
612 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
613 /* stop here */
614
615 /*
616 * Common states below
617 */
618 case HTTP_MSG_HDR_FIRST:
619 http_msg_hdr_first:
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200620 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200621 if (likely(!HTTP_IS_CRLF(*ptr))) {
622 goto http_msg_hdr_name;
623 }
624
625 if (likely(*ptr == '\r'))
626 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
627 goto http_msg_last_lf;
628
629 case HTTP_MSG_HDR_NAME:
630 http_msg_hdr_name:
631 /* assumes msg->sol points to the first char */
632 if (likely(HTTP_IS_TOKEN(*ptr)))
633 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
634
635 if (likely(*ptr == ':'))
636 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
637
638 if (likely(msg->err_pos < -1) || *ptr == '\n') {
639 state = HTTP_MSG_HDR_NAME;
640 goto http_msg_invalid;
641 }
642
643 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200644 msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200645
646 /* and we still accept this non-token character */
647 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
648
649 case HTTP_MSG_HDR_L1_SP:
650 http_msg_hdr_l1_sp:
651 /* assumes msg->sol points to the first char */
652 if (likely(HTTP_IS_SPHT(*ptr)))
653 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
654
655 /* header value can be basically anything except CR/LF */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200656 msg->sov = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200657
658 if (likely(!HTTP_IS_CRLF(*ptr))) {
659 goto http_msg_hdr_val;
660 }
661
662 if (likely(*ptr == '\r'))
663 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
664 goto http_msg_hdr_l1_lf;
665
666 case HTTP_MSG_HDR_L1_LF:
667 http_msg_hdr_l1_lf:
668 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
669 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
670
671 case HTTP_MSG_HDR_L1_LWS:
672 http_msg_hdr_l1_lws:
673 if (likely(HTTP_IS_SPHT(*ptr))) {
674 /* replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200675 for (; input + msg->sov < ptr; msg->sov++)
676 input[msg->sov] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200677 goto http_msg_hdr_l1_sp;
678 }
679 /* we had a header consisting only in spaces ! */
680 msg->eol = msg->sov;
681 goto http_msg_complete_header;
682
683 case HTTP_MSG_HDR_VAL:
684 http_msg_hdr_val:
685 /* assumes msg->sol points to the first char, and msg->sov
686 * points to the first character of the value.
687 */
688
689 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
690 * and lower. In fact since most of the time is spent in the loop, we
691 * also remove the sign bit test so that bytes 0x8e..0x0d break the
692 * loop, but we don't care since they're very rare in header values.
693 */
694#if defined(__x86_64__)
695 while (ptr <= end - sizeof(long)) {
696 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
697 goto http_msg_hdr_val2;
698 ptr += sizeof(long);
699 }
700#endif
701#if defined(__x86_64__) || \
702 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
703 defined(__ARM_ARCH_7A__)
704 while (ptr <= end - sizeof(int)) {
705 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
706 goto http_msg_hdr_val2;
707 ptr += sizeof(int);
708 }
709#endif
710 if (ptr >= end) {
711 state = HTTP_MSG_HDR_VAL;
712 goto http_msg_ood;
713 }
714 http_msg_hdr_val2:
715 if (likely(!HTTP_IS_CRLF(*ptr)))
716 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
717
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200718 msg->eol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200719 /* Note: we could also copy eol into ->eoh so that we have the
720 * real header end in case it ends with lots of LWS, but is this
721 * really needed ?
722 */
723 if (likely(*ptr == '\r'))
724 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
725 goto http_msg_hdr_l2_lf;
726
727 case HTTP_MSG_HDR_L2_LF:
728 http_msg_hdr_l2_lf:
729 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
730 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
731
732 case HTTP_MSG_HDR_L2_LWS:
733 http_msg_hdr_l2_lws:
734 if (unlikely(HTTP_IS_SPHT(*ptr))) {
735 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200736 for (; input + msg->eol < ptr; msg->eol++)
737 input[msg->eol] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200738 goto http_msg_hdr_val;
739 }
740 http_msg_complete_header:
741 /*
742 * It was a new header, so the last one is finished.
743 * Assumes msg->sol points to the first char, msg->sov points
744 * to the first character of the value and msg->eol to the
745 * first CR or LF so we know how the line ends. We insert last
746 * header into the index.
747 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200748 if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200749 idx, idx->tail) < 0)) {
750 state = HTTP_MSG_HDR_L2_LWS;
751 goto http_msg_invalid;
752 }
753
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200754 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200755 if (likely(!HTTP_IS_CRLF(*ptr))) {
756 goto http_msg_hdr_name;
757 }
758
759 if (likely(*ptr == '\r'))
760 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
761 goto http_msg_last_lf;
762
763 case HTTP_MSG_LAST_LF:
764 http_msg_last_lf:
765 /* Assumes msg->sol points to the first of either CR or LF.
766 * Sets ->sov and ->next to the total header length, ->eoh to
767 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
768 */
769 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
770 ptr++;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200771 msg->sov = msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200772 msg->eoh = msg->sol;
773 msg->sol = 0;
774 msg->eol = msg->sov - msg->eoh;
775 msg->msg_state = HTTP_MSG_BODY;
776 return;
777
778 case HTTP_MSG_ERROR:
779 /* this may only happen if we call http_msg_analyser() twice with an error */
780 break;
781
782 default:
783#ifdef DEBUG_FULL
784 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
785 exit(1);
786#endif
787 ;
788 }
789 http_msg_ood:
790 /* out of data */
791 msg->msg_state = state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200792 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200793 return;
794
795 http_msg_invalid:
796 /* invalid message */
797 msg->err_state = state;
798 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200799 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200800 return;
801}
802
Willy Tarreau794f9af2017-07-26 09:07:47 +0200803/* This function parses a contiguous HTTP/1 headers block starting at <start>
804 * and ending before <stop>, at once, and converts it a list of (name,value)
805 * pairs representing header fields into the array <hdr> of size <hdr_num>,
806 * whose last entry will have an empty name and an empty value. If <hdr_num> is
807 * too small to represent the whole message, an error is returned. If <h1m> is
808 * not NULL, some protocol elements such as content-length and transfer-encoding
809 * will be parsed and stored there as well.
810 *
811 * For now it's limited to the response. If the header block is incomplete,
812 * 0 is returned, waiting to be called again with more data to try it again.
813 *
814 * The code derived from the main HTTP/1 parser above but was simplified and
815 * optimized to process responses produced or forwarded by haproxy. The caller
816 * is responsible for ensuring that the message doesn't wrap, and should ensure
817 * it is complete to avoid having to retry the operation after a failed
818 * attempt. The message is not supposed to be invalid, which is why a few
819 * properties such as the character set used in the header field names are not
820 * checked. In case of an unparsable response message, a negative value will be
821 * returned with h1m->err_pos and h1m->err_state matching the location and
822 * state where the error was met. Leading blank likes are tolerated but not
823 * recommended.
824 *
825 * This function returns :
826 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
827 * set) with the state the error occurred in and h2-m>err_pos with the
828 * the position relative to <start>
829 * -2 if the output is full (hdr_num reached). err_state and err_pos also
830 * indicate where it failed.
831 * 0 in case of missing data.
832 * > 0 on success, it then corresponds to the number of bytes read since
833 * <start> so that the caller can go on with the payload.
834 */
835int h1_headers_to_hdr_list(char *start, const char *stop,
836 struct http_hdr *hdr, unsigned int hdr_num,
837 struct h1m *h1m)
838{
839 enum h1_state state = HTTP_MSG_RPBEFORE;
840 register char *ptr = start;
841 register const char *end = stop;
842 unsigned int hdr_count = 0;
843 unsigned int code = 0; /* status code, ASCII form */
844 unsigned int st_c; /* beginning of status code, relative to msg_start */
845 unsigned int st_c_l; /* length of status code */
846 unsigned int sol = 0; /* start of line */
847 unsigned int col = 0; /* position of the colon */
848 unsigned int eol = 0; /* end of line */
849 unsigned int sov = 0; /* start of value */
850 unsigned int skip = 0; /* number of bytes skipped at the beginning */
851 struct ist n, v; /* header name and value during parsing */
852
853 if (unlikely(ptr >= end))
854 goto http_msg_ood;
855
856 switch (state) {
857 case HTTP_MSG_RPBEFORE:
858 http_msg_rpbefore:
859 if (likely(HTTP_IS_TOKEN(*ptr))) {
860 /* we have a start of message, we may have skipped some
861 * heading CRLF. Skip them now.
862 */
863 skip += ptr - start;
864 start = ptr;
865
866 sol = 0;
867 hdr_count = 0;
868 state = HTTP_MSG_RPVER;
869 goto http_msg_rpver;
870 }
871
872 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
873 state = HTTP_MSG_RPBEFORE;
874 goto http_msg_invalid;
875 }
876
877 if (unlikely(*ptr == '\n'))
878 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
879 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
880 /* stop here */
881
882 case HTTP_MSG_RPBEFORE_CR:
883 http_msg_rpbefore_cr:
884 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
885 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
886 /* stop here */
887
888 case HTTP_MSG_RPVER:
889 http_msg_rpver:
890 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
891 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
892
893 if (likely(HTTP_IS_SPHT(*ptr))) {
894 /* version length = ptr - start */
895 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
896 }
897 state = HTTP_MSG_RPVER;
898 goto http_msg_invalid;
899
900 case HTTP_MSG_RPVER_SP:
901 http_msg_rpver_sp:
902 if (likely(!HTTP_IS_LWS(*ptr))) {
903 code = 0;
904 st_c = ptr - start;
905 goto http_msg_rpcode;
906 }
907 if (likely(HTTP_IS_SPHT(*ptr)))
908 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
909 /* so it's a CR/LF, this is invalid */
910 state = HTTP_MSG_RPVER_SP;
911 goto http_msg_invalid;
912
913 case HTTP_MSG_RPCODE:
914 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100915 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100916 code = code * 10 + *ptr - '0';
Willy Tarreau794f9af2017-07-26 09:07:47 +0200917 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
918 }
919
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100920 if (unlikely(!HTTP_IS_LWS(*ptr))) {
921 state = HTTP_MSG_RPCODE;
922 goto http_msg_invalid;
923 }
924
Willy Tarreau794f9af2017-07-26 09:07:47 +0200925 if (likely(HTTP_IS_SPHT(*ptr))) {
926 st_c_l = ptr - start - st_c;
927 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
928 }
929
930 /* so it's a CR/LF, so there is no reason phrase */
931 st_c_l = ptr - start - st_c;
932
933 http_msg_rsp_reason:
934 /* reason = ptr - start; */
935 /* reason length = 0 */
936 goto http_msg_rpline_eol;
937
938 case HTTP_MSG_RPCODE_SP:
939 http_msg_rpcode_sp:
940 if (likely(!HTTP_IS_LWS(*ptr))) {
941 /* reason = ptr - start */
942 goto http_msg_rpreason;
943 }
944 if (likely(HTTP_IS_SPHT(*ptr)))
945 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
946 /* so it's a CR/LF, so there is no reason phrase */
947 goto http_msg_rsp_reason;
948
949 case HTTP_MSG_RPREASON:
950 http_msg_rpreason:
951 if (likely(!HTTP_IS_CRLF(*ptr)))
952 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
953 /* reason length = ptr - start - reason */
954 http_msg_rpline_eol:
955 /* We have seen the end of line. Note that we do not
956 * necessarily have the \n yet, but at least we know that we
957 * have EITHER \r OR \n, otherwise the response would not be
958 * complete. We can then record the response length and return
959 * to the caller which will be able to register it.
960 */
961
962 if (unlikely(hdr_count >= hdr_num)) {
963 state = HTTP_MSG_RPREASON;
964 goto http_output_full;
965 }
966 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + st_c, st_c_l));
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100967 if (h1m)
968 h1m->status = code;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200969
970 sol = ptr - start;
971 if (likely(*ptr == '\r'))
972 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
973 goto http_msg_rpline_end;
974
975 case HTTP_MSG_RPLINE_END:
976 http_msg_rpline_end:
977 /* sol must point to the first of CR or LF. */
978 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
979 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
980 /* stop here */
981
982 case HTTP_MSG_HDR_FIRST:
983 http_msg_hdr_first:
984 sol = ptr - start;
985 if (likely(!HTTP_IS_CRLF(*ptr))) {
986 goto http_msg_hdr_name;
987 }
988
989 if (likely(*ptr == '\r'))
990 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
991 goto http_msg_last_lf;
992
993 case HTTP_MSG_HDR_NAME:
994 http_msg_hdr_name:
995 /* assumes sol points to the first char */
996 if (likely(HTTP_IS_TOKEN(*ptr))) {
997 /* turn it to lower case if needed */
998 if (isupper((unsigned char)*ptr))
999 *ptr = tolower(*ptr);
1000 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
1001 }
1002
1003 if (likely(*ptr == ':')) {
1004 col = ptr - start;
1005 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1006 }
1007
1008 if (HTTP_IS_LWS(*ptr)) {
1009 state = HTTP_MSG_HDR_NAME;
1010 goto http_msg_invalid;
1011 }
1012
1013 /* now we have a non-token character in the header field name,
1014 * it's up to the H1 layer to have decided whether or not it
1015 * was acceptable. If we find it here, it was considered
1016 * acceptable due to configuration rules so we obey.
1017 */
1018 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
1019
1020 case HTTP_MSG_HDR_L1_SP:
1021 http_msg_hdr_l1_sp:
1022 /* assumes sol points to the first char */
1023 if (likely(HTTP_IS_SPHT(*ptr)))
1024 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
1025
1026 /* header value can be basically anything except CR/LF */
1027 sov = ptr - start;
1028
1029 if (likely(!HTTP_IS_CRLF(*ptr))) {
1030 goto http_msg_hdr_val;
1031 }
1032
1033 if (likely(*ptr == '\r'))
1034 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
1035 goto http_msg_hdr_l1_lf;
1036
1037 case HTTP_MSG_HDR_L1_LF:
1038 http_msg_hdr_l1_lf:
1039 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
1040 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
1041
1042 case HTTP_MSG_HDR_L1_LWS:
1043 http_msg_hdr_l1_lws:
1044 if (likely(HTTP_IS_SPHT(*ptr))) {
1045 /* replace HT,CR,LF with spaces */
1046 for (; start + sov < ptr; sov++)
1047 start[sov] = ' ';
1048 goto http_msg_hdr_l1_sp;
1049 }
1050 /* we had a header consisting only in spaces ! */
1051 eol = sov;
1052 goto http_msg_complete_header;
1053
1054 case HTTP_MSG_HDR_VAL:
1055 http_msg_hdr_val:
1056 /* assumes sol points to the first char, and sov
1057 * points to the first character of the value.
1058 */
1059
1060 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
1061 * and lower. In fact since most of the time is spent in the loop, we
1062 * also remove the sign bit test so that bytes 0x8e..0x0d break the
1063 * loop, but we don't care since they're very rare in header values.
1064 */
1065#if defined(__x86_64__)
1066 while (ptr <= end - sizeof(long)) {
1067 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
1068 goto http_msg_hdr_val2;
1069 ptr += sizeof(long);
1070 }
1071#endif
1072#if defined(__x86_64__) || \
1073 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
1074 defined(__ARM_ARCH_7A__)
1075 while (ptr <= end - sizeof(int)) {
1076 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
1077 goto http_msg_hdr_val2;
1078 ptr += sizeof(int);
1079 }
1080#endif
1081 if (ptr >= end) {
1082 state = HTTP_MSG_HDR_VAL;
1083 goto http_msg_ood;
1084 }
1085 http_msg_hdr_val2:
1086 if (likely(!HTTP_IS_CRLF(*ptr)))
1087 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
1088
1089 eol = ptr - start;
1090 /* Note: we could also copy eol into ->eoh so that we have the
1091 * real header end in case it ends with lots of LWS, but is this
1092 * really needed ?
1093 */
1094 if (likely(*ptr == '\r'))
1095 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
1096 goto http_msg_hdr_l2_lf;
1097
1098 case HTTP_MSG_HDR_L2_LF:
1099 http_msg_hdr_l2_lf:
1100 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
1101 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
1102
1103 case HTTP_MSG_HDR_L2_LWS:
1104 http_msg_hdr_l2_lws:
1105 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1106 /* LWS: replace HT,CR,LF with spaces */
1107 for (; start + eol < ptr; eol++)
1108 start[eol] = ' ';
1109 goto http_msg_hdr_val;
1110 }
1111 http_msg_complete_header:
1112 /*
1113 * It was a new header, so the last one is finished. Assumes
1114 * <sol> points to the first char of the name, <col> to the
1115 * colon, <sov> points to the first character of the value and
1116 * <eol> to the first CR or LF so we know how the line ends. We
1117 * will trim spaces around the value. It's possible to do it by
1118 * adjusting <eol> and <sov> which are no more used after this.
1119 * We can add the header field to the list.
1120 */
1121 while (sov < eol && HTTP_IS_LWS(start[sov]))
1122 sov++;
1123
1124 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1125 eol--;
1126
1127
1128 n = ist2(start + sol, col - sol);
1129 v = ist2(start + sov, eol - sov);
1130
1131 if (unlikely(hdr_count >= hdr_num)) {
1132 state = HTTP_MSG_HDR_L2_LWS;
1133 goto http_output_full;
1134 }
1135 http_set_hdr(&hdr[hdr_count++], n, v);
1136
1137 if (h1m) {
1138 long long cl;
1139
Willy Tarreaud22e83a2017-10-31 08:02:24 +01001140 if (h1m->status >= 100 && h1m->status < 200)
1141 h1m->curr_len = h1m->body_len = 0;
1142 else if (h1m->status == 304 || h1m->status == 204) {
Willy Tarreau8ea0f382017-10-30 19:31:59 +01001143 /* no contents, claim c-len is present and set to zero */
1144 h1m->flags |= H1_MF_CLEN;
1145 h1m->curr_len = h1m->body_len = 0;
1146 }
1147 else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001148 h1m->flags &= ~H1_MF_CLEN;
1149 h1m->flags |= H1_MF_CHNK;
1150 }
1151 else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1152 h1m->flags |= H1_MF_CLEN;
1153 strl2llrc(v.ptr, v.len, &cl);
1154 h1m->curr_len = h1m->body_len = cl;
1155 }
1156 }
1157
1158 sol = ptr - start;
1159 if (likely(!HTTP_IS_CRLF(*ptr)))
1160 goto http_msg_hdr_name;
1161
1162 if (likely(*ptr == '\r'))
1163 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
1164 goto http_msg_last_lf;
1165
1166 case HTTP_MSG_LAST_LF:
1167 http_msg_last_lf:
1168 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
1169 ptr++;
1170 /* <ptr> now points to the first byte of payload. If needed sol
1171 * still points to the first of either CR or LF of the empty
1172 * line ending the headers block.
1173 */
1174 if (unlikely(hdr_count >= hdr_num)) {
1175 state = HTTP_MSG_LAST_LF;
1176 goto http_output_full;
1177 }
1178 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
1179 state = HTTP_MSG_BODY;
1180 break;
1181
1182 default:
1183 /* impossible states */
1184 goto http_msg_invalid;
1185 }
1186
1187 /* reaching here, we've parsed the whole message and the state is
1188 * HTTP_MSG_BODY.
1189 */
1190 return ptr - start + skip;
1191
1192 http_msg_ood:
1193 /* out of data at <ptr> during state <state> */
1194 return 0;
1195
1196 http_msg_invalid:
1197 /* invalid message, error at <ptr> */
1198 if (h1m) {
1199 h1m->err_state = state;
1200 h1m->err_pos = ptr - start + skip;
1201 }
1202 return -1;
1203
1204 http_output_full:
1205 /* no more room to store the current header, error at <ptr> */
1206 if (h1m) {
1207 h1m->err_state = state;
1208 h1m->err_pos = ptr - start + skip;
1209 }
1210 return -2;
1211}
1212
Willy Tarreau2510f702017-10-31 17:14:16 +01001213/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001214 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001215 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1216 * input data, or < 0 in case of parse error (in which case the caller may have
1217 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001218 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001219int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001220{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001221 const char *stop = b_peek(buf, ofs + max);
1222 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001223
1224 while (1) {
1225 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001226 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001227 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001228
1229 /* scan current line and stop at LF or CRLF */
1230 while (1) {
1231 if (ptr == stop)
1232 return 0;
1233
1234 if (*ptr == '\n') {
1235 if (!p1)
1236 p1 = ptr;
1237 p2 = ptr;
1238 break;
1239 }
1240
1241 if (*ptr == '\r') {
1242 if (p1)
1243 return -1;
1244 p1 = ptr;
1245 }
1246
Willy Tarreau7314be82018-06-14 13:32:50 +02001247 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001248 }
1249
1250 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001251 p2 = b_next(buf, p2);
1252 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001253
1254 /* LF/CRLF at beginning of line => end of trailers at p2.
1255 * Everything was scheduled for forwarding, there's nothing left
1256 * from this message. */
1257 if (p1 == start)
1258 break;
1259 /* OK, next line then */
1260 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001261 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001262}
1263
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001264/* This function skips trailers in the buffer associated with HTTP message
1265 * <msg>. The first visited position is msg->next. If the end of the trailers is
1266 * found, the function returns >0. So, the caller can automatically schedul it
1267 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1268 * data are available, the function does not change anything except maybe
1269 * msg->sol if it could parse some lines, and returns zero. If a parse error
1270 * is encountered, the function returns < 0 and does not change anything except
1271 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1272 * state before calling this function, which implies that all non-trailers data
1273 * have already been scheduled for forwarding, and that msg->next exactly
1274 * matches the length of trailers already parsed and not forwarded. It is also
1275 * important to note that this function is designed to be able to parse wrapped
1276 * headers at end of buffer.
1277 */
1278int http_forward_trailers(struct http_msg *msg)
1279{
Willy Tarreauc9fa0482018-07-10 17:43:27 +02001280 const struct buffer *buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001281 const char *parse = ci_head(msg->chn);
1282 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001283
1284 /* we have msg->next which points to next line. Look for CRLF. But
1285 * first, we reset msg->sol */
1286 msg->sol = 0;
1287 while (1) {
1288 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001289 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001290 const char *ptr = start;
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001291
1292 /* scan current line and stop at LF or CRLF */
1293 while (1) {
1294 if (ptr == stop)
1295 return 0;
1296
1297 if (*ptr == '\n') {
1298 if (!p1)
1299 p1 = ptr;
1300 p2 = ptr;
1301 break;
1302 }
1303
1304 if (*ptr == '\r') {
1305 if (p1) {
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001306 msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001307 return -1;
1308 }
1309 p1 = ptr;
1310 }
1311
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001312 ptr = b_next(buf, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001313 }
1314
1315 /* after LF; point to beginning of next line */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001316 p2 = b_next(buf, p2);
1317 msg->sol += b_dist(buf, start, p2);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001318
1319 /* LF/CRLF at beginning of line => end of trailers at p2.
1320 * Everything was scheduled for forwarding, there's nothing left
1321 * from this message. */
1322 if (p1 == start)
1323 return 1;
1324
1325 /* OK, next line then */
1326 }
1327}