blob: fc0b8da58b84ad868024d206867e230ed91de751 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
Willy Tarreau8740c8b2017-09-21 10:22:25 +020021/*
22 * This function parses a status line between <ptr> and <end>, starting with
23 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
24 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
25 * will give undefined results.
26 * Note that it is upon the caller's responsibility to ensure that ptr < end,
27 * and that msg->sol points to the beginning of the response.
28 * If a complete line is found (which implies that at least one CR or LF is
29 * found before <end>, the updated <ptr> is returned, otherwise NULL is
30 * returned indicating an incomplete line (which does not mean that parts have
31 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
32 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
33 * upon next call.
34 *
35 * This function was intentionally designed to be called from
36 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
37 * within its state machine and use the same macros, hence the need for same
38 * labels and variable names. Note that msg->sol is left unchanged.
39 */
40const char *http_parse_stsline(struct http_msg *msg,
41 enum h1_state state, const char *ptr, const char *end,
42 unsigned int *ret_ptr, enum h1_state *ret_state)
43{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +020044 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +020045
46 switch (state) {
47 case HTTP_MSG_RPVER:
48 http_msg_rpver:
49 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
50 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
51
52 if (likely(HTTP_IS_SPHT(*ptr))) {
53 msg->sl.st.v_l = ptr - msg_start;
54 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
55 }
56 msg->err_state = HTTP_MSG_RPVER;
57 state = HTTP_MSG_ERROR;
58 break;
59
60 case HTTP_MSG_RPVER_SP:
61 http_msg_rpver_sp:
62 if (likely(!HTTP_IS_LWS(*ptr))) {
63 msg->sl.st.c = ptr - msg_start;
64 goto http_msg_rpcode;
65 }
66 if (likely(HTTP_IS_SPHT(*ptr)))
67 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
68 /* so it's a CR/LF, this is invalid */
69 msg->err_state = HTTP_MSG_RPVER_SP;
70 state = HTTP_MSG_ERROR;
71 break;
72
73 case HTTP_MSG_RPCODE:
74 http_msg_rpcode:
75 if (likely(!HTTP_IS_LWS(*ptr)))
76 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
77
78 if (likely(HTTP_IS_SPHT(*ptr))) {
79 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
80 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
81 }
82
83 /* so it's a CR/LF, so there is no reason phrase */
84 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
85 http_msg_rsp_reason:
86 /* FIXME: should we support HTTP responses without any reason phrase ? */
87 msg->sl.st.r = ptr - msg_start;
88 msg->sl.st.r_l = 0;
89 goto http_msg_rpline_eol;
90
91 case HTTP_MSG_RPCODE_SP:
92 http_msg_rpcode_sp:
93 if (likely(!HTTP_IS_LWS(*ptr))) {
94 msg->sl.st.r = ptr - msg_start;
95 goto http_msg_rpreason;
96 }
97 if (likely(HTTP_IS_SPHT(*ptr)))
98 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
99 /* so it's a CR/LF, so there is no reason phrase */
100 goto http_msg_rsp_reason;
101
102 case HTTP_MSG_RPREASON:
103 http_msg_rpreason:
104 if (likely(!HTTP_IS_CRLF(*ptr)))
105 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
106 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
107 http_msg_rpline_eol:
108 /* We have seen the end of line. Note that we do not
109 * necessarily have the \n yet, but at least we know that we
110 * have EITHER \r OR \n, otherwise the response would not be
111 * complete. We can then record the response length and return
112 * to the caller which will be able to register it.
113 */
114 msg->sl.st.l = ptr - msg_start - msg->sol;
115 return ptr;
116
117 default:
118#ifdef DEBUG_FULL
119 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
120 exit(1);
121#endif
122 ;
123 }
124
125 http_msg_ood:
126 /* out of valid data */
127 if (ret_state)
128 *ret_state = state;
129 if (ret_ptr)
130 *ret_ptr = ptr - msg_start;
131 return NULL;
132}
133
134/*
135 * This function parses a request line between <ptr> and <end>, starting with
136 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
137 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
138 * will give undefined results.
139 * Note that it is upon the caller's responsibility to ensure that ptr < end,
140 * and that msg->sol points to the beginning of the request.
141 * If a complete line is found (which implies that at least one CR or LF is
142 * found before <end>, the updated <ptr> is returned, otherwise NULL is
143 * returned indicating an incomplete line (which does not mean that parts have
144 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
145 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
146 * upon next call.
147 *
148 * This function was intentionally designed to be called from
149 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
150 * within its state machine and use the same macros, hence the need for same
151 * labels and variable names. Note that msg->sol is left unchanged.
152 */
153const char *http_parse_reqline(struct http_msg *msg,
154 enum h1_state state, const char *ptr, const char *end,
155 unsigned int *ret_ptr, enum h1_state *ret_state)
156{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200157 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200158
159 switch (state) {
160 case HTTP_MSG_RQMETH:
161 http_msg_rqmeth:
162 if (likely(HTTP_IS_TOKEN(*ptr)))
163 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
164
165 if (likely(HTTP_IS_SPHT(*ptr))) {
166 msg->sl.rq.m_l = ptr - msg_start;
167 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
168 }
169
170 if (likely(HTTP_IS_CRLF(*ptr))) {
171 /* HTTP 0.9 request */
172 msg->sl.rq.m_l = ptr - msg_start;
173 http_msg_req09_uri:
174 msg->sl.rq.u = ptr - msg_start;
175 http_msg_req09_uri_e:
176 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
177 http_msg_req09_ver:
178 msg->sl.rq.v = ptr - msg_start;
179 msg->sl.rq.v_l = 0;
180 goto http_msg_rqline_eol;
181 }
182 msg->err_state = HTTP_MSG_RQMETH;
183 state = HTTP_MSG_ERROR;
184 break;
185
186 case HTTP_MSG_RQMETH_SP:
187 http_msg_rqmeth_sp:
188 if (likely(!HTTP_IS_LWS(*ptr))) {
189 msg->sl.rq.u = ptr - msg_start;
190 goto http_msg_rquri;
191 }
192 if (likely(HTTP_IS_SPHT(*ptr)))
193 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
194 /* so it's a CR/LF, meaning an HTTP 0.9 request */
195 goto http_msg_req09_uri;
196
197 case HTTP_MSG_RQURI:
198 http_msg_rquri:
199#if defined(__x86_64__) || \
200 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
201 defined(__ARM_ARCH_7A__)
202 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
203 while (ptr <= end - sizeof(int)) {
204 int x = *(int *)ptr - 0x21212121;
205 if (x & 0x80808080)
206 break;
207
208 x -= 0x5e5e5e5e;
209 if (!(x & 0x80808080))
210 break;
211
212 ptr += sizeof(int);
213 }
214#endif
215 if (ptr >= end) {
216 state = HTTP_MSG_RQURI;
217 goto http_msg_ood;
218 }
219 http_msg_rquri2:
220 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
222
223 if (likely(HTTP_IS_SPHT(*ptr))) {
224 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
225 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
226 }
227
228 if (likely((unsigned char)*ptr >= 128)) {
229 /* non-ASCII chars are forbidden unless option
230 * accept-invalid-http-request is enabled in the frontend.
231 * In any case, we capture the faulty char.
232 */
233 if (msg->err_pos < -1)
234 goto invalid_char;
235 if (msg->err_pos == -1)
236 msg->err_pos = ptr - msg_start;
237 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
238 }
239
240 if (likely(HTTP_IS_CRLF(*ptr))) {
241 /* so it's a CR/LF, meaning an HTTP 0.9 request */
242 goto http_msg_req09_uri_e;
243 }
244
245 /* OK forbidden chars, 0..31 or 127 */
246 invalid_char:
247 msg->err_pos = ptr - msg_start;
248 msg->err_state = HTTP_MSG_RQURI;
249 state = HTTP_MSG_ERROR;
250 break;
251
252 case HTTP_MSG_RQURI_SP:
253 http_msg_rquri_sp:
254 if (likely(!HTTP_IS_LWS(*ptr))) {
255 msg->sl.rq.v = ptr - msg_start;
256 goto http_msg_rqver;
257 }
258 if (likely(HTTP_IS_SPHT(*ptr)))
259 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
260 /* so it's a CR/LF, meaning an HTTP 0.9 request */
261 goto http_msg_req09_ver;
262
263 case HTTP_MSG_RQVER:
264 http_msg_rqver:
265 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
266 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
267
268 if (likely(HTTP_IS_CRLF(*ptr))) {
269 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
270 http_msg_rqline_eol:
271 /* We have seen the end of line. Note that we do not
272 * necessarily have the \n yet, but at least we know that we
273 * have EITHER \r OR \n, otherwise the request would not be
274 * complete. We can then record the request length and return
275 * to the caller which will be able to register it.
276 */
277 msg->sl.rq.l = ptr - msg_start - msg->sol;
278 return ptr;
279 }
280
281 /* neither an HTTP_VER token nor a CRLF */
282 msg->err_state = HTTP_MSG_RQVER;
283 state = HTTP_MSG_ERROR;
284 break;
285
286 default:
287#ifdef DEBUG_FULL
288 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
289 exit(1);
290#endif
291 ;
292 }
293
294 http_msg_ood:
295 /* out of valid data */
296 if (ret_state)
297 *ret_state = state;
298 if (ret_ptr)
299 *ret_ptr = ptr - msg_start;
300 return NULL;
301}
302
303/*
304 * This function parses an HTTP message, either a request or a response,
305 * depending on the initial msg->msg_state. The caller is responsible for
306 * ensuring that the message does not wrap. The function can be preempted
307 * everywhere when data are missing and recalled at the exact same location
308 * with no information loss. The message may even be realigned between two
309 * calls. The header index is re-initialized when switching from
310 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
311 * fields. Note that msg->sol will be initialized after completing the first
312 * state, so that none of the msg pointers has to be initialized prior to the
313 * first call.
314 */
315void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
316{
317 enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200318 register const char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreau950a8a62018-09-06 10:48:15 +0200319 struct buffer *buf = &msg->chn->buf;
320 char *input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200321
322 state = msg->msg_state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200323 ptr = input + msg->next;
324 end = b_stop(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200325
326 if (unlikely(ptr >= end))
327 goto http_msg_ood;
328
329 switch (state) {
330 /*
331 * First, states that are specific to the response only.
332 * We check them first so that request and headers are
333 * closer to each other (accessed more often).
334 */
335 case HTTP_MSG_RPBEFORE:
336 http_msg_rpbefore:
337 if (likely(HTTP_IS_TOKEN(*ptr))) {
338 /* we have a start of message, but we have to check
339 * first if we need to remove some CRLF. We can only
340 * do this when o=0.
341 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200342 if (unlikely(ptr != input)) {
343 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200344 goto http_msg_ood;
345 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200346 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200347 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200348 }
349 msg->sol = 0;
350 msg->sl.st.l = 0; /* used in debug mode */
351 hdr_idx_init(idx);
352 state = HTTP_MSG_RPVER;
353 goto http_msg_rpver;
354 }
355
356 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
357 state = HTTP_MSG_RPBEFORE;
358 goto http_msg_invalid;
359 }
360
361 if (unlikely(*ptr == '\n'))
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
363 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
364 /* stop here */
365
366 case HTTP_MSG_RPBEFORE_CR:
367 http_msg_rpbefore_cr:
368 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
369 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
370 /* stop here */
371
372 case HTTP_MSG_RPVER:
373 http_msg_rpver:
374 case HTTP_MSG_RPVER_SP:
375 case HTTP_MSG_RPCODE:
376 case HTTP_MSG_RPCODE_SP:
377 case HTTP_MSG_RPREASON:
378 ptr = (char *)http_parse_stsline(msg,
379 state, ptr, end,
380 &msg->next, &msg->msg_state);
381 if (unlikely(!ptr))
382 return;
383
384 /* we have a full response and we know that we have either a CR
385 * or an LF at <ptr>.
386 */
387 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
388
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200389 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200390 if (likely(*ptr == '\r'))
391 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
392 goto http_msg_rpline_end;
393
394 case HTTP_MSG_RPLINE_END:
395 http_msg_rpline_end:
396 /* msg->sol must point to the first of CR or LF. */
397 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
398 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
399 /* stop here */
400
401 /*
402 * Second, states that are specific to the request only
403 */
404 case HTTP_MSG_RQBEFORE:
405 http_msg_rqbefore:
406 if (likely(HTTP_IS_TOKEN(*ptr))) {
407 /* we have a start of message, but we have to check
408 * first if we need to remove some CRLF. We can only
409 * do this when o=0.
410 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200411 if (likely(ptr != input)) {
412 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200413 goto http_msg_ood;
414 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200415 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200416 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200417 }
418 msg->sol = 0;
419 msg->sl.rq.l = 0; /* used in debug mode */
420 state = HTTP_MSG_RQMETH;
421 goto http_msg_rqmeth;
422 }
423
424 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
425 state = HTTP_MSG_RQBEFORE;
426 goto http_msg_invalid;
427 }
428
429 if (unlikely(*ptr == '\n'))
430 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
431 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
432 /* stop here */
433
434 case HTTP_MSG_RQBEFORE_CR:
435 http_msg_rqbefore_cr:
436 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
437 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
438 /* stop here */
439
440 case HTTP_MSG_RQMETH:
441 http_msg_rqmeth:
442 case HTTP_MSG_RQMETH_SP:
443 case HTTP_MSG_RQURI:
444 case HTTP_MSG_RQURI_SP:
445 case HTTP_MSG_RQVER:
446 ptr = (char *)http_parse_reqline(msg,
447 state, ptr, end,
448 &msg->next, &msg->msg_state);
449 if (unlikely(!ptr))
450 return;
451
452 /* we have a full request and we know that we have either a CR
453 * or an LF at <ptr>.
454 */
455 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
456
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200457 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200458 if (likely(*ptr == '\r'))
459 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
460 goto http_msg_rqline_end;
461
462 case HTTP_MSG_RQLINE_END:
463 http_msg_rqline_end:
464 /* check for HTTP/0.9 request : no version information available.
465 * msg->sol must point to the first of CR or LF.
466 */
467 if (unlikely(msg->sl.rq.v_l == 0))
468 goto http_msg_last_lf;
469
470 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
471 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
472 /* stop here */
473
474 /*
475 * Common states below
476 */
477 case HTTP_MSG_HDR_FIRST:
478 http_msg_hdr_first:
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200479 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200480 if (likely(!HTTP_IS_CRLF(*ptr))) {
481 goto http_msg_hdr_name;
482 }
483
484 if (likely(*ptr == '\r'))
485 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
486 goto http_msg_last_lf;
487
488 case HTTP_MSG_HDR_NAME:
489 http_msg_hdr_name:
490 /* assumes msg->sol points to the first char */
491 if (likely(HTTP_IS_TOKEN(*ptr)))
492 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
493
494 if (likely(*ptr == ':'))
495 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
496
497 if (likely(msg->err_pos < -1) || *ptr == '\n') {
498 state = HTTP_MSG_HDR_NAME;
499 goto http_msg_invalid;
500 }
501
502 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200503 msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200504
505 /* and we still accept this non-token character */
506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
507
508 case HTTP_MSG_HDR_L1_SP:
509 http_msg_hdr_l1_sp:
510 /* assumes msg->sol points to the first char */
511 if (likely(HTTP_IS_SPHT(*ptr)))
512 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
513
514 /* header value can be basically anything except CR/LF */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200515 msg->sov = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200516
517 if (likely(!HTTP_IS_CRLF(*ptr))) {
518 goto http_msg_hdr_val;
519 }
520
521 if (likely(*ptr == '\r'))
522 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
523 goto http_msg_hdr_l1_lf;
524
525 case HTTP_MSG_HDR_L1_LF:
526 http_msg_hdr_l1_lf:
527 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
528 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
529
530 case HTTP_MSG_HDR_L1_LWS:
531 http_msg_hdr_l1_lws:
532 if (likely(HTTP_IS_SPHT(*ptr))) {
533 /* replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200534 for (; input + msg->sov < ptr; msg->sov++)
535 input[msg->sov] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200536 goto http_msg_hdr_l1_sp;
537 }
538 /* we had a header consisting only in spaces ! */
539 msg->eol = msg->sov;
540 goto http_msg_complete_header;
541
542 case HTTP_MSG_HDR_VAL:
543 http_msg_hdr_val:
544 /* assumes msg->sol points to the first char, and msg->sov
545 * points to the first character of the value.
546 */
547
548 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
549 * and lower. In fact since most of the time is spent in the loop, we
550 * also remove the sign bit test so that bytes 0x8e..0x0d break the
551 * loop, but we don't care since they're very rare in header values.
552 */
553#if defined(__x86_64__)
554 while (ptr <= end - sizeof(long)) {
555 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
556 goto http_msg_hdr_val2;
557 ptr += sizeof(long);
558 }
559#endif
560#if defined(__x86_64__) || \
561 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
562 defined(__ARM_ARCH_7A__)
563 while (ptr <= end - sizeof(int)) {
564 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
565 goto http_msg_hdr_val2;
566 ptr += sizeof(int);
567 }
568#endif
569 if (ptr >= end) {
570 state = HTTP_MSG_HDR_VAL;
571 goto http_msg_ood;
572 }
573 http_msg_hdr_val2:
574 if (likely(!HTTP_IS_CRLF(*ptr)))
575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
576
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200577 msg->eol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200578 /* Note: we could also copy eol into ->eoh so that we have the
579 * real header end in case it ends with lots of LWS, but is this
580 * really needed ?
581 */
582 if (likely(*ptr == '\r'))
583 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
584 goto http_msg_hdr_l2_lf;
585
586 case HTTP_MSG_HDR_L2_LF:
587 http_msg_hdr_l2_lf:
588 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
589 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
590
591 case HTTP_MSG_HDR_L2_LWS:
592 http_msg_hdr_l2_lws:
593 if (unlikely(HTTP_IS_SPHT(*ptr))) {
594 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200595 for (; input + msg->eol < ptr; msg->eol++)
596 input[msg->eol] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200597 goto http_msg_hdr_val;
598 }
599 http_msg_complete_header:
600 /*
601 * It was a new header, so the last one is finished.
602 * Assumes msg->sol points to the first char, msg->sov points
603 * to the first character of the value and msg->eol to the
604 * first CR or LF so we know how the line ends. We insert last
605 * header into the index.
606 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200607 if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200608 idx, idx->tail) < 0)) {
609 state = HTTP_MSG_HDR_L2_LWS;
610 goto http_msg_invalid;
611 }
612
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200613 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200614 if (likely(!HTTP_IS_CRLF(*ptr))) {
615 goto http_msg_hdr_name;
616 }
617
618 if (likely(*ptr == '\r'))
619 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
620 goto http_msg_last_lf;
621
622 case HTTP_MSG_LAST_LF:
623 http_msg_last_lf:
624 /* Assumes msg->sol points to the first of either CR or LF.
625 * Sets ->sov and ->next to the total header length, ->eoh to
626 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
627 */
628 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
629 ptr++;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200630 msg->sov = msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200631 msg->eoh = msg->sol;
632 msg->sol = 0;
633 msg->eol = msg->sov - msg->eoh;
634 msg->msg_state = HTTP_MSG_BODY;
635 return;
636
637 case HTTP_MSG_ERROR:
638 /* this may only happen if we call http_msg_analyser() twice with an error */
639 break;
640
641 default:
642#ifdef DEBUG_FULL
643 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
644 exit(1);
645#endif
646 ;
647 }
648 http_msg_ood:
649 /* out of data */
650 msg->msg_state = state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200651 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200652 return;
653
654 http_msg_invalid:
655 /* invalid message */
656 msg->err_state = state;
657 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200658 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200659 return;
660}
661
Willy Tarreau794f9af2017-07-26 09:07:47 +0200662/* This function parses a contiguous HTTP/1 headers block starting at <start>
663 * and ending before <stop>, at once, and converts it a list of (name,value)
664 * pairs representing header fields into the array <hdr> of size <hdr_num>,
665 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200666 * too small to represent the whole message, an error is returned. Some
667 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200668 * parsed and stored into h1m as well. <hdr> may be null, in which case only
669 * the parsing state will be updated. This may be used to restart the parsing
670 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200671 *
672 * For now it's limited to the response. If the header block is incomplete,
673 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200674 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200675 * and h1m->next to zero on the first call, the parser will do the rest. If
676 * an incomplete message is seen, the caller only needs to present h1m->state
677 * and h1m->next again, with an empty header list so that the parser can start
678 * again. In this case, it will detect that it interrupted a previous session
679 * and will first look for the end of the message before reparsing it again and
680 * indexing it at the same time. This ensures that incomplete messages fed 1
681 * character at a time are never processed entirely more than exactly twice,
682 * and that there is no need to store all the internal state and pre-parsed
683 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200684 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200685 * A pointer to a start line descriptor may be passed in <slp>, in which case
686 * the parser will fill it with whatever it found.
687 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200688 * The code derived from the main HTTP/1 parser above but was simplified and
689 * optimized to process responses produced or forwarded by haproxy. The caller
690 * is responsible for ensuring that the message doesn't wrap, and should ensure
691 * it is complete to avoid having to retry the operation after a failed
692 * attempt. The message is not supposed to be invalid, which is why a few
693 * properties such as the character set used in the header field names are not
694 * checked. In case of an unparsable response message, a negative value will be
695 * returned with h1m->err_pos and h1m->err_state matching the location and
696 * state where the error was met. Leading blank likes are tolerated but not
697 * recommended.
698 *
699 * This function returns :
700 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200701 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200702 * the position relative to <start>
703 * -2 if the output is full (hdr_num reached). err_state and err_pos also
704 * indicate where it failed.
705 * 0 in case of missing data.
706 * > 0 on success, it then corresponds to the number of bytes read since
707 * <start> so that the caller can go on with the payload.
708 */
709int h1_headers_to_hdr_list(char *start, const char *stop,
710 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200711 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200712{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200713 enum h1m_state state;
714 register char *ptr;
715 register const char *end;
716 unsigned int hdr_count;
717 unsigned int skip; /* number of bytes skipped at the beginning */
718 unsigned int sol; /* start of line */
719 unsigned int col; /* position of the colon */
720 unsigned int eol; /* end of line */
721 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200722 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200723 int skip_update;
724 int restarting;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200725 struct ist n, v; /* header name and value during parsing */
726
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200727 skip = 0; // do it only once to keep track of the leading CRLF.
728
729 try_again:
730 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200731 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200732 skip_update = restarting = 0;
733
734 ptr = start + h1m->next;
735 end = stop;
736 state = h1m->state;
737
738 if (state != H1_MSG_RPBEFORE)
739 restarting = 1;
740
Willy Tarreau794f9af2017-07-26 09:07:47 +0200741 if (unlikely(ptr >= end))
742 goto http_msg_ood;
743
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200744 /* don't update output if hdr is NULL or if we're restarting */
745 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200746 skip_update = 1;
747
Willy Tarreau794f9af2017-07-26 09:07:47 +0200748 switch (state) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200749 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200750 http_msg_rpbefore:
751 if (likely(HTTP_IS_TOKEN(*ptr))) {
752 /* we have a start of message, we may have skipped some
753 * heading CRLF. Skip them now.
754 */
755 skip += ptr - start;
756 start = ptr;
757
758 sol = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200759 sl.st.v = skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200760 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200761 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200762 goto http_msg_rpver;
763 }
764
765 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200766 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200767 goto http_msg_invalid;
768 }
769
770 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200771 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
772 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200773 /* stop here */
774
Willy Tarreau801250e2018-09-11 11:45:04 +0200775 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200776 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200777 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
778 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200779 /* stop here */
780
Willy Tarreau801250e2018-09-11 11:45:04 +0200781 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200782 http_msg_rpver:
783 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200784 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200785
786 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200787 sl.st.v_l = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200788 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200789 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200790 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200791 goto http_msg_invalid;
792
Willy Tarreau801250e2018-09-11 11:45:04 +0200793 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200794 http_msg_rpver_sp:
795 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200796 sl.st.status = 0;
797 sl.st.c = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200798 goto http_msg_rpcode;
799 }
800 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200801 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200802 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200803 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200804 goto http_msg_invalid;
805
Willy Tarreau801250e2018-09-11 11:45:04 +0200806 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200807 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100808 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200809 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200810 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200811 }
812
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100813 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200814 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100815 goto http_msg_invalid;
816 }
817
Willy Tarreau794f9af2017-07-26 09:07:47 +0200818 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200819 sl.st.c_l = ptr - start + skip - sl.st.c;
Willy Tarreau801250e2018-09-11 11:45:04 +0200820 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200821 }
822
823 /* so it's a CR/LF, so there is no reason phrase */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200824 sl.st.c_l = ptr - start + skip - sl.st.c;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200825
826 http_msg_rsp_reason:
Willy Tarreaua41393f2018-09-11 15:34:50 +0200827 sl.st.r = ptr - start + skip;
828 sl.st.r_l = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200829 goto http_msg_rpline_eol;
830
Willy Tarreau801250e2018-09-11 11:45:04 +0200831 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832 http_msg_rpcode_sp:
833 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200834 sl.st.r = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200835 goto http_msg_rpreason;
836 }
837 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200838 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200839 /* so it's a CR/LF, so there is no reason phrase */
840 goto http_msg_rsp_reason;
841
Willy Tarreau801250e2018-09-11 11:45:04 +0200842 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200843 http_msg_rpreason:
844 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200845 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Willy Tarreaua41393f2018-09-11 15:34:50 +0200846 sl.st.r_l = ptr - start + skip - sl.st.r;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200847 http_msg_rpline_eol:
848 /* We have seen the end of line. Note that we do not
849 * necessarily have the \n yet, but at least we know that we
850 * have EITHER \r OR \n, otherwise the response would not be
851 * complete. We can then record the response length and return
852 * to the caller which will be able to register it.
853 */
854
Willy Tarreau5384aac2018-09-11 16:04:48 +0200855 if (likely(!skip_update)) {
856 if (unlikely(hdr_count >= hdr_num)) {
857 state = H1_MSG_RPREASON;
858 goto http_output_full;
859 }
860 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + sl.st.c, sl.st.c_l));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200861 }
Willy Tarreaud22e83a2017-10-31 08:02:24 +0100862 if (h1m)
Willy Tarreaua41393f2018-09-11 15:34:50 +0200863 h1m->status = sl.st.status;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200864
865 sol = ptr - start;
866 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200867 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200868 goto http_msg_rpline_end;
869
Willy Tarreau801250e2018-09-11 11:45:04 +0200870 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200871 http_msg_rpline_end:
872 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200873 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
874 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200875 /* stop here */
876
Willy Tarreau801250e2018-09-11 11:45:04 +0200877 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200878 http_msg_hdr_first:
879 sol = ptr - start;
880 if (likely(!HTTP_IS_CRLF(*ptr))) {
881 goto http_msg_hdr_name;
882 }
883
884 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200885 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200886 goto http_msg_last_lf;
887
Willy Tarreau801250e2018-09-11 11:45:04 +0200888 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200889 http_msg_hdr_name:
890 /* assumes sol points to the first char */
891 if (likely(HTTP_IS_TOKEN(*ptr))) {
892 /* turn it to lower case if needed */
893 if (isupper((unsigned char)*ptr))
894 *ptr = tolower(*ptr);
Willy Tarreau801250e2018-09-11 11:45:04 +0200895 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200896 }
897
898 if (likely(*ptr == ':')) {
899 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200900 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200901 }
902
Willy Tarreau9aec3052018-09-12 09:20:40 +0200903 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200904 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200905 goto http_msg_invalid;
906 }
907
Willy Tarreau9aec3052018-09-12 09:20:40 +0200908 if (h1m->err_pos == -1) /* capture the error pointer */
909 h1m->err_pos = ptr - start + skip; /* >= 0 now */
910
911 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200912 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200913
Willy Tarreau801250e2018-09-11 11:45:04 +0200914 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200915 http_msg_hdr_l1_sp:
916 /* assumes sol points to the first char */
917 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200918 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200919
920 /* header value can be basically anything except CR/LF */
921 sov = ptr - start;
922
923 if (likely(!HTTP_IS_CRLF(*ptr))) {
924 goto http_msg_hdr_val;
925 }
926
927 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200928 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200929 goto http_msg_hdr_l1_lf;
930
Willy Tarreau801250e2018-09-11 11:45:04 +0200931 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200932 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200933 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
934 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200935
Willy Tarreau801250e2018-09-11 11:45:04 +0200936 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200937 http_msg_hdr_l1_lws:
938 if (likely(HTTP_IS_SPHT(*ptr))) {
939 /* replace HT,CR,LF with spaces */
940 for (; start + sov < ptr; sov++)
941 start[sov] = ' ';
942 goto http_msg_hdr_l1_sp;
943 }
944 /* we had a header consisting only in spaces ! */
945 eol = sov;
946 goto http_msg_complete_header;
947
Willy Tarreau801250e2018-09-11 11:45:04 +0200948 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200949 http_msg_hdr_val:
950 /* assumes sol points to the first char, and sov
951 * points to the first character of the value.
952 */
953
954 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
955 * and lower. In fact since most of the time is spent in the loop, we
956 * also remove the sign bit test so that bytes 0x8e..0x0d break the
957 * loop, but we don't care since they're very rare in header values.
958 */
959#if defined(__x86_64__)
960 while (ptr <= end - sizeof(long)) {
961 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
962 goto http_msg_hdr_val2;
963 ptr += sizeof(long);
964 }
965#endif
966#if defined(__x86_64__) || \
967 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
968 defined(__ARM_ARCH_7A__)
969 while (ptr <= end - sizeof(int)) {
970 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
971 goto http_msg_hdr_val2;
972 ptr += sizeof(int);
973 }
974#endif
975 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200976 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200977 goto http_msg_ood;
978 }
979 http_msg_hdr_val2:
980 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200981 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200982
983 eol = ptr - start;
984 /* Note: we could also copy eol into ->eoh so that we have the
985 * real header end in case it ends with lots of LWS, but is this
986 * really needed ?
987 */
988 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200989 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200990 goto http_msg_hdr_l2_lf;
991
Willy Tarreau801250e2018-09-11 11:45:04 +0200992 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200993 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200994 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
995 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200996
Willy Tarreau801250e2018-09-11 11:45:04 +0200997 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200998 http_msg_hdr_l2_lws:
999 if (unlikely(HTTP_IS_SPHT(*ptr))) {
1000 /* LWS: replace HT,CR,LF with spaces */
1001 for (; start + eol < ptr; eol++)
1002 start[eol] = ' ';
1003 goto http_msg_hdr_val;
1004 }
1005 http_msg_complete_header:
1006 /*
1007 * It was a new header, so the last one is finished. Assumes
1008 * <sol> points to the first char of the name, <col> to the
1009 * colon, <sov> points to the first character of the value and
1010 * <eol> to the first CR or LF so we know how the line ends. We
1011 * will trim spaces around the value. It's possible to do it by
1012 * adjusting <eol> and <sov> which are no more used after this.
1013 * We can add the header field to the list.
1014 */
1015 while (sov < eol && HTTP_IS_LWS(start[sov]))
1016 sov++;
1017
1018 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1019 eol--;
1020
1021
1022 n = ist2(start + sol, col - sol);
1023 v = ist2(start + sov, eol - sov);
1024
Willy Tarreau5384aac2018-09-11 16:04:48 +02001025 if (likely(!skip_update)) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001026 long long cl;
1027
Willy Tarreau5384aac2018-09-11 16:04:48 +02001028 if (unlikely(hdr_count >= hdr_num)) {
1029 state = H1_MSG_HDR_L2_LWS;
1030 goto http_output_full;
1031 }
1032
1033 http_set_hdr(&hdr[hdr_count++], n, v);
1034
Willy Tarreaud22e83a2017-10-31 08:02:24 +01001035 if (h1m->status >= 100 && h1m->status < 200)
1036 h1m->curr_len = h1m->body_len = 0;
1037 else if (h1m->status == 304 || h1m->status == 204) {
Willy Tarreau8ea0f382017-10-30 19:31:59 +01001038 /* no contents, claim c-len is present and set to zero */
1039 h1m->flags |= H1_MF_CLEN;
1040 h1m->curr_len = h1m->body_len = 0;
1041 }
1042 else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001043 h1m->flags &= ~H1_MF_CLEN;
1044 h1m->flags |= H1_MF_CHNK;
1045 }
1046 else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1047 h1m->flags |= H1_MF_CLEN;
1048 strl2llrc(v.ptr, v.len, &cl);
1049 h1m->curr_len = h1m->body_len = cl;
1050 }
1051 }
1052
1053 sol = ptr - start;
1054 if (likely(!HTTP_IS_CRLF(*ptr)))
1055 goto http_msg_hdr_name;
1056
1057 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001058 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001059 goto http_msg_last_lf;
1060
Willy Tarreau801250e2018-09-11 11:45:04 +02001061 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001062 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001063 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001064 ptr++;
1065 /* <ptr> now points to the first byte of payload. If needed sol
1066 * still points to the first of either CR or LF of the empty
1067 * line ending the headers block.
1068 */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001069 if (likely(!skip_update)) {
1070 if (unlikely(hdr_count >= hdr_num)) {
1071 state = H1_MSG_LAST_LF;
1072 goto http_output_full;
1073 }
1074 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +02001075 }
Willy Tarreau001823c2018-09-12 17:25:32 +02001076
1077 /* reaching here we've parsed the whole message. We may detect
1078 * that we were already continuing an interrupted parsing pass
1079 * so we were silently looking for the end of message not
1080 * updating anything before deciding to parse it fully at once.
1081 * It's guaranteed that we won't match this test twice in a row
1082 * since restarting will turn zero.
1083 */
1084 if (restarting)
1085 goto restart;
1086
1087 if (h1m->flags & H1_MF_CHNK)
1088 state = H1_MSG_CHUNK_SIZE;
1089 else
1090 state = H1_MSG_DATA;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001091 break;
1092
1093 default:
1094 /* impossible states */
1095 goto http_msg_invalid;
1096 }
1097
Willy Tarreau001823c2018-09-12 17:25:32 +02001098 /* Now we've left the headers state and are either in H1_MSG_DATA or
1099 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001100 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001101
Willy Tarreau5384aac2018-09-11 16:04:48 +02001102 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001103 *slp = sl;
1104
Willy Tarreau4433c082018-09-11 15:33:32 +02001105 h1m->state = state;
1106 h1m->next = ptr - start + skip;
1107 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001108
1109 http_msg_ood:
1110 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001111 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001112 *slp = sl;
1113
Willy Tarreau4433c082018-09-11 15:33:32 +02001114 h1m->state = state;
1115 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001116 return 0;
1117
1118 http_msg_invalid:
1119 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001120 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001121 *slp = sl;
1122
Willy Tarreau4433c082018-09-11 15:33:32 +02001123 h1m->err_state = h1m->state = state;
1124 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001125 return -1;
1126
1127 http_output_full:
1128 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001129 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001130 *slp = sl;
1131
Willy Tarreau4433c082018-09-11 15:33:32 +02001132 h1m->err_state = h1m->state = state;
1133 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001134 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001135
1136 restart:
1137 h1m->next = 0;
1138 h1m->state = H1_MSG_RPBEFORE;
1139 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001140}
1141
Willy Tarreau2510f702017-10-31 17:14:16 +01001142/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001143 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001144 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1145 * input data, or < 0 in case of parse error (in which case the caller may have
1146 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001147 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001148int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001149{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001150 const char *stop = b_peek(buf, ofs + max);
1151 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001152
1153 while (1) {
1154 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001155 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001156 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001157
1158 /* scan current line and stop at LF or CRLF */
1159 while (1) {
1160 if (ptr == stop)
1161 return 0;
1162
1163 if (*ptr == '\n') {
1164 if (!p1)
1165 p1 = ptr;
1166 p2 = ptr;
1167 break;
1168 }
1169
1170 if (*ptr == '\r') {
1171 if (p1)
1172 return -1;
1173 p1 = ptr;
1174 }
1175
Willy Tarreau7314be82018-06-14 13:32:50 +02001176 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001177 }
1178
1179 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001180 p2 = b_next(buf, p2);
1181 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001182
1183 /* LF/CRLF at beginning of line => end of trailers at p2.
1184 * Everything was scheduled for forwarding, there's nothing left
1185 * from this message. */
1186 if (p1 == start)
1187 break;
1188 /* OK, next line then */
1189 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001190 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001191}
1192
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001193/* This function skips trailers in the buffer associated with HTTP message
1194 * <msg>. The first visited position is msg->next. If the end of the trailers is
1195 * found, the function returns >0. So, the caller can automatically schedul it
1196 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1197 * data are available, the function does not change anything except maybe
1198 * msg->sol if it could parse some lines, and returns zero. If a parse error
1199 * is encountered, the function returns < 0 and does not change anything except
1200 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1201 * state before calling this function, which implies that all non-trailers data
1202 * have already been scheduled for forwarding, and that msg->next exactly
1203 * matches the length of trailers already parsed and not forwarded. It is also
1204 * important to note that this function is designed to be able to parse wrapped
1205 * headers at end of buffer.
1206 */
1207int http_forward_trailers(struct http_msg *msg)
1208{
Willy Tarreauc9fa0482018-07-10 17:43:27 +02001209 const struct buffer *buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001210 const char *parse = ci_head(msg->chn);
1211 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001212
1213 /* we have msg->next which points to next line. Look for CRLF. But
1214 * first, we reset msg->sol */
1215 msg->sol = 0;
1216 while (1) {
1217 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001218 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001219 const char *ptr = start;
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001220
1221 /* scan current line and stop at LF or CRLF */
1222 while (1) {
1223 if (ptr == stop)
1224 return 0;
1225
1226 if (*ptr == '\n') {
1227 if (!p1)
1228 p1 = ptr;
1229 p2 = ptr;
1230 break;
1231 }
1232
1233 if (*ptr == '\r') {
1234 if (p1) {
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001235 msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001236 return -1;
1237 }
1238 p1 = ptr;
1239 }
1240
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001241 ptr = b_next(buf, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001242 }
1243
1244 /* after LF; point to beginning of next line */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001245 p2 = b_next(buf, p2);
1246 msg->sol += b_dist(buf, start, p2);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001247
1248 /* LF/CRLF at beginning of line => end of trailers at p2.
1249 * Everything was scheduled for forwarding, there's nothing left
1250 * from this message. */
1251 if (p1 == start)
1252 return 1;
1253
1254 /* OK, next line then */
1255 }
1256}