blob: 1172e05c8f49940a18b1fd778467c2c87f3a6932 [file] [log] [blame]
Willy Tarreau0da5b3b2017-09-21 09:30:46 +02001/*
2 * HTTP/1 protocol analyzer
3 *
4 * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau794f9af2017-07-26 09:07:47 +020013#include <ctype.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020014#include <common/config.h>
Willy Tarreau794f9af2017-07-26 09:07:47 +020015#include <common/http-hdr.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020016
Willy Tarreau188e2302018-06-15 11:11:53 +020017#include <proto/channel.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020018#include <proto/h1.h>
Willy Tarreau8740c8b2017-09-21 10:22:25 +020019#include <proto/hdr_idx.h>
Willy Tarreau0da5b3b2017-09-21 09:30:46 +020020
Willy Tarreau8740c8b2017-09-21 10:22:25 +020021/*
22 * This function parses a status line between <ptr> and <end>, starting with
23 * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
24 * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
25 * will give undefined results.
26 * Note that it is upon the caller's responsibility to ensure that ptr < end,
27 * and that msg->sol points to the beginning of the response.
28 * If a complete line is found (which implies that at least one CR or LF is
29 * found before <end>, the updated <ptr> is returned, otherwise NULL is
30 * returned indicating an incomplete line (which does not mean that parts have
31 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
32 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
33 * upon next call.
34 *
35 * This function was intentionally designed to be called from
36 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
37 * within its state machine and use the same macros, hence the need for same
38 * labels and variable names. Note that msg->sol is left unchanged.
39 */
40const char *http_parse_stsline(struct http_msg *msg,
41 enum h1_state state, const char *ptr, const char *end,
42 unsigned int *ret_ptr, enum h1_state *ret_state)
43{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +020044 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +020045
46 switch (state) {
47 case HTTP_MSG_RPVER:
48 http_msg_rpver:
49 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
50 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
51
52 if (likely(HTTP_IS_SPHT(*ptr))) {
53 msg->sl.st.v_l = ptr - msg_start;
54 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
55 }
56 msg->err_state = HTTP_MSG_RPVER;
57 state = HTTP_MSG_ERROR;
58 break;
59
60 case HTTP_MSG_RPVER_SP:
61 http_msg_rpver_sp:
62 if (likely(!HTTP_IS_LWS(*ptr))) {
63 msg->sl.st.c = ptr - msg_start;
64 goto http_msg_rpcode;
65 }
66 if (likely(HTTP_IS_SPHT(*ptr)))
67 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
68 /* so it's a CR/LF, this is invalid */
69 msg->err_state = HTTP_MSG_RPVER_SP;
70 state = HTTP_MSG_ERROR;
71 break;
72
73 case HTTP_MSG_RPCODE:
74 http_msg_rpcode:
75 if (likely(!HTTP_IS_LWS(*ptr)))
76 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
77
78 if (likely(HTTP_IS_SPHT(*ptr))) {
79 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
80 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
81 }
82
83 /* so it's a CR/LF, so there is no reason phrase */
84 msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
85 http_msg_rsp_reason:
86 /* FIXME: should we support HTTP responses without any reason phrase ? */
87 msg->sl.st.r = ptr - msg_start;
88 msg->sl.st.r_l = 0;
89 goto http_msg_rpline_eol;
90
91 case HTTP_MSG_RPCODE_SP:
92 http_msg_rpcode_sp:
93 if (likely(!HTTP_IS_LWS(*ptr))) {
94 msg->sl.st.r = ptr - msg_start;
95 goto http_msg_rpreason;
96 }
97 if (likely(HTTP_IS_SPHT(*ptr)))
98 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
99 /* so it's a CR/LF, so there is no reason phrase */
100 goto http_msg_rsp_reason;
101
102 case HTTP_MSG_RPREASON:
103 http_msg_rpreason:
104 if (likely(!HTTP_IS_CRLF(*ptr)))
105 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
106 msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
107 http_msg_rpline_eol:
108 /* We have seen the end of line. Note that we do not
109 * necessarily have the \n yet, but at least we know that we
110 * have EITHER \r OR \n, otherwise the response would not be
111 * complete. We can then record the response length and return
112 * to the caller which will be able to register it.
113 */
114 msg->sl.st.l = ptr - msg_start - msg->sol;
115 return ptr;
116
117 default:
118#ifdef DEBUG_FULL
119 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
120 exit(1);
121#endif
122 ;
123 }
124
125 http_msg_ood:
126 /* out of valid data */
127 if (ret_state)
128 *ret_state = state;
129 if (ret_ptr)
130 *ret_ptr = ptr - msg_start;
131 return NULL;
132}
133
134/*
135 * This function parses a request line between <ptr> and <end>, starting with
136 * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
137 * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
138 * will give undefined results.
139 * Note that it is upon the caller's responsibility to ensure that ptr < end,
140 * and that msg->sol points to the beginning of the request.
141 * If a complete line is found (which implies that at least one CR or LF is
142 * found before <end>, the updated <ptr> is returned, otherwise NULL is
143 * returned indicating an incomplete line (which does not mean that parts have
144 * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
145 * non-NULL, they are fed with the new <ptr> and <state> values to be passed
146 * upon next call.
147 *
148 * This function was intentionally designed to be called from
149 * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
150 * within its state machine and use the same macros, hence the need for same
151 * labels and variable names. Note that msg->sol is left unchanged.
152 */
153const char *http_parse_reqline(struct http_msg *msg,
154 enum h1_state state, const char *ptr, const char *end,
155 unsigned int *ret_ptr, enum h1_state *ret_state)
156{
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200157 const char *msg_start = ci_head(msg->chn);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200158
159 switch (state) {
160 case HTTP_MSG_RQMETH:
161 http_msg_rqmeth:
162 if (likely(HTTP_IS_TOKEN(*ptr)))
163 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
164
165 if (likely(HTTP_IS_SPHT(*ptr))) {
166 msg->sl.rq.m_l = ptr - msg_start;
167 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
168 }
169
170 if (likely(HTTP_IS_CRLF(*ptr))) {
171 /* HTTP 0.9 request */
172 msg->sl.rq.m_l = ptr - msg_start;
173 http_msg_req09_uri:
174 msg->sl.rq.u = ptr - msg_start;
175 http_msg_req09_uri_e:
176 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
177 http_msg_req09_ver:
178 msg->sl.rq.v = ptr - msg_start;
179 msg->sl.rq.v_l = 0;
180 goto http_msg_rqline_eol;
181 }
182 msg->err_state = HTTP_MSG_RQMETH;
183 state = HTTP_MSG_ERROR;
184 break;
185
186 case HTTP_MSG_RQMETH_SP:
187 http_msg_rqmeth_sp:
188 if (likely(!HTTP_IS_LWS(*ptr))) {
189 msg->sl.rq.u = ptr - msg_start;
190 goto http_msg_rquri;
191 }
192 if (likely(HTTP_IS_SPHT(*ptr)))
193 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
194 /* so it's a CR/LF, meaning an HTTP 0.9 request */
195 goto http_msg_req09_uri;
196
197 case HTTP_MSG_RQURI:
198 http_msg_rquri:
199#if defined(__x86_64__) || \
200 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
201 defined(__ARM_ARCH_7A__)
202 /* speedup: skip bytes not between 0x21 and 0x7e inclusive */
203 while (ptr <= end - sizeof(int)) {
204 int x = *(int *)ptr - 0x21212121;
205 if (x & 0x80808080)
206 break;
207
208 x -= 0x5e5e5e5e;
209 if (!(x & 0x80808080))
210 break;
211
212 ptr += sizeof(int);
213 }
214#endif
215 if (ptr >= end) {
216 state = HTTP_MSG_RQURI;
217 goto http_msg_ood;
218 }
219 http_msg_rquri2:
220 if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
221 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
222
223 if (likely(HTTP_IS_SPHT(*ptr))) {
224 msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
225 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
226 }
227
228 if (likely((unsigned char)*ptr >= 128)) {
229 /* non-ASCII chars are forbidden unless option
230 * accept-invalid-http-request is enabled in the frontend.
231 * In any case, we capture the faulty char.
232 */
233 if (msg->err_pos < -1)
234 goto invalid_char;
235 if (msg->err_pos == -1)
236 msg->err_pos = ptr - msg_start;
237 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
238 }
239
240 if (likely(HTTP_IS_CRLF(*ptr))) {
241 /* so it's a CR/LF, meaning an HTTP 0.9 request */
242 goto http_msg_req09_uri_e;
243 }
244
245 /* OK forbidden chars, 0..31 or 127 */
246 invalid_char:
247 msg->err_pos = ptr - msg_start;
248 msg->err_state = HTTP_MSG_RQURI;
249 state = HTTP_MSG_ERROR;
250 break;
251
252 case HTTP_MSG_RQURI_SP:
253 http_msg_rquri_sp:
254 if (likely(!HTTP_IS_LWS(*ptr))) {
255 msg->sl.rq.v = ptr - msg_start;
256 goto http_msg_rqver;
257 }
258 if (likely(HTTP_IS_SPHT(*ptr)))
259 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
260 /* so it's a CR/LF, meaning an HTTP 0.9 request */
261 goto http_msg_req09_ver;
262
263 case HTTP_MSG_RQVER:
264 http_msg_rqver:
265 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
266 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
267
268 if (likely(HTTP_IS_CRLF(*ptr))) {
269 msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
270 http_msg_rqline_eol:
271 /* We have seen the end of line. Note that we do not
272 * necessarily have the \n yet, but at least we know that we
273 * have EITHER \r OR \n, otherwise the request would not be
274 * complete. We can then record the request length and return
275 * to the caller which will be able to register it.
276 */
277 msg->sl.rq.l = ptr - msg_start - msg->sol;
278 return ptr;
279 }
280
281 /* neither an HTTP_VER token nor a CRLF */
282 msg->err_state = HTTP_MSG_RQVER;
283 state = HTTP_MSG_ERROR;
284 break;
285
286 default:
287#ifdef DEBUG_FULL
288 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
289 exit(1);
290#endif
291 ;
292 }
293
294 http_msg_ood:
295 /* out of valid data */
296 if (ret_state)
297 *ret_state = state;
298 if (ret_ptr)
299 *ret_ptr = ptr - msg_start;
300 return NULL;
301}
302
303/*
304 * This function parses an HTTP message, either a request or a response,
305 * depending on the initial msg->msg_state. The caller is responsible for
306 * ensuring that the message does not wrap. The function can be preempted
307 * everywhere when data are missing and recalled at the exact same location
308 * with no information loss. The message may even be realigned between two
309 * calls. The header index is re-initialized when switching from
310 * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
311 * fields. Note that msg->sol will be initialized after completing the first
312 * state, so that none of the msg pointers has to be initialized prior to the
313 * first call.
314 */
315void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
316{
317 enum h1_state state; /* updated only when leaving the FSM */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200318 register const char *ptr, *end; /* request pointers, to avoid dereferences */
Willy Tarreau950a8a62018-09-06 10:48:15 +0200319 struct buffer *buf = &msg->chn->buf;
320 char *input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200321
322 state = msg->msg_state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200323 ptr = input + msg->next;
324 end = b_stop(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200325
326 if (unlikely(ptr >= end))
327 goto http_msg_ood;
328
329 switch (state) {
330 /*
331 * First, states that are specific to the response only.
332 * We check them first so that request and headers are
333 * closer to each other (accessed more often).
334 */
335 case HTTP_MSG_RPBEFORE:
336 http_msg_rpbefore:
337 if (likely(HTTP_IS_TOKEN(*ptr))) {
338 /* we have a start of message, but we have to check
339 * first if we need to remove some CRLF. We can only
340 * do this when o=0.
341 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200342 if (unlikely(ptr != input)) {
343 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200344 goto http_msg_ood;
345 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200346 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200347 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200348 }
349 msg->sol = 0;
350 msg->sl.st.l = 0; /* used in debug mode */
351 hdr_idx_init(idx);
352 state = HTTP_MSG_RPVER;
353 goto http_msg_rpver;
354 }
355
356 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
357 state = HTTP_MSG_RPBEFORE;
358 goto http_msg_invalid;
359 }
360
361 if (unlikely(*ptr == '\n'))
362 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
363 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
364 /* stop here */
365
366 case HTTP_MSG_RPBEFORE_CR:
367 http_msg_rpbefore_cr:
368 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
369 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
370 /* stop here */
371
372 case HTTP_MSG_RPVER:
373 http_msg_rpver:
374 case HTTP_MSG_RPVER_SP:
375 case HTTP_MSG_RPCODE:
376 case HTTP_MSG_RPCODE_SP:
377 case HTTP_MSG_RPREASON:
378 ptr = (char *)http_parse_stsline(msg,
379 state, ptr, end,
380 &msg->next, &msg->msg_state);
381 if (unlikely(!ptr))
382 return;
383
384 /* we have a full response and we know that we have either a CR
385 * or an LF at <ptr>.
386 */
387 hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
388
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200389 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200390 if (likely(*ptr == '\r'))
391 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
392 goto http_msg_rpline_end;
393
394 case HTTP_MSG_RPLINE_END:
395 http_msg_rpline_end:
396 /* msg->sol must point to the first of CR or LF. */
397 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
398 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
399 /* stop here */
400
401 /*
402 * Second, states that are specific to the request only
403 */
404 case HTTP_MSG_RQBEFORE:
405 http_msg_rqbefore:
406 if (likely(HTTP_IS_TOKEN(*ptr))) {
407 /* we have a start of message, but we have to check
408 * first if we need to remove some CRLF. We can only
409 * do this when o=0.
410 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200411 if (likely(ptr != input)) {
412 if (co_data(msg->chn))
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200413 goto http_msg_ood;
414 /* Remove empty leading lines, as recommended by RFC2616. */
Willy Tarreau72a100b2018-07-10 09:59:31 +0200415 b_del(buf, ptr - input);
Willy Tarreau950a8a62018-09-06 10:48:15 +0200416 input = b_head(buf);
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200417 }
418 msg->sol = 0;
419 msg->sl.rq.l = 0; /* used in debug mode */
420 state = HTTP_MSG_RQMETH;
421 goto http_msg_rqmeth;
422 }
423
424 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
425 state = HTTP_MSG_RQBEFORE;
426 goto http_msg_invalid;
427 }
428
429 if (unlikely(*ptr == '\n'))
430 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
431 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
432 /* stop here */
433
434 case HTTP_MSG_RQBEFORE_CR:
435 http_msg_rqbefore_cr:
436 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
437 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
438 /* stop here */
439
440 case HTTP_MSG_RQMETH:
441 http_msg_rqmeth:
442 case HTTP_MSG_RQMETH_SP:
443 case HTTP_MSG_RQURI:
444 case HTTP_MSG_RQURI_SP:
445 case HTTP_MSG_RQVER:
446 ptr = (char *)http_parse_reqline(msg,
447 state, ptr, end,
448 &msg->next, &msg->msg_state);
449 if (unlikely(!ptr))
450 return;
451
452 /* we have a full request and we know that we have either a CR
453 * or an LF at <ptr>.
454 */
455 hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
456
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200457 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200458 if (likely(*ptr == '\r'))
459 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
460 goto http_msg_rqline_end;
461
462 case HTTP_MSG_RQLINE_END:
463 http_msg_rqline_end:
464 /* check for HTTP/0.9 request : no version information available.
465 * msg->sol must point to the first of CR or LF.
466 */
467 if (unlikely(msg->sl.rq.v_l == 0))
468 goto http_msg_last_lf;
469
470 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
471 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
472 /* stop here */
473
474 /*
475 * Common states below
476 */
477 case HTTP_MSG_HDR_FIRST:
478 http_msg_hdr_first:
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200479 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200480 if (likely(!HTTP_IS_CRLF(*ptr))) {
481 goto http_msg_hdr_name;
482 }
483
484 if (likely(*ptr == '\r'))
485 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
486 goto http_msg_last_lf;
487
488 case HTTP_MSG_HDR_NAME:
489 http_msg_hdr_name:
490 /* assumes msg->sol points to the first char */
491 if (likely(HTTP_IS_TOKEN(*ptr)))
492 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
493
494 if (likely(*ptr == ':'))
495 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
496
497 if (likely(msg->err_pos < -1) || *ptr == '\n') {
498 state = HTTP_MSG_HDR_NAME;
499 goto http_msg_invalid;
500 }
501
502 if (msg->err_pos == -1) /* capture error pointer */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200503 msg->err_pos = ptr - input; /* >= 0 now */
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200504
505 /* and we still accept this non-token character */
506 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
507
508 case HTTP_MSG_HDR_L1_SP:
509 http_msg_hdr_l1_sp:
510 /* assumes msg->sol points to the first char */
511 if (likely(HTTP_IS_SPHT(*ptr)))
512 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
513
514 /* header value can be basically anything except CR/LF */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200515 msg->sov = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200516
517 if (likely(!HTTP_IS_CRLF(*ptr))) {
518 goto http_msg_hdr_val;
519 }
520
521 if (likely(*ptr == '\r'))
522 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
523 goto http_msg_hdr_l1_lf;
524
525 case HTTP_MSG_HDR_L1_LF:
526 http_msg_hdr_l1_lf:
527 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
528 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
529
530 case HTTP_MSG_HDR_L1_LWS:
531 http_msg_hdr_l1_lws:
532 if (likely(HTTP_IS_SPHT(*ptr))) {
533 /* replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200534 for (; input + msg->sov < ptr; msg->sov++)
535 input[msg->sov] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200536 goto http_msg_hdr_l1_sp;
537 }
538 /* we had a header consisting only in spaces ! */
539 msg->eol = msg->sov;
540 goto http_msg_complete_header;
541
542 case HTTP_MSG_HDR_VAL:
543 http_msg_hdr_val:
544 /* assumes msg->sol points to the first char, and msg->sov
545 * points to the first character of the value.
546 */
547
548 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
549 * and lower. In fact since most of the time is spent in the loop, we
550 * also remove the sign bit test so that bytes 0x8e..0x0d break the
551 * loop, but we don't care since they're very rare in header values.
552 */
553#if defined(__x86_64__)
554 while (ptr <= end - sizeof(long)) {
555 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
556 goto http_msg_hdr_val2;
557 ptr += sizeof(long);
558 }
559#endif
560#if defined(__x86_64__) || \
561 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
562 defined(__ARM_ARCH_7A__)
563 while (ptr <= end - sizeof(int)) {
564 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
565 goto http_msg_hdr_val2;
566 ptr += sizeof(int);
567 }
568#endif
569 if (ptr >= end) {
570 state = HTTP_MSG_HDR_VAL;
571 goto http_msg_ood;
572 }
573 http_msg_hdr_val2:
574 if (likely(!HTTP_IS_CRLF(*ptr)))
575 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
576
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200577 msg->eol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200578 /* Note: we could also copy eol into ->eoh so that we have the
579 * real header end in case it ends with lots of LWS, but is this
580 * really needed ?
581 */
582 if (likely(*ptr == '\r'))
583 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
584 goto http_msg_hdr_l2_lf;
585
586 case HTTP_MSG_HDR_L2_LF:
587 http_msg_hdr_l2_lf:
588 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
589 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
590
591 case HTTP_MSG_HDR_L2_LWS:
592 http_msg_hdr_l2_lws:
593 if (unlikely(HTTP_IS_SPHT(*ptr))) {
594 /* LWS: replace HT,CR,LF with spaces */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200595 for (; input + msg->eol < ptr; msg->eol++)
596 input[msg->eol] = ' ';
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200597 goto http_msg_hdr_val;
598 }
599 http_msg_complete_header:
600 /*
601 * It was a new header, so the last one is finished.
602 * Assumes msg->sol points to the first char, msg->sov points
603 * to the first character of the value and msg->eol to the
604 * first CR or LF so we know how the line ends. We insert last
605 * header into the index.
606 */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200607 if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r',
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200608 idx, idx->tail) < 0)) {
609 state = HTTP_MSG_HDR_L2_LWS;
610 goto http_msg_invalid;
611 }
612
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200613 msg->sol = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200614 if (likely(!HTTP_IS_CRLF(*ptr))) {
615 goto http_msg_hdr_name;
616 }
617
618 if (likely(*ptr == '\r'))
619 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
620 goto http_msg_last_lf;
621
622 case HTTP_MSG_LAST_LF:
623 http_msg_last_lf:
624 /* Assumes msg->sol points to the first of either CR or LF.
625 * Sets ->sov and ->next to the total header length, ->eoh to
626 * the last CRLF, and ->eol to the last CRLF length (1 or 2).
627 */
628 EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
629 ptr++;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200630 msg->sov = msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200631 msg->eoh = msg->sol;
632 msg->sol = 0;
633 msg->eol = msg->sov - msg->eoh;
634 msg->msg_state = HTTP_MSG_BODY;
635 return;
636
637 case HTTP_MSG_ERROR:
638 /* this may only happen if we call http_msg_analyser() twice with an error */
639 break;
640
641 default:
642#ifdef DEBUG_FULL
643 fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
644 exit(1);
645#endif
646 ;
647 }
648 http_msg_ood:
649 /* out of data */
650 msg->msg_state = state;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200651 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200652 return;
653
654 http_msg_invalid:
655 /* invalid message */
656 msg->err_state = state;
657 msg->msg_state = HTTP_MSG_ERROR;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +0200658 msg->next = ptr - input;
Willy Tarreau8740c8b2017-09-21 10:22:25 +0200659 return;
660}
661
Willy Tarreau794f9af2017-07-26 09:07:47 +0200662/* This function parses a contiguous HTTP/1 headers block starting at <start>
663 * and ending before <stop>, at once, and converts it a list of (name,value)
664 * pairs representing header fields into the array <hdr> of size <hdr_num>,
665 * whose last entry will have an empty name and an empty value. If <hdr_num> is
Willy Tarreau4433c082018-09-11 15:33:32 +0200666 * too small to represent the whole message, an error is returned. Some
667 * protocol elements such as content-length and transfer-encoding will be
Willy Tarreau5384aac2018-09-11 16:04:48 +0200668 * parsed and stored into h1m as well. <hdr> may be null, in which case only
669 * the parsing state will be updated. This may be used to restart the parsing
670 * where it stopped for example.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200671 *
672 * For now it's limited to the response. If the header block is incomplete,
673 * 0 is returned, waiting to be called again with more data to try it again.
Willy Tarreau4433c082018-09-11 15:33:32 +0200674 * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200675 * and h1m->next to zero on the first call, the parser will do the rest. If
676 * an incomplete message is seen, the caller only needs to present h1m->state
677 * and h1m->next again, with an empty header list so that the parser can start
678 * again. In this case, it will detect that it interrupted a previous session
679 * and will first look for the end of the message before reparsing it again and
680 * indexing it at the same time. This ensures that incomplete messages fed 1
681 * character at a time are never processed entirely more than exactly twice,
682 * and that there is no need to store all the internal state and pre-parsed
683 * headers or start line between calls.
Willy Tarreau794f9af2017-07-26 09:07:47 +0200684 *
Willy Tarreaua41393f2018-09-11 15:34:50 +0200685 * A pointer to a start line descriptor may be passed in <slp>, in which case
686 * the parser will fill it with whatever it found.
687 *
Willy Tarreau794f9af2017-07-26 09:07:47 +0200688 * The code derived from the main HTTP/1 parser above but was simplified and
689 * optimized to process responses produced or forwarded by haproxy. The caller
690 * is responsible for ensuring that the message doesn't wrap, and should ensure
691 * it is complete to avoid having to retry the operation after a failed
692 * attempt. The message is not supposed to be invalid, which is why a few
693 * properties such as the character set used in the header field names are not
694 * checked. In case of an unparsable response message, a negative value will be
695 * returned with h1m->err_pos and h1m->err_state matching the location and
696 * state where the error was met. Leading blank likes are tolerated but not
697 * recommended.
698 *
699 * This function returns :
700 * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
Willy Tarreau801250e2018-09-11 11:45:04 +0200701 * set) with the state the error occurred in and h1m->err_pos with the
Willy Tarreau794f9af2017-07-26 09:07:47 +0200702 * the position relative to <start>
703 * -2 if the output is full (hdr_num reached). err_state and err_pos also
704 * indicate where it failed.
705 * 0 in case of missing data.
706 * > 0 on success, it then corresponds to the number of bytes read since
707 * <start> so that the caller can go on with the payload.
708 */
709int h1_headers_to_hdr_list(char *start, const char *stop,
710 struct http_hdr *hdr, unsigned int hdr_num,
Willy Tarreaua41393f2018-09-11 15:34:50 +0200711 struct h1m *h1m, union h1_sl *slp)
Willy Tarreau794f9af2017-07-26 09:07:47 +0200712{
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200713 enum h1m_state state;
714 register char *ptr;
715 register const char *end;
716 unsigned int hdr_count;
717 unsigned int skip; /* number of bytes skipped at the beginning */
718 unsigned int sol; /* start of line */
719 unsigned int col; /* position of the colon */
720 unsigned int eol; /* end of line */
721 unsigned int sov; /* start of value */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200722 union h1_sl sl;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200723 int skip_update;
724 int restarting;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200725 struct ist n, v; /* header name and value during parsing */
726
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200727 skip = 0; // do it only once to keep track of the leading CRLF.
728
729 try_again:
730 hdr_count = sol = col = eol = sov = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200731 sl.st.status = 0;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200732 skip_update = restarting = 0;
733
734 ptr = start + h1m->next;
735 end = stop;
736 state = h1m->state;
737
738 if (state != H1_MSG_RPBEFORE)
739 restarting = 1;
740
Willy Tarreau794f9af2017-07-26 09:07:47 +0200741 if (unlikely(ptr >= end))
742 goto http_msg_ood;
743
Willy Tarreau4c34c0e2018-09-11 16:20:30 +0200744 /* don't update output if hdr is NULL or if we're restarting */
745 if (!hdr || restarting)
Willy Tarreau5384aac2018-09-11 16:04:48 +0200746 skip_update = 1;
747
Willy Tarreau794f9af2017-07-26 09:07:47 +0200748 switch (state) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200749 case H1_MSG_RPBEFORE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200750 http_msg_rpbefore:
751 if (likely(HTTP_IS_TOKEN(*ptr))) {
752 /* we have a start of message, we may have skipped some
753 * heading CRLF. Skip them now.
754 */
755 skip += ptr - start;
756 start = ptr;
757
758 sol = 0;
Willy Tarreaua41393f2018-09-11 15:34:50 +0200759 sl.st.v = skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200760 hdr_count = 0;
Willy Tarreau801250e2018-09-11 11:45:04 +0200761 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200762 goto http_msg_rpver;
763 }
764
765 if (unlikely(!HTTP_IS_CRLF(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200766 state = H1_MSG_RPBEFORE;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200767 goto http_msg_invalid;
768 }
769
770 if (unlikely(*ptr == '\n'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200771 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
772 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200773 /* stop here */
774
Willy Tarreau801250e2018-09-11 11:45:04 +0200775 case H1_MSG_RPBEFORE_CR:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200776 http_msg_rpbefore_cr:
Willy Tarreau801250e2018-09-11 11:45:04 +0200777 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
778 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200779 /* stop here */
780
Willy Tarreau801250e2018-09-11 11:45:04 +0200781 case H1_MSG_RPVER:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200782 http_msg_rpver:
783 if (likely(HTTP_IS_VER_TOKEN(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200784 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200785
786 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200787 sl.st.v_l = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200788 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200789 }
Willy Tarreau801250e2018-09-11 11:45:04 +0200790 state = H1_MSG_RPVER;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200791 goto http_msg_invalid;
792
Willy Tarreau801250e2018-09-11 11:45:04 +0200793 case H1_MSG_RPVER_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200794 http_msg_rpver_sp:
795 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200796 sl.st.status = 0;
797 sl.st.c = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200798 goto http_msg_rpcode;
799 }
800 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200801 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200802 /* so it's a CR/LF, this is invalid */
Willy Tarreau801250e2018-09-11 11:45:04 +0200803 state = H1_MSG_RPVER_SP;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200804 goto http_msg_invalid;
805
Willy Tarreau801250e2018-09-11 11:45:04 +0200806 case H1_MSG_RPCODE:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200807 http_msg_rpcode:
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100808 if (likely(HTTP_IS_DIGIT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200809 sl.st.status = sl.st.status * 10 + *ptr - '0';
Willy Tarreau801250e2018-09-11 11:45:04 +0200810 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200811 }
812
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100813 if (unlikely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200814 state = H1_MSG_RPCODE;
Willy Tarreau1b4cf9b2017-11-09 11:15:45 +0100815 goto http_msg_invalid;
816 }
817
Willy Tarreau794f9af2017-07-26 09:07:47 +0200818 if (likely(HTTP_IS_SPHT(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200819 sl.st.c_l = ptr - start + skip - sl.st.c;
Willy Tarreau801250e2018-09-11 11:45:04 +0200820 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200821 }
822
823 /* so it's a CR/LF, so there is no reason phrase */
Willy Tarreaua41393f2018-09-11 15:34:50 +0200824 sl.st.c_l = ptr - start + skip - sl.st.c;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200825
826 http_msg_rsp_reason:
Willy Tarreaua41393f2018-09-11 15:34:50 +0200827 sl.st.r = ptr - start + skip;
828 sl.st.r_l = 0;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200829 goto http_msg_rpline_eol;
830
Willy Tarreau801250e2018-09-11 11:45:04 +0200831 case H1_MSG_RPCODE_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200832 http_msg_rpcode_sp:
833 if (likely(!HTTP_IS_LWS(*ptr))) {
Willy Tarreaua41393f2018-09-11 15:34:50 +0200834 sl.st.r = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200835 goto http_msg_rpreason;
836 }
837 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200838 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200839 /* so it's a CR/LF, so there is no reason phrase */
840 goto http_msg_rsp_reason;
841
Willy Tarreau801250e2018-09-11 11:45:04 +0200842 case H1_MSG_RPREASON:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200843 http_msg_rpreason:
844 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200845 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
Willy Tarreaua41393f2018-09-11 15:34:50 +0200846 sl.st.r_l = ptr - start + skip - sl.st.r;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200847 http_msg_rpline_eol:
848 /* We have seen the end of line. Note that we do not
849 * necessarily have the \n yet, but at least we know that we
850 * have EITHER \r OR \n, otherwise the response would not be
851 * complete. We can then record the response length and return
852 * to the caller which will be able to register it.
853 */
854
Willy Tarreau5384aac2018-09-11 16:04:48 +0200855 if (likely(!skip_update)) {
856 if (unlikely(hdr_count >= hdr_num)) {
857 state = H1_MSG_RPREASON;
858 goto http_output_full;
859 }
860 http_set_hdr(&hdr[hdr_count++], ist(":status"), ist2(start + sl.st.c, sl.st.c_l));
Willy Tarreau794f9af2017-07-26 09:07:47 +0200861 }
Willy Tarreau794f9af2017-07-26 09:07:47 +0200862
863 sol = ptr - start;
864 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200865 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200866 goto http_msg_rpline_end;
867
Willy Tarreau801250e2018-09-11 11:45:04 +0200868 case H1_MSG_RPLINE_END:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200869 http_msg_rpline_end:
870 /* sol must point to the first of CR or LF. */
Willy Tarreau801250e2018-09-11 11:45:04 +0200871 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
872 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200873 /* stop here */
874
Willy Tarreau801250e2018-09-11 11:45:04 +0200875 case H1_MSG_HDR_FIRST:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200876 http_msg_hdr_first:
877 sol = ptr - start;
878 if (likely(!HTTP_IS_CRLF(*ptr))) {
879 goto http_msg_hdr_name;
880 }
881
882 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200883 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200884 goto http_msg_last_lf;
885
Willy Tarreau801250e2018-09-11 11:45:04 +0200886 case H1_MSG_HDR_NAME:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200887 http_msg_hdr_name:
888 /* assumes sol points to the first char */
889 if (likely(HTTP_IS_TOKEN(*ptr))) {
890 /* turn it to lower case if needed */
891 if (isupper((unsigned char)*ptr))
892 *ptr = tolower(*ptr);
Willy Tarreau801250e2018-09-11 11:45:04 +0200893 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200894 }
895
896 if (likely(*ptr == ':')) {
897 col = ptr - start;
Willy Tarreau801250e2018-09-11 11:45:04 +0200898 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200899 }
900
Willy Tarreau9aec3052018-09-12 09:20:40 +0200901 if (likely(h1m->err_pos < -1) || *ptr == '\n') {
Willy Tarreau801250e2018-09-11 11:45:04 +0200902 state = H1_MSG_HDR_NAME;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200903 goto http_msg_invalid;
904 }
905
Willy Tarreau9aec3052018-09-12 09:20:40 +0200906 if (h1m->err_pos == -1) /* capture the error pointer */
907 h1m->err_pos = ptr - start + skip; /* >= 0 now */
908
909 /* and we still accept this non-token character */
Willy Tarreau801250e2018-09-11 11:45:04 +0200910 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200911
Willy Tarreau801250e2018-09-11 11:45:04 +0200912 case H1_MSG_HDR_L1_SP:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200913 http_msg_hdr_l1_sp:
914 /* assumes sol points to the first char */
915 if (likely(HTTP_IS_SPHT(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200916 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200917
918 /* header value can be basically anything except CR/LF */
919 sov = ptr - start;
920
921 if (likely(!HTTP_IS_CRLF(*ptr))) {
922 goto http_msg_hdr_val;
923 }
924
925 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200926 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200927 goto http_msg_hdr_l1_lf;
928
Willy Tarreau801250e2018-09-11 11:45:04 +0200929 case H1_MSG_HDR_L1_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200930 http_msg_hdr_l1_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200931 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
932 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200933
Willy Tarreau801250e2018-09-11 11:45:04 +0200934 case H1_MSG_HDR_L1_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200935 http_msg_hdr_l1_lws:
936 if (likely(HTTP_IS_SPHT(*ptr))) {
937 /* replace HT,CR,LF with spaces */
938 for (; start + sov < ptr; sov++)
939 start[sov] = ' ';
940 goto http_msg_hdr_l1_sp;
941 }
942 /* we had a header consisting only in spaces ! */
943 eol = sov;
944 goto http_msg_complete_header;
945
Willy Tarreau801250e2018-09-11 11:45:04 +0200946 case H1_MSG_HDR_VAL:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200947 http_msg_hdr_val:
948 /* assumes sol points to the first char, and sov
949 * points to the first character of the value.
950 */
951
952 /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
953 * and lower. In fact since most of the time is spent in the loop, we
954 * also remove the sign bit test so that bytes 0x8e..0x0d break the
955 * loop, but we don't care since they're very rare in header values.
956 */
957#if defined(__x86_64__)
958 while (ptr <= end - sizeof(long)) {
959 if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
960 goto http_msg_hdr_val2;
961 ptr += sizeof(long);
962 }
963#endif
964#if defined(__x86_64__) || \
965 defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
966 defined(__ARM_ARCH_7A__)
967 while (ptr <= end - sizeof(int)) {
968 if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
969 goto http_msg_hdr_val2;
970 ptr += sizeof(int);
971 }
972#endif
973 if (ptr >= end) {
Willy Tarreau801250e2018-09-11 11:45:04 +0200974 state = H1_MSG_HDR_VAL;
Willy Tarreau794f9af2017-07-26 09:07:47 +0200975 goto http_msg_ood;
976 }
977 http_msg_hdr_val2:
978 if (likely(!HTTP_IS_CRLF(*ptr)))
Willy Tarreau801250e2018-09-11 11:45:04 +0200979 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200980
981 eol = ptr - start;
982 /* Note: we could also copy eol into ->eoh so that we have the
983 * real header end in case it ends with lots of LWS, but is this
984 * really needed ?
985 */
986 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +0200987 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200988 goto http_msg_hdr_l2_lf;
989
Willy Tarreau801250e2018-09-11 11:45:04 +0200990 case H1_MSG_HDR_L2_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200991 http_msg_hdr_l2_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +0200992 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
993 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
Willy Tarreau794f9af2017-07-26 09:07:47 +0200994
Willy Tarreau801250e2018-09-11 11:45:04 +0200995 case H1_MSG_HDR_L2_LWS:
Willy Tarreau794f9af2017-07-26 09:07:47 +0200996 http_msg_hdr_l2_lws:
997 if (unlikely(HTTP_IS_SPHT(*ptr))) {
998 /* LWS: replace HT,CR,LF with spaces */
999 for (; start + eol < ptr; eol++)
1000 start[eol] = ' ';
1001 goto http_msg_hdr_val;
1002 }
1003 http_msg_complete_header:
1004 /*
1005 * It was a new header, so the last one is finished. Assumes
1006 * <sol> points to the first char of the name, <col> to the
1007 * colon, <sov> points to the first character of the value and
1008 * <eol> to the first CR or LF so we know how the line ends. We
1009 * will trim spaces around the value. It's possible to do it by
1010 * adjusting <eol> and <sov> which are no more used after this.
1011 * We can add the header field to the list.
1012 */
1013 while (sov < eol && HTTP_IS_LWS(start[sov]))
1014 sov++;
1015
1016 while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
1017 eol--;
1018
1019
1020 n = ist2(start + sol, col - sol);
1021 v = ist2(start + sov, eol - sov);
1022
Willy Tarreau5384aac2018-09-11 16:04:48 +02001023 if (likely(!skip_update)) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001024 long long cl;
1025
Willy Tarreau5384aac2018-09-11 16:04:48 +02001026 if (unlikely(hdr_count >= hdr_num)) {
1027 state = H1_MSG_HDR_L2_LWS;
1028 goto http_output_full;
1029 }
1030
1031 http_set_hdr(&hdr[hdr_count++], n, v);
1032
Willy Tarreau11da5672018-09-11 19:23:04 +02001033 if (sl.st.status >= 100 && sl.st.status < 200)
Willy Tarreaud22e83a2017-10-31 08:02:24 +01001034 h1m->curr_len = h1m->body_len = 0;
Willy Tarreau11da5672018-09-11 19:23:04 +02001035 else if (sl.st.status == 304 || sl.st.status == 204) {
Willy Tarreau8ea0f382017-10-30 19:31:59 +01001036 /* no contents, claim c-len is present and set to zero */
1037 h1m->flags |= H1_MF_CLEN;
1038 h1m->curr_len = h1m->body_len = 0;
1039 }
1040 else if (isteq(n, ist("transfer-encoding"))) {
Willy Tarreau794f9af2017-07-26 09:07:47 +02001041 h1m->flags &= ~H1_MF_CLEN;
1042 h1m->flags |= H1_MF_CHNK;
1043 }
1044 else if (isteq(n, ist("content-length")) && !(h1m->flags & H1_MF_CHNK)) {
1045 h1m->flags |= H1_MF_CLEN;
1046 strl2llrc(v.ptr, v.len, &cl);
1047 h1m->curr_len = h1m->body_len = cl;
1048 }
1049 }
1050
1051 sol = ptr - start;
1052 if (likely(!HTTP_IS_CRLF(*ptr)))
1053 goto http_msg_hdr_name;
1054
1055 if (likely(*ptr == '\r'))
Willy Tarreau801250e2018-09-11 11:45:04 +02001056 EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001057 goto http_msg_last_lf;
1058
Willy Tarreau801250e2018-09-11 11:45:04 +02001059 case H1_MSG_LAST_LF:
Willy Tarreau794f9af2017-07-26 09:07:47 +02001060 http_msg_last_lf:
Willy Tarreau801250e2018-09-11 11:45:04 +02001061 EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
Willy Tarreau794f9af2017-07-26 09:07:47 +02001062 ptr++;
1063 /* <ptr> now points to the first byte of payload. If needed sol
1064 * still points to the first of either CR or LF of the empty
1065 * line ending the headers block.
1066 */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001067 if (likely(!skip_update)) {
1068 if (unlikely(hdr_count >= hdr_num)) {
1069 state = H1_MSG_LAST_LF;
1070 goto http_output_full;
1071 }
1072 http_set_hdr(&hdr[hdr_count++], ist(""), ist(""));
Willy Tarreau794f9af2017-07-26 09:07:47 +02001073 }
Willy Tarreau001823c2018-09-12 17:25:32 +02001074
1075 /* reaching here we've parsed the whole message. We may detect
1076 * that we were already continuing an interrupted parsing pass
1077 * so we were silently looking for the end of message not
1078 * updating anything before deciding to parse it fully at once.
1079 * It's guaranteed that we won't match this test twice in a row
1080 * since restarting will turn zero.
1081 */
1082 if (restarting)
1083 goto restart;
1084
1085 if (h1m->flags & H1_MF_CHNK)
1086 state = H1_MSG_CHUNK_SIZE;
1087 else
1088 state = H1_MSG_DATA;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001089 break;
1090
1091 default:
1092 /* impossible states */
1093 goto http_msg_invalid;
1094 }
1095
Willy Tarreau001823c2018-09-12 17:25:32 +02001096 /* Now we've left the headers state and are either in H1_MSG_DATA or
1097 * H1_MSG_CHUNK_SIZE.
Willy Tarreau794f9af2017-07-26 09:07:47 +02001098 */
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001099
Willy Tarreau5384aac2018-09-11 16:04:48 +02001100 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001101 *slp = sl;
1102
Willy Tarreau4433c082018-09-11 15:33:32 +02001103 h1m->state = state;
1104 h1m->next = ptr - start + skip;
1105 return h1m->next;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001106
1107 http_msg_ood:
1108 /* out of data at <ptr> during state <state> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001109 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001110 *slp = sl;
1111
Willy Tarreau4433c082018-09-11 15:33:32 +02001112 h1m->state = state;
1113 h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001114 return 0;
1115
1116 http_msg_invalid:
1117 /* invalid message, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001118 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001119 *slp = sl;
1120
Willy Tarreau4433c082018-09-11 15:33:32 +02001121 h1m->err_state = h1m->state = state;
1122 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001123 return -1;
1124
1125 http_output_full:
1126 /* no more room to store the current header, error at <ptr> */
Willy Tarreau5384aac2018-09-11 16:04:48 +02001127 if (slp && !skip_update)
Willy Tarreaua41393f2018-09-11 15:34:50 +02001128 *slp = sl;
1129
Willy Tarreau4433c082018-09-11 15:33:32 +02001130 h1m->err_state = h1m->state = state;
1131 h1m->err_pos = h1m->next = ptr - start + skip;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001132 return -2;
Willy Tarreau4c34c0e2018-09-11 16:20:30 +02001133
1134 restart:
1135 h1m->next = 0;
1136 h1m->state = H1_MSG_RPBEFORE;
1137 goto try_again;
Willy Tarreau794f9af2017-07-26 09:07:47 +02001138}
1139
Willy Tarreau2510f702017-10-31 17:14:16 +01001140/* This function performs a very minimal parsing of the trailers block present
Willy Tarreauf40e6822018-06-14 16:52:02 +02001141 * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
Willy Tarreau7314be82018-06-14 13:32:50 +02001142 * bytes to delete to skip the trailers. It may return 0 if it's missing some
1143 * input data, or < 0 in case of parse error (in which case the caller may have
1144 * to decide how to proceed, possibly eating everything).
Willy Tarreau2510f702017-10-31 17:14:16 +01001145 */
Willy Tarreauf40e6822018-06-14 16:52:02 +02001146int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
Willy Tarreau2510f702017-10-31 17:14:16 +01001147{
Willy Tarreauf40e6822018-06-14 16:52:02 +02001148 const char *stop = b_peek(buf, ofs + max);
1149 int count = ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001150
1151 while (1) {
1152 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau7314be82018-06-14 13:32:50 +02001153 const char *start = b_peek(buf, count);
Willy Tarreau2510f702017-10-31 17:14:16 +01001154 const char *ptr = start;
Willy Tarreau2510f702017-10-31 17:14:16 +01001155
1156 /* scan current line and stop at LF or CRLF */
1157 while (1) {
1158 if (ptr == stop)
1159 return 0;
1160
1161 if (*ptr == '\n') {
1162 if (!p1)
1163 p1 = ptr;
1164 p2 = ptr;
1165 break;
1166 }
1167
1168 if (*ptr == '\r') {
1169 if (p1)
1170 return -1;
1171 p1 = ptr;
1172 }
1173
Willy Tarreau7314be82018-06-14 13:32:50 +02001174 ptr = b_next(buf, ptr);
Willy Tarreau2510f702017-10-31 17:14:16 +01001175 }
1176
1177 /* after LF; point to beginning of next line */
Willy Tarreau7314be82018-06-14 13:32:50 +02001178 p2 = b_next(buf, p2);
1179 count += b_dist(buf, start, p2);
Willy Tarreau2510f702017-10-31 17:14:16 +01001180
1181 /* LF/CRLF at beginning of line => end of trailers at p2.
1182 * Everything was scheduled for forwarding, there's nothing left
1183 * from this message. */
1184 if (p1 == start)
1185 break;
1186 /* OK, next line then */
1187 }
Willy Tarreauf40e6822018-06-14 16:52:02 +02001188 return count - ofs;
Willy Tarreau2510f702017-10-31 17:14:16 +01001189}
1190
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001191/* This function skips trailers in the buffer associated with HTTP message
1192 * <msg>. The first visited position is msg->next. If the end of the trailers is
1193 * found, the function returns >0. So, the caller can automatically schedul it
1194 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
1195 * data are available, the function does not change anything except maybe
1196 * msg->sol if it could parse some lines, and returns zero. If a parse error
1197 * is encountered, the function returns < 0 and does not change anything except
1198 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
1199 * state before calling this function, which implies that all non-trailers data
1200 * have already been scheduled for forwarding, and that msg->next exactly
1201 * matches the length of trailers already parsed and not forwarded. It is also
1202 * important to note that this function is designed to be able to parse wrapped
1203 * headers at end of buffer.
1204 */
1205int http_forward_trailers(struct http_msg *msg)
1206{
Willy Tarreauc9fa0482018-07-10 17:43:27 +02001207 const struct buffer *buf = &msg->chn->buf;
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001208 const char *parse = ci_head(msg->chn);
1209 const char *stop = b_tail(buf);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001210
1211 /* we have msg->next which points to next line. Look for CRLF. But
1212 * first, we reset msg->sol */
1213 msg->sol = 0;
1214 while (1) {
1215 const char *p1 = NULL, *p2 = NULL;
Willy Tarreau188e2302018-06-15 11:11:53 +02001216 const char *start = c_ptr(msg->chn, msg->next + msg->sol);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001217 const char *ptr = start;
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001218
1219 /* scan current line and stop at LF or CRLF */
1220 while (1) {
1221 if (ptr == stop)
1222 return 0;
1223
1224 if (*ptr == '\n') {
1225 if (!p1)
1226 p1 = ptr;
1227 p2 = ptr;
1228 break;
1229 }
1230
1231 if (*ptr == '\r') {
1232 if (p1) {
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001233 msg->err_pos = b_dist(buf, parse, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001234 return -1;
1235 }
1236 p1 = ptr;
1237 }
1238
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001239 ptr = b_next(buf, ptr);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001240 }
1241
1242 /* after LF; point to beginning of next line */
Willy Tarreau5e74b0b2018-06-19 08:03:19 +02001243 p2 = b_next(buf, p2);
1244 msg->sol += b_dist(buf, start, p2);
Willy Tarreaudb4893d2017-09-21 08:40:02 +02001245
1246 /* LF/CRLF at beginning of line => end of trailers at p2.
1247 * Everything was scheduled for forwarding, there's nothing left
1248 * from this message. */
1249 if (p1 == start)
1250 return 1;
1251
1252 /* OK, next line then */
1253 }
1254}