blob: b4c49c781febb650baccaf1bf48b6f3bbee3da27 [file] [log] [blame]
Christopher Faulet4f0f88a2019-08-10 11:17:44 +02001/*
2 * Functions to manipulate H1 messages using the internal representation.
3 *
4 * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020013#include <haproxy/api.h>
Willy Tarreau6be78492020-06-05 00:00:29 +020014#include <haproxy/cfgparse.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020015#include <haproxy/global.h>
Willy Tarreau5413a872020-06-02 19:33:08 +020016#include <haproxy/h1.h>
Willy Tarreauc6fe8842020-06-04 09:00:02 +020017#include <haproxy/h1_htx.h>
Willy Tarreaucd72d8c2020-06-02 19:11:26 +020018#include <haproxy/http.h>
Willy Tarreau16f958c2020-06-03 08:44:35 +020019#include <haproxy/htx.h>
Willy Tarreau36979d92020-06-05 17:27:29 +020020#include <haproxy/tools.h>
Christopher Faulet4f0f88a2019-08-10 11:17:44 +020021
Christopher Faulet4f0f88a2019-08-10 11:17:44 +020022/* Estimate the size of the HTX headers after the parsing, including the EOH. */
23static size_t h1_eval_htx_hdrs_size(const struct http_hdr *hdrs)
24{
25 size_t sz = 0;
26 int i;
27
28 for (i = 0; hdrs[i].n.len; i++)
29 sz += sizeof(struct htx_blk) + hdrs[i].n.len + hdrs[i].v.len;
30 sz += sizeof(struct htx_blk) + 1;
31 return sz;
32}
33
34/* Estimate the size of the HTX request after the parsing. */
35static size_t h1_eval_htx_size(const struct ist p1, const struct ist p2, const struct ist p3,
36 const struct http_hdr *hdrs)
37{
38 size_t sz;
39
40 /* size of the HTX start-line */
41 sz = sizeof(struct htx_blk) + sizeof(struct htx_sl) + p1.len + p2.len + p3.len;
42 sz += h1_eval_htx_hdrs_size(hdrs);
43 return sz;
44}
45
Christopher Faulet4f0f88a2019-08-10 11:17:44 +020046/* Check the validity of the request version. If the version is valid, it
47 * returns 1. Otherwise, it returns 0.
48 */
49static int h1_process_req_vsn(struct h1m *h1m, union h1_sl *sl)
50{
51 /* RFC7230#2.6 has enforced the format of the HTTP version string to be
52 * exactly one digit "." one digit. This check may be disabled using
53 * option accept-invalid-http-request.
54 */
55 if (h1m->err_pos == -2) { /* PR_O2_REQBUG_OK not set */
56 if (sl->rq.v.len != 8)
57 return 0;
58
59 if (*(sl->rq.v.ptr + 4) != '/' ||
60 !isdigit((unsigned char)*(sl->rq.v.ptr + 5)) ||
61 *(sl->rq.v.ptr + 6) != '.' ||
62 !isdigit((unsigned char)*(sl->rq.v.ptr + 7)))
63 return 0;
64 }
65 else if (!sl->rq.v.len) {
66 /* try to convert HTTP/0.9 requests to HTTP/1.0 */
67
68 /* RFC 1945 allows only GET for HTTP/0.9 requests */
69 if (sl->rq.meth != HTTP_METH_GET)
70 return 0;
71
72 /* HTTP/0.9 requests *must* have a request URI, per RFC 1945 */
73 if (!sl->rq.u.len)
74 return 0;
75
76 /* Add HTTP version */
77 sl->rq.v = ist("HTTP/1.0");
78 return 1;
79 }
80
81 if ((sl->rq.v.len == 8) &&
82 ((*(sl->rq.v.ptr + 5) > '1') ||
83 ((*(sl->rq.v.ptr + 5) == '1') && (*(sl->rq.v.ptr + 7) >= '1'))))
84 h1m->flags |= H1_MF_VER_11;
85 return 1;
86}
87
88/* Check the validity of the response version. If the version is valid, it
89 * returns 1. Otherwise, it returns 0.
90 */
91static int h1_process_res_vsn(struct h1m *h1m, union h1_sl *sl)
92{
93 /* RFC7230#2.6 has enforced the format of the HTTP version string to be
94 * exactly one digit "." one digit. This check may be disabled using
95 * option accept-invalid-http-request.
96 */
97 if (h1m->err_pos == -2) { /* PR_O2_REQBUG_OK not set */
98 if (sl->st.v.len != 8)
99 return 0;
100
101 if (*(sl->st.v.ptr + 4) != '/' ||
102 !isdigit((unsigned char)*(sl->st.v.ptr + 5)) ||
103 *(sl->st.v.ptr + 6) != '.' ||
104 !isdigit((unsigned char)*(sl->st.v.ptr + 7)))
105 return 0;
106 }
107
108 if ((sl->st.v.len == 8) &&
109 ((*(sl->st.v.ptr + 5) > '1') ||
110 ((*(sl->st.v.ptr + 5) == '1') && (*(sl->st.v.ptr + 7) >= '1'))))
111 h1m->flags |= H1_MF_VER_11;
112
113 return 1;
114}
115
116/* Convert H1M flags to HTX start-line flags. */
117static unsigned int h1m_htx_sl_flags(struct h1m *h1m)
118{
119 unsigned int flags = HTX_SL_F_NONE;
120
121 if (h1m->flags & H1_MF_RESP)
122 flags |= HTX_SL_F_IS_RESP;
123 if (h1m->flags & H1_MF_VER_11)
124 flags |= HTX_SL_F_VER_11;
125 if (h1m->flags & H1_MF_XFER_ENC)
126 flags |= HTX_SL_F_XFER_ENC;
127 if (h1m->flags & H1_MF_XFER_LEN) {
128 flags |= HTX_SL_F_XFER_LEN;
129 if (h1m->flags & H1_MF_CHNK)
130 flags |= HTX_SL_F_CHNK;
131 else if (h1m->flags & H1_MF_CLEN) {
132 flags |= HTX_SL_F_CLEN;
133 if (h1m->body_len == 0)
134 flags |= HTX_SL_F_BODYLESS;
135 }
136 else
137 flags |= HTX_SL_F_BODYLESS;
138 }
Christopher Faulet576c3582021-01-08 15:53:01 +0100139 if (h1m->flags & H1_MF_CONN_UPG)
140 flags |= HTX_SL_F_CONN_UPG;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200141 return flags;
142}
143
144/* Postprocess the parsed headers for a request and convert them into an htx
145 * message. It returns the number of bytes parsed if > 0, or 0 if it couldn't
146 * proceed. Parsing errors are reported by setting the htx flag
147 * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields.
148 */
149static int h1_postparse_req_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *htx,
150 struct http_hdr *hdrs, size_t max)
151{
152 struct htx_sl *sl;
153 struct ist meth, uri, vsn;
154 unsigned int flags;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200155
156 /* <h1sl> is always defined for a request */
157 meth = h1sl->rq.m;
158 uri = h1sl->rq.u;
159 vsn = h1sl->rq.v;
160
161 /* Be sure the message, once converted into HTX, will not exceed the max
162 * size allowed.
163 */
164 if (h1_eval_htx_size(meth, uri, vsn, hdrs) > max) {
165 if (htx_is_empty(htx))
166 goto error;
167 h1m_init_res(h1m);
168 h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
169 return 0;
170 }
171
172 /* By default, request have always a known length */
173 h1m->flags |= H1_MF_XFER_LEN;
174
175 if (h1sl->rq.meth == HTTP_METH_CONNECT) {
Christopher Faulet5be651d2021-01-22 15:28:03 +0100176 h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
177 h1m->curr_len = h1m->body_len = 0;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200178 }
179
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200180 flags = h1m_htx_sl_flags(h1m);
181 sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, uri, vsn);
182 if (!sl || !htx_add_all_headers(htx, hdrs))
183 goto error;
184 sl->info.req.meth = h1sl->rq.meth;
185
Christopher Fauletfe451fb2019-10-08 15:01:34 +0200186 /* Check if the uri contains an authority. Also check if it contains an
187 * explicit scheme and if it is "http" or "https". */
188 if (h1sl->rq.meth == HTTP_METH_CONNECT)
189 sl->flags |= HTX_SL_F_HAS_AUTHORITY;
190 else if (uri.len && uri.ptr[0] != '/' && uri.ptr[0] != '*') {
191 sl->flags |= (HTX_SL_F_HAS_AUTHORITY|HTX_SL_F_HAS_SCHM);
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200192 if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h')
193 sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS);
194 }
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200195
196 /* If body length cannot be determined, set htx->extra to
197 * ULLONG_MAX. This value is impossible in other cases.
198 */
199 htx->extra = ((h1m->flags & H1_MF_XFER_LEN) ? h1m->curr_len : ULLONG_MAX);
200
201 end:
202 return 1;
203 error:
204 h1m->err_pos = h1m->next;
205 h1m->err_state = h1m->state;
206 htx->flags |= HTX_FL_PARSING_ERROR;
207 return 0;
208}
209
210/* Postprocess the parsed headers for a response and convert them into an htx
211 * message. It returns the number of bytes parsed if > 0, or 0 if it couldn't
212 * proceed. Parsing errors are reported by setting the htx flag
213 * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields.
214 */
215static int h1_postparse_res_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *htx,
216 struct http_hdr *hdrs, size_t max)
217{
218 struct htx_sl *sl;
219 struct ist vsn, status, reason;
220 unsigned int flags;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200221 uint16_t code = 0;
222
223 if (h1sl) {
224 /* For HTTP responses, the start-line was parsed */
225 code = h1sl->st.status;
226 vsn = h1sl->st.v;
227 status = h1sl->st.c;
228 reason = h1sl->st.r;
229 }
230 else {
231 /* For FCGI responses, there is no start(-line but the "Status"
232 * header must be parsed, if found.
233 */
234 int hdr;
235
236 vsn = ((h1m->flags & H1_MF_VER_11) ? ist("HTTP/1.1") : ist("HTTP/1.0"));
237 for (hdr = 0; hdrs[hdr].n.len; hdr++) {
238 if (isteqi(hdrs[hdr].n, ist("status"))) {
239 code = http_parse_status_val(hdrs[hdr].v, &status, &reason);
240 }
241 else if (isteqi(hdrs[hdr].n, ist("location"))) {
242 code = 302;
243 status = ist("302");
244 reason = ist("Moved Temporarily");
245 }
246 }
247 if (!code) {
248 code = 200;
249 status = ist("200");
250 reason = ist("OK");
251 }
252 /* FIXME: Check the codes 1xx ? */
253 }
254
255 /* Be sure the message, once converted into HTX, will not exceed the max
256 * size allowed.
257 */
258 if (h1_eval_htx_size(vsn, status, reason, hdrs) > max) {
259 if (htx_is_empty(htx))
260 goto error;
261 h1m_init_res(h1m);
262 h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
263 return 0;
264 }
265
Christopher Fauletc75668e2020-12-07 18:10:32 +0100266 if (((h1m->flags & H1_MF_METH_CONNECT) && code >= 200 && code < 300) || code == 101) {
Christopher Faulet5be651d2021-01-22 15:28:03 +0100267 h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
268 h1m->flags |= H1_MF_XFER_LEN;
269 h1m->curr_len = h1m->body_len = 0;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200270 }
271 else if ((h1m->flags & H1_MF_METH_HEAD) || (code >= 100 && code < 200) ||
272 (code == 204) || (code == 304)) {
273 /* Responses known to have no body. */
274 h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
275 h1m->flags |= H1_MF_XFER_LEN;
276 h1m->curr_len = h1m->body_len = 0;
277 }
278 else if (h1m->flags & (H1_MF_CLEN|H1_MF_CHNK)) {
279 /* Responses with a known body length. */
280 h1m->flags |= H1_MF_XFER_LEN;
281 }
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200282
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200283 flags = h1m_htx_sl_flags(h1m);
284 sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, vsn, status, reason);
285 if (!sl || !htx_add_all_headers(htx, hdrs))
286 goto error;
287 sl->info.res.status = code;
288
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200289 /* If body length cannot be determined, set htx->extra to
290 * ULLONG_MAX. This value is impossible in other cases.
291 */
292 htx->extra = ((h1m->flags & H1_MF_XFER_LEN) ? h1m->curr_len : ULLONG_MAX);
293
294 end:
295 return 1;
296 error:
297 h1m->err_pos = h1m->next;
298 h1m->err_state = h1m->state;
299 htx->flags |= HTX_FL_PARSING_ERROR;
300 return 0;
301}
302
303/* Parse HTTP/1 headers. It returns the number of bytes parsed if > 0, or 0 if
304 * it couldn't proceed. Parsing errors are reported by setting the htx flag
305 * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields. This
306 * functions is responsible to update the parser state <h1m> and the start-line
307 * <h1sl> if not NULL.
308 * For the requests, <h1sl> must always be provided. For responses, <h1sl> may
309 * be NULL and <h1m> flags HTTP_METH_CONNECT of HTTP_METH_HEAD may be set.
310 */
Christopher Fauletde471a42021-02-01 16:37:28 +0100311size_t h1_parse_msg_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *dsthtx,
312 struct buffer *srcbuf, size_t ofs, size_t max)
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200313{
314 struct http_hdr hdrs[global.tune.max_http_hdr];
315 int ret = 0;
316
317 if (!max || !b_data(srcbuf))
318 goto end;
319
320 /* Realing input buffer if necessary */
321 if (b_head(srcbuf) + b_data(srcbuf) > b_wrap(srcbuf))
Christopher Faulet00d7cde2021-02-04 11:01:51 +0100322 b_slow_realign_ofs(srcbuf, trash.area, 0);
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200323
324 if (!h1sl) {
325 /* If there no start-line, be sure to only parse the headers */
326 h1m->flags |= H1_MF_HDRS_ONLY;
327 }
328 ret = h1_headers_to_hdr_list(b_peek(srcbuf, ofs), b_tail(srcbuf),
329 hdrs, sizeof(hdrs)/sizeof(hdrs[0]), h1m, h1sl);
330 if (ret <= 0) {
331 /* Incomplete or invalid message. If the input buffer only
332 * contains headers and is full, which is detected by it being
333 * full and the offset to be zero, it's an error because
334 * headers are too large to be handled by the parser. */
335 if (ret < 0 || (!ret && !ofs && !buf_room_for_htx_data(srcbuf)))
336 goto error;
337 goto end;
338 }
339
340 /* messages headers fully parsed, do some checks to prepare the body
341 * parsing.
342 */
343
344 if (!(h1m->flags & H1_MF_RESP)) {
345 if (!h1_process_req_vsn(h1m, h1sl)) {
346 h1m->err_pos = h1sl->rq.v.ptr - b_head(srcbuf);
347 h1m->err_state = h1m->state;
348 goto vsn_error;
349 }
350 if (!h1_postparse_req_hdrs(h1m, h1sl, dsthtx, hdrs, max))
351 ret = 0;
352 }
353 else {
354 if (h1sl && !h1_process_res_vsn(h1m, h1sl)) {
355 h1m->err_pos = h1sl->st.v.ptr - b_head(srcbuf);
356 h1m->err_state = h1m->state;
357 goto vsn_error;
358 }
359 if (!h1_postparse_res_hdrs(h1m, h1sl, dsthtx, hdrs, max))
360 ret = 0;
361 }
362
Christopher Faulet76014fd2019-12-10 11:47:22 +0100363 /* Switch messages without any payload to DONE state */
364 if (((h1m->flags & H1_MF_CLEN) && h1m->body_len == 0) ||
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100365 ((h1m->flags & (H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK)) == H1_MF_XFER_LEN)) {
Christopher Faulet76014fd2019-12-10 11:47:22 +0100366 h1m->state = H1_MSG_DONE;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100367 dsthtx->flags |= HTX_FL_EOM;
368 }
Christopher Faulet76014fd2019-12-10 11:47:22 +0100369
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200370 end:
371 return ret;
372 error:
373 h1m->err_pos = h1m->next;
374 h1m->err_state = h1m->state;
375 vsn_error:
376 dsthtx->flags |= HTX_FL_PARSING_ERROR;
377 return 0;
378
379}
380
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200381/* Copy data from <srbuf> into an DATA block in <dsthtx>. If possible, a
382 * zero-copy is performed. It returns the number of bytes copied.
383 */
Christopher Fauletde471a42021-02-01 16:37:28 +0100384static size_t h1_copy_msg_data(struct htx **dsthtx, struct buffer *srcbuf, size_t ofs,
Christopher Fauletf7c20442021-02-02 19:40:07 +0100385 size_t count, size_t max, struct buffer *htxbuf)
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200386{
Christopher Fauletaf542632019-10-01 21:52:49 +0200387 struct htx *tmp_htx = *dsthtx;
Christopher Fauletf7c20442021-02-02 19:40:07 +0100388 size_t block1, block2, ret = 0;
389
390 /* Be prepared to create at least one HTX block by reserving its size
391 * and adjust <count> accordingly.
392 */
393 max -= sizeof(struct htx_blk);
394 if (count > max)
395 count = max;
Christopher Fauletaf542632019-10-01 21:52:49 +0200396
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200397 /* very often with large files we'll face the following
398 * situation :
399 * - htx is empty and points to <htxbuf>
Christopher Fauletf7c20442021-02-02 19:40:07 +0100400 * - count == srcbuf->data
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200401 * - srcbuf->head == sizeof(struct htx)
402 * => we can swap the buffers and place an htx header into
403 * the target buffer instead
404 */
Christopher Fauletaf542632019-10-01 21:52:49 +0200405 if (unlikely(htx_is_empty(tmp_htx) && count == b_data(srcbuf) &&
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200406 !ofs && b_head_ofs(srcbuf) == sizeof(struct htx))) {
407 void *raw_area = srcbuf->area;
408 void *htx_area = htxbuf->area;
409 struct htx_blk *blk;
410
411 srcbuf->area = htx_area;
412 htxbuf->area = raw_area;
Christopher Fauletaf542632019-10-01 21:52:49 +0200413 tmp_htx = (struct htx *)htxbuf->area;
414 tmp_htx->size = htxbuf->size - sizeof(*tmp_htx);
415 htx_reset(tmp_htx);
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200416 b_set_data(htxbuf, b_size(htxbuf));
417
Christopher Fauletaf542632019-10-01 21:52:49 +0200418 blk = htx_add_blk(tmp_htx, HTX_BLK_DATA, count);
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200419 blk->info += count;
Christopher Fauletaf542632019-10-01 21:52:49 +0200420
421 *dsthtx = tmp_htx;
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200422 /* nothing else to do, the old buffer now contains an
423 * empty pre-initialized HTX header
424 */
425 return count;
426 }
427
Christopher Fauletf7c20442021-02-02 19:40:07 +0100428 /* * First block is the copy of contiguous data starting at offset <ofs>
429 * with <count> as max. <max> is updated accordingly
430 *
431 * * Second block is the remaining (count - block1) if <max> is large
432 * enough. Another HTX block is reserved.
433 */
434 block1 = b_contig_data(srcbuf, ofs);
435 block2 = 0;
436 if (block1 > count)
437 block1 = count;
438 max -= block1;
439
440 if (max > sizeof(struct htx_blk)) {
441 block2 = count - block1;
442 max -= sizeof(struct htx_blk);
443 if (block2 > max)
444 block2 = max;
445 }
446
447 ret = htx_add_data(tmp_htx, ist2(b_peek(srcbuf, ofs), block1));
448 if (ret == block1 && block2)
449 ret += htx_add_data(tmp_htx, ist2(b_orig(srcbuf), block2));
450 end:
451 return ret;
Christopher Fauletcc3124c2019-08-12 22:42:21 +0200452}
453
Christopher Faulet7a835f32021-05-21 11:31:35 +0200454static const char hextable[] = {
455 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
456 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
457 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
458 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
459 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
460 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
461 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
462 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
463};
464
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200465/* Generic function to parse the current HTTP chunk. It may be used to parsed
466 * any kind of chunks, including incomplete HTTP chunks or splitted chunks
467 * because the buffer wraps. This version tries to performed zero-copy on large
468 * chunks if possible.
Christopher Faulet140691b2021-02-03 11:51:24 +0100469 */
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200470static size_t h1_parse_chunk(struct h1m *h1m, struct htx **dsthtx,
471 struct buffer *srcbuf, size_t ofs, size_t *max,
472 struct buffer *htxbuf)
Christopher Faulet140691b2021-02-03 11:51:24 +0100473{
474 uint64_t chksz;
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200475 size_t sz, used, lmax, total = 0;
Christopher Faulet140691b2021-02-03 11:51:24 +0100476 int ret = 0;
477
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200478 lmax = *max;
Christopher Faulet140691b2021-02-03 11:51:24 +0100479 switch (h1m->state) {
480 case H1_MSG_DATA:
481 new_chunk:
482 used = htx_used_space(*dsthtx);
483
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200484 if (b_data(srcbuf) == ofs || !lmax)
Christopher Faulet140691b2021-02-03 11:51:24 +0100485 break;
486
487 sz = b_data(srcbuf) - ofs;
488 if (unlikely(sz > h1m->curr_len))
489 sz = h1m->curr_len;
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200490 sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, lmax, htxbuf);
491 lmax -= htx_used_space(*dsthtx) - used;
Christopher Faulet140691b2021-02-03 11:51:24 +0100492 ofs += sz;
493 total += sz;
494 h1m->curr_len -= sz;
495 if (h1m->curr_len)
496 break;
497
498 h1m->state = H1_MSG_CHUNK_CRLF;
499 /*fall through */
500
501 case H1_MSG_CHUNK_CRLF:
502 ret = h1_skip_chunk_crlf(srcbuf, ofs, b_data(srcbuf));
503 if (ret <= 0)
504 break;
505 ofs += ret;
506 total += ret;
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200507
508 /* Don't parse next chunk to try to handle contiguous chunks if possible */
Christopher Faulet140691b2021-02-03 11:51:24 +0100509 h1m->state = H1_MSG_CHUNK_SIZE;
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200510 break;
Christopher Faulet140691b2021-02-03 11:51:24 +0100511
512 case H1_MSG_CHUNK_SIZE:
513 ret = h1_parse_chunk_size(srcbuf, ofs, b_data(srcbuf), &chksz);
514 if (ret <= 0)
515 break;
516 h1m->state = ((!chksz) ? H1_MSG_TRAILERS : H1_MSG_DATA);
517 h1m->curr_len = chksz;
518 h1m->body_len += chksz;
519 ofs += ret;
520 total += ret;
521
522 if (h1m->curr_len) {
523 h1m->state = H1_MSG_DATA;
524 goto new_chunk;
525 }
526 h1m->state = H1_MSG_TRAILERS;
527 break;
528
529 default:
530 /* unexpected */
531 ret = -1;
532 break;
533 }
534
535 if (ret < 0) {
536 (*dsthtx)->flags |= HTX_FL_PARSING_ERROR;
537 h1m->err_state = h1m->state;
538 h1m->err_pos = ofs;
539 total = 0;
540 }
541
542 /* Don't forget to update htx->extra */
543 (*dsthtx)->extra = h1m->curr_len;
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200544 *max = lmax;
545 return total;
546}
547
Christopher Fauletbdcefe52021-05-21 11:05:12 +0200548/* Parses full contiguous HTTP chunks. This version is optimized for small
549 * chunks and does not performed zero-copy. It must be called in
550 * H1_MSG_CHUNK_SIZE state. Be carefull if you change something in this
551 * function. It is really sensitive, any change may have an impact on
552 * performance.
553 */
554static size_t h1_parse_full_contig_chunks(struct h1m *h1m, struct htx **dsthtx,
555 struct buffer *srcbuf, size_t ofs, size_t *max,
556 struct buffer *htxbuf)
557{
558 char *start, *end, *dptr;
559 ssize_t dpos, ridx, save;
560 size_t lmax, total = 0;
561 uint64_t chksz;
562 struct htx_ret htxret;
563
564 /* source info :
565 * start : pointer at <ofs> position
566 * end : pointer marking the end of data to parse
567 * ridx : the reverse index (negative) marking the parser position (end[ridx])
568 */
569 ridx = -b_contig_data(srcbuf, ofs);
570 if (!ridx)
571 goto out;
572 start = b_peek(srcbuf, ofs);
573 end = start - ridx;
574
575 /* Reserve the maximum possible size for the data */
576 htxret = htx_reserve_max_data(*dsthtx);
577 if (!htxret.blk)
578 goto out;
579
580 /* destination info :
581 * dptr : pointer on the beginning of the data
582 * dpos : current position where to copy data
583 */
584 dptr = htx_get_blk_ptr(*dsthtx, htxret.blk);
585 dpos = htxret.ret;
586
587 /* Empty DATA block is not possible, thus if <dpos> is the beginning of
588 * the block, it means it is a new block. We can remove the block size
589 * from <max>. Then we must adjust it if it exceeds the free size in the
590 * block.
591 */
592 lmax = *max;
593 if (!dpos)
594 lmax -= sizeof(struct htx_blk);
595 if (lmax > htx_get_blksz(htxret.blk) - dpos)
596 lmax = htx_get_blksz(htxret.blk) - dpos;
597
598 while (1) {
599 /* The chunk size is in the following form, though we are only
600 * interested in the size and CRLF :
601 * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
602 */
603 chksz = 0;
604 save = ridx; /* Save the parser position to rewind if necessary */
605 while (1) {
606 int c;
607
608 if (!ridx)
609 goto end_parsing;
610
611 /* Convert current character */
Christopher Faulet7a835f32021-05-21 11:31:35 +0200612 c = hextable[(unsigned char)end[ridx]];
Christopher Fauletbdcefe52021-05-21 11:05:12 +0200613
614 /* not a hex digit anymore */
615 if (c < 0)
616 break;
617
618 /* Update current chunk size */
619 chksz = (chksz << 4) + c;
620
621 if (unlikely(chksz & 0xF0000000000000)) {
622 /* Don't get more than 13 hexa-digit (2^52 - 1)
623 * to never fed possibly bogus values from
624 * languages that use floats for their integers
625 */
626 goto parsing_error;
627 }
628 ++ridx;
629 }
630
631 if (unlikely(chksz > lmax))
632 goto end_parsing;
633
634 if (unlikely(ridx == save)) {
635 /* empty size not allowed */
636 goto parsing_error;
637 }
638
639 /* Skip spaces */
640 while (HTTP_IS_SPHT(end[ridx])) {
641 if (!++ridx)
642 goto end_parsing;
643 }
644
645 /* Up to there, we know that at least one byte is present. Check
646 * for the end of chunk size.
647 */
648 while (1) {
649 if (likely(end[ridx] == '\r')) {
650 /* Parse CRLF */
651 if (!++ridx)
652 goto end_parsing;
653 if (unlikely(end[ridx] != '\n')) {
654 /* CR must be followed by LF */
655 goto parsing_error;
656 }
657
658 /* done */
659 ++ridx;
660 break;
661 }
662 else if (end[ridx] == '\n') {
663 /* Parse LF only, nothing more to do */
664 ++ridx;
665 break;
666 }
667 else if (likely(end[ridx] == ';')) {
668 /* chunk extension, ends at next CRLF */
669 if (!++ridx)
670 goto end_parsing;
671 while (!HTTP_IS_CRLF(end[ridx])) {
672 if (!++ridx)
673 goto end_parsing;
674 }
675 /* we have a CRLF now, loop above */
676 continue;
677 }
678 else {
679 /* all other characters are unexpected */
680 goto parsing_error;
681 }
682 }
683
684 /* Exit if it is the last chunk */
685 if (unlikely(!chksz)) {
686 h1m->state = H1_MSG_TRAILERS;
687 save = ridx;
688 goto end_parsing;
689 }
690
691 /* Now check if the whole chunk is here (including the CRLF at
692 * the end), otherise we switch in H1_MSG_DATA stae.
693 */
694 if (chksz + 2 > -ridx) {
695 h1m->curr_len = chksz;
696 h1m->body_len += chksz;
697 h1m->state = H1_MSG_DATA;
698 (*dsthtx)->extra = h1m->curr_len;
699 save = ridx;
700 goto end_parsing;
701 }
702
703 memcpy(dptr + dpos, end + ridx, chksz);
704 h1m->body_len += chksz;
705 lmax -= chksz;
706 dpos += chksz;
707 ridx += chksz;
708
709 /* Parse CRLF or LF (always present) */
710 if (likely(end[ridx] == '\r'))
711 ++ridx;
712 if (end[ridx] != '\n') {
713 h1m->state = H1_MSG_CHUNK_CRLF;
714 goto parsing_error;
715 }
716 ++ridx;
717 }
718
719 end_parsing:
720 ridx = save;
721
722 /* Adjust the HTX block size or remove the block if nothing was copied
723 * (Empty HTX data block are not supported).
724 */
725 if (!dpos)
726 htx_remove_blk(*dsthtx, htxret.blk);
727 else
728 htx_change_blk_value_len(*dsthtx, htxret.blk, dpos);
729 total = end + ridx - start;
730 *max = lmax;
731
732 out:
733 return total;
734
735 parsing_error:
736 (*dsthtx)->flags |= HTX_FL_PARSING_ERROR;
737 h1m->err_state = h1m->state;
738 h1m->err_pos = ofs + end + ridx - start;
739 return 0;
740}
741
742/* Parse HTTP chunks. This function relies on an optimized function to parse
743 * contiguous chunks if possible. Otherwise, when a chunk is incomplete or when
744 * the underlying buffer is wrapping, a generic function is used.
745 */
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200746static size_t h1_parse_msg_chunks(struct h1m *h1m, struct htx **dsthtx,
747 struct buffer *srcbuf, size_t ofs, size_t max,
748 struct buffer *htxbuf)
749{
750 size_t ret, total = 0;
751
752 while (ofs < b_data(srcbuf)) {
753 ret = 0;
754
Christopher Fauletbdcefe52021-05-21 11:05:12 +0200755 /* First parse full contiguous chunks. It is only possible if we
756 * are waiting for the next chunk size.
757 */
758 if (h1m->state == H1_MSG_CHUNK_SIZE) {
759 ret = h1_parse_full_contig_chunks(h1m, dsthtx, srcbuf, ofs, &max, htxbuf);
760 /* exit on error */
761 if (!ret && (*dsthtx)->flags & HTX_FL_PARSING_ERROR) {
762 total = 0;
763 break;
764 }
765 /* or let a chance to parse remaining data */
766 total += ret;
767 ofs += ret;
768 ret = 0;
769 }
770
771 /* If some data remains, try to parse it using the generic
772 * function handling incomplete chunks and splitted chunks
773 * because of a wrapping buffer.
774 */
775 if (h1m->state < H1_MSG_TRAILERS && ofs < b_data(srcbuf)) {
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200776 ret = h1_parse_chunk(h1m, dsthtx, srcbuf, ofs, &max, htxbuf);
777 total += ret;
778 ofs += ret;
779 }
780
Christopher Fauletbdcefe52021-05-21 11:05:12 +0200781 /* nothing more was parsed or parsing was stopped on incomplete
782 * chunk, we can exit, handling parsing error if necessary.
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200783 */
Christopher Fauletbdcefe52021-05-21 11:05:12 +0200784 if (!ret || h1m->state != H1_MSG_CHUNK_SIZE) {
Christopher Faulet0d4c9242021-05-21 10:56:24 +0200785 if ((*dsthtx)->flags & HTX_FL_PARSING_ERROR)
786 total = 0;
787 break;
788 }
789 }
790
Christopher Faulet140691b2021-02-03 11:51:24 +0100791 return total;
792}
793
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200794/* Parse HTTP/1 body. It returns the number of bytes parsed if > 0, or 0 if it
795 * couldn't proceed. Parsing errors are reported by setting the htx flags
796 * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields. This
797 * functions is responsible to update the parser state <h1m>.
798 */
Christopher Fauletde471a42021-02-01 16:37:28 +0100799size_t h1_parse_msg_data(struct h1m *h1m, struct htx **dsthtx,
800 struct buffer *srcbuf, size_t ofs, size_t max,
801 struct buffer *htxbuf)
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200802{
Christopher Fauletde471a42021-02-01 16:37:28 +0100803 size_t sz, total = 0;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200804
Christopher Fauletf7c20442021-02-02 19:40:07 +0100805 if (b_data(srcbuf) == ofs || !max)
Christopher Faulet140691b2021-02-03 11:51:24 +0100806 return 0;
Christopher Fauletf7c20442021-02-02 19:40:07 +0100807
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200808 if (h1m->flags & H1_MF_CLEN) {
809 /* content-length: read only h2m->body_len */
Christopher Fauletf7c20442021-02-02 19:40:07 +0100810 sz = b_data(srcbuf) - ofs;
811 if (unlikely(sz > h1m->curr_len))
Christopher Fauletde471a42021-02-01 16:37:28 +0100812 sz = h1m->curr_len;
Christopher Fauletf7c20442021-02-02 19:40:07 +0100813 sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, max, htxbuf);
814 h1m->curr_len -= sz;
815 (*dsthtx)->extra = h1m->curr_len;
816 total += sz;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100817 if (!h1m->curr_len) {
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200818 h1m->state = H1_MSG_DONE;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100819 (*dsthtx)->flags |= HTX_FL_EOM;
820 }
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200821 }
822 else if (h1m->flags & H1_MF_CHNK) {
823 /* te:chunked : parse chunks */
Christopher Faulet140691b2021-02-03 11:51:24 +0100824 total += h1_parse_msg_chunks(h1m, dsthtx, srcbuf, ofs, max, htxbuf);
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200825 }
826 else if (h1m->flags & H1_MF_XFER_LEN) {
827 /* XFER_LEN is set but not CLEN nor CHNK, it means there is no
828 * body. Switch the message in DONE state
829 */
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200830 h1m->state = H1_MSG_DONE;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100831 (*dsthtx)->flags |= HTX_FL_EOM;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200832 }
833 else {
834 /* no content length, read till SHUTW */
Christopher Fauletf7c20442021-02-02 19:40:07 +0100835 sz = b_data(srcbuf) - ofs;
836 sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, max, htxbuf);
837 total += sz;
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200838 }
839
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200840 return total;
841}
842
843/* Parse HTTP/1 trailers. It returns the number of bytes parsed if > 0, or 0 if
844 * it couldn't proceed. Parsing errors are reported by setting the htx flags
845 * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields. This
846 * functions is responsible to update the parser state <h1m>.
847 */
Christopher Fauletde471a42021-02-01 16:37:28 +0100848size_t h1_parse_msg_tlrs(struct h1m *h1m, struct htx *dsthtx,
849 struct buffer *srcbuf, size_t ofs, size_t max)
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200850{
851 struct http_hdr hdrs[global.tune.max_http_hdr];
852 struct h1m tlr_h1m;
853 int ret = 0;
854
855 if (!max || !b_data(srcbuf))
856 goto end;
857
858 /* Realing input buffer if necessary */
859 if (b_peek(srcbuf, ofs) > b_tail(srcbuf))
Christopher Faulet00d7cde2021-02-04 11:01:51 +0100860 b_slow_realign_ofs(srcbuf, trash.area, 0);
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200861
862 tlr_h1m.flags = (H1_MF_NO_PHDR|H1_MF_HDRS_ONLY);
863 ret = h1_headers_to_hdr_list(b_peek(srcbuf, ofs), b_tail(srcbuf),
864 hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &tlr_h1m, NULL);
865 if (ret <= 0) {
866 /* Incomplete or invalid trailers. If the input buffer only
867 * contains trailers and is full, which is detected by it being
868 * full and the offset to be zero, it's an error because
869 * trailers are too large to be handled by the parser. */
870 if (ret < 0 || (!ret && !ofs && !buf_room_for_htx_data(srcbuf)))
871 goto error;
872 goto end;
873 }
874
875 /* messages trailers fully parsed. */
876 if (h1_eval_htx_hdrs_size(hdrs) > max) {
877 if (htx_is_empty(dsthtx))
878 goto error;
879 ret = 0;
880 goto end;
881 }
882
883 if (!htx_add_all_trailers(dsthtx, hdrs))
884 goto error;
885
Christopher Faulet76014fd2019-12-10 11:47:22 +0100886 h1m->state = H1_MSG_DONE;
Christopher Fauletd1ac2b92020-12-02 19:12:22 +0100887 dsthtx->flags |= HTX_FL_EOM;
Christopher Faulet76014fd2019-12-10 11:47:22 +0100888
Christopher Faulet4f0f88a2019-08-10 11:17:44 +0200889 end:
890 return ret;
891 error:
892 h1m->err_state = h1m->state;
893 h1m->err_pos = h1m->next;
894 dsthtx->flags |= HTX_FL_PARSING_ERROR;
895 return 0;
896}
897
Christopher Faulet53a899b2019-10-08 16:38:42 +0200898/* Appends the H1 representation of the request line <sl> to the chunk <chk>. It
899 * returns 1 if data are successfully appended, otherwise it returns 0.
900 */
901int h1_format_htx_reqline(const struct htx_sl *sl, struct buffer *chk)
902{
903 struct ist uri;
904 size_t sz = chk->data;
905
Christopher Fauletfb38c912021-04-26 09:38:55 +0200906 uri = h1_get_uri(sl);
Christopher Faulet53a899b2019-10-08 16:38:42 +0200907 if (!chunk_memcat(chk, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)) ||
908 !chunk_memcat(chk, " ", 1) ||
909 !chunk_memcat(chk, uri.ptr, uri.len) ||
910 !chunk_memcat(chk, " ", 1))
911 goto full;
912
913 if (sl->flags & HTX_SL_F_VER_11) {
914 if (!chunk_memcat(chk, "HTTP/1.1", 8))
915 goto full;
916 }
917 else {
918 if (!chunk_memcat(chk, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)))
919 goto full;
920 }
921
922 if (!chunk_memcat(chk, "\r\n", 2))
923 goto full;
924
925 return 1;
926
927 full:
928 chk->data = sz;
929 return 0;
930}
931
932/* Appends the H1 representation of the status line <sl> to the chunk <chk>. It
933 * returns 1 if data are successfully appended, otherwise it returns 0.
934 */
935int h1_format_htx_stline(const struct htx_sl *sl, struct buffer *chk)
936{
937 size_t sz = chk->data;
938
939 if (HTX_SL_LEN(sl) + 4 > b_room(chk))
940 return 0;
941
942 if (sl->flags & HTX_SL_F_VER_11) {
943 if (!chunk_memcat(chk, "HTTP/1.1", 8))
944 goto full;
945 }
946 else {
947 if (!chunk_memcat(chk, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)))
948 goto full;
949 }
950 if (!chunk_memcat(chk, " ", 1) ||
951 !chunk_memcat(chk, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl)) ||
952 !chunk_memcat(chk, " ", 1) ||
953 !chunk_memcat(chk, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl)) ||
954 !chunk_memcat(chk, "\r\n", 2))
955 goto full;
956
957 return 1;
958
959 full:
960 chk->data = sz;
961 return 0;
962}
963
Ilya Shipitsin47d17182020-06-21 21:42:57 +0500964/* Appends the H1 representation of the header <n> with the value <v> to the
Christopher Faulet53a899b2019-10-08 16:38:42 +0200965 * chunk <chk>. It returns 1 if data are successfully appended, otherwise it
966 * returns 0.
967 */
968int h1_format_htx_hdr(const struct ist n, const struct ist v, struct buffer *chk)
969{
970 size_t sz = chk->data;
971
972 if (n.len + v.len + 4 > b_room(chk))
973 return 0;
974
975 if (!chunk_memcat(chk, n.ptr, n.len) ||
976 !chunk_memcat(chk, ": ", 2) ||
977 !chunk_memcat(chk, v.ptr, v.len) ||
978 !chunk_memcat(chk, "\r\n", 2))
979 goto full;
980
981 return 1;
982
983 full:
984 chk->data = sz;
985 return 0;
986}
987
988/* Appends the H1 representation of the data <data> to the chunk <chk>. If
989 * <chunked> is non-zero, it emits HTTP/1 chunk-encoded data. It returns 1 if
990 * data are successfully appended, otherwise it returns 0.
991 */
992int h1_format_htx_data(const struct ist data, struct buffer *chk, int chunked)
993{
994 size_t sz = chk->data;
995
996 if (chunked) {
997 uint32_t chksz;
998 char tmp[10];
999 char *beg, *end;
1000
1001 chksz = data.len;
1002
1003 beg = end = tmp+10;
1004 *--beg = '\n';
1005 *--beg = '\r';
1006 do {
1007 *--beg = hextab[chksz & 0xF];
1008 } while (chksz >>= 4);
1009
1010 if (!chunk_memcat(chk, beg, end - beg) ||
1011 !chunk_memcat(chk, data.ptr, data.len) ||
1012 !chunk_memcat(chk, "\r\n", 2))
1013 goto full;
1014 }
1015 else {
1016 if (!chunk_memcat(chk, data.ptr, data.len))
1017 return 0;
1018 }
1019
1020 return 1;
1021
1022 full:
1023 chk->data = sz;
1024 return 0;
1025}
1026
Christopher Faulet4f0f88a2019-08-10 11:17:44 +02001027/*
1028 * Local variables:
1029 * c-indent-level: 8
1030 * c-basic-offset: 8
1031 * End:
1032 */