[MEDIUM] http: rework chunk-size parser Chunked encoding can be slightly more complex than what was implemented. Specifically, it supports some optional extensions that were not parsed till now if present, and would have caused an error to be returned. Also, now we enforce check for too large values in chunk sizes in order to ensure we never overflow. Last, we're now able to return a request error if we can't read the chunk size because the buffer is already full.

commit: 115acb975521ae2e548a19840b77ef32ffaab98a [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Sat Dec 26 13:56:06 2009 +0100
committer: Willy Tarreau <w@1wt.eu> Sat Dec 26 13:56:06 2009 +0100
tree: 4f57e96fc575794603403c12b317827491de08f8
parent: 0394594b067f6383d56e444396e98fa40fbbc3a3 [diff]
diff --git a/src/proto_http.c b/src/proto_http.c
index 82bdfe3..b6e6435 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c

@@ -1803,6 +1803,71 @@
 	txn->flags |= TX_CON_HDR_PARS;
 }
 
+/* Parse the chunk size at msg->col.
+ * Return >0 on success, 0 when some data is missing, <0 on error.
+ */
+int http_parse_chunk_size(struct buffer *req, struct http_msg *msg)
+{
+	unsigned long body = msg->col;
+	unsigned int chunk = 0;
+
+	/* The chunk size is in the following form, though we are only
+	 * interested in the size and CRLF :
+	 *    1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
+	 */
+	while (1) {
+		int c;
+		if (body >= req->l)
+			return 0;
+		c = hex2i(msg->sol[body]);
+		if (c < 0) /* not a hex digit anymore */
+			break;
+		body++;
+		if (chunk & 0xF000000) /* overflow will occur */
+			return -1;
+		chunk = (chunk << 4) + c;
+	}
+
+	while (1) {
+		if (body >= req->l)
+			return 0;
+		if (!http_is_spht[(unsigned char)msg->sol[body]])
+			break;
+		body++;
+	}
+
+	/* Up to there, we know that at least one byte is present at [body]. */
+	switch (msg->sol[body]) {
+	case ';': /* chunk extension, ends at next CRLF */
+		if (++body >= req->l)
+			return 0;
+
+		while (msg->sol[body] != '\r') {
+			if (++body >= req->l)
+				return 0;
+		}
+		/* we now have a CR at <body>, fall through */
+	case '\r':
+		if (++body >= req->l)
+			return 0;
+		if (msg->sol[body] != '\n')
+			return -1;
+		body++;
+		break;
+
+	default:
+		return -1;
+	}
+
+	/* OK we found our CRLF and now <body> points to the next byte,
+	 * which may or may not be present.
+	 */
+	msg->sov = body;
+	msg->hdr_content_len = chunk;
+	msg->msg_state = HTTP_MSG_DATA;
+	return 1;
+}
+
 
 /* This stream analyser waits for a complete HTTP request. It returns 1 if the
  * processing can continue on next analysers, or zero if it either needs more
@@ -2816,27 +2881,15 @@
 	}
 
 	if (msg->msg_state == HTTP_MSG_CHUNK_SIZE) {
-		unsigned long body = msg->col;
-		unsigned int chunk = 0;
-
-		while (body < req->l && !HTTP_IS_CRLF(msg->sol[body])) {
-			int c = hex2i(msg->sol[body]);
-			if (c < 0)
-				goto return_bad_req;
-			chunk = (chunk << 4) + c;
-			body++;
-		}
-
-		/* now we want CRLF */
-		if (body + 2 >= req->l) /* we want CRLF too */
-			goto missing_data; /* end of buffer? data missing! */
+		/* read the chunk size and assign it to ->hdr_content_len, then
+		 * set ->sov to point to the body and switch to DATA state.
+		 */
+		int ret = http_parse_chunk_size(req, msg);
 
-		if ((msg->sol[body++] != '\r') || (msg->sol[body++] != '\n'))
+		if (!ret)
+			goto missing_data;
+		else if (ret < 0)
 			goto return_bad_req;
-
-		msg->sov = body;
-		txn->req.hdr_content_len = chunk;
-		msg->msg_state = HTTP_MSG_DATA;
 	}
 
 	/* Now we're in HTTP_MSG_DATA state.
@@ -2854,6 +2907,9 @@
 
  missing_data:
 	/* we get here if we need to wait for more data */
+	if (req->flags & BF_FULL)
+		goto return_bad_req;
+
 	if ((req->flags & BF_READ_TIMEOUT) || tick_is_expired(req->analyse_exp, now_ms)) {
 		txn->status = 408;
 		stream_int_retnclose(req->prod, error_message(s, HTTP_ERR_408));
commit	115acb975521ae2e548a19840b77ef32ffaab98a	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Sat Dec 26 13:56:06 2009 +0100
committer	Willy Tarreau <w@1wt.eu>	Sat Dec 26 13:56:06 2009 +0100
tree	4f57e96fc575794603403c12b317827491de08f8
parent	0394594b067f6383d56e444396e98fa40fbbc3a3 [diff]