[MEDIUM] optimized the request parser a bit more Some while() constructs are not very efficient with gcc, yet they are used to scan all the text in the start line and the headers. Replacing them with more efficient (but ugly) loops provides a global gain of about 2%, which is not bad at all !

commit: 230fd0bfdfec5df9e76e918a31ba6f0ff61930af [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Sun Dec 17 12:05:00 2006 +0100
committer: Willy Tarreau <w@1wt.eu> Sun Dec 17 12:05:00 2006 +0100
tree: aae60d46d6a00edab8084af997c61cd45d2a3327
parent: 976f1ee561f9cb5167cfc2aeee218a345e730b13 [diff]
diff --git a/src/proto_http.c b/src/proto_http.c
index a6482ec..4dae1a8 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c

@@ -515,22 +515,51 @@
 #endif
 
 			} else if (parse == HTTP_PA_HEADER) {
-			parse_inside_hdr:
+				char *ptr;
 				/* Inside a non-empty header */
 
+			parse_inside_hdr:
 				delete_header = 0;
-				while (req->lr < req->r && !IS_CTL(*req->lr))
-					req->lr++;
-				if (req->lr == req->r)
+
+				ptr = req->lr;
+
+#ifdef GCC_FINALLY_PRODUCES_EFFICIENT_WHILE_LOOPS
+				/* This code is disabled right now because
+				 * eventhough it seems straightforward, the
+				 * object code produced by GCC is so much
+				 * suboptimal that about 10% of the time
+				 * spend parsing header is there.
+				 */
+				while (ptr < req->r && !IS_CTL(*ptr))
+					ptr++;
+				req->lr = ptr;
+				if (ptr == req->r)
 					break;
+#else
+				/* Just by using this loop instead of the previous one,
+				 * the global performance increases by about 2% ! The
+				 * code is also smaller by about 50 bytes.
+				 */
+				goto reqhdr_loop_chk;
+			reqhdr_loop:
+				ptr++;
+			reqhdr_loop_chk:
+				if (ptr == req->r) {
+					req->lr = ptr;
+					break;
+				}
+				if (*ptr != 0x7F && (unsigned)*ptr >= 0x20)
+					goto reqhdr_loop;
+				req->lr = ptr;
+#endif
 
 				/* we have a CTL char */
-				if (*req->lr == '\r') {
+				if (*ptr == '\r') {
 					t->hreq.hdr_state = HTTP_PA_HDR_LF | HTTP_PA_CR_SKIP | HTTP_PA_LF_EXP;
 					req->lr++;
 					continue;
 				}
-				else if (*req->lr == '\n') {
+				else if (*ptr == '\n') {
 					t->hreq.hdr_state = HTTP_PA_HDR_LF;
 					QUICK_JUMP(parse_hdr_lf, continue);
 				}
@@ -577,19 +606,49 @@
 				QUICK_JUMP(parse_start, continue);
 
 			} else if (parse == HTTP_PA_START) {
+				char *ptr;
+				/* Inside the start line */
+
 			parse_start:
-				/* Start line */
-				while (req->lr < req->r && !IS_CTL(*req->lr))
-					req->lr++;
-				if (req->lr == req->r)
+				ptr = req->lr;
+
+#ifdef GCC_FINALLY_PRODUCES_EFFICIENT_WHILE_LOOPS
+				/* This code is disabled right now because
+				 * eventhough it seems straightforward, the
+				 * object code produced by GCC is so much
+				 * suboptimal that about 10% of the time
+				 * spend parsing header is there.
+				 */
+				while (ptr < req->r && !IS_CTL(*ptr))
+					ptr++;
+				req->lr = ptr;
+				if (ptr == req->r)
 					break;
+#else
+				/* Just by using this loop instead of the previous one,
+				 * the global performance increases by about 2% ! The
+				 * code is also smaller by about 50 bytes.
+				 */
+				goto reqstrt_loop_chk;
+			reqstrt_loop:
+				ptr++;
+			reqstrt_loop_chk:
+				if (ptr == req->r) {
+					req->lr = ptr;
+					break;
+				}
+				if (*ptr != 0x7F && (unsigned)*ptr >= 0x20)
+					goto reqstrt_loop;
+				req->lr = ptr;
+#endif
+
 				/* we have a CTL char */
-				if (*req->lr == '\r') {
+				if (*ptr == '\r') {
 					req->lr++;
 					t->hreq.hdr_state = HTTP_PA_STRT_LF | HTTP_PA_CR_SKIP | HTTP_PA_LF_EXP;
 					continue;
 				}
-				else if (*req->lr == '\n') {
+				else if (*ptr == '\n') {
 					t->hreq.hdr_state = HTTP_PA_STRT_LF;
 					/* we know that we still have one char available */
 					QUICK_JUMP(parse_strt_lf, continue);
@@ -765,26 +824,23 @@
 		 * filters and various options. In order to support 3-level
 		 * switching, here's how we should proceed :
 		 *
-		 *  a) run fe->filters.
+		 *  a) run be->filters.
 		 *     if (switch) then switch ->fi and ->be to the new backend.
-		 *  b) run fi->filters.
+		 *  b) run be->filters.
 		 *     If there's another switch, then switch ->be to the new be.
 		 *  c) run be->filters
 		 *     There cannot be any switch from there, so ->be cannot be
 		 *     changed anymore.
 		 *
+		 *  => filters always apply to ->be, then ->be may change.
+		 *
 		 *  The response path will be able to apply either ->be, or
 		 *  ->be then ->fi, or ->be then ->fi then ->fe filters in order
 		 * to match the reverse of the forward sequence.
 		 */
 
 		do {
-			if (t->fi == t->fe)
-				rule_set = t->fe;
-			else if (t->be == t->fi)
-				rule_set = t->fi;
-			else
-				rule_set = t->be;
+			rule_set = t->be;
 
 			/* try headers filters */
 			if (rule_set->req_exp != NULL)
@@ -815,9 +871,11 @@
 		} while (rule_set != t->be);  /* we loop only if t->be has changed */
 		
 
-		/* Right now, we know that we have processed the entire headers
+		/*
+		 * Right now, we know that we have processed the entire headers
 		 * and that unwanted requests have been filtered out. We can do
-		 * whatever we want with the remaining request.
+		 * whatever we want with the remaining request. Also, now we
+		 * may have separate values for ->fe, ->fi and ->be.
 		 */
commit	230fd0bfdfec5df9e76e918a31ba6f0ff61930af	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Sun Dec 17 12:05:00 2006 +0100
committer	Willy Tarreau <w@1wt.eu>	Sun Dec 17 12:05:00 2006 +0100
tree	aae60d46d6a00edab8084af997c61cd45d2a3327
parent	976f1ee561f9cb5167cfc2aeee218a345e730b13 [diff]