[OPTIM] halog: check once for correct line format and reuse the pointer Almost all filters first check the line format, which takes a lot of code and requires parsing back and forth. By centralizing this test, we can save about 15-20 more percent of performance for all filters. Also, the test was wrong, it was checking that the source IP address was starting with a digit, which is not always true with local IPv6 addresses. Instead, we now check that the next field (accept field) starts with an opening bracket and is followed by a digit between 0 and 3 (day of the month). Doing this has contributed a 2% speedup because all other field calculations were relative to a closer field.

commit: 26deaf51d9784183537f1436dd10b20e8cfdb941 [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Sun Jul 10 19:47:48 2011 +0200
committer: Willy Tarreau <w@1wt.eu> Mon Jul 11 06:48:04 2011 +0200
tree: f76285f77bc0a184698e0ca7bd0715607e5de86a
parent: 758a6ea46cac53d8f4b48e769d5169caaf2afa08 [diff]
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
index 67952fb..0fbbbe7 100644
--- a/contrib/halog/halog.c
+++ b/contrib/halog/halog.c

@@ -385,7 +385,7 @@
 
 int main(int argc, char **argv)
 {
-	const char *b, *e, *p, *time_field;
+	const char *b, *e, *p, *time_field, *accept_field;
 	const char *output_file = NULL;
 	int f, tot, last, linenum, err, parse_err;
 	struct timer *t = NULL, *t2;
@@ -494,17 +494,33 @@
 
 	while ((line = fgets2(stdin)) != NULL) {
 		linenum++;
-		time_field = NULL;
+		time_field = NULL; accept_field = NULL;
 
 		test = 1;
+
+		/* for any line we process, we first ensure that there is a field
+		 * looking like the accept date field (beginning with a '[').
+		 */
+		accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
+		if (*accept_field != '[') {
+			parse_err++;
+			continue;
+		}
+
+		/* the day of month field is begin 01 and 31 */
+		if (unlikely(accept_field[1] < '0' || accept_field[1] > '3')) {
+			parse_err++;
+			continue;
+		}
+
 		if (unlikely(filter & FILT_HTTP_ONLY)) {
 			/* only report lines with at least 4 timers */
-
-			if (!time_field)
-				time_field = field_start(line, TIME_FIELD + skip_fields);
-			if (!*time_field) {
-				truncated_line(linenum, line);
-				continue;
+			if (unlikely(!time_field)) {
+				time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
+				if (!*time_field) {
+					truncated_line(linenum, line);
+					continue;
+				}
 			}
 
 			e = field_stop(time_field + 1);
@@ -523,11 +539,12 @@
 			int tps;
 
 			/* only report lines with response times larger than filter_time_resp */
-			if (!time_field)
-				time_field = field_start(line, TIME_FIELD + skip_fields);
-			if (!*time_field) {
-				truncated_line(linenum, line);
-				continue;
+			if (unlikely(!time_field)) {
+				time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
+				if (!*time_field) {
+					truncated_line(linenum, line);
+					continue;
+				}
 			}
 
 			e = field_stop(time_field + 1);
@@ -557,13 +574,11 @@
 
 		if (unlikely(filter & FILT_ERRORS_ONLY)) {
 			/* only report erroneous status codes */
-			if (!time_field)
-				time_field = field_start(line, TIME_FIELD + skip_fields);
-			if (!*time_field) {
-				truncated_line(linenum, line);
-				continue;
-			}
-			b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
+			if (time_field)
+				b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
+			else
+				b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
+
 			if (!*b) {
 				truncated_line(linenum, line);
 				continue;
@@ -581,12 +596,7 @@
 			continue;
 
 		if (unlikely(filter & (FILT_ACC_COUNT|FILT_ACC_DELAY))) {
-			b = field_start(line, ACCEPT_FIELD + skip_fields);
-			if (!*b) {
-				truncated_line(linenum, line);
-				continue;
-			}
-
+			b = accept_field;
 			tot++;
 			val = convert_date(b);
 			//printf("date=%s => %d\n", b, val);
@@ -603,15 +613,12 @@
 		if (unlikely(filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE))) {
 			int f;
 
-			if (!time_field)
-				time_field = field_start(line, TIME_FIELD + skip_fields);
-			if (!*time_field) {
-				truncated_line(linenum, line);
-				continue;
-			}
-			if (!*time_field) {
-				truncated_line(linenum, line);
-				continue;
+			if (unlikely(!time_field)) {
+				time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
+				if (!*time_field) {
+					truncated_line(linenum, line);
+					continue;
+				}
 			}
 
 			e = field_stop(time_field + 1);
@@ -689,19 +696,11 @@
 		}
 
 		if (unlikely(filter & FILT_COUNT_STATUS)) {
-			/* first, let's ensure that the line is a traffic line (beginning
-			 * with an IP address)
-			 */
-			b = field_start(line, SOURCE_FIELD + skip_fields);
-			if (*b < '0' || *b > '9') {
-				parse_err++;
-				continue;
-			}
-
 			if (time_field)
 				b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
 			else
-				b = field_start(b, STATUS_FIELD - SOURCE_FIELD + 1);
+				b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
+
 			if (!*b) {
 				truncated_line(linenum, line);
 				continue;
@@ -714,19 +713,11 @@
 		}
 
 		if (unlikely(filter & FILT_COUNT_TERM_CODES)) {
-			/* first, let's ensure that the line is a traffic line (beginning
-			 * with an IP address)
-			 */
-			b = field_start(line, SOURCE_FIELD + skip_fields);
-			if (*b < '0' || *b > '9') {
-				parse_err++;
-				continue;
-			}
-
 			if (time_field)
 				b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
 			else
-				b = field_start(b, TERM_CODES_FIELD - SOURCE_FIELD + 1);
+				b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
+
 			if (!*b) {
 				truncated_line(linenum, line);
 				continue;
@@ -743,19 +734,10 @@
 			struct ebmb_node *srv_node;
 			struct srv_st *srv;
 
-			/* first, let's ensure that the line is a traffic line (beginning
-			 * with an IP address)
-			 */
-			b = field_start(line, SOURCE_FIELD + skip_fields);
-			if (*b < '0' || *b > '9') {
-				parse_err++;
-				continue;
-			}
-
 			/* the server field is before the status field, so let's
 			 * parse them in the proper order.
 			 */
-			b = field_start(b, SERVER_FIELD - SOURCE_FIELD + 1);
+			b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1);
 			if (!*b) {
 				truncated_line(linenum, line);
 				continue;
@@ -840,22 +822,13 @@
 		}
 
 		if (unlikely(filter & FILT_COUNT_URL_ANY)) {
-			/* first, let's ensure that the line is a traffic line (beginning
-			 * with an IP address, or already having a time field)
-			 */
-
 			/* let's collect the response time */
-			if (!time_field) {
-				b = field_start(line, SOURCE_FIELD + skip_fields); // avg 95 ns per line
-				if (*b < '0' || *b > '9') {
-					parse_err++;
+			if (unlikely(!time_field)) {
+				time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);  // avg 115 ns per line
+				if (!*time_field) {
+					truncated_line(linenum, line);
 					continue;
 				}
-				time_field = field_start(field_stop(b + 1), TIME_FIELD - SOURCE_FIELD);  // avg 115 ns per line
-			}
-			if (!*time_field) {
-				truncated_line(linenum, line);
-				continue;
 			}
 
 			/* we have the field TIME_FIELD starting at <time_field>. We'll
commit	26deaf51d9784183537f1436dd10b20e8cfdb941	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Sun Jul 10 19:47:48 2011 +0200
committer	Willy Tarreau <w@1wt.eu>	Mon Jul 11 06:48:04 2011 +0200
tree	f76285f77bc0a184698e0ca7bd0715607e5de86a
parent	758a6ea46cac53d8f4b48e769d5169caaf2afa08 [diff]