[CONTRIB] halog: faster fgets() and add support for percentile reporting

A new fgets implementation saves about 25-50% of the time spent parsing
the logs.

Percentile calculation has been added for timers using -pct.
diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile
index 911cae6..2215e03 100644
--- a/contrib/halog/Makefile
+++ b/contrib/halog/Makefile
@@ -1,10 +1,12 @@
 INCLUDE  = -I../../include
 OPTIMIZE = -O3 -mtune=pentium-m
-CC       = gcc
 
-OBJS     = halog
+OBJS     = halog halog64
+
+halog: halog.c fgets2.c
+	$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
 
-halog: halog.c
+halog64: halog.c fgets2-64.c
 	$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
 
 clean:
diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c
new file mode 100644
index 0000000..409d602
--- /dev/null
+++ b/contrib/halog/fgets2-64.c
@@ -0,0 +1,147 @@
+/*
+ * fast fgets() replacement for log parsing
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This function manages its own buffer and returns a pointer to that buffer
+ * in order to avoid expensive memory copies. It also checks for line breaks
+ * 32 bits at a time. It could be improved a lot using mmap() but we would
+ * not be allowed to replace trailing \n with zeroes and we would be limited
+ * to small log files on 32-bit machines.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+// return 1 if the integer contains at least one zero byte
+static inline unsigned int has_zero(unsigned int x)
+{
+	if (!(x & 0xFF000000U) ||
+	    !(x & 0xFF0000U) ||
+	    !(x & 0xFF00U) ||
+	    !(x & 0xFFU))
+		return 1;
+	return 0;
+}
+
+static inline unsigned int has_zero64(unsigned long long x)
+{
+	unsigned long long x2;
+
+	x2 = x & (x >> 8);
+	/* no need to split it further */
+	if ((x2 & 0x00FF) && (x2 & 0x00FF0000) && (x2 & 0x00FF00000000ULL) && (x2 & 0x00FF000000000000ULL))
+			return 0; // approx 11/12 return here
+
+	if (!(x & 0xff00000000000000ULL) ||
+	    !(x & 0xff000000000000ULL) ||
+	    !(x & 0xff0000000000ULL) ||
+	    !(x & 0xff00000000ULL) ||
+	    !(x & 0xff000000UL) ||
+	    !(x & 0xff0000UL) ||
+	    !(x & 0xff00UL) ||
+	    !(x & 0xffUL))
+		return 1; // approx 1/3 of the remaining return here
+
+	return 0;
+}
+
+#define FGETS2_BUFSIZE		(256*1024)
+const char *fgets2(FILE *stream)
+{
+	static char buffer[FGETS2_BUFSIZE + 5];
+	static char *end = buffer;
+	static char *line = buffer;
+
+	char *next;
+	int ret;
+
+	next = line;
+
+	while (1) {
+		/* this is a speed-up, we read 32 bits at once and check for an
+		 * LF character there. We stop if found then continue one at a
+		 * time.
+		 */
+		while (next < end && (((unsigned long)next) & 7) && *next != '\n')
+			next++;
+
+		/* now next is multiple of 4 or equal to end */
+		while (next <= (end-32)) {
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+		}
+
+		/* we finish if needed. Note that next might be slightly higher
+		 * than end here because we might have gone past it above.
+		 */
+		while (next < end) {
+			if (*next == '\n') {
+				const char *start = line;
+
+				*next = '\0';
+				line = next + 1;
+				return start;
+			}
+			next++;
+		}
+
+		/* we found an incomplete line. First, let's move the
+		 * remaining part of the buffer to the beginning, then
+		 * try to complete the buffer with a new read.
+		 */
+		if (line > buffer) {
+			if (end != line)
+				memmove(buffer, line, end - line);
+			end = buffer + (end - line);
+			next = end;
+			line = buffer;
+		} else {
+			if (end == buffer + FGETS2_BUFSIZE)
+				return NULL;
+		}
+
+		ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
+
+		if (ret <= 0) {
+			if (end == line)
+				return NULL;
+
+			*end = '\0';
+			return line;
+		}
+
+		end += ret;
+		/* search for '\n' again */
+	}
+}
+
+#ifdef BENCHMARK
+int main() {
+	const char *p;
+	unsigned int lines = 0;
+
+	while ((p=fgets2(stdin)))
+		lines++;
+	printf("lines=%d\n", lines);
+	return 0;
+}
+#endif
diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c
new file mode 100644
index 0000000..13918a7
--- /dev/null
+++ b/contrib/halog/fgets2.c
@@ -0,0 +1,138 @@
+/*
+ * fast fgets() replacement for log parsing
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This function manages its own buffer and returns a pointer to that buffer
+ * in order to avoid expensive memory copies. It also checks for line breaks
+ * 32 bits at a time. It could be improved a lot using mmap() but we would
+ * not be allowed to replace trailing \n with zeroes and we would be limited
+ * to small log files on 32-bit machines.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+// return 1 if the integer contains at least one zero byte
+static inline unsigned int has_zero(unsigned int x)
+{
+	if (!(x & 0xFF000000U) ||
+	    !(x & 0xFF0000U) ||
+	    !(x & 0xFF00U) ||
+	    !(x & 0xFFU))
+		return 1;
+	return 0;
+}
+
+
+#define FGETS2_BUFSIZE		(256*1024)
+const char *fgets2(FILE *stream)
+{
+	static char buffer[FGETS2_BUFSIZE + 5];
+	static char *end = buffer;
+	static char *line = buffer;
+
+	char *next;
+	int ret;
+
+	next = line;
+
+	while (1) {
+		/* this is a speed-up, we read 32 bits at once and check for an
+		 * LF character there. We stop if found then continue one at a
+		 * time.
+		 */
+		while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+			next++;
+
+		/* now next is multiple of 4 or equal to end */
+		while (next <= (end-32)) {
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+		}
+
+		/* we finish if needed. Note that next might be slightly higher
+		 * than end here because we might have gone past it above.
+		 */
+		while (next < end) {
+			if (*next == '\n') {
+				const char *start = line;
+
+				*next = '\0';
+				line = next + 1;
+				return start;
+			}
+			next++;
+		}
+
+		/* we found an incomplete line. First, let's move the
+		 * remaining part of the buffer to the beginning, then
+		 * try to complete the buffer with a new read.
+		 */
+		if (line > buffer) {
+			if (end != line)
+				memmove(buffer, line, end - line);
+			end = buffer + (end - line);
+			next = end;
+			line = buffer;
+		} else {
+			if (end == buffer + FGETS2_BUFSIZE)
+				return NULL;
+		}
+
+		ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
+
+		if (ret <= 0) {
+			if (end == line)
+				return NULL;
+
+			*end = '\0';
+			return line;
+		}
+
+		end += ret;
+		/* search for '\n' again */
+	}
+}
+
+#ifdef BENCHMARK
+int main() {
+	const char *p;
+	unsigned int lines = 0;
+
+	while ((p=fgets2(stdin)))
+		lines++;
+	printf("lines=%d\n", lines);
+	return 0;
+}
+#endif
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
index 6bfc76a..32b4a7f 100644
--- a/contrib/halog/halog.c
+++ b/contrib/halog/halog.c
@@ -11,7 +11,7 @@
  */
 
 /*
- * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c
+ * gcc -O2 -o halog2 halog2.c -Iinclude src/ebtree.c src/eb32tree.c fgets2.c
  *
  * Usage:
  *    $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log
@@ -57,17 +57,19 @@
 #define FILT_ACC_DELAY		0x10
 #define FILT_ACC_COUNT		0x20
 #define FILT_GRAPH_TIMERS	0x40
+#define FILT_PERCENTILE		0x80
 
 unsigned int filter = 0;
 unsigned int filter_invert = 0;
-char line[MAXLINE];
+const char *line;
 
+const char *fgets2(FILE *stream);
 
 void die(const char *msg)
 {
 	fprintf(stderr,
 		"%s"
-		"Usage: halog [-c] [-v] [-gt] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
+		"Usage: halog [-c] [-v] [-gt] [-pct] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
 		"\n",
 		msg ? msg : ""
 		);
@@ -385,6 +387,8 @@
 			filter_invert = !filter_invert;
 		else if (strcmp(argv[0], "-gt") == 0)
 			filter |= FILT_GRAPH_TIMERS;
+		else if (strcmp(argv[0], "-pct") == 0)
+			filter |= FILT_PERCENTILE;
 		else if (strcmp(argv[0], "-o") == 0) {
 			if (output_file)
 				die("Fatal: output file name already specified.\n");
@@ -409,7 +413,7 @@
 	tot = 0;
 	parse_err = 0;
 
-	while (fgets(line, MAXLINE, stdin) != NULL) {
+	while ((line = fgets2(stdin)) != NULL) {
 		linenum++;
 
 		if (filter & FILT_ERRORS_ONLY) {
@@ -429,7 +433,7 @@
 			if (test) {
 				tot++;
 				if (!(filter & FILT_COUNT_ONLY))
-					printf("%s", line);
+					puts(line);
 			}
 			continue;
 		}
@@ -441,6 +445,7 @@
 				continue;
 			}
 
+			tot++;
 			val = convert_date(b);
 			//printf("date=%s => %d\n", b, val);
 			if (val < 0) {
@@ -453,7 +458,7 @@
 			continue;
 		}
 
-		if (filter & FILT_GRAPH_TIMERS) {
+		if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE)) {
 			int f;
 
 			b = field_start(line, TIME_FIELD + skip_fields);
@@ -489,23 +494,47 @@
 			 * that the total time is negative, hence the reason to reset
 			 * it.
 			 */
-			if (err) {
-				if (array[4] < 0)
-					array[4] = -1;
-				t2 = insert_timer(&timers[0], &t, array[4]);  // total time
-				t2->count++;
-			} else {
-				int v;
+
+			if (filter & FILT_GRAPH_TIMERS) {
+				if (err) {
+					if (array[4] < 0)
+						array[4] = -1;
+					t2 = insert_timer(&timers[0], &t, array[4]);  // total time
+					t2->count++;
+				} else {
+					int v;
 
-				t2 = insert_timer(&timers[1], &t, array[0]); t2->count++;  // req
-				t2 = insert_timer(&timers[2], &t, array[2]); t2->count++;  // conn
-				t2 = insert_timer(&timers[3], &t, array[3]); t2->count++;  // resp
+					t2 = insert_timer(&timers[1], &t, array[0]); t2->count++;  // req
+					t2 = insert_timer(&timers[2], &t, array[2]); t2->count++;  // conn
+					t2 = insert_timer(&timers[3], &t, array[3]); t2->count++;  // resp
+
+					v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+					if (v < 0 && !(filter & FILT_QUIET))
+						fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+							line, array[0], array[1], array[2], array[3], array[4], v);
+					t2 = insert_timer(&timers[4], &t, v); t2->count++;
+					tot++;
+				}
+			} else { /* percentile */
+				if (err) {
+					if (array[4] < 0)
+						array[4] = -1;
+					t2 = insert_value(&timers[0], &t, array[4]);  // total time
+					t2->count++;
+				} else {
+					int v;
 
-				v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
-				if (v < 0 && !(filter & FILT_QUIET))
-					fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
-						line, array[0], array[1], array[2], array[3], array[4], v);
-				t2 = insert_timer(&timers[4], &t, v); t2->count++;
+					t2 = insert_value(&timers[1], &t, array[0]); t2->count++;  // req
+					t2 = insert_value(&timers[2], &t, array[2]); t2->count++;  // conn
+					t2 = insert_value(&timers[3], &t, array[3]); t2->count++;  // resp
+
+					v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+					if (v < 0 && !(filter & FILT_QUIET))
+						fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+							line, array[0], array[1], array[2], array[3], array[4], v);
+					t2 = insert_value(&timers[4], &t, v); t2->count++;
+					tot++;
+				}
 			}
 			continue;
 		}
@@ -582,6 +611,47 @@
 
 				n = eb32_next(n);
 			}
+		}
+	}
+	else if (filter & FILT_PERCENTILE) {
+		/* report timers by percentile :
+		 *    <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
+		 * We don't count errs.
+		 */
+		struct eb32_node *n[5];
+		unsigned long cum[5];
+		double step;
+
+		for (f = 1; f < 5; f++) {
+			n[f] = eb32_first(&timers[f]);
+			cum[f] = container_of(n[f], struct timer, node)->count;
+		}
+
+		for (step = 1; step <= 1000;) {
+			unsigned int thres = tot * (step / 1000.0);
+
+			printf("%3.1f %d ", step/10.0, thres);
+			for (f = 1; f < 5; f++) {
+				struct eb32_node *next;
+				while (cum[f] < thres) {
+					/* need to find other keys */
+					next = eb32_next(n[f]);
+					if (!next)
+						break;
+					n[f] = next;
+					cum[f] += container_of(next, struct timer, node)->count;
+				}
+
+				/* value still within $step % of total */
+				printf("%d ", n[f]->key);
+			}
+			putchar('\n');
+			if (step >= 100 && step < 900)
+				step += 50;  // jump 5% by 5% between those steps.
+			else if (step >= 20 && step < 980)
+				step += 10;
+			else
+				step += 1;
 		}
 	}