[CONTRIB] halog: faster fgets() and add support for percentile reporting
A new fgets implementation saves about 25-50% of the time spent parsing
the logs.
Percentile calculation has been added for timers using -pct.
diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile
index 911cae6..2215e03 100644
--- a/contrib/halog/Makefile
+++ b/contrib/halog/Makefile
@@ -1,10 +1,12 @@
INCLUDE = -I../../include
OPTIMIZE = -O3 -mtune=pentium-m
-CC = gcc
-OBJS = halog
+OBJS = halog halog64
+
+halog: halog.c fgets2.c
+ $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
-halog: halog.c
+halog64: halog.c fgets2-64.c
$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
clean:
diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c
new file mode 100644
index 0000000..409d602
--- /dev/null
+++ b/contrib/halog/fgets2-64.c
@@ -0,0 +1,147 @@
+/*
+ * fast fgets() replacement for log parsing
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This function manages its own buffer and returns a pointer to that buffer
+ * in order to avoid expensive memory copies. It also checks for line breaks
+ * 32 bits at a time. It could be improved a lot using mmap() but we would
+ * not be allowed to replace trailing \n with zeroes and we would be limited
+ * to small log files on 32-bit machines.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+// return 1 if the integer contains at least one zero byte
+static inline unsigned int has_zero(unsigned int x)
+{
+ if (!(x & 0xFF000000U) ||
+ !(x & 0xFF0000U) ||
+ !(x & 0xFF00U) ||
+ !(x & 0xFFU))
+ return 1;
+ return 0;
+}
+
+static inline unsigned int has_zero64(unsigned long long x)
+{
+ unsigned long long x2;
+
+ x2 = x & (x >> 8);
+ /* no need to split it further */
+ if ((x2 & 0x00FF) && (x2 & 0x00FF0000) && (x2 & 0x00FF00000000ULL) && (x2 & 0x00FF000000000000ULL))
+ return 0; // approx 11/12 return here
+
+ if (!(x & 0xff00000000000000ULL) ||
+ !(x & 0xff000000000000ULL) ||
+ !(x & 0xff0000000000ULL) ||
+ !(x & 0xff00000000ULL) ||
+ !(x & 0xff000000UL) ||
+ !(x & 0xff0000UL) ||
+ !(x & 0xff00UL) ||
+ !(x & 0xffUL))
+ return 1; // approx 1/3 of the remaining return here
+
+ return 0;
+}
+
+#define FGETS2_BUFSIZE (256*1024)
+const char *fgets2(FILE *stream)
+{
+ static char buffer[FGETS2_BUFSIZE + 5];
+ static char *end = buffer;
+ static char *line = buffer;
+
+ char *next;
+ int ret;
+
+ next = line;
+
+ while (1) {
+ /* this is a speed-up, we read 32 bits at once and check for an
+ * LF character there. We stop if found then continue one at a
+ * time.
+ */
+ while (next < end && (((unsigned long)next) & 7) && *next != '\n')
+ next++;
+
+ /* now next is multiple of 4 or equal to end */
+ while (next <= (end-32)) {
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ }
+
+ /* we finish if needed. Note that next might be slightly higher
+ * than end here because we might have gone past it above.
+ */
+ while (next < end) {
+ if (*next == '\n') {
+ const char *start = line;
+
+ *next = '\0';
+ line = next + 1;
+ return start;
+ }
+ next++;
+ }
+
+ /* we found an incomplete line. First, let's move the
+ * remaining part of the buffer to the beginning, then
+ * try to complete the buffer with a new read.
+ */
+ if (line > buffer) {
+ if (end != line)
+ memmove(buffer, line, end - line);
+ end = buffer + (end - line);
+ next = end;
+ line = buffer;
+ } else {
+ if (end == buffer + FGETS2_BUFSIZE)
+ return NULL;
+ }
+
+ ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
+
+ if (ret <= 0) {
+ if (end == line)
+ return NULL;
+
+ *end = '\0';
+ return line;
+ }
+
+ end += ret;
+ /* search for '\n' again */
+ }
+}
+
+#ifdef BENCHMARK
+int main() {
+ const char *p;
+ unsigned int lines = 0;
+
+ while ((p=fgets2(stdin)))
+ lines++;
+ printf("lines=%d\n", lines);
+ return 0;
+}
+#endif
diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c
new file mode 100644
index 0000000..13918a7
--- /dev/null
+++ b/contrib/halog/fgets2.c
@@ -0,0 +1,138 @@
+/*
+ * fast fgets() replacement for log parsing
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This function manages its own buffer and returns a pointer to that buffer
+ * in order to avoid expensive memory copies. It also checks for line breaks
+ * 32 bits at a time. It could be improved a lot using mmap() but we would
+ * not be allowed to replace trailing \n with zeroes and we would be limited
+ * to small log files on 32-bit machines.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+// return 1 if the integer contains at least one zero byte
+static inline unsigned int has_zero(unsigned int x)
+{
+ if (!(x & 0xFF000000U) ||
+ !(x & 0xFF0000U) ||
+ !(x & 0xFF00U) ||
+ !(x & 0xFFU))
+ return 1;
+ return 0;
+}
+
+
+#define FGETS2_BUFSIZE (256*1024)
+const char *fgets2(FILE *stream)
+{
+ static char buffer[FGETS2_BUFSIZE + 5];
+ static char *end = buffer;
+ static char *line = buffer;
+
+ char *next;
+ int ret;
+
+ next = line;
+
+ while (1) {
+ /* this is a speed-up, we read 32 bits at once and check for an
+ * LF character there. We stop if found then continue one at a
+ * time.
+ */
+ while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+ next++;
+
+ /* now next is multiple of 4 or equal to end */
+ while (next <= (end-32)) {
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ }
+
+ /* we finish if needed. Note that next might be slightly higher
+ * than end here because we might have gone past it above.
+ */
+ while (next < end) {
+ if (*next == '\n') {
+ const char *start = line;
+
+ *next = '\0';
+ line = next + 1;
+ return start;
+ }
+ next++;
+ }
+
+ /* we found an incomplete line. First, let's move the
+ * remaining part of the buffer to the beginning, then
+ * try to complete the buffer with a new read.
+ */
+ if (line > buffer) {
+ if (end != line)
+ memmove(buffer, line, end - line);
+ end = buffer + (end - line);
+ next = end;
+ line = buffer;
+ } else {
+ if (end == buffer + FGETS2_BUFSIZE)
+ return NULL;
+ }
+
+ ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
+
+ if (ret <= 0) {
+ if (end == line)
+ return NULL;
+
+ *end = '\0';
+ return line;
+ }
+
+ end += ret;
+ /* search for '\n' again */
+ }
+}
+
+#ifdef BENCHMARK
+int main() {
+ const char *p;
+ unsigned int lines = 0;
+
+ while ((p=fgets2(stdin)))
+ lines++;
+ printf("lines=%d\n", lines);
+ return 0;
+}
+#endif
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
index 6bfc76a..32b4a7f 100644
--- a/contrib/halog/halog.c
+++ b/contrib/halog/halog.c
@@ -11,7 +11,7 @@
*/
/*
- * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c
+ * gcc -O2 -o halog2 halog2.c -Iinclude src/ebtree.c src/eb32tree.c fgets2.c
*
* Usage:
* $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log
@@ -57,17 +57,19 @@
#define FILT_ACC_DELAY 0x10
#define FILT_ACC_COUNT 0x20
#define FILT_GRAPH_TIMERS 0x40
+#define FILT_PERCENTILE 0x80
unsigned int filter = 0;
unsigned int filter_invert = 0;
-char line[MAXLINE];
+const char *line;
+const char *fgets2(FILE *stream);
void die(const char *msg)
{
fprintf(stderr,
"%s"
- "Usage: halog [-c] [-v] [-gt] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
+ "Usage: halog [-c] [-v] [-gt] [-pct] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
"\n",
msg ? msg : ""
);
@@ -385,6 +387,8 @@
filter_invert = !filter_invert;
else if (strcmp(argv[0], "-gt") == 0)
filter |= FILT_GRAPH_TIMERS;
+ else if (strcmp(argv[0], "-pct") == 0)
+ filter |= FILT_PERCENTILE;
else if (strcmp(argv[0], "-o") == 0) {
if (output_file)
die("Fatal: output file name already specified.\n");
@@ -409,7 +413,7 @@
tot = 0;
parse_err = 0;
- while (fgets(line, MAXLINE, stdin) != NULL) {
+ while ((line = fgets2(stdin)) != NULL) {
linenum++;
if (filter & FILT_ERRORS_ONLY) {
@@ -429,7 +433,7 @@
if (test) {
tot++;
if (!(filter & FILT_COUNT_ONLY))
- printf("%s", line);
+ puts(line);
}
continue;
}
@@ -441,6 +445,7 @@
continue;
}
+ tot++;
val = convert_date(b);
//printf("date=%s => %d\n", b, val);
if (val < 0) {
@@ -453,7 +458,7 @@
continue;
}
- if (filter & FILT_GRAPH_TIMERS) {
+ if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE)) {
int f;
b = field_start(line, TIME_FIELD + skip_fields);
@@ -489,23 +494,47 @@
* that the total time is negative, hence the reason to reset
* it.
*/
- if (err) {
- if (array[4] < 0)
- array[4] = -1;
- t2 = insert_timer(&timers[0], &t, array[4]); // total time
- t2->count++;
- } else {
- int v;
+
+ if (filter & FILT_GRAPH_TIMERS) {
+ if (err) {
+ if (array[4] < 0)
+ array[4] = -1;
+ t2 = insert_timer(&timers[0], &t, array[4]); // total time
+ t2->count++;
+ } else {
+ int v;
- t2 = insert_timer(&timers[1], &t, array[0]); t2->count++; // req
- t2 = insert_timer(&timers[2], &t, array[2]); t2->count++; // conn
- t2 = insert_timer(&timers[3], &t, array[3]); t2->count++; // resp
+ t2 = insert_timer(&timers[1], &t, array[0]); t2->count++; // req
+ t2 = insert_timer(&timers[2], &t, array[2]); t2->count++; // conn
+ t2 = insert_timer(&timers[3], &t, array[3]); t2->count++; // resp
+
+ v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+ if (v < 0 && !(filter & FILT_QUIET))
+ fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+ line, array[0], array[1], array[2], array[3], array[4], v);
+ t2 = insert_timer(&timers[4], &t, v); t2->count++;
+ tot++;
+ }
+ } else { /* percentile */
+ if (err) {
+ if (array[4] < 0)
+ array[4] = -1;
+ t2 = insert_value(&timers[0], &t, array[4]); // total time
+ t2->count++;
+ } else {
+ int v;
- v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
- if (v < 0 && !(filter & FILT_QUIET))
- fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
- line, array[0], array[1], array[2], array[3], array[4], v);
- t2 = insert_timer(&timers[4], &t, v); t2->count++;
+ t2 = insert_value(&timers[1], &t, array[0]); t2->count++; // req
+ t2 = insert_value(&timers[2], &t, array[2]); t2->count++; // conn
+ t2 = insert_value(&timers[3], &t, array[3]); t2->count++; // resp
+
+ v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+ if (v < 0 && !(filter & FILT_QUIET))
+ fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+ line, array[0], array[1], array[2], array[3], array[4], v);
+ t2 = insert_value(&timers[4], &t, v); t2->count++;
+ tot++;
+ }
}
continue;
}
@@ -582,6 +611,47 @@
n = eb32_next(n);
}
+ }
+ }
+ else if (filter & FILT_PERCENTILE) {
+ /* report timers by percentile :
+ * <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
+ * We don't count errs.
+ */
+ struct eb32_node *n[5];
+ unsigned long cum[5];
+ double step;
+
+ for (f = 1; f < 5; f++) {
+ n[f] = eb32_first(&timers[f]);
+ cum[f] = container_of(n[f], struct timer, node)->count;
+ }
+
+ for (step = 1; step <= 1000;) {
+ unsigned int thres = tot * (step / 1000.0);
+
+ printf("%3.1f %d ", step/10.0, thres);
+ for (f = 1; f < 5; f++) {
+ struct eb32_node *next;
+ while (cum[f] < thres) {
+ /* need to find other keys */
+ next = eb32_next(n[f]);
+ if (!next)
+ break;
+ n[f] = next;
+ cum[f] += container_of(next, struct timer, node)->count;
+ }
+
+ /* value still within $step % of total */
+ printf("%d ", n[f]->key);
+ }
+ putchar('\n');
+ if (step >= 100 && step < 900)
+ step += 50; // jump 5% by 5% between those steps.
+ else if (step >= 20 && step < 980)
+ step += 10;
+ else
+ step += 1;
}
}