[CONTRIB] halog: fast log parser for haproxy

halog can search errors, count lines, sort by accept date, look for
traffic holes and large connection counts at output graph plots of
timers.
diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile
new file mode 100644
index 0000000..911cae6
--- /dev/null
+++ b/contrib/halog/Makefile
@@ -0,0 +1,11 @@
+INCLUDE  = -I../../include
+OPTIMIZE = -O3 -mtune=pentium-m
+CC       = gcc
+
+OBJS     = halog
+
+halog: halog.c
+	$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
+
+clean:
+	rm -vf $(OBJS)
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
new file mode 100644
index 0000000..6bfc76a
--- /dev/null
+++ b/contrib/halog/halog.c
@@ -0,0 +1,599 @@
+/*
+ * haproxy log time reporter
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/*
+ * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c
+ *
+ * Usage:
+ *    $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log
+ *    Note: if min_delay < 0, it only outputs lines with status codes 5xx.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include <common/eb32tree.h>
+
+#define ACCEPT_FIELD 6
+#define TIME_FIELD 9
+#define STATUS_FIELD 10
+#define CONN_FIELD 15
+#define MAXLINE 16384
+#define QBITS 4
+
+#define SKIP_CHAR(p,c) do { while (1) if (!*p) break; else if (*(p++) == c) break; } while (0)
+
+/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */
+static struct eb_root timers[5] = {
+	EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
+	EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
+};
+
+struct timer {
+	struct eb32_node node;
+	unsigned int count;
+};
+
+
+#define FILT_COUNT_ONLY		0x01
+#define FILT_INVERT		0x02
+#define FILT_QUIET		0x04
+#define FILT_ERRORS_ONLY	0x08
+#define FILT_ACC_DELAY		0x10
+#define FILT_ACC_COUNT		0x20
+#define FILT_GRAPH_TIMERS	0x40
+
+unsigned int filter = 0;
+unsigned int filter_invert = 0;
+char line[MAXLINE];
+
+
+void die(const char *msg)
+{
+	fprintf(stderr,
+		"%s"
+		"Usage: halog [-c] [-v] [-gt] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
+		"\n",
+		msg ? msg : ""
+		);
+	exit(1);
+}
+
+
+/* return pointer to first char not part of current field starting at <p>. */
+const char *field_stop(const char *p)
+{
+	unsigned char c;
+
+	while (1) {
+		c = *(p++);
+		if (c > ' ')
+			continue;
+		if (c == ' ' || c == '\t' || c == 0)
+			break;
+	}
+	return p - 1;
+}
+
+/* return field <field> (starting from 1) in string <p>. Only consider
+ * contiguous spaces (or tabs) as one delimiter. May return pointer to
+ * last char if field is not found. Equivalent to awk '{print $field}'.
+ */
+const char *field_start(const char *p, int field)
+{
+	unsigned char c;
+	while (1) {
+		/* skip spaces */
+		while (1) {
+			c = *p;
+			if (c > ' ')
+				break;
+			if (c == ' ' || c == '\t')
+				goto next;
+			if (!c) /* end of line */
+				return p;
+			/* other char => new field */
+			break;
+		next:
+			p++;
+		}
+
+		/* start of field */
+		field--;
+		if (!field)
+			return p;
+
+		/* skip this field */
+		while (1) {
+			c = *(p++);
+			if (c > ' ')
+				continue;
+			if (c == ' ' || c == '\t')
+				break;
+			if (c == '\0')
+				return p;
+		}
+	}
+}
+
+/* keep only the <bits> higher bits of <i> */
+static inline unsigned int quantify_u32(unsigned int i, int bits)
+{
+	int high;
+
+	if (!bits)
+		return 0;
+
+	if (i)
+		high = fls_auto(i);    // 1 to 32
+	else
+		high = 0;
+
+	if (high <= bits)
+		return i;
+
+	return i & ~((1 << (high - bits)) - 1);
+}
+
+/* keep only the <bits> higher bits of the absolute value of <i>, as well as
+ * its sign. */
+static inline int quantify(int i, int bits)
+{
+	if (i >= 0)
+		return quantify_u32(i, bits);
+	else
+		return -quantify_u32(-i, bits);
+}
+
+/* Insert timer value <v> into tree <r>. A pre-allocated node must be passed
+ * in <alloc>. It may be NULL, in which case the function will allocate it
+ * itself. It will be reset to NULL once consumed. The caller is responsible
+ * for freeing the node once not used anymore. The node where the value was
+ * inserted is returned.
+ */
+struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v)
+{
+	struct timer *t = *alloc;
+	struct eb32_node *n;
+
+	if (!t) {
+		t = calloc(sizeof(*t), 1);
+		if (unlikely(!t)) {
+			fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
+			exit(1);
+		}
+	}
+	t->node.key = quantify(v, QBITS);         // keep only the higher QBITS bits
+
+	n = eb32i_insert(r, &t->node);
+	if (n == &t->node)
+		t = NULL;  /* node inserted, will malloc next time */
+
+	*alloc = t;
+	return container_of(n, struct timer, node);
+}
+
+/* Insert value value <v> into tree <r>. A pre-allocated node must be passed
+ * in <alloc>. It may be NULL, in which case the function will allocate it
+ * itself. It will be reset to NULL once consumed. The caller is responsible
+ * for freeing the node once not used anymore. The node where the value was
+ * inserted is returned.
+ */
+struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v)
+{
+	struct timer *t = *alloc;
+	struct eb32_node *n;
+
+	if (!t) {
+		t = calloc(sizeof(*t), 1);
+		if (unlikely(!t)) {
+			fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
+			exit(1);
+		}
+	}
+	t->node.key = v;
+
+	n = eb32i_insert(r, &t->node);
+	if (n == &t->node)
+		t = NULL;  /* node inserted, will malloc next time */
+
+	*alloc = t;
+	return container_of(n, struct timer, node);
+}
+
+int str2ic(const char *s)
+{
+	int i = 0;
+	int j, k;
+
+	if (*s != '-') {
+		/* positive number */
+		while (1) {
+			j = (*s++) - '0';
+			k = i * 10;
+			if ((unsigned)j > 9)
+				break;
+			i = k + j;
+		}
+	} else {
+		/* negative number */
+		s++;
+		while (1) {
+			j = (*s++) - '0';
+			k = i * 10;
+			if ((unsigned)j > 9)
+				break;
+			i = k - j;
+		}
+	}
+
+	return i;
+}
+
+
+/* Equivalent to strtoul with a length. */
+static inline unsigned int __strl2ui(const char *s, int len)
+{
+	unsigned int i = 0;
+	while (len-- > 0) {
+		i = i * 10 - '0';
+		i += (unsigned char)*s++;
+	}
+	return i;
+}
+
+unsigned int strl2ui(const char *s, int len)
+{
+	return __strl2ui(s, len);
+}
+
+/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of
+ * the day in milliseconds. It returns -1 for all unparsable values. The parser
+ * looks ugly but gcc emits far better code that way.
+ */
+int convert_date(const char *field)
+{
+	unsigned int h, m, s, ms;
+	unsigned char c;
+	const char *b, *e;
+
+	h = m = s = ms = 0;
+	e = field;
+
+	/* skip the date */
+	while (1) {
+		c = *(e++);
+		if (c == ':')
+			break;
+		if (!c)
+			goto out_err;
+	}
+
+	/* hour + ':' */
+	b = e;
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		h = h * 10 + c;
+	}
+	if (c == (unsigned char)(0 - '0'))
+		goto out_err;
+
+	/* minute + ':' */
+	b = e;
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		m = m * 10 + c;
+	}
+	if (c == (unsigned char)(0 - '0'))
+		goto out_err;
+
+	/* second + '.' or ']' */
+	b = e;
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		s = s * 10 + c;
+	}
+	if (c == (unsigned char)(0 - '0'))
+		goto out_err;
+
+	/* if there's a '.', we have milliseconds */
+	if (c == (unsigned char)('.' - '0')) {
+		/* millisecond second + ']' */
+		b = e;
+		while (1) {
+			c = *(e++) - '0';
+			if (c > 9)
+				break;
+			ms = ms * 10 + c;
+		}
+		if (c == (unsigned char)(0 - '0'))
+			goto out_err;
+	}
+	return (((h * 60) + m) * 60 + s) * 1000 + ms;
+ out_err:
+	return -1;
+}
+
+void truncated_line(int linenum, const char *line)
+{
+	if (!(filter & FILT_QUIET))
+		fprintf(stderr, "Truncated line %d: %s\n", linenum, line);
+}
+
+int main(int argc, char **argv)
+{
+	const char *b, *e, *p;
+	const char *output_file = NULL;
+	int f, tot, last, linenum, err, parse_err;
+	struct timer *t = NULL, *t2;
+	struct eb32_node *n;
+	int val, test;
+	int array[5];
+	int filter_acc_delay = 0, filter_acc_count = 0;
+	int skip_fields = 1;
+
+	argc--; argv++;
+	while (argc > 0) {
+		if (*argv[0] != '-')
+			break;
+
+		if (strcmp(argv[0], "-ad") == 0) {
+			if (argc < 2) die("missing option for -ad");
+			argc--; argv++;
+			filter |= FILT_ACC_DELAY;
+			filter_acc_delay = atol(*argv);
+		}
+		else if (strcmp(argv[0], "-ac") == 0) {
+			if (argc < 2) die("missing option for -ac");
+			argc--; argv++;
+			filter |= FILT_ACC_COUNT;
+			filter_acc_count = atol(*argv);
+		}
+		else if (strcmp(argv[0], "-s") == 0) {
+			if (argc < 2) die("missing option for -s");
+			argc--; argv++;
+			skip_fields = atol(*argv);
+		}
+		else if (strcmp(argv[0], "-e") == 0)
+			filter |= FILT_ERRORS_ONLY;
+		else if (strcmp(argv[0], "-c") == 0)
+			filter |= FILT_COUNT_ONLY;
+		else if (strcmp(argv[0], "-q") == 0)
+			filter |= FILT_QUIET;
+		else if (strcmp(argv[0], "-v") == 0)
+			filter_invert = !filter_invert;
+		else if (strcmp(argv[0], "-gt") == 0)
+			filter |= FILT_GRAPH_TIMERS;
+		else if (strcmp(argv[0], "-o") == 0) {
+			if (output_file)
+				die("Fatal: output file name already specified.\n");
+			if (argc < 2)
+				die("Fatal: missing output file name.\n");
+			output_file = argv[1];
+		}
+		argc--;
+		argv++;
+	}
+
+	if (!filter)
+		die("No action specified.\n");
+
+	if (filter & FILT_ACC_COUNT && !filter_acc_count)
+		filter_acc_count=1;
+
+	if (filter & FILT_ACC_DELAY && !filter_acc_delay)
+		filter_acc_delay = 1;
+
+	linenum = 0;
+	tot = 0;
+	parse_err = 0;
+
+	while (fgets(line, MAXLINE, stdin) != NULL) {
+		linenum++;
+
+		if (filter & FILT_ERRORS_ONLY) {
+			/* only report erroneous status codes */
+			b = field_start(line, STATUS_FIELD + skip_fields);
+			if (!*b) {
+				truncated_line(linenum, line);
+				continue;
+			}
+			if (*b == '-') {
+				test = 1;
+			} else {
+				val = strl2ui(b, 3);
+				test = (val >= 500 && val <= 599);
+			}
+			test ^= filter_invert;
+			if (test) {
+				tot++;
+				if (!(filter & FILT_COUNT_ONLY))
+					printf("%s", line);
+			}
+			continue;
+		}
+
+		if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
+			b = field_start(line, ACCEPT_FIELD + skip_fields);
+			if (!*b) {
+				truncated_line(linenum, line);
+				continue;
+			}
+
+			val = convert_date(b);
+			//printf("date=%s => %d\n", b, val);
+			if (val < 0) {
+				parse_err++;
+				continue;
+			}
+
+			t2 = insert_value(&timers[0], &t, val);
+			t2->count++;
+			continue;
+		}
+
+		if (filter & FILT_GRAPH_TIMERS) {
+			int f;
+
+			b = field_start(line, TIME_FIELD + skip_fields);
+			if (!*b) {
+				truncated_line(linenum, line);
+				continue;
+			}
+
+			e = field_stop(b + 1);
+			/* we have field TIME_FIELD in [b]..[e-1] */
+
+			p = b;
+			err = 0;
+			for (f = 0; f < 5 && *p; f++) {
+				array[f] = str2ic(p);
+				if (array[f] < 0) {
+					array[f] = -1;
+					err = 1;
+				}
+
+				SKIP_CHAR(p, '/');
+			}
+
+			if (f < 5) {
+				parse_err++;
+				continue;
+			}
+
+			/* if we find at least one negative time, we count one error
+			 * with a time equal to the total session time. This will
+			 * emphasize quantum timing effects associated to known
+			 * timeouts. Note that on some buggy machines, it is possible
+			 * that the total time is negative, hence the reason to reset
+			 * it.
+			 */
+			if (err) {
+				if (array[4] < 0)
+					array[4] = -1;
+				t2 = insert_timer(&timers[0], &t, array[4]);  // total time
+				t2->count++;
+			} else {
+				int v;
+
+				t2 = insert_timer(&timers[1], &t, array[0]); t2->count++;  // req
+				t2 = insert_timer(&timers[2], &t, array[2]); t2->count++;  // conn
+				t2 = insert_timer(&timers[3], &t, array[3]); t2->count++;  // resp
+
+				v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+				if (v < 0 && !(filter & FILT_QUIET))
+					fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+						line, array[0], array[1], array[2], array[3], array[4], v);
+				t2 = insert_timer(&timers[4], &t, v); t2->count++;
+			}
+			continue;
+		}
+
+		/* all other cases mean we just want to count lines */
+		tot++;
+	}
+
+	if (t)
+		free(t);
+
+	if (filter & FILT_COUNT_ONLY) {
+		printf("%d\n", tot);
+		exit(0);
+	}
+
+	if (filter & FILT_ERRORS_ONLY)
+		exit(0);
+
+	if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
+		/* sort and count all timers. Output will look like this :
+		 * <accept_date> <delta_ms from previous one> <nb entries>
+		 */
+		n = eb32_first(&timers[0]);
+
+		if (n)
+			last = n->key;
+		while (n) {
+			unsigned int d, h, m, s, ms;
+
+			t = container_of(n, struct timer, node);
+			h = n->key;
+			d = h - last;
+			last = h;
+
+			if (d >= filter_acc_delay && t->count >= filter_acc_count) {
+				ms = h % 1000; h = h / 1000;
+				s = h % 60; h = h / 60;
+				m = h % 60; h = h / 60;
+				tot++;
+				printf("%02d:%02d:%02d.%03d %d %d %d\n", h, m, s, ms, last, d, t->count);
+			}
+			n = eb32_next(n);
+		}
+	}
+	else if (filter & FILT_GRAPH_TIMERS) {
+		/* sort all timers */
+		for (f = 0; f < 5; f++) {
+			struct eb32_node *n;
+			int val;
+
+			val = 0;
+			n = eb32_first(&timers[f]);
+			while (n) {
+				int i;
+				double d;
+
+				t = container_of(n, struct timer, node);
+				last = n->key;
+				val = t->count;
+
+				i = (last < 0) ? -last : last;
+				i = fls_auto(i) - QBITS;
+
+				if (i > 0)
+					d = val / (double)(1 << i);
+				else
+					d = val;
+
+				if (d > 0.0) {
+					printf("%d %d %f\n", f, last, d+1.0);
+					tot++;
+				}
+
+				n = eb32_next(n);
+			}
+		}
+	}
+
+	if (!(filter & FILT_QUIET))
+		fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
+			linenum, tot, parse_err);
+	exit(0);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */