[CONTRIB] halog: fast log parser for haproxy
halog can search errors, count lines, sort by accept date, look for
traffic holes and large connection counts at output graph plots of
timers.
diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile
new file mode 100644
index 0000000..911cae6
--- /dev/null
+++ b/contrib/halog/Makefile
@@ -0,0 +1,11 @@
+INCLUDE = -I../../include
+OPTIMIZE = -O3 -mtune=pentium-m
+CC = gcc
+
+OBJS = halog
+
+halog: halog.c
+ $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
+
+clean:
+ rm -vf $(OBJS)
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
new file mode 100644
index 0000000..6bfc76a
--- /dev/null
+++ b/contrib/halog/halog.c
@@ -0,0 +1,599 @@
+/*
+ * haproxy log time reporter
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/*
+ * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c
+ *
+ * Usage:
+ * $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log
+ * Note: if min_delay < 0, it only outputs lines with status codes 5xx.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include <common/eb32tree.h>
+
+#define ACCEPT_FIELD 6
+#define TIME_FIELD 9
+#define STATUS_FIELD 10
+#define CONN_FIELD 15
+#define MAXLINE 16384
+#define QBITS 4
+
+#define SKIP_CHAR(p,c) do { while (1) if (!*p) break; else if (*(p++) == c) break; } while (0)
+
+/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */
+static struct eb_root timers[5] = {
+ EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
+ EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
+};
+
+struct timer {
+ struct eb32_node node;
+ unsigned int count;
+};
+
+
+#define FILT_COUNT_ONLY 0x01
+#define FILT_INVERT 0x02
+#define FILT_QUIET 0x04
+#define FILT_ERRORS_ONLY 0x08
+#define FILT_ACC_DELAY 0x10
+#define FILT_ACC_COUNT 0x20
+#define FILT_GRAPH_TIMERS 0x40
+
+unsigned int filter = 0;
+unsigned int filter_invert = 0;
+char line[MAXLINE];
+
+
+void die(const char *msg)
+{
+ fprintf(stderr,
+ "%s"
+ "Usage: halog [-c] [-v] [-gt] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
+ "\n",
+ msg ? msg : ""
+ );
+ exit(1);
+}
+
+
+/* return pointer to first char not part of current field starting at <p>. */
+const char *field_stop(const char *p)
+{
+ unsigned char c;
+
+ while (1) {
+ c = *(p++);
+ if (c > ' ')
+ continue;
+ if (c == ' ' || c == '\t' || c == 0)
+ break;
+ }
+ return p - 1;
+}
+
+/* return field <field> (starting from 1) in string <p>. Only consider
+ * contiguous spaces (or tabs) as one delimiter. May return pointer to
+ * last char if field is not found. Equivalent to awk '{print $field}'.
+ */
+const char *field_start(const char *p, int field)
+{
+ unsigned char c;
+ while (1) {
+ /* skip spaces */
+ while (1) {
+ c = *p;
+ if (c > ' ')
+ break;
+ if (c == ' ' || c == '\t')
+ goto next;
+ if (!c) /* end of line */
+ return p;
+ /* other char => new field */
+ break;
+ next:
+ p++;
+ }
+
+ /* start of field */
+ field--;
+ if (!field)
+ return p;
+
+ /* skip this field */
+ while (1) {
+ c = *(p++);
+ if (c > ' ')
+ continue;
+ if (c == ' ' || c == '\t')
+ break;
+ if (c == '\0')
+ return p;
+ }
+ }
+}
+
+/* keep only the <bits> higher bits of <i> */
+static inline unsigned int quantify_u32(unsigned int i, int bits)
+{
+ int high;
+
+ if (!bits)
+ return 0;
+
+ if (i)
+ high = fls_auto(i); // 1 to 32
+ else
+ high = 0;
+
+ if (high <= bits)
+ return i;
+
+ return i & ~((1 << (high - bits)) - 1);
+}
+
+/* keep only the <bits> higher bits of the absolute value of <i>, as well as
+ * its sign. */
+static inline int quantify(int i, int bits)
+{
+ if (i >= 0)
+ return quantify_u32(i, bits);
+ else
+ return -quantify_u32(-i, bits);
+}
+
+/* Insert timer value <v> into tree <r>. A pre-allocated node must be passed
+ * in <alloc>. It may be NULL, in which case the function will allocate it
+ * itself. It will be reset to NULL once consumed. The caller is responsible
+ * for freeing the node once not used anymore. The node where the value was
+ * inserted is returned.
+ */
+struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v)
+{
+ struct timer *t = *alloc;
+ struct eb32_node *n;
+
+ if (!t) {
+ t = calloc(sizeof(*t), 1);
+ if (unlikely(!t)) {
+ fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
+ exit(1);
+ }
+ }
+ t->node.key = quantify(v, QBITS); // keep only the higher QBITS bits
+
+ n = eb32i_insert(r, &t->node);
+ if (n == &t->node)
+ t = NULL; /* node inserted, will malloc next time */
+
+ *alloc = t;
+ return container_of(n, struct timer, node);
+}
+
+/* Insert value value <v> into tree <r>. A pre-allocated node must be passed
+ * in <alloc>. It may be NULL, in which case the function will allocate it
+ * itself. It will be reset to NULL once consumed. The caller is responsible
+ * for freeing the node once not used anymore. The node where the value was
+ * inserted is returned.
+ */
+struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v)
+{
+ struct timer *t = *alloc;
+ struct eb32_node *n;
+
+ if (!t) {
+ t = calloc(sizeof(*t), 1);
+ if (unlikely(!t)) {
+ fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
+ exit(1);
+ }
+ }
+ t->node.key = v;
+
+ n = eb32i_insert(r, &t->node);
+ if (n == &t->node)
+ t = NULL; /* node inserted, will malloc next time */
+
+ *alloc = t;
+ return container_of(n, struct timer, node);
+}
+
+int str2ic(const char *s)
+{
+ int i = 0;
+ int j, k;
+
+ if (*s != '-') {
+ /* positive number */
+ while (1) {
+ j = (*s++) - '0';
+ k = i * 10;
+ if ((unsigned)j > 9)
+ break;
+ i = k + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (1) {
+ j = (*s++) - '0';
+ k = i * 10;
+ if ((unsigned)j > 9)
+ break;
+ i = k - j;
+ }
+ }
+
+ return i;
+}
+
+
+/* Equivalent to strtoul with a length. */
+static inline unsigned int __strl2ui(const char *s, int len)
+{
+ unsigned int i = 0;
+ while (len-- > 0) {
+ i = i * 10 - '0';
+ i += (unsigned char)*s++;
+ }
+ return i;
+}
+
+unsigned int strl2ui(const char *s, int len)
+{
+ return __strl2ui(s, len);
+}
+
+/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of
+ * the day in milliseconds. It returns -1 for all unparsable values. The parser
+ * looks ugly but gcc emits far better code that way.
+ */
+int convert_date(const char *field)
+{
+ unsigned int h, m, s, ms;
+ unsigned char c;
+ const char *b, *e;
+
+ h = m = s = ms = 0;
+ e = field;
+
+ /* skip the date */
+ while (1) {
+ c = *(e++);
+ if (c == ':')
+ break;
+ if (!c)
+ goto out_err;
+ }
+
+ /* hour + ':' */
+ b = e;
+ while (1) {
+ c = *(e++) - '0';
+ if (c > 9)
+ break;
+ h = h * 10 + c;
+ }
+ if (c == (unsigned char)(0 - '0'))
+ goto out_err;
+
+ /* minute + ':' */
+ b = e;
+ while (1) {
+ c = *(e++) - '0';
+ if (c > 9)
+ break;
+ m = m * 10 + c;
+ }
+ if (c == (unsigned char)(0 - '0'))
+ goto out_err;
+
+ /* second + '.' or ']' */
+ b = e;
+ while (1) {
+ c = *(e++) - '0';
+ if (c > 9)
+ break;
+ s = s * 10 + c;
+ }
+ if (c == (unsigned char)(0 - '0'))
+ goto out_err;
+
+ /* if there's a '.', we have milliseconds */
+ if (c == (unsigned char)('.' - '0')) {
+ /* millisecond second + ']' */
+ b = e;
+ while (1) {
+ c = *(e++) - '0';
+ if (c > 9)
+ break;
+ ms = ms * 10 + c;
+ }
+ if (c == (unsigned char)(0 - '0'))
+ goto out_err;
+ }
+ return (((h * 60) + m) * 60 + s) * 1000 + ms;
+ out_err:
+ return -1;
+}
+
+void truncated_line(int linenum, const char *line)
+{
+ if (!(filter & FILT_QUIET))
+ fprintf(stderr, "Truncated line %d: %s\n", linenum, line);
+}
+
+int main(int argc, char **argv)
+{
+ const char *b, *e, *p;
+ const char *output_file = NULL;
+ int f, tot, last, linenum, err, parse_err;
+ struct timer *t = NULL, *t2;
+ struct eb32_node *n;
+ int val, test;
+ int array[5];
+ int filter_acc_delay = 0, filter_acc_count = 0;
+ int skip_fields = 1;
+
+ argc--; argv++;
+ while (argc > 0) {
+ if (*argv[0] != '-')
+ break;
+
+ if (strcmp(argv[0], "-ad") == 0) {
+ if (argc < 2) die("missing option for -ad");
+ argc--; argv++;
+ filter |= FILT_ACC_DELAY;
+ filter_acc_delay = atol(*argv);
+ }
+ else if (strcmp(argv[0], "-ac") == 0) {
+ if (argc < 2) die("missing option for -ac");
+ argc--; argv++;
+ filter |= FILT_ACC_COUNT;
+ filter_acc_count = atol(*argv);
+ }
+ else if (strcmp(argv[0], "-s") == 0) {
+ if (argc < 2) die("missing option for -s");
+ argc--; argv++;
+ skip_fields = atol(*argv);
+ }
+ else if (strcmp(argv[0], "-e") == 0)
+ filter |= FILT_ERRORS_ONLY;
+ else if (strcmp(argv[0], "-c") == 0)
+ filter |= FILT_COUNT_ONLY;
+ else if (strcmp(argv[0], "-q") == 0)
+ filter |= FILT_QUIET;
+ else if (strcmp(argv[0], "-v") == 0)
+ filter_invert = !filter_invert;
+ else if (strcmp(argv[0], "-gt") == 0)
+ filter |= FILT_GRAPH_TIMERS;
+ else if (strcmp(argv[0], "-o") == 0) {
+ if (output_file)
+ die("Fatal: output file name already specified.\n");
+ if (argc < 2)
+ die("Fatal: missing output file name.\n");
+ output_file = argv[1];
+ }
+ argc--;
+ argv++;
+ }
+
+ if (!filter)
+ die("No action specified.\n");
+
+ if (filter & FILT_ACC_COUNT && !filter_acc_count)
+ filter_acc_count=1;
+
+ if (filter & FILT_ACC_DELAY && !filter_acc_delay)
+ filter_acc_delay = 1;
+
+ linenum = 0;
+ tot = 0;
+ parse_err = 0;
+
+ while (fgets(line, MAXLINE, stdin) != NULL) {
+ linenum++;
+
+ if (filter & FILT_ERRORS_ONLY) {
+ /* only report erroneous status codes */
+ b = field_start(line, STATUS_FIELD + skip_fields);
+ if (!*b) {
+ truncated_line(linenum, line);
+ continue;
+ }
+ if (*b == '-') {
+ test = 1;
+ } else {
+ val = strl2ui(b, 3);
+ test = (val >= 500 && val <= 599);
+ }
+ test ^= filter_invert;
+ if (test) {
+ tot++;
+ if (!(filter & FILT_COUNT_ONLY))
+ printf("%s", line);
+ }
+ continue;
+ }
+
+ if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
+ b = field_start(line, ACCEPT_FIELD + skip_fields);
+ if (!*b) {
+ truncated_line(linenum, line);
+ continue;
+ }
+
+ val = convert_date(b);
+ //printf("date=%s => %d\n", b, val);
+ if (val < 0) {
+ parse_err++;
+ continue;
+ }
+
+ t2 = insert_value(&timers[0], &t, val);
+ t2->count++;
+ continue;
+ }
+
+ if (filter & FILT_GRAPH_TIMERS) {
+ int f;
+
+ b = field_start(line, TIME_FIELD + skip_fields);
+ if (!*b) {
+ truncated_line(linenum, line);
+ continue;
+ }
+
+ e = field_stop(b + 1);
+ /* we have field TIME_FIELD in [b]..[e-1] */
+
+ p = b;
+ err = 0;
+ for (f = 0; f < 5 && *p; f++) {
+ array[f] = str2ic(p);
+ if (array[f] < 0) {
+ array[f] = -1;
+ err = 1;
+ }
+
+ SKIP_CHAR(p, '/');
+ }
+
+ if (f < 5) {
+ parse_err++;
+ continue;
+ }
+
+ /* if we find at least one negative time, we count one error
+ * with a time equal to the total session time. This will
+ * emphasize quantum timing effects associated to known
+ * timeouts. Note that on some buggy machines, it is possible
+ * that the total time is negative, hence the reason to reset
+ * it.
+ */
+ if (err) {
+ if (array[4] < 0)
+ array[4] = -1;
+ t2 = insert_timer(&timers[0], &t, array[4]); // total time
+ t2->count++;
+ } else {
+ int v;
+
+ t2 = insert_timer(&timers[1], &t, array[0]); t2->count++; // req
+ t2 = insert_timer(&timers[2], &t, array[2]); t2->count++; // conn
+ t2 = insert_timer(&timers[3], &t, array[3]); t2->count++; // resp
+
+ v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+ if (v < 0 && !(filter & FILT_QUIET))
+ fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+ line, array[0], array[1], array[2], array[3], array[4], v);
+ t2 = insert_timer(&timers[4], &t, v); t2->count++;
+ }
+ continue;
+ }
+
+ /* all other cases mean we just want to count lines */
+ tot++;
+ }
+
+ if (t)
+ free(t);
+
+ if (filter & FILT_COUNT_ONLY) {
+ printf("%d\n", tot);
+ exit(0);
+ }
+
+ if (filter & FILT_ERRORS_ONLY)
+ exit(0);
+
+ if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
+ /* sort and count all timers. Output will look like this :
+ * <accept_date> <delta_ms from previous one> <nb entries>
+ */
+ n = eb32_first(&timers[0]);
+
+ if (n)
+ last = n->key;
+ while (n) {
+ unsigned int d, h, m, s, ms;
+
+ t = container_of(n, struct timer, node);
+ h = n->key;
+ d = h - last;
+ last = h;
+
+ if (d >= filter_acc_delay && t->count >= filter_acc_count) {
+ ms = h % 1000; h = h / 1000;
+ s = h % 60; h = h / 60;
+ m = h % 60; h = h / 60;
+ tot++;
+ printf("%02d:%02d:%02d.%03d %d %d %d\n", h, m, s, ms, last, d, t->count);
+ }
+ n = eb32_next(n);
+ }
+ }
+ else if (filter & FILT_GRAPH_TIMERS) {
+ /* sort all timers */
+ for (f = 0; f < 5; f++) {
+ struct eb32_node *n;
+ int val;
+
+ val = 0;
+ n = eb32_first(&timers[f]);
+ while (n) {
+ int i;
+ double d;
+
+ t = container_of(n, struct timer, node);
+ last = n->key;
+ val = t->count;
+
+ i = (last < 0) ? -last : last;
+ i = fls_auto(i) - QBITS;
+
+ if (i > 0)
+ d = val / (double)(1 << i);
+ else
+ d = val;
+
+ if (d > 0.0) {
+ printf("%d %d %f\n", f, last, d+1.0);
+ tot++;
+ }
+
+ n = eb32_next(n);
+ }
+ }
+ }
+
+ if (!(filter & FILT_QUIET))
+ fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
+ linenum, tot, parse_err);
+ exit(0);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */