CONTRIB: halog: Filter input lines by date and time through timestamp

I wanted to make a graph with average answer time in nagios that takes only
the last 5 mn of the log. Filtering the log before using halog was too
slow, so I added that filter to halog.

The patch attached to this mail is a proposal to add a new option : -time
[min][:max]

The values are min timestamp and/or max timestamp of the lines to be used
for stats. The date and time of the log lines between '[' and ']' are
converted to timestamp and compared to these values.

Here is an exemple of usage :
cat /var/log/haproxy.log | ./halog -srv -H -q -time $(date --date '-5 min' +%s)
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
index 9552998..364bf90 100644
--- a/contrib/halog/halog.c
+++ b/contrib/halog/halog.c
@@ -18,6 +18,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <ctype.h>
+#include <time.h>
 
 #include <eb32tree.h>
 #include <eb64tree.h>
@@ -116,7 +117,10 @@
 #define FILT_COUNT_COOK_CODES 0x40000000
 #define FILT_COUNT_IP_COUNT   0x80000000
 
+#define FILT2_TIMESTAMP	0x01
+
 unsigned int filter = 0;
+unsigned int filter2 = 0;
 unsigned int filter_invert = 0;
 const char *line;
 int linenum = 0;
@@ -144,7 +148,7 @@
 		"       halog [-q] [-c] [-m <lines>]\n"
 		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n"
 		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n"
-		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] < log\n"
+		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n"
 		"\n",
 		msg ? msg : ""
 		);
@@ -170,6 +174,8 @@
 	       " -hs|-HS <[min][:][max]> only match requests with HTTP status codes within/not\n"
 	       "                         within min..max. Any of them may be omitted. Exact\n"
 	       "                         code is checked for if no ':' is specified.\n"
+	       " -time <[min][:max]>     only match requests recorded between timestamps.\n"
+	       "                         Any of them may be omitted.\n"
 	       "Modifiers\n"
 	       " -v                      invert the input filtering condition\n"
 	       " -q                      don't report errors/warnings\n"
@@ -521,6 +527,145 @@
 	return -1;
 }
 
+/* Convert "[04/Dec/2008:09:49:40.555]" to an unix timestamp.
+ * It returns -1 for all unparsable values. The parser
+ * looks ugly but gcc emits far better code that way.
+ */
+int convert_date_to_timestamp(const char *field)
+{
+	unsigned int d, mo, y, h, m, s;
+	unsigned char c;
+	const char *b, *e;
+	time_t rawtime;
+	struct tm * timeinfo;
+
+	d = mo = y = h = m = s = 0;
+	e = field;
+
+	c = *(e++); // remove '['
+	/* day + '/' */
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		d = d * 10 + c;
+		if (c == (unsigned char)(0 - '0'))
+			goto out_err;
+	}
+
+	/* month + '/' */
+	c = *(e++);
+	if (c =='F') {
+		mo = 2;
+		e = e+3;
+	} else if (c =='S') {
+		mo = 9;
+		e = e+3;
+	} else if (c =='O') {
+		mo = 10;
+		e = e+3;
+	} else if (c =='N') {
+		mo = 11;
+		e = e+3;
+	} else if (c == 'D') {
+		mo = 12;
+		e = e+3;
+	} else if (c == 'A') {
+		c = *(e++);
+		if (c == 'p') {
+			mo = 4;
+			e = e+2;
+		} else if (c == 'u') {
+			mo = 8;
+			e = e+2;
+		} else
+			goto out_err;
+	} else if (c == 'J') {
+		c = *(e++);
+		if (c == 'a') {
+			mo = 1;
+			e = e+2;
+		} else if (c == 'u') {
+			c = *(e++);
+			if (c == 'n') {
+				mo = 6;
+				e = e+1;
+			} else if (c == 'l') {
+				mo = 7;
+				e++;
+			}
+		} else
+			goto out_err;
+	} else if (c == 'M') {
+		e++;
+		c = *(e++);
+		if (c == 'r') {
+			mo = 3;
+			e = e+1;
+		} else if (c == 'y') {
+			mo = 5;
+			e = e+1;
+		} else
+			goto out_err;
+	} else
+		goto out_err;
+
+	/* year + ':' */
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		y = y * 10 + c;
+		if (c == (unsigned char)(0 - '0'))
+			goto out_err;
+	}
+
+	/* hour + ':' */
+	b = e;
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		h = h * 10 + c;
+	}
+	if (c == (unsigned char)(0 - '0'))
+		goto out_err;
+
+	/* minute + ':' */
+	b = e;
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		m = m * 10 + c;
+	}
+	if (c == (unsigned char)(0 - '0'))
+		goto out_err;
+
+	/* second + '.' or ']' */
+	b = e;
+	while (1) {
+		c = *(e++) - '0';
+		if (c > 9)
+			break;
+		s = s * 10 + c;
+	}
+
+	time(&rawtime);
+	timeinfo = localtime(&rawtime);
+
+	timeinfo->tm_sec = s;
+	timeinfo->tm_min = m;
+	timeinfo->tm_hour = h;
+	timeinfo->tm_mday = d;
+	timeinfo->tm_mon = mo - 1;
+	timeinfo->tm_year = y - 1900;
+
+	return mktime(timeinfo);
+ out_err:
+	return -1;
+}
+
 void truncated_line(int linenum, const char *line)
 {
 	if (!(filter & FILT_QUIET))
@@ -537,9 +682,11 @@
 	struct eb32_node *n;
 	struct url_stat *ustat = NULL;
 	int val, test;
+	uint uval;
 	int filter_acc_delay = 0, filter_acc_count = 0;
 	int filter_time_resp = 0;
 	int filt_http_status_low = 0, filt_http_status_high = 0;
+	int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
 	int skip_fields = 1;
 
 	void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
@@ -641,6 +788,21 @@
 			filt_http_status_low = *str ? atol(str) : 0;
 			filt_http_status_high = *sep ? atol(sep) : 65535;
 		}
+		else if (strcmp(argv[0], "-time") == 0) {
+			char *sep, *str;
+
+			if (argc < 2) die("missing option for -time ([min]:[max])");
+			filter2 |= FILT2_TIMESTAMP;
+
+			argc--; argv++;
+			str = *argv;
+			sep = strchr(str, ':');  /* [min]:[max] */
+			filt2_timestamp_low = *str ? atol(str) : 0;
+			if (!sep)
+				filt2_timestamp_high = 0xFFFFFFFF;
+			else
+				filt2_timestamp_high = atol(++sep);
+		}
 		else if (strcmp(argv[0], "-u") == 0)
 			filter |= FILT_COUNT_URL_ONLY;
 		else if (strcmp(argv[0], "-uc") == 0)
@@ -713,7 +875,8 @@
 #endif
 
 	if (!line_filter && /* FILT_COUNT_ONLY ( see above), and no input filter (see below) */
-	    !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME))) {
+	    !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME)) &&
+		!(filter2 & (FILT2_TIMESTAMP))) {
 		/* read the whole file at once first, ignore it if inverted output */
 		if (!filter_invert)
 			while ((lines_max < 0 || lines_out < lines_max) && fgets2(stdin) != NULL)
@@ -751,6 +914,11 @@
 			continue;
 		}
 
+		if (filter2 & FILT2_TIMESTAMP) {
+			uval = convert_date_to_timestamp(accept_field);
+			test &= (uval>=filt2_timestamp_low && uval<=filt2_timestamp_high) ;
+		}
+
 		if (filter & FILT_HTTP_ONLY) {
 			/* only report lines with at least 4 timers */
 			if (!time_field) {