MEDIUM: halog: add support for counting per source address (-ic) This is the same as -uc except that instead of counting URLs, it counts source addresses. The reported times are request times and not response times. The code becomes heavily ugly, the url struct is being abused to store an address, and there are no more bit fields available. The code needs a major revamp.

commit: 7cf479cc09d4e4d142e8862e2a2792385f695439 [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Sat Feb 16 23:49:04 2013 +0100
committer: Willy Tarreau <w@1wt.eu> Sat Feb 16 23:49:04 2013 +0100
tree: 24f1ef3ff3deaae77463b349178d825af5f74668
parent: 58ea039115f3faaf29529e0df97f4562436fdd09 [diff]
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
index 61034ec..9552998 100644
--- a/contrib/halog/halog.c
+++ b/contrib/halog/halog.c

@@ -114,6 +114,7 @@
 			      FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT)
 
 #define FILT_COUNT_COOK_CODES 0x40000000
+#define FILT_COUNT_IP_COUNT   0x80000000
 
 unsigned int filter = 0;
 unsigned int filter_invert = 0;
@@ -126,6 +127,7 @@
 const char *fgets2(FILE *stream);
 
 void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr);
+void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr);
 void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr);
 void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr);
 void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr);
@@ -140,7 +142,7 @@
 		"%s"
 		"Usage: halog [-h|--help] for long help\n"
 		"       halog [-q] [-c] [-m <lines>]\n"
-		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt}\n"
+		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n"
 		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n"
 		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] < log\n"
 		"\n",
@@ -527,7 +529,7 @@
 
 int main(int argc, char **argv)
 {
-	const char *b, *e, *p, *time_field, *accept_field;
+	const char *b, *e, *p, *time_field, *accept_field, *source_field;
 	const char *filter_term_code_name = NULL;
 	const char *output_file = NULL;
 	int f, last, err;
@@ -657,6 +659,8 @@
 			filter |= FILT_COUNT_URL_BAVG;
 		else if (strcmp(argv[0], "-ubt") == 0)
 			filter |= FILT_COUNT_URL_BTOT;
+		else if (strcmp(argv[0], "-ic") == 0)
+			filter |= FILT_COUNT_IP_COUNT;
 		else if (strcmp(argv[0], "-o") == 0) {
 			if (output_file)
 				die("Fatal: output file name already specified.\n");
@@ -721,13 +725,21 @@
 	while ((line = fgets2(stdin)) != NULL) {
 		linenum++;
 		time_field = NULL; accept_field = NULL;
+		source_field = NULL;
 
 		test = 1;
 
 		/* for any line we process, we first ensure that there is a field
 		 * looking like the accept date field (beginning with a '[').
 		 */
-		accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
+		if (filter & FILT_COUNT_IP_COUNT) {
+			/* we need the IP first */
+			source_field = field_start(line, SOURCE_FIELD + skip_fields);
+			accept_field = field_start(source_field, ACCEPT_FIELD - SOURCE_FIELD + 1);
+		}
+		else
+			accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
+
 		if (unlikely(*accept_field != '[')) {
 			parse_err++;
 			continue;
@@ -869,8 +881,12 @@
 
 		/************** here we process inputs *******************/
 
-		if (line_filter)
-			line_filter(accept_field, time_field, &t);
+		if (line_filter) {
+			if (filter & FILT_COUNT_IP_COUNT)
+				filter_count_ip(source_field, accept_field, time_field, &t);
+			else
+				line_filter(accept_field, time_field, &t);
+		}
 		else
 			lines_out++; /* FILT_COUNT_ONLY was used, so we're just counting lines */
 		if (lines_max >= 0 && lines_out >= lines_max)
@@ -1047,7 +1063,7 @@
 			n = eb32_next(n);
 		}
 	}
-	else if (filter & FILT_COUNT_URL_ANY) {
+	else if (filter & (FILT_COUNT_URL_ANY|FILT_COUNT_IP_COUNT)) {
 		struct eb_node *node, *next;
 
 		if (!(filter & FILT_COUNT_URL_ONLY)) {
@@ -1062,7 +1078,7 @@
 
 				ustat = container_of(node, struct url_stat, node.url.node);
 
-				if (filter & FILT_COUNT_URL_COUNT)
+				if (filter & (FILT_COUNT_URL_COUNT|FILT_COUNT_IP_COUNT))
 					ustat->node.val.key = ustat->nb_req;
 				else if (filter & FILT_COUNT_URL_ERR)
 					ustat->node.val.key = ustat->nb_err;
@@ -1087,7 +1103,10 @@
 			timers[0] = timers[1];
 		}
 
-		printf("#req err ttot tavg oktot okavg bavg btot url\n");
+		if (FILT_COUNT_IP_COUNT)
+			printf("#req err ttot tavg oktot okavg bavg btot src\n");
+		else
+			printf("#req err ttot tavg oktot okavg bavg btot url\n");
 
 		/* scan the tree in its reverse sorting order */
 		node = eb_last(&timers[0]);
@@ -1410,6 +1429,95 @@
 	}
 }
 
+void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr)
+{
+	struct url_stat *ustat = NULL;
+	struct ebpt_node *ebpt_old;
+	const char *b, *e;
+	int f, err, array[5];
+	int val;
+
+	/* let's collect the response time */
+	if (!time_field) {
+		time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);  // avg 115 ns per line
+		if (unlikely(!*time_field)) {
+			truncated_line(linenum, line);
+			return;
+		}
+	}
+
+	/* we have the field TIME_FIELD starting at <time_field>. We'll
+	 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
+	 */
+	e = time_field; err = 0; f = 0;
+	while (!SEP(*e)) {
+		if (f == 0 || f == 4) {
+			array[f] = str2ic(e);
+			if (array[f] < 0) {
+				array[f] = -1;
+				err = 1;
+			}
+		}
+		if (++f == 5)
+			break;
+		SKIP_CHAR(e, '/');
+	}
+	if (f < 5) {
+		parse_err++;
+		return;
+	}
+
+	/* OK we have our timers in array[0], and err is >0 if at
+	 * least one -1 was seen. <e> points to the first char of
+	 * the last timer. Let's prepare a new node with that.
+	 */
+	if (unlikely(!ustat))
+		ustat = calloc(1, sizeof(*ustat));
+
+	ustat->nb_err = err;
+	ustat->nb_req = 1;
+
+	/* use array[4] = total time in case of error */
+	ustat->total_time = (array[0] >= 0) ? array[0] : array[4];
+	ustat->total_time_ok = (array[0] >= 0) ? array[0] : 0;
+
+	e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
+	val = str2ic(e);
+	ustat->total_bytes_sent = val;
+
+	/* the source might be IPv4 or IPv6, so we always strip the port by
+	 * removing the last colon.
+	 */
+	b = source_field;
+	e = field_stop(b + 1);
+	while (e > b && e[-1] != ':')
+		e--;
+	*(char *)(e - 1) = '\0';
+
+	/* now instead of copying the src for a simple lookup, we'll link
+	 * to it from the node we're trying to insert. If it returns a
+	 * different value, it was already there. Otherwise we just have
+	 * to dynamically realloc an entry using strdup(). We're using the
+	 * <url> field of the node to store the source address.
+	 */
+	ustat->node.url.key = (char *)b;
+	ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
+
+	if (ebpt_old != &ustat->node.url) {
+		struct url_stat *ustat_old;
+		/* node was already there, let's update previous one */
+		ustat_old = container_of(ebpt_old, struct url_stat, node.url);
+		ustat_old->nb_req ++;
+		ustat_old->nb_err += ustat->nb_err;
+		ustat_old->total_time += ustat->total_time;
+		ustat_old->total_time_ok += ustat->total_time_ok;
+		ustat_old->total_bytes_sent += ustat->total_bytes_sent;
+	} else {
+		ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
+		ustat = NULL; /* node was used */
+	}
+}
+
 void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr)
 {
 	struct timer *t2;
commit	7cf479cc09d4e4d142e8862e2a2792385f695439	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Sat Feb 16 23:49:04 2013 +0100
committer	Willy Tarreau <w@1wt.eu>	Sat Feb 16 23:49:04 2013 +0100
tree	24f1ef3ff3deaae77463b349178d825af5f74668
parent	58ea039115f3faaf29529e0df97f4562436fdd09 [diff]