CONTRIB: halog: sort URLs by avg bytes_read or total bytes_read

The patch attached to this mail brings ability to sort URLs by
averaged bytes read and total bytes read in HALog tool.
In most cases, bytes read is also the object size.
The purpose of this patch is to know which URL consume the most
bandwith, in average or in total.
It may be interesting as well to know the standard deviation (ecart
type in french) for some counters (like bytes_read).

The results:
- Sorting by average bytes read per URL:
./halog -uba <~/tmp/haproxy.log | column -t | head
2246 lines in, 302 lines out, 194 parsing errors
18    0    5101     283    5101   283    126573  2278327  /lib/exe/js.php
1     0    1        1      1      1      106734  106734   /wp-admin/images/screenshots/theme-customizer.png
2     0    2        1      2      1      106511  213022   /wp-admin/css/wp-admin.css
1     0    1        1      1      1      96698   96698    /wp-admin/images/screenshots/captions-1.png
1     0    1        1      1      1      73165   73165    /wp-admin/images/screenshots/flex-header-1.png
4     0    0        0      0      0      64832   259328   /cuisine/wp-content/plugins/stats/open-flash-chart.swf
1     0    0        0      0      0      48647   48647    /wp-admin/images/screenshots/flex-header-3.png
1     0    0        0      0      0      44046   44046    /wp-admin/images/screenshots/captions-2.png
1     0    1        1      1      1      38830   38830    /wp-admin/images/screenshots/flex-header-2.png

- Sorting by total bytes read per URL:
./halog -ubt <~/tmp/haproxy.log | column -t | head
2246 lines in, 302 lines out, 194 parsing errors
18    0    5101     283    5101   283    126573  2278327  /lib/exe/js.php
60    0    14387    239    14387  239    10081   604865   /lib/exe/css.php
64    2    8820     137    8819   142    7742    495524   /doku.php
14    0    250      17     250    17     24045   336632   /wp-admin/load-scripts.php
71    0    6422     90     6422   90     4048    287419   /wp-admin/
4     0    0        0      0      0      64832   259328   /cuisine/wp-content/plugins/stats/open-flash-chart.swf
2     0    2        1      2      1      106511  213022   /wp-admin/css/wp-admin.css
31    3    5423     174    5040   180    6804    210931   /index
10    0    429      42     429    42     18009   180093   /cuisine/files/2011/10/tarte_figue_amande-e1318281546905-225x300.jpg
diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c
index 448e5c0..08444fe 100644
--- a/contrib/halog/halog.c
+++ b/contrib/halog/halog.c
@@ -29,6 +29,7 @@
 #define SERVER_FIELD 8
 #define TIME_FIELD 9
 #define STATUS_FIELD 10
+#define BYTES_SENT_FIELD 11
 #define TERM_CODES_FIELD 14
 #define CONN_FIELD 15
 #define QUEUE_LEN_FIELD 16
@@ -67,6 +68,7 @@
 	char *url;
 	unsigned long long total_time;    /* sum(all reqs' times) */
 	unsigned long long total_time_ok; /* sum(all OK reqs' times) */
+	unsigned long long total_bytes_sent; /* sum(all bytes sent) */
 	unsigned int nb_err, nb_req;
 };
 
@@ -94,8 +96,6 @@
 #define FILT_COUNT_URL_TAVG  0x040000
 #define FILT_COUNT_URL_TTOTO 0x080000
 #define FILT_COUNT_URL_TAVGO 0x100000
-#define FILT_COUNT_URL_ANY   (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \
-			      FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO)
 
 #define FILT_HTTP_ONLY       0x200000
 #define FILT_TERM_CODE_NAME  0x400000
@@ -106,6 +106,13 @@
 #define FILT_QUEUE_ONLY            0x4000000
 #define FILT_QUEUE_SRV_ONLY        0x8000000
 
+#define FILT_COUNT_URL_BAVG       0x10000000
+#define FILT_COUNT_URL_BTOT       0x20000000
+
+#define FILT_COUNT_URL_ANY   (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \
+			      FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO| \
+			      FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT)
+
 unsigned int filter = 0;
 unsigned int filter_invert = 0;
 const char *line;
@@ -128,9 +135,10 @@
 	fprintf(output,
 		"%s"
 		"Usage: halog [-h|--help] for long help\n"
-		"       halog [-q] [-c] [-v] {-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto}\n"
+		"       halog [-q] [-c]\n"
+		"       {-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt}\n"
 		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n"
-		"       [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] < log\n"
+		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] < log\n"
 		"\n",
 		msg ? msg : ""
 		);
@@ -171,6 +179,7 @@
 	       "       -u : by URL, -uc : request count, -ue : error count\n"
 	       "       -ua : average response time, -uto : average total time\n"
 	       "       -uao, -uto: average times computed on valid ('OK') requests\n"
+	       "       -uba, -ubt: average bytes returned, total bytes returned\n"
 	       );
 	exit(0);
 }
@@ -632,6 +641,10 @@
 			filter |= FILT_COUNT_URL_TAVGO;
 		else if (strcmp(argv[0], "-uto") == 0)
 			filter |= FILT_COUNT_URL_TTOTO;
+		else if (strcmp(argv[0], "-uba") == 0)
+			filter |= FILT_COUNT_URL_BAVG;
+		else if (strcmp(argv[0], "-ubt") == 0)
+			filter |= FILT_COUNT_URL_BTOT;
 		else if (strcmp(argv[0], "-o") == 0) {
 			if (output_file)
 				die("Fatal: output file name already specified.\n");
@@ -1039,6 +1052,10 @@
 					ustat->node.val.key = ustat->total_time_ok;
 				else if (filter & FILT_COUNT_URL_TAVGO)
 					ustat->node.val.key = (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0;
+				else if (filter & FILT_COUNT_URL_BAVG)
+					ustat->node.val.key = ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0;
+				else if (filter & FILT_COUNT_URL_BTOT)
+					ustat->node.val.key = ustat->total_bytes_sent;
 				else
 					ustat->node.val.key = 0;
 
@@ -1048,19 +1065,21 @@
 			timers[0] = timers[1];
 		}
 
-		printf("#req err ttot tavg oktot okavg url\n");
+		printf("#req err ttot tavg oktot okavg bavg btot url\n");
 
 		/* scan the tree in its reverse sorting order */
 		node = eb_last(&timers[0]);
 		while (node) {
 			ustat = container_of(node, struct url_stat, node.url.node);
-			printf("%d %d %Ld %Ld %Ld %Ld %s\n",
+			printf("%d %d %Ld %Ld %Ld %Ld %Ld %Ld %s\n",
 			       ustat->nb_req,
 			       ustat->nb_err,
 			       ustat->total_time,
 			       ustat->nb_req ? ustat->total_time / ustat->nb_req : 0,
 			       ustat->total_time_ok,
 			       (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0,
+			       ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0,
+			       ustat->total_bytes_sent,
 			       ustat->url);
 
 			node = eb_prev(node);
@@ -1243,6 +1262,7 @@
 	struct ebpt_node *ebpt_old;
 	const char *b, *e;
 	int f, err, array[5];
+	int val;
 
 	/* let's collect the response time */
 	if (!time_field) {
@@ -1286,12 +1306,16 @@
 	ustat->total_time = (array[3] >= 0) ? array[3] : array[4];
 	ustat->total_time_ok = (array[3] >= 0) ? array[3] : 0;
 
+	e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
+	val = str2ic(e);
+	ustat->total_bytes_sent = val;
+
 	/* the line may be truncated because of a bad request or anything like this,
 	 * without a method. Also, if it does not begin with an quote, let's skip to
 	 * the next field because it's a capture. Let's fall back to the "method" itself
 	 * if there's nothing else.
 	 */
-	e = field_start(e, METH_FIELD - TIME_FIELD + 1); // avg 100 ns per line
+	e = field_start(e, METH_FIELD - BYTES_SENT_FIELD + 1);
 	while (*e != '"' && *e) {
 		/* Note: some syslog servers escape quotes ! */
 		if (*e == '\\' && e[1] == '"')
@@ -1334,6 +1358,7 @@
 		ustat_old->nb_err += ustat->nb_err;
 		ustat_old->total_time += ustat->total_time;
 		ustat_old->total_time_ok += ustat->total_time_ok;
+		ustat_old->total_bytes_sent += ustat->total_bytes_sent;
 	} else {
 		ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
 		ustat = NULL; /* node was used */