MINOR: activity: report the average loop time in "show activity"

Since we know the time it takes to process everything between two poll()
calls, we can use this as the max latency measurement any task will
experience and average it.

This code does this, and reports in "show activity" the average of this
loop time over the last 1024 poll() loops, for each thread. It will vary
quickly at high loads and slowly under low to moderate loads, depending
on the rate at which poll() is called. The latency a task experiences
is expected to be half of this on average.
diff --git a/include/proto/activity.h b/include/proto/activity.h
index 4cf9c8d..717d667 100644
--- a/include/proto/activity.h
+++ b/include/proto/activity.h
@@ -26,6 +26,7 @@
 #include <common/hathreads.h>
 #include <common/time.h>
 #include <types/activity.h>
+#include <proto/freq_ctr.h>
 
 extern struct activity activity[MAX_THREADS];
 
@@ -34,12 +35,15 @@
 
 /* Collect date and time information before calling poll(). This will be used
  * to count the run time of the past loop and the sleep time of the next poll.
+ * It also makes use of the just updated before_poll timer to count the loop's
+ * run time and feed the average loop time metric (in microseconds).
  */
 static inline void activity_count_runtime()
 {
 	uint64_t new_mono_time;
 	uint64_t new_cpu_time;
 	int64_t stolen;
+	uint32_t run_time;
 
 	new_cpu_time   = now_cpu_time();
 	new_mono_time  = now_mono_time();
@@ -56,6 +60,9 @@
 			report_stolen_time(stolen);
 		}
 	}
+
+	run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec);
+	swrate_add(&activity[tid].avg_loop_us, TIME_STATS_SAMPLES, run_time);
 }
 
 
diff --git a/include/types/activity.h b/include/types/activity.h
index 99922b4..f58d759 100644
--- a/include/types/activity.h
+++ b/include/types/activity.h
@@ -47,8 +47,10 @@
 	unsigned int empty_rq;     // calls to process_runnable_tasks() with nothing for the thread
 	unsigned int long_rq;      // process_runnable_tasks() left with tasks in the run queue
 	unsigned int cpust_total;  // sum of half-ms stolen per thread
+	/* one cache line */
 	struct freq_ctr cpust_1s;  // avg amount of half-ms stolen over last second
 	struct freq_ctr_period cpust_15s; // avg amount of half-ms stolen over last 15s
+	unsigned int avg_loop_us;  // average run time per loop over last 1024 runs
 	char __pad[0]; // unused except to check remaining room
 	char __end[0] __attribute__((aligned(64))); // align size to 64.
 };