REORG: clock: move the updates of cpu/mono time to clock.c
The entering_poll/leaving_poll/measure_idle functions that were hard
to classify and used to move to various locations have now been placed
into clock.c since it's precisely about time-keeping. The functions
were renamed to clock_*. The samp_time and idle_time values are now
static since there is no reason for them to be read from outside.
diff --git a/include/haproxy/clock.h b/include/haproxy/clock.h
index 045ab5a..448b0c8 100644
--- a/include/haproxy/clock.h
+++ b/include/haproxy/clock.h
@@ -41,5 +41,8 @@
void clock_init_process_date(void);
void clock_init_thread_date(void);
char *timeofday_as_iso_us(int pad);
+uint clock_report_idle(void);
+void clock_leaving_poll(int timeout, int interrupted);
+void clock_entering_poll(void);
#endif
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index 4acbcad..c1261c7 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -93,9 +93,6 @@
extern unsigned int grq_total; /* total number of entries in the global run queue, atomic */
extern unsigned int niced_tasks; /* number of niced tasks in the run queue */
-extern THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
-extern THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
-
extern struct pool_head *pool_head_task;
extern struct pool_head *pool_head_tasklet;
extern struct pool_head *pool_head_notification;
@@ -117,10 +114,6 @@
void __task_wakeup(struct task *t);
void __task_queue(struct task *task, struct eb_root *wq);
-uint sched_report_idle();
-void sched_leaving_poll(int timeout, int interrupted);
-void sched_entering_poll();
-
unsigned int run_tasks_from_lists(unsigned int budgets[]);
/*
diff --git a/src/clock.c b/src/clock.c
index ecaeca9..f68830e 100644
--- a/src/clock.c
+++ b/src/clock.c
@@ -14,6 +14,7 @@
#include <time.h>
#include <haproxy/api.h>
+#include <haproxy/activity.h>
#include <haproxy/clock.h>
#include <haproxy/time.h>
#include <haproxy/tinfo-t.h>
@@ -31,6 +32,8 @@
THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
+static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
+static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */
static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */
@@ -212,6 +215,95 @@
clock_update_date(0, 1);
}
+/* report the average CPU idle percentage over all running threads, between 0 and 100 */
+uint clock_report_idle(void)
+{
+ uint total = 0;
+ uint rthr = 0;
+ uint thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ if (!(all_threads_mask & (1UL << thr)))
+ continue;
+ total += HA_ATOMIC_LOAD(&ha_thread_info[thr].idle_pct);
+ rthr++;
+ }
+ return rthr ? total / rthr : 0;
+}
+
+/* Update the idle time value twice a second, to be called after
+ * clock_update_date() when called after poll(), and currently called only by
+ * clock_leaving_poll() below. It relies on <before_poll> to be updated to
+ * the system time before calling poll().
+ */
+static inline void clock_measure_idle(void)
+{
+ /* Let's compute the idle to work ratio. We worked between after_poll
+ * and before_poll, and slept between before_poll and date. The idle_pct
+ * is updated at most twice every second. Note that the current second
+ * rarely changes so we avoid a multiply when not needed.
+ */
+ int delta;
+
+ if ((delta = date.tv_sec - before_poll.tv_sec))
+ delta *= 1000000;
+ idle_time += delta + (date.tv_usec - before_poll.tv_usec);
+
+ if ((delta = date.tv_sec - after_poll.tv_sec))
+ delta *= 1000000;
+ samp_time += delta + (date.tv_usec - after_poll.tv_usec);
+
+ after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
+ if (samp_time < 500000)
+ return;
+
+ HA_ATOMIC_STORE(&ti->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
+ idle_time = samp_time = 0;
+}
+
+/* Collect date and time information after leaving poll(). <timeout> must be
+ * set to the maximum sleep time passed to poll (in milliseconds), and
+ * <interrupted> must be zero if the poller reached the timeout or non-zero
+ * otherwise, which generally is provided by the poller's return value.
+ */
+void clock_leaving_poll(int timeout, int interrupted)
+{
+ clock_measure_idle();
+ ti->prev_cpu_time = now_cpu_time();
+ ti->prev_mono_time = now_mono_time();
+}
+
+/* Collect date and time information before calling poll(). This will be used
+ * to count the run time of the past loop and the sleep time of the next poll.
+ * It also compares the elasped and cpu times during the activity period to
+ * estimate the amount of stolen time, which is reported if higher than half
+ * a millisecond.
+ */
+void clock_entering_poll(void)
+{
+ uint64_t new_mono_time;
+ uint64_t new_cpu_time;
+ int64_t stolen;
+
+ gettimeofday(&before_poll, NULL);
+
+ new_cpu_time = now_cpu_time();
+ new_mono_time = now_mono_time();
+
+ if (ti->prev_cpu_time && ti->prev_mono_time) {
+ new_cpu_time -= ti->prev_cpu_time;
+ new_mono_time -= ti->prev_mono_time;
+ stolen = new_mono_time - new_cpu_time;
+ if (unlikely(stolen >= 500000)) {
+ stolen /= 500000;
+ /* more than half a millisecond difference might
+ * indicate an undesired preemption.
+ */
+ report_stolen_time(stolen);
+ }
+ }
+}
+
/* returns the current date as returned by gettimeofday() in ISO+microsecond
* format. It uses a thread-local static variable that the reader can consume
* for as long as it wants until next call. Thus, do not call it from a signal
diff --git a/src/ev_epoll.c b/src/ev_epoll.c
index 03888d0..a9b572b 100644
--- a/src/ev_epoll.c
+++ b/src/ev_epoll.c
@@ -189,7 +189,7 @@
/* now let's wait for polled events */
wait_time = wake ? 0 : compute_poll_timeout(exp);
- sched_entering_poll();
+ clock_entering_poll();
activity_count_runtime();
do {
int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
@@ -209,7 +209,7 @@
break;
} while (1);
- sched_leaving_poll(wait_time, status);
+ clock_leaving_poll(wait_time, status);
thread_harmless_end();
thread_idle_end();
diff --git a/src/ev_evports.c b/src/ev_evports.c
index 23bf11f..dcc30a1 100644
--- a/src/ev_evports.c
+++ b/src/ev_evports.c
@@ -159,7 +159,7 @@
* Determine how long to wait for events to materialise on the port.
*/
wait_time = wake ? 0 : compute_poll_timeout(exp);
- sched_entering_poll();
+ clock_entering_poll();
activity_count_runtime();
do {
@@ -203,7 +203,7 @@
break;
} while(1);
- sched_leaving_poll(wait_time, nevlist);
+ clock_leaving_poll(wait_time, nevlist);
thread_harmless_end();
thread_idle_end();
diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c
index df8eae2..c9f355b 100644
--- a/src/ev_kqueue.c
+++ b/src/ev_kqueue.c
@@ -146,7 +146,7 @@
/* now let's wait for events */
wait_time = wake ? 0 : compute_poll_timeout(exp);
fd = global.tune.maxpollevents;
- sched_entering_poll();
+ clock_entering_poll();
activity_count_runtime();
do {
@@ -175,7 +175,7 @@
break;
} while (1);
- sched_leaving_poll(wait_time, status);
+ clock_leaving_poll(wait_time, status);
thread_harmless_end();
thread_idle_end();
diff --git a/src/ev_poll.c b/src/ev_poll.c
index 9b93c92..dbfa262 100644
--- a/src/ev_poll.c
+++ b/src/ev_poll.c
@@ -202,11 +202,11 @@
/* now let's wait for events */
wait_time = wake ? 0 : compute_poll_timeout(exp);
- sched_entering_poll();
+ clock_entering_poll();
activity_count_runtime();
status = poll(poll_events, nbfd, wait_time);
clock_update_date(wait_time, status);
- sched_leaving_poll(wait_time, status);
+ clock_leaving_poll(wait_time, status);
thread_harmless_end();
thread_idle_end();
diff --git a/src/ev_select.c b/src/ev_select.c
index 47da75d..87a926d 100644
--- a/src/ev_select.c
+++ b/src/ev_select.c
@@ -173,7 +173,7 @@
delta_ms = wake ? 0 : compute_poll_timeout(exp);
delta.tv_sec = (delta_ms / 1000);
delta.tv_usec = (delta_ms % 1000) * 1000;
- sched_entering_poll();
+ clock_entering_poll();
activity_count_runtime();
status = select(maxfd,
readnotnull ? tmp_evts[DIR_RD] : NULL,
@@ -181,7 +181,7 @@
NULL,
&delta);
clock_update_date(delta_ms, status);
- sched_leaving_poll(delta_ms, status);
+ clock_leaving_poll(delta_ms, status);
thread_harmless_end();
thread_idle_end();
diff --git a/src/stats.c b/src/stats.c
index da6fc36..bc083de 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -3448,7 +3448,7 @@
actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec),
bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0),
bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k',
- total_run_queues(), total_allocated_tasks(), sched_report_idle()
+ total_run_queues(), total_allocated_tasks(), clock_report_idle()
);
/* scope_txt = search query, appctx->ctx.stats.scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
@@ -4481,7 +4481,7 @@
#endif
info[INF_TASKS] = mkf_u32(0, total_allocated_tasks());
info[INF_RUN_QUEUE] = mkf_u32(0, total_run_queues());
- info[INF_IDLE_PCT] = mkf_u32(FN_AVG, sched_report_idle());
+ info[INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle());
info[INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node);
if (global.desc)
info[INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc);
diff --git a/src/task.c b/src/task.c
index d547359..45111f1 100644
--- a/src/task.c
+++ b/src/task.c
@@ -38,10 +38,6 @@
volatile unsigned long global_tasks_mask = 0; /* Mask of threads with tasks in the global runqueue */
unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
-/* used for idle time calculation */
-THREAD_LOCAL unsigned int samp_time = 0; /* total elapsed time over current sample */
-THREAD_LOCAL unsigned int idle_time = 0; /* total idle time over current sample */
-
THREAD_LOCAL struct task_per_thread *sched = &task_per_thread[0]; /* scheduler context for the current thread */
__decl_aligned_spinlock(rq_lock); /* spin lock related to run queue */
@@ -865,95 +861,6 @@
activity[tid].long_rq++;
}
-/* report the average CPU idle percentage over all running threads, between 0 and 100 */
-uint sched_report_idle()
-{
- uint total = 0;
- uint rthr = 0;
- uint thr;
-
- for (thr = 0; thr < MAX_THREADS; thr++) {
- if (!(all_threads_mask & (1UL << thr)))
- continue;
- total += HA_ATOMIC_LOAD(&ha_thread_info[thr].idle_pct);
- rthr++;
- }
- return rthr ? total / rthr : 0;
-}
-
-/* Update the idle time value twice a second, to be called after
- * clock_update_date() when called after poll(), and currently called only by
- * sched_leaving_poll() below. It relies on <before_poll> to be updated to
- * the system time before calling poll().
- */
-static inline void sched_measure_idle()
-{
- /* Let's compute the idle to work ratio. We worked between after_poll
- * and before_poll, and slept between before_poll and date. The idle_pct
- * is updated at most twice every second. Note that the current second
- * rarely changes so we avoid a multiply when not needed.
- */
- int delta;
-
- if ((delta = date.tv_sec - before_poll.tv_sec))
- delta *= 1000000;
- idle_time += delta + (date.tv_usec - before_poll.tv_usec);
-
- if ((delta = date.tv_sec - after_poll.tv_sec))
- delta *= 1000000;
- samp_time += delta + (date.tv_usec - after_poll.tv_usec);
-
- after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
- if (samp_time < 500000)
- return;
-
- HA_ATOMIC_STORE(&ti->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
- idle_time = samp_time = 0;
-}
-
-/* Collect date and time information after leaving poll(). <timeout> must be
- * set to the maximum sleep time passed to poll (in milliseconds), and
- * <interrupted> must be zero if the poller reached the timeout or non-zero
- * otherwise, which generally is provided by the poller's return value.
- */
-void sched_leaving_poll(int timeout, int interrupted)
-{
- sched_measure_idle();
- ti->prev_cpu_time = now_cpu_time();
- ti->prev_mono_time = now_mono_time();
-}
-
-/* Collect date and time information before calling poll(). This will be used
- * to count the run time of the past loop and the sleep time of the next poll.
- * It also compares the elasped and cpu times during the activity period to
- * estimate the amount of stolen time, which is reported if higher than half
- * a millisecond.
- */
-void sched_entering_poll()
-{
- uint64_t new_mono_time;
- uint64_t new_cpu_time;
- int64_t stolen;
-
- gettimeofday(&before_poll, NULL);
-
- new_cpu_time = now_cpu_time();
- new_mono_time = now_mono_time();
-
- if (ti->prev_cpu_time && ti->prev_mono_time) {
- new_cpu_time -= ti->prev_cpu_time;
- new_mono_time -= ti->prev_mono_time;
- stolen = new_mono_time - new_cpu_time;
- if (unlikely(stolen >= 500000)) {
- stolen /= 500000;
- /* more than half a millisecond difference might
- * indicate an undesired preemption.
- */
- report_stolen_time(stolen);
- }
- }
-}
-
/*
* Delete every tasks before running the master polling loop
*/