blob: 59ec173c186f4a0cfc8d33783a7ad788d0028f14 [file] [log] [blame]
Willy Tarreau55542642021-10-08 09:33:24 +02001/*
2 * General time-keeping code and variables
3 *
4 * Copyright 2000-2021 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <sys/time.h>
Willy Tarreau6cb0c392021-10-08 14:48:30 +020014#include <signal.h>
Willy Tarreau55542642021-10-08 09:33:24 +020015#include <time.h>
16
Willy Tarreau44c58da2021-10-08 12:27:54 +020017#ifdef USE_THREAD
18#include <pthread.h>
19#endif
20
Willy Tarreau55542642021-10-08 09:33:24 +020021#include <haproxy/api.h>
Willy Tarreauf9d5e102021-10-08 10:43:59 +020022#include <haproxy/activity.h>
Willy Tarreau55542642021-10-08 09:33:24 +020023#include <haproxy/clock.h>
Willy Tarreau6cb0c392021-10-08 14:48:30 +020024#include <haproxy/signal-t.h>
Willy Tarreau55542642021-10-08 09:33:24 +020025#include <haproxy/time.h>
26#include <haproxy/tinfo-t.h>
27#include <haproxy/tools.h>
28
29struct timeval start_date; /* the process's start date in wall-clock time */
Willy Tarreau6093ba42023-02-07 15:52:14 +010030struct timeval start_time; /* the process's start date in internal monotonic time */
Willy Tarreau55542642021-10-08 09:33:24 +020031volatile ullong global_now; /* common monotonic date between all threads (32:32) */
32volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */
33
34THREAD_ALIGNED(64) static ullong now_offset; /* global offset between system time and global time */
35
36THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */
37THREAD_LOCAL struct timeval now; /* internal monotonic date derived from real clock */
38THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */
Willy Tarreau55542642021-10-08 09:33:24 +020039
Willy Tarreau2c6a9982021-10-08 11:38:30 +020040static THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
41static THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
Willy Tarreauf9d5e102021-10-08 10:43:59 +020042static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
43static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
Willy Tarreau55542642021-10-08 09:33:24 +020044static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */
45static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */
46
Willy Tarreau21694982021-10-08 15:09:17 +020047#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
48static clockid_t per_thread_clock_id[MAX_THREADS];
49#endif
50
Willy Tarreau55542642021-10-08 09:33:24 +020051/* returns the system's monotonic time in nanoseconds if supported, otherwise zero */
52uint64_t now_mono_time(void)
53{
54 uint64_t ret = 0;
Willy Tarreau6cb0c392021-10-08 14:48:30 +020055#if defined(_POSIX_TIMERS) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK)
Willy Tarreau55542642021-10-08 09:33:24 +020056 struct timespec ts;
57 clock_gettime(CLOCK_MONOTONIC, &ts);
58 ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
59#endif
60 return ret;
61}
62
63/* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
64uint64_t now_cpu_time(void)
65{
66 uint64_t ret = 0;
67#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
68 struct timespec ts;
69 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
70 ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
71#endif
72 return ret;
73}
74
75/* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
Willy Tarreau21694982021-10-08 15:09:17 +020076uint64_t now_cpu_time_thread(int thr)
Willy Tarreau55542642021-10-08 09:33:24 +020077{
78 uint64_t ret = 0;
79#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
80 struct timespec ts;
Willy Tarreau21694982021-10-08 15:09:17 +020081 clock_gettime(per_thread_clock_id[thr], &ts);
Willy Tarreau55542642021-10-08 09:33:24 +020082 ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
83#endif
84 return ret;
85}
86
Willy Tarreau44c58da2021-10-08 12:27:54 +020087/* set the clock source for the local thread */
88void clock_set_local_source(void)
89{
90#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
91#ifdef USE_THREAD
Willy Tarreau21694982021-10-08 15:09:17 +020092 pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]);
Willy Tarreau44c58da2021-10-08 12:27:54 +020093#else
Willy Tarreau21694982021-10-08 15:09:17 +020094 per_thread_clock_id[tid] = CLOCK_THREAD_CPUTIME_ID;
Willy Tarreau44c58da2021-10-08 12:27:54 +020095#endif
96#endif
97}
98
Willy Tarreau6cb0c392021-10-08 14:48:30 +020099/* registers a timer <tmr> of type timer_t delivering signal <sig> with value
100 * <val>. It tries on the current thread's clock ID first and falls back to
101 * CLOCK_REALTIME. Returns non-zero on success, 1 on failure.
102 */
103int clock_setup_signal_timer(void *tmr, int sig, int val)
104{
105 int ret = 0;
106
107#if defined(USE_RT) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
108 struct sigevent sev = { };
109 timer_t *timer = tmr;
110 sigset_t set;
111
112 /* unblock the WDTSIG signal we intend to use */
113 sigemptyset(&set);
114 sigaddset(&set, WDTSIG);
115 ha_sigmask(SIG_UNBLOCK, &set, NULL);
116
117 /* this timer will signal WDTSIG when it fires, with tid in the si_int
118 * field (important since any thread will receive the signal).
119 */
120 sev.sigev_notify = SIGEV_SIGNAL;
121 sev.sigev_signo = sig;
122 sev.sigev_value.sival_int = val;
Willy Tarreau21694982021-10-08 15:09:17 +0200123 if (timer_create(per_thread_clock_id[tid], &sev, timer) != -1 ||
Willy Tarreau6cb0c392021-10-08 14:48:30 +0200124 timer_create(CLOCK_REALTIME, &sev, timer) != -1)
125 ret = 1;
126#endif
127 return ret;
128}
129
Willy Tarreau55542642021-10-08 09:33:24 +0200130/* clock_update_date: sets <date> to system time, and sets <now> to something as
131 * close as possible to real time, following a monotonic function. The main
132 * principle consists in detecting backwards and forwards time jumps and adjust
133 * an offset to correct them. This function should be called once after each
134 * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should
135 * be passed in <max_wait>, and the return value in <interrupted> (a non-zero
136 * value means that we have not expired the timeout).
137 *
138 * clock_init_process_date() must have been called once first, and
139 * clock_init_thread_date() must also have been called once for each thread.
140 *
141 * An offset is used to adjust the current time (date), to figure a monotonic
142 * local time (now). The offset is not critical, as it is only updated after a
143 * clock jump is detected. From this point all threads will apply it to their
144 * locally measured time, and will then agree around a common monotonic
145 * global_now value that serves to further refine their local time. As it is
146 * not possible to atomically update a timeval, both global_now and the
147 * now_offset values are instead stored as 64-bit integers made of two 32 bit
148 * values for the tv_sec and tv_usec parts. The offset is made of two signed
149 * ints so that the clock can be adjusted in the two directions.
150 */
Willy Tarreaua7004202022-09-21 07:37:27 +0200151void clock_update_local_date(int max_wait, int interrupted)
Willy Tarreau55542642021-10-08 09:33:24 +0200152{
Willy Tarreaua7004202022-09-21 07:37:27 +0200153 struct timeval min_deadline, max_deadline;
154 ullong ofs;
Willy Tarreau55542642021-10-08 09:33:24 +0200155
156 gettimeofday(&date, NULL);
157
158 /* compute the minimum and maximum local date we may have reached based
159 * on our past date and the associated timeout. There are three possible
160 * extremities:
161 * - the new date cannot be older than before_poll
162 * - if not interrupted, the new date cannot be older than
163 * before_poll+max_wait
164 * - in any case the new date cannot be newer than
165 * before_poll+max_wait+some margin (100ms used here).
166 * In case of violation, we'll ignore the current date and instead
167 * restart from the last date we knew.
168 */
169 _tv_ms_add(&min_deadline, &before_poll, max_wait);
170 _tv_ms_add(&max_deadline, &before_poll, max_wait + 100);
171
172 ofs = HA_ATOMIC_LOAD(&now_offset);
173
174 if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards
175 (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards
176 __tv_islt(&max_deadline, &date))) { // big jump forwards
177 if (!interrupted)
178 _tv_ms_add(&now, &now, max_wait);
179 } else {
180 /* The date is still within expectations. Let's apply the
181 * now_offset to the system date. Note: ofs if made of two
182 * independent signed ints.
183 */
184 now.tv_sec = date.tv_sec + (int)(ofs >> 32); // note: may be positive or negative
185 now.tv_usec = date.tv_usec + (int)ofs; // note: may be positive or negative
186 if ((int)now.tv_usec < 0) {
187 now.tv_usec += 1000000;
188 now.tv_sec -= 1;
189 } else if (now.tv_usec >= 1000000) {
190 now.tv_usec -= 1000000;
191 now.tv_sec += 1;
192 }
193 }
Willy Tarreaua7004202022-09-21 07:37:27 +0200194 now_ms = __tv_to_ms(&now);
195}
196
197void clock_update_global_date()
198{
199 struct timeval tmp_now;
200 uint old_now_ms;
201 ullong old_now;
202 ullong new_now;
Willy Tarreau4eaf85f2022-09-21 08:21:45 +0200203 ullong ofs_new;
Willy Tarreaua7004202022-09-21 07:37:27 +0200204 uint sec_ofs, usec_ofs;
205
Willy Tarreau55542642021-10-08 09:33:24 +0200206 /* now that we have bounded the local time, let's check if it's
207 * realistic regarding the global date, which only moves forward,
208 * otherwise catch up.
209 */
210 old_now = global_now;
211 old_now_ms = global_now_ms;
212
213 do {
214 tmp_now.tv_sec = (unsigned int)(old_now >> 32);
215 tmp_now.tv_usec = old_now & 0xFFFFFFFFU;
216
217 if (__tv_islt(&now, &tmp_now))
218 now = tmp_now;
219
220 /* now <now> is expected to be the most accurate date,
Willy Tarreau4eaf85f2022-09-21 08:21:45 +0200221 * equal to <global_now> or newer. Updating the global
222 * date too often causes extreme contention and is not
223 * needed: it's only used to help threads run at the
224 * same date in case of local drift, and the global date,
225 * which changes, is only used by freq counters (a choice
226 * which is debatable by the way since it changes under us).
227 * Tests have seen that the contention can be reduced from
228 * 37% in this function to almost 0% when keeping clocks
229 * synchronized no better than 32 microseconds, so that's
230 * what we're doing here.
Willy Tarreau55542642021-10-08 09:33:24 +0200231 */
Willy Tarreau4eaf85f2022-09-21 08:21:45 +0200232
Willy Tarreau55542642021-10-08 09:33:24 +0200233 new_now = ((ullong)now.tv_sec << 32) + (uint)now.tv_usec;
234 now_ms = __tv_to_ms(&now);
235
Willy Tarreau4eaf85f2022-09-21 08:21:45 +0200236 if (!((new_now ^ old_now) & ~0x1FULL))
237 return;
238
Willy Tarreau55542642021-10-08 09:33:24 +0200239 /* let's try to update the global <now> (both in timeval
240 * and ms forms) or loop again.
241 */
Willy Tarreau4eaf85f2022-09-21 08:21:45 +0200242 } while ((!_HA_ATOMIC_CAS(&global_now, &old_now, new_now) ||
Willy Tarreau55542642021-10-08 09:33:24 +0200243 (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) &&
244 __ha_cpu_relax());
245
246 /* <now> and <now_ms> are now updated to the last value of global_now
247 * and global_now_ms, which were also monotonically updated. We can
248 * compute the latest offset, we don't care who writes it last, the
249 * variations will not break the monotonic property.
250 */
251
252 sec_ofs = now.tv_sec - date.tv_sec;
253 usec_ofs = now.tv_usec - date.tv_usec;
254 if ((int)usec_ofs < 0) {
255 usec_ofs += 1000000;
256 sec_ofs -= 1;
257 }
258 ofs_new = ((ullong)sec_ofs << 32) + usec_ofs;
Willy Tarreau4eaf85f2022-09-21 08:21:45 +0200259 HA_ATOMIC_STORE(&now_offset, ofs_new);
Willy Tarreau55542642021-10-08 09:33:24 +0200260}
261
262/* must be called once at boot to initialize some global variables */
263void clock_init_process_date(void)
264{
265 now_offset = 0;
266 gettimeofday(&date, NULL);
267 now = after_poll = before_poll = date;
268 global_now = ((ullong)date.tv_sec << 32) + (uint)date.tv_usec;
269 global_now_ms = now.tv_sec * 1000 + now.tv_usec / 1000;
Willy Tarreau28360dc2023-02-07 14:44:44 +0100270
271 /* force time to wrap 20s after boot: we first compute the time offset
272 * that once applied to the wall-clock date will make the local time
273 * wrap in 5 seconds. This offset is applied to the process-wide time,
274 * and will be used to recompute the local time, both of which will
275 * match and continue from this shifted date.
276 */
277 now_offset = (uint64_t)(-(global_now_ms / 1000U) - BOOT_TIME_WRAP_SEC) << 32;
278 global_now += now_offset;
279
Willy Tarreau45c38e22021-09-30 18:28:49 +0200280 th_ctx->idle_pct = 100;
Willy Tarreau55542642021-10-08 09:33:24 +0200281 clock_update_date(0, 1);
282}
283
284/* must be called once per thread to initialize their thread-local variables.
285 * Note that other threads might also be initializing and running in parallel.
286 */
287void clock_init_thread_date(void)
288{
289 ullong old_now;
290
291 gettimeofday(&date, NULL);
292 after_poll = before_poll = date;
293
294 old_now = _HA_ATOMIC_LOAD(&global_now);
295 now.tv_sec = old_now >> 32;
296 now.tv_usec = (uint)old_now;
Willy Tarreau45c38e22021-09-30 18:28:49 +0200297 th_ctx->idle_pct = 100;
Aurelien DARRAGON16d6c0c2022-11-10 11:47:47 +0100298 th_ctx->prev_cpu_time = now_cpu_time();
Willy Tarreau55542642021-10-08 09:33:24 +0200299 clock_update_date(0, 1);
300}
301
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200302/* report the average CPU idle percentage over all running threads, between 0 and 100 */
303uint clock_report_idle(void)
304{
305 uint total = 0;
306 uint rthr = 0;
307 uint thr;
308
309 for (thr = 0; thr < MAX_THREADS; thr++) {
Willy Tarreau1e7f0d62022-06-27 16:22:22 +0200310 if (!ha_thread_info[thr].tg ||
311 !(ha_thread_info[thr].tg->threads_enabled & ha_thread_info[thr].ltid_bit))
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200312 continue;
Willy Tarreau45c38e22021-09-30 18:28:49 +0200313 total += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].idle_pct);
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200314 rthr++;
315 }
316 return rthr ? total / rthr : 0;
317}
318
319/* Update the idle time value twice a second, to be called after
320 * clock_update_date() when called after poll(), and currently called only by
321 * clock_leaving_poll() below. It relies on <before_poll> to be updated to
322 * the system time before calling poll().
323 */
324static inline void clock_measure_idle(void)
325{
326 /* Let's compute the idle to work ratio. We worked between after_poll
327 * and before_poll, and slept between before_poll and date. The idle_pct
328 * is updated at most twice every second. Note that the current second
329 * rarely changes so we avoid a multiply when not needed.
330 */
331 int delta;
332
333 if ((delta = date.tv_sec - before_poll.tv_sec))
334 delta *= 1000000;
335 idle_time += delta + (date.tv_usec - before_poll.tv_usec);
336
337 if ((delta = date.tv_sec - after_poll.tv_sec))
338 delta *= 1000000;
339 samp_time += delta + (date.tv_usec - after_poll.tv_usec);
340
341 after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
342 if (samp_time < 500000)
343 return;
344
Willy Tarreau45c38e22021-09-30 18:28:49 +0200345 HA_ATOMIC_STORE(&th_ctx->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200346 idle_time = samp_time = 0;
347}
348
349/* Collect date and time information after leaving poll(). <timeout> must be
350 * set to the maximum sleep time passed to poll (in milliseconds), and
351 * <interrupted> must be zero if the poller reached the timeout or non-zero
352 * otherwise, which generally is provided by the poller's return value.
353 */
354void clock_leaving_poll(int timeout, int interrupted)
355{
356 clock_measure_idle();
Willy Tarreau45c38e22021-09-30 18:28:49 +0200357 th_ctx->prev_cpu_time = now_cpu_time();
358 th_ctx->prev_mono_time = now_mono_time();
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200359}
360
361/* Collect date and time information before calling poll(). This will be used
362 * to count the run time of the past loop and the sleep time of the next poll.
Ilya Shipitsin4a689da2022-10-29 09:34:32 +0500363 * It also compares the elapsed and cpu times during the activity period to
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200364 * estimate the amount of stolen time, which is reported if higher than half
365 * a millisecond.
366 */
367void clock_entering_poll(void)
368{
369 uint64_t new_mono_time;
370 uint64_t new_cpu_time;
Willy Tarreau20adfde2021-10-08 11:34:46 +0200371 uint32_t run_time;
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200372 int64_t stolen;
373
374 gettimeofday(&before_poll, NULL);
375
Willy Tarreau20adfde2021-10-08 11:34:46 +0200376 run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec);
377
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200378 new_cpu_time = now_cpu_time();
379 new_mono_time = now_mono_time();
380
Willy Tarreau45c38e22021-09-30 18:28:49 +0200381 if (th_ctx->prev_cpu_time && th_ctx->prev_mono_time) {
382 new_cpu_time -= th_ctx->prev_cpu_time;
383 new_mono_time -= th_ctx->prev_mono_time;
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200384 stolen = new_mono_time - new_cpu_time;
385 if (unlikely(stolen >= 500000)) {
386 stolen /= 500000;
387 /* more than half a millisecond difference might
388 * indicate an undesired preemption.
389 */
390 report_stolen_time(stolen);
391 }
392 }
Willy Tarreau20adfde2021-10-08 11:34:46 +0200393
394 /* update the average runtime */
395 activity_count_runtime(run_time);
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200396}
397
Willy Tarreau55542642021-10-08 09:33:24 +0200398/* returns the current date as returned by gettimeofday() in ISO+microsecond
399 * format. It uses a thread-local static variable that the reader can consume
400 * for as long as it wants until next call. Thus, do not call it from a signal
401 * handler. If <pad> is non-0, a trailing space will be added. It will always
402 * return exactly 32 or 33 characters (depending on padding) and will always be
403 * zero-terminated, thus it will always fit into a 34 bytes buffer.
404 * This also always include the local timezone (in +/-HH:mm format) .
405 */
406char *timeofday_as_iso_us(int pad)
407{
408 struct timeval new_date;
409 struct tm tm;
410 const char *offset;
411 char c;
412
413 gettimeofday(&new_date, NULL);
414 if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) {
415 get_localtime(new_date.tv_sec, &tm);
416 offset = get_gmt_offset(new_date.tv_sec, &tm);
417 if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32))
418 strcpy(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00"); // make the failure visible but respect format.
419 iso_time_str[26] = offset[0];
420 iso_time_str[27] = offset[1];
421 iso_time_str[28] = offset[2];
422 iso_time_str[30] = offset[3];
423 iso_time_str[31] = offset[4];
424 iso_time_sec = new_date.tv_sec;
425 }
426
427 /* utoa_pad adds a trailing 0 so we save the char for restore */
428 c = iso_time_str[26];
429 utoa_pad(new_date.tv_usec, iso_time_str + 20, 7);
430 iso_time_str[26] = c;
431 if (pad) {
432 iso_time_str[32] = ' ';
433 iso_time_str[33] = 0;
434 }
435 return iso_time_str;
436}