blob: 5a3447f2be79f2ae2c489be07d24dae257d2b2d2 [file] [log] [blame]
Willy Tarreau55542642021-10-08 09:33:24 +02001/*
2 * General time-keeping code and variables
3 *
4 * Copyright 2000-2021 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <sys/time.h>
14#include <time.h>
15
Willy Tarreau44c58da2021-10-08 12:27:54 +020016#ifdef USE_THREAD
17#include <pthread.h>
18#endif
19
Willy Tarreau55542642021-10-08 09:33:24 +020020#include <haproxy/api.h>
Willy Tarreauf9d5e102021-10-08 10:43:59 +020021#include <haproxy/activity.h>
Willy Tarreau55542642021-10-08 09:33:24 +020022#include <haproxy/clock.h>
23#include <haproxy/time.h>
24#include <haproxy/tinfo-t.h>
25#include <haproxy/tools.h>
26
27struct timeval start_date; /* the process's start date in wall-clock time */
28volatile ullong global_now; /* common monotonic date between all threads (32:32) */
29volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */
30
31THREAD_ALIGNED(64) static ullong now_offset; /* global offset between system time and global time */
32
33THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */
34THREAD_LOCAL struct timeval now; /* internal monotonic date derived from real clock */
35THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */
Willy Tarreau55542642021-10-08 09:33:24 +020036
Willy Tarreau2c6a9982021-10-08 11:38:30 +020037static THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
38static THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
Willy Tarreauf9d5e102021-10-08 10:43:59 +020039static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
40static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
Willy Tarreau55542642021-10-08 09:33:24 +020041static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */
42static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */
43
44/* returns the system's monotonic time in nanoseconds if supported, otherwise zero */
45uint64_t now_mono_time(void)
46{
47 uint64_t ret = 0;
48#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK)
49 struct timespec ts;
50 clock_gettime(CLOCK_MONOTONIC, &ts);
51 ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
52#endif
53 return ret;
54}
55
56/* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
57uint64_t now_cpu_time(void)
58{
59 uint64_t ret = 0;
60#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
61 struct timespec ts;
62 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
63 ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
64#endif
65 return ret;
66}
67
68/* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
69uint64_t now_cpu_time_thread(const struct thread_info *thr)
70{
71 uint64_t ret = 0;
72#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
73 struct timespec ts;
74 clock_gettime(thr->clock_id, &ts);
75 ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
76#endif
77 return ret;
78}
79
Willy Tarreau44c58da2021-10-08 12:27:54 +020080/* set the clock source for the local thread */
81void clock_set_local_source(void)
82{
83#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
84#ifdef USE_THREAD
85 pthread_getcpuclockid(pthread_self(), &ti->clock_id);
86#else
87 ti->clock_id = CLOCK_THREAD_CPUTIME_ID;
88#endif
89#endif
90}
91
Willy Tarreau55542642021-10-08 09:33:24 +020092/* clock_update_date: sets <date> to system time, and sets <now> to something as
93 * close as possible to real time, following a monotonic function. The main
94 * principle consists in detecting backwards and forwards time jumps and adjust
95 * an offset to correct them. This function should be called once after each
96 * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should
97 * be passed in <max_wait>, and the return value in <interrupted> (a non-zero
98 * value means that we have not expired the timeout).
99 *
100 * clock_init_process_date() must have been called once first, and
101 * clock_init_thread_date() must also have been called once for each thread.
102 *
103 * An offset is used to adjust the current time (date), to figure a monotonic
104 * local time (now). The offset is not critical, as it is only updated after a
105 * clock jump is detected. From this point all threads will apply it to their
106 * locally measured time, and will then agree around a common monotonic
107 * global_now value that serves to further refine their local time. As it is
108 * not possible to atomically update a timeval, both global_now and the
109 * now_offset values are instead stored as 64-bit integers made of two 32 bit
110 * values for the tv_sec and tv_usec parts. The offset is made of two signed
111 * ints so that the clock can be adjusted in the two directions.
112 */
113void clock_update_date(int max_wait, int interrupted)
114{
115 struct timeval min_deadline, max_deadline, tmp_now;
116 uint old_now_ms;
117 ullong old_now;
118 ullong new_now;
119 ullong ofs, ofs_new;
120 uint sec_ofs, usec_ofs;
121
122 gettimeofday(&date, NULL);
123
124 /* compute the minimum and maximum local date we may have reached based
125 * on our past date and the associated timeout. There are three possible
126 * extremities:
127 * - the new date cannot be older than before_poll
128 * - if not interrupted, the new date cannot be older than
129 * before_poll+max_wait
130 * - in any case the new date cannot be newer than
131 * before_poll+max_wait+some margin (100ms used here).
132 * In case of violation, we'll ignore the current date and instead
133 * restart from the last date we knew.
134 */
135 _tv_ms_add(&min_deadline, &before_poll, max_wait);
136 _tv_ms_add(&max_deadline, &before_poll, max_wait + 100);
137
138 ofs = HA_ATOMIC_LOAD(&now_offset);
139
140 if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards
141 (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards
142 __tv_islt(&max_deadline, &date))) { // big jump forwards
143 if (!interrupted)
144 _tv_ms_add(&now, &now, max_wait);
145 } else {
146 /* The date is still within expectations. Let's apply the
147 * now_offset to the system date. Note: ofs if made of two
148 * independent signed ints.
149 */
150 now.tv_sec = date.tv_sec + (int)(ofs >> 32); // note: may be positive or negative
151 now.tv_usec = date.tv_usec + (int)ofs; // note: may be positive or negative
152 if ((int)now.tv_usec < 0) {
153 now.tv_usec += 1000000;
154 now.tv_sec -= 1;
155 } else if (now.tv_usec >= 1000000) {
156 now.tv_usec -= 1000000;
157 now.tv_sec += 1;
158 }
159 }
160
161 /* now that we have bounded the local time, let's check if it's
162 * realistic regarding the global date, which only moves forward,
163 * otherwise catch up.
164 */
165 old_now = global_now;
166 old_now_ms = global_now_ms;
167
168 do {
169 tmp_now.tv_sec = (unsigned int)(old_now >> 32);
170 tmp_now.tv_usec = old_now & 0xFFFFFFFFU;
171
172 if (__tv_islt(&now, &tmp_now))
173 now = tmp_now;
174
175 /* now <now> is expected to be the most accurate date,
176 * equal to <global_now> or newer.
177 */
178 new_now = ((ullong)now.tv_sec << 32) + (uint)now.tv_usec;
179 now_ms = __tv_to_ms(&now);
180
181 /* let's try to update the global <now> (both in timeval
182 * and ms forms) or loop again.
183 */
184 } while (((new_now != old_now && !_HA_ATOMIC_CAS(&global_now, &old_now, new_now)) ||
185 (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) &&
186 __ha_cpu_relax());
187
188 /* <now> and <now_ms> are now updated to the last value of global_now
189 * and global_now_ms, which were also monotonically updated. We can
190 * compute the latest offset, we don't care who writes it last, the
191 * variations will not break the monotonic property.
192 */
193
194 sec_ofs = now.tv_sec - date.tv_sec;
195 usec_ofs = now.tv_usec - date.tv_usec;
196 if ((int)usec_ofs < 0) {
197 usec_ofs += 1000000;
198 sec_ofs -= 1;
199 }
200 ofs_new = ((ullong)sec_ofs << 32) + usec_ofs;
201 if (ofs_new != ofs)
202 HA_ATOMIC_STORE(&now_offset, ofs_new);
203}
204
205/* must be called once at boot to initialize some global variables */
206void clock_init_process_date(void)
207{
208 now_offset = 0;
209 gettimeofday(&date, NULL);
210 now = after_poll = before_poll = date;
211 global_now = ((ullong)date.tv_sec << 32) + (uint)date.tv_usec;
212 global_now_ms = now.tv_sec * 1000 + now.tv_usec / 1000;
213 ti->idle_pct = 100;
214 clock_update_date(0, 1);
215}
216
217/* must be called once per thread to initialize their thread-local variables.
218 * Note that other threads might also be initializing and running in parallel.
219 */
220void clock_init_thread_date(void)
221{
222 ullong old_now;
223
224 gettimeofday(&date, NULL);
225 after_poll = before_poll = date;
226
227 old_now = _HA_ATOMIC_LOAD(&global_now);
228 now.tv_sec = old_now >> 32;
229 now.tv_usec = (uint)old_now;
230 ti->idle_pct = 100;
231 clock_update_date(0, 1);
232}
233
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200234/* report the average CPU idle percentage over all running threads, between 0 and 100 */
235uint clock_report_idle(void)
236{
237 uint total = 0;
238 uint rthr = 0;
239 uint thr;
240
241 for (thr = 0; thr < MAX_THREADS; thr++) {
242 if (!(all_threads_mask & (1UL << thr)))
243 continue;
244 total += HA_ATOMIC_LOAD(&ha_thread_info[thr].idle_pct);
245 rthr++;
246 }
247 return rthr ? total / rthr : 0;
248}
249
250/* Update the idle time value twice a second, to be called after
251 * clock_update_date() when called after poll(), and currently called only by
252 * clock_leaving_poll() below. It relies on <before_poll> to be updated to
253 * the system time before calling poll().
254 */
255static inline void clock_measure_idle(void)
256{
257 /* Let's compute the idle to work ratio. We worked between after_poll
258 * and before_poll, and slept between before_poll and date. The idle_pct
259 * is updated at most twice every second. Note that the current second
260 * rarely changes so we avoid a multiply when not needed.
261 */
262 int delta;
263
264 if ((delta = date.tv_sec - before_poll.tv_sec))
265 delta *= 1000000;
266 idle_time += delta + (date.tv_usec - before_poll.tv_usec);
267
268 if ((delta = date.tv_sec - after_poll.tv_sec))
269 delta *= 1000000;
270 samp_time += delta + (date.tv_usec - after_poll.tv_usec);
271
272 after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
273 if (samp_time < 500000)
274 return;
275
276 HA_ATOMIC_STORE(&ti->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
277 idle_time = samp_time = 0;
278}
279
280/* Collect date and time information after leaving poll(). <timeout> must be
281 * set to the maximum sleep time passed to poll (in milliseconds), and
282 * <interrupted> must be zero if the poller reached the timeout or non-zero
283 * otherwise, which generally is provided by the poller's return value.
284 */
285void clock_leaving_poll(int timeout, int interrupted)
286{
287 clock_measure_idle();
288 ti->prev_cpu_time = now_cpu_time();
289 ti->prev_mono_time = now_mono_time();
290}
291
292/* Collect date and time information before calling poll(). This will be used
293 * to count the run time of the past loop and the sleep time of the next poll.
294 * It also compares the elasped and cpu times during the activity period to
295 * estimate the amount of stolen time, which is reported if higher than half
296 * a millisecond.
297 */
298void clock_entering_poll(void)
299{
300 uint64_t new_mono_time;
301 uint64_t new_cpu_time;
Willy Tarreau20adfde2021-10-08 11:34:46 +0200302 uint32_t run_time;
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200303 int64_t stolen;
304
305 gettimeofday(&before_poll, NULL);
306
Willy Tarreau20adfde2021-10-08 11:34:46 +0200307 run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec);
308
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200309 new_cpu_time = now_cpu_time();
310 new_mono_time = now_mono_time();
311
312 if (ti->prev_cpu_time && ti->prev_mono_time) {
313 new_cpu_time -= ti->prev_cpu_time;
314 new_mono_time -= ti->prev_mono_time;
315 stolen = new_mono_time - new_cpu_time;
316 if (unlikely(stolen >= 500000)) {
317 stolen /= 500000;
318 /* more than half a millisecond difference might
319 * indicate an undesired preemption.
320 */
321 report_stolen_time(stolen);
322 }
323 }
Willy Tarreau20adfde2021-10-08 11:34:46 +0200324
325 /* update the average runtime */
326 activity_count_runtime(run_time);
Willy Tarreauf9d5e102021-10-08 10:43:59 +0200327}
328
Willy Tarreau55542642021-10-08 09:33:24 +0200329/* returns the current date as returned by gettimeofday() in ISO+microsecond
330 * format. It uses a thread-local static variable that the reader can consume
331 * for as long as it wants until next call. Thus, do not call it from a signal
332 * handler. If <pad> is non-0, a trailing space will be added. It will always
333 * return exactly 32 or 33 characters (depending on padding) and will always be
334 * zero-terminated, thus it will always fit into a 34 bytes buffer.
335 * This also always include the local timezone (in +/-HH:mm format) .
336 */
337char *timeofday_as_iso_us(int pad)
338{
339 struct timeval new_date;
340 struct tm tm;
341 const char *offset;
342 char c;
343
344 gettimeofday(&new_date, NULL);
345 if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) {
346 get_localtime(new_date.tv_sec, &tm);
347 offset = get_gmt_offset(new_date.tv_sec, &tm);
348 if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32))
349 strcpy(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00"); // make the failure visible but respect format.
350 iso_time_str[26] = offset[0];
351 iso_time_str[27] = offset[1];
352 iso_time_str[28] = offset[2];
353 iso_time_str[30] = offset[3];
354 iso_time_str[31] = offset[4];
355 iso_time_sec = new_date.tv_sec;
356 }
357
358 /* utoa_pad adds a trailing 0 so we save the char for restore */
359 c = iso_time_str[26];
360 utoa_pad(new_date.tv_usec, iso_time_str + 20, 7);
361 iso_time_str[26] = c;
362 if (pad) {
363 iso_time_str[32] = ' ';
364 iso_time_str[33] = 0;
365 }
366 return iso_time_str;
367}