Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 1 | /* |
| 2 | * General time-keeping code and variables |
| 3 | * |
| 4 | * Copyright 2000-2021 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <sys/time.h> |
Willy Tarreau | 6cb0c39 | 2021-10-08 14:48:30 +0200 | [diff] [blame] | 14 | #include <signal.h> |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 15 | #include <time.h> |
| 16 | |
Willy Tarreau | 44c58da | 2021-10-08 12:27:54 +0200 | [diff] [blame] | 17 | #ifdef USE_THREAD |
| 18 | #include <pthread.h> |
| 19 | #endif |
| 20 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 21 | #include <haproxy/api.h> |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 22 | #include <haproxy/activity.h> |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 23 | #include <haproxy/clock.h> |
Willy Tarreau | 6cb0c39 | 2021-10-08 14:48:30 +0200 | [diff] [blame] | 24 | #include <haproxy/signal-t.h> |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 25 | #include <haproxy/time.h> |
| 26 | #include <haproxy/tinfo-t.h> |
| 27 | #include <haproxy/tools.h> |
| 28 | |
| 29 | struct timeval start_date; /* the process's start date in wall-clock time */ |
Willy Tarreau | da4aa69 | 2023-05-17 09:02:21 +0200 | [diff] [blame] | 30 | struct timeval ready_date; /* date when the process was considered ready */ |
Willy Tarreau | c05d30e | 2023-04-28 14:50:29 +0200 | [diff] [blame] | 31 | ullong start_time_ns; /* the process's start date in internal monotonic time (ns) */ |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 32 | volatile ullong global_now_ns; /* common monotonic date between all threads, in ns (wraps every 585 yr) */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 33 | volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */ |
| 34 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 35 | THREAD_ALIGNED(64) static llong now_offset; /* global offset between system time and global time in ns */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 36 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 37 | THREAD_LOCAL ullong now_ns; /* internal monotonic date derived from real clock, in ns (wraps every 585 yr) */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 38 | THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 39 | THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 40 | |
Willy Tarreau | 2c6a998 | 2021-10-08 11:38:30 +0200 | [diff] [blame] | 41 | static THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */ |
| 42 | static THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */ |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 43 | static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */ |
| 44 | static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 45 | static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */ |
| 46 | static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */ |
| 47 | |
Willy Tarreau | 2169498 | 2021-10-08 15:09:17 +0200 | [diff] [blame] | 48 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) |
| 49 | static clockid_t per_thread_clock_id[MAX_THREADS]; |
| 50 | #endif |
| 51 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 52 | /* returns the system's monotonic time in nanoseconds if supported, otherwise zero */ |
| 53 | uint64_t now_mono_time(void) |
| 54 | { |
| 55 | uint64_t ret = 0; |
Willy Tarreau | 6cb0c39 | 2021-10-08 14:48:30 +0200 | [diff] [blame] | 56 | #if defined(_POSIX_TIMERS) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK) |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 57 | struct timespec ts; |
| 58 | clock_gettime(CLOCK_MONOTONIC, &ts); |
| 59 | ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
| 60 | #endif |
| 61 | return ret; |
| 62 | } |
| 63 | |
Aurelien DARRAGON | 07cbd8e | 2022-11-25 08:56:46 +0100 | [diff] [blame] | 64 | /* Returns the system's monotonic time in nanoseconds. |
| 65 | * Uses the coarse clock source if supported (for fast but |
| 66 | * less precise queries with limited resource usage). |
| 67 | * Fallback to now_mono_time() if coarse source is not supported, |
| 68 | * which may itself return 0 if not supported either. |
| 69 | */ |
| 70 | uint64_t now_mono_time_fast(void) |
| 71 | { |
| 72 | #if defined(CLOCK_MONOTONIC_COARSE) |
| 73 | struct timespec ts; |
| 74 | |
| 75 | clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); |
| 76 | return (ts.tv_sec * 1000000000ULL + ts.tv_nsec); |
| 77 | #else |
| 78 | /* fallback to regular mono time, |
| 79 | * returns 0 if not supported |
| 80 | */ |
| 81 | return now_mono_time(); |
| 82 | #endif |
| 83 | } |
| 84 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 85 | /* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */ |
| 86 | uint64_t now_cpu_time(void) |
| 87 | { |
| 88 | uint64_t ret = 0; |
| 89 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) |
| 90 | struct timespec ts; |
| 91 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); |
| 92 | ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
| 93 | #endif |
| 94 | return ret; |
| 95 | } |
| 96 | |
Aurelien DARRAGON | df188f1 | 2023-04-04 17:21:40 +0200 | [diff] [blame] | 97 | /* Returns the current thread's cumulated CPU time in nanoseconds. |
| 98 | * |
| 99 | * thread_local timer is cached so that call is less precise but also less |
| 100 | * expensive if heavily used. |
| 101 | * We use the mono time as a cache expiration hint since now_cpu_time() is |
| 102 | * known to be much more expensive than now_mono_time_fast() on systems |
| 103 | * supporting the COARSE clock source. |
| 104 | * |
| 105 | * Returns 0 if either now_mono_time_fast() or now_cpu_time() are not |
| 106 | * supported. |
| 107 | */ |
| 108 | uint64_t now_cpu_time_fast(void) |
| 109 | { |
| 110 | static THREAD_LOCAL uint64_t mono_cache = 0; |
| 111 | static THREAD_LOCAL uint64_t cpu_cache = 0; |
| 112 | uint64_t mono_cur; |
| 113 | |
| 114 | mono_cur = now_mono_time_fast(); |
| 115 | if (unlikely(mono_cur != mono_cache)) { |
| 116 | /* global mono clock was updated: local cache is outdated */ |
| 117 | cpu_cache = now_cpu_time(); |
| 118 | mono_cache = mono_cur; |
| 119 | } |
| 120 | return cpu_cache; |
| 121 | } |
| 122 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 123 | /* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */ |
Willy Tarreau | 2169498 | 2021-10-08 15:09:17 +0200 | [diff] [blame] | 124 | uint64_t now_cpu_time_thread(int thr) |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 125 | { |
| 126 | uint64_t ret = 0; |
| 127 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) |
| 128 | struct timespec ts; |
Willy Tarreau | 2169498 | 2021-10-08 15:09:17 +0200 | [diff] [blame] | 129 | clock_gettime(per_thread_clock_id[thr], &ts); |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 130 | ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
| 131 | #endif |
| 132 | return ret; |
| 133 | } |
| 134 | |
Willy Tarreau | 44c58da | 2021-10-08 12:27:54 +0200 | [diff] [blame] | 135 | /* set the clock source for the local thread */ |
| 136 | void clock_set_local_source(void) |
| 137 | { |
Ilia Shipitsin | f484e45 | 2024-05-05 13:09:22 +0200 | [diff] [blame] | 138 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) && (_POSIX_THREAD_CPUTIME >= 0) |
Willy Tarreau | 44c58da | 2021-10-08 12:27:54 +0200 | [diff] [blame] | 139 | #ifdef USE_THREAD |
Willy Tarreau | 2169498 | 2021-10-08 15:09:17 +0200 | [diff] [blame] | 140 | pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]); |
Willy Tarreau | 44c58da | 2021-10-08 12:27:54 +0200 | [diff] [blame] | 141 | #else |
Willy Tarreau | 2169498 | 2021-10-08 15:09:17 +0200 | [diff] [blame] | 142 | per_thread_clock_id[tid] = CLOCK_THREAD_CPUTIME_ID; |
Willy Tarreau | 44c58da | 2021-10-08 12:27:54 +0200 | [diff] [blame] | 143 | #endif |
| 144 | #endif |
| 145 | } |
| 146 | |
Willy Tarreau | 6cb0c39 | 2021-10-08 14:48:30 +0200 | [diff] [blame] | 147 | /* registers a timer <tmr> of type timer_t delivering signal <sig> with value |
| 148 | * <val>. It tries on the current thread's clock ID first and falls back to |
| 149 | * CLOCK_REALTIME. Returns non-zero on success, 1 on failure. |
| 150 | */ |
| 151 | int clock_setup_signal_timer(void *tmr, int sig, int val) |
| 152 | { |
| 153 | int ret = 0; |
| 154 | |
| 155 | #if defined(USE_RT) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) |
| 156 | struct sigevent sev = { }; |
| 157 | timer_t *timer = tmr; |
| 158 | sigset_t set; |
| 159 | |
| 160 | /* unblock the WDTSIG signal we intend to use */ |
| 161 | sigemptyset(&set); |
| 162 | sigaddset(&set, WDTSIG); |
| 163 | ha_sigmask(SIG_UNBLOCK, &set, NULL); |
| 164 | |
| 165 | /* this timer will signal WDTSIG when it fires, with tid in the si_int |
| 166 | * field (important since any thread will receive the signal). |
| 167 | */ |
| 168 | sev.sigev_notify = SIGEV_SIGNAL; |
| 169 | sev.sigev_signo = sig; |
| 170 | sev.sigev_value.sival_int = val; |
Willy Tarreau | 2169498 | 2021-10-08 15:09:17 +0200 | [diff] [blame] | 171 | if (timer_create(per_thread_clock_id[tid], &sev, timer) != -1 || |
Willy Tarreau | 6cb0c39 | 2021-10-08 14:48:30 +0200 | [diff] [blame] | 172 | timer_create(CLOCK_REALTIME, &sev, timer) != -1) |
| 173 | ret = 1; |
| 174 | #endif |
| 175 | return ret; |
| 176 | } |
| 177 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 178 | /* clock_update_date: sets <date> to system time, and sets <now_ns> to something |
| 179 | * as close as possible to real time, following a monotonic function. The main |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 180 | * principle consists in detecting backwards and forwards time jumps and adjust |
| 181 | * an offset to correct them. This function should be called once after each |
| 182 | * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should |
| 183 | * be passed in <max_wait>, and the return value in <interrupted> (a non-zero |
| 184 | * value means that we have not expired the timeout). |
| 185 | * |
| 186 | * clock_init_process_date() must have been called once first, and |
| 187 | * clock_init_thread_date() must also have been called once for each thread. |
| 188 | * |
| 189 | * An offset is used to adjust the current time (date), to figure a monotonic |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 190 | * local time (now_ns). The offset is not critical, as it is only updated after |
| 191 | * a clock jump is detected. From this point all threads will apply it to their |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 192 | * locally measured time, and will then agree around a common monotonic |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 193 | * global_now_ns value that serves to further refine their local time. Both |
| 194 | * now_ns and global_now_ns are 64-bit integers counting nanoseconds since a |
| 195 | * vague reference (it starts roughly 20s before the next wrap-around of the |
| 196 | * millisecond counter after boot). The offset is also an integral number of |
| 197 | * nanoseconds, but it's signed so that the clock can be adjusted in the two |
| 198 | * directions. |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 199 | */ |
Willy Tarreau | a700420 | 2022-09-21 07:37:27 +0200 | [diff] [blame] | 200 | void clock_update_local_date(int max_wait, int interrupted) |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 201 | { |
Willy Tarreau | a700420 | 2022-09-21 07:37:27 +0200 | [diff] [blame] | 202 | struct timeval min_deadline, max_deadline; |
Willy Tarreau | f0af384 | 2024-09-09 13:56:18 +0200 | [diff] [blame] | 203 | llong ofs = HA_ATOMIC_LOAD(&now_offset); |
| 204 | llong date_ns; |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 205 | |
| 206 | gettimeofday(&date, NULL); |
Willy Tarreau | f0af384 | 2024-09-09 13:56:18 +0200 | [diff] [blame] | 207 | date_ns = tv_to_ns(&date); |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 208 | |
| 209 | /* compute the minimum and maximum local date we may have reached based |
| 210 | * on our past date and the associated timeout. There are three possible |
| 211 | * extremities: |
| 212 | * - the new date cannot be older than before_poll |
| 213 | * - if not interrupted, the new date cannot be older than |
| 214 | * before_poll+max_wait |
| 215 | * - in any case the new date cannot be newer than |
| 216 | * before_poll+max_wait+some margin (100ms used here). |
| 217 | * In case of violation, we'll ignore the current date and instead |
| 218 | * restart from the last date we knew. |
| 219 | */ |
| 220 | _tv_ms_add(&min_deadline, &before_poll, max_wait); |
| 221 | _tv_ms_add(&max_deadline, &before_poll, max_wait + 100); |
| 222 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 223 | if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards |
| 224 | (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards |
Willy Tarreau | f0af384 | 2024-09-09 13:56:18 +0200 | [diff] [blame] | 225 | date_ns + ofs >= now_ns + ms_to_ns(max_wait + 100)|| // offset changed by another thread |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 226 | __tv_islt(&max_deadline, &date))) { // big jump forwards |
| 227 | if (!interrupted) |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 228 | now_ns += ms_to_ns(max_wait); |
Willy Tarreau | edbce2e | 2024-09-04 16:55:43 +0200 | [diff] [blame] | 229 | |
Willy Tarreau | 0f20117 | 2024-09-12 17:58:57 +0200 | [diff] [blame] | 230 | /* consider the most recent known date */ |
| 231 | now_ns = MAX(now_ns, HA_ATOMIC_LOAD(&global_now_ns)); |
| 232 | |
Willy Tarreau | edbce2e | 2024-09-04 16:55:43 +0200 | [diff] [blame] | 233 | /* this event is rare, but it requires proper handling because if |
| 234 | * we just left now_ns where it was, the date will not be updated |
| 235 | * by clock_update_global_date(). |
| 236 | */ |
Willy Tarreau | f0af384 | 2024-09-09 13:56:18 +0200 | [diff] [blame] | 237 | HA_ATOMIC_STORE(&now_offset, now_ns - date_ns); |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 238 | } else { |
| 239 | /* The date is still within expectations. Let's apply the |
| 240 | * now_offset to the system date. Note: ofs if made of two |
| 241 | * independent signed ints. |
| 242 | */ |
Willy Tarreau | f0af384 | 2024-09-09 13:56:18 +0200 | [diff] [blame] | 243 | now_ns = date_ns + ofs; |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 244 | } |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 245 | now_ms = ns_to_ms(now_ns); |
Willy Tarreau | a700420 | 2022-09-21 07:37:27 +0200 | [diff] [blame] | 246 | } |
| 247 | |
| 248 | void clock_update_global_date() |
| 249 | { |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 250 | ullong old_now_ns; |
Willy Tarreau | a700420 | 2022-09-21 07:37:27 +0200 | [diff] [blame] | 251 | uint old_now_ms; |
Willy Tarreau | a700420 | 2022-09-21 07:37:27 +0200 | [diff] [blame] | 252 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 253 | /* now that we have bounded the local time, let's check if it's |
| 254 | * realistic regarding the global date, which only moves forward, |
| 255 | * otherwise catch up. |
| 256 | */ |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 257 | old_now_ns = _HA_ATOMIC_LOAD(&global_now_ns); |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 258 | old_now_ms = global_now_ms; |
| 259 | |
| 260 | do { |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 261 | if (now_ns < old_now_ns) |
| 262 | now_ns = old_now_ns; |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 263 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 264 | /* now <now_ns> is expected to be the most accurate date, |
| 265 | * equal to <global_now_ns> or newer. Updating the global |
Willy Tarreau | 4eaf85f | 2022-09-21 08:21:45 +0200 | [diff] [blame] | 266 | * date too often causes extreme contention and is not |
| 267 | * needed: it's only used to help threads run at the |
| 268 | * same date in case of local drift, and the global date, |
| 269 | * which changes, is only used by freq counters (a choice |
| 270 | * which is debatable by the way since it changes under us). |
| 271 | * Tests have seen that the contention can be reduced from |
| 272 | * 37% in this function to almost 0% when keeping clocks |
| 273 | * synchronized no better than 32 microseconds, so that's |
| 274 | * what we're doing here. |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 275 | */ |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 276 | now_ms = ns_to_ms(now_ns); |
Willy Tarreau | 4eaf85f | 2022-09-21 08:21:45 +0200 | [diff] [blame] | 277 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 278 | if (!((now_ns ^ old_now_ns) & ~0x7FFFULL)) |
Willy Tarreau | 4eaf85f | 2022-09-21 08:21:45 +0200 | [diff] [blame] | 279 | return; |
| 280 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 281 | /* let's try to update the global_now_ns (both in nanoseconds |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 282 | * and ms forms) or loop again. |
| 283 | */ |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 284 | } while ((!_HA_ATOMIC_CAS(&global_now_ns, &old_now_ns, now_ns) || |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 285 | (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) && |
| 286 | __ha_cpu_relax()); |
| 287 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 288 | /* <now_ns> and <now_ms> are now updated to the last value of |
| 289 | * global_now_ns and global_now_ms, which were also monotonically |
| 290 | * updated. We can compute the latest offset, we don't care who writes |
| 291 | * it last, the variations will not break the monotonic property. |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 292 | */ |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 293 | HA_ATOMIC_STORE(&now_offset, now_ns - tv_to_ns(&date)); |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 294 | } |
| 295 | |
| 296 | /* must be called once at boot to initialize some global variables */ |
| 297 | void clock_init_process_date(void) |
| 298 | { |
| 299 | now_offset = 0; |
| 300 | gettimeofday(&date, NULL); |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 301 | after_poll = before_poll = date; |
| 302 | now_ns = global_now_ns = tv_to_ns(&date); |
| 303 | global_now_ms = ns_to_ms(now_ns); |
Willy Tarreau | 28360dc | 2023-02-07 14:44:44 +0100 | [diff] [blame] | 304 | |
| 305 | /* force time to wrap 20s after boot: we first compute the time offset |
| 306 | * that once applied to the wall-clock date will make the local time |
| 307 | * wrap in 5 seconds. This offset is applied to the process-wide time, |
| 308 | * and will be used to recompute the local time, both of which will |
| 309 | * match and continue from this shifted date. |
| 310 | */ |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 311 | now_offset = sec_to_ns((uint)((uint)(-global_now_ms) / 1000U - BOOT_TIME_WRAP_SEC)); |
| 312 | global_now_ns += now_offset; |
| 313 | now_ns = global_now_ns; |
| 314 | now_ms = global_now_ms = ns_to_ms(now_ns); |
Willy Tarreau | 28360dc | 2023-02-07 14:44:44 +0100 | [diff] [blame] | 315 | |
Willy Tarreau | 45c38e2 | 2021-09-30 18:28:49 +0200 | [diff] [blame] | 316 | th_ctx->idle_pct = 100; |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 317 | clock_update_date(0, 1); |
| 318 | } |
| 319 | |
Willy Tarreau | 5345490 | 2023-05-16 19:01:55 +0200 | [diff] [blame] | 320 | void clock_adjust_now_offset(void) |
| 321 | { |
| 322 | HA_ATOMIC_STORE(&now_offset, now_ns - tv_to_ns(&date)); |
| 323 | } |
| 324 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 325 | /* must be called once per thread to initialize their thread-local variables. |
| 326 | * Note that other threads might also be initializing and running in parallel. |
| 327 | */ |
| 328 | void clock_init_thread_date(void) |
| 329 | { |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 330 | gettimeofday(&date, NULL); |
| 331 | after_poll = before_poll = date; |
| 332 | |
Willy Tarreau | 69530f5 | 2023-04-28 09:16:15 +0200 | [diff] [blame] | 333 | now_ns = _HA_ATOMIC_LOAD(&global_now_ns); |
Willy Tarreau | 45c38e2 | 2021-09-30 18:28:49 +0200 | [diff] [blame] | 334 | th_ctx->idle_pct = 100; |
Aurelien DARRAGON | 16d6c0c | 2022-11-10 11:47:47 +0100 | [diff] [blame] | 335 | th_ctx->prev_cpu_time = now_cpu_time(); |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 336 | clock_update_date(0, 1); |
| 337 | } |
| 338 | |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 339 | /* report the average CPU idle percentage over all running threads, between 0 and 100 */ |
| 340 | uint clock_report_idle(void) |
| 341 | { |
| 342 | uint total = 0; |
| 343 | uint rthr = 0; |
| 344 | uint thr; |
| 345 | |
| 346 | for (thr = 0; thr < MAX_THREADS; thr++) { |
Willy Tarreau | 1e7f0d6 | 2022-06-27 16:22:22 +0200 | [diff] [blame] | 347 | if (!ha_thread_info[thr].tg || |
| 348 | !(ha_thread_info[thr].tg->threads_enabled & ha_thread_info[thr].ltid_bit)) |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 349 | continue; |
Willy Tarreau | 45c38e2 | 2021-09-30 18:28:49 +0200 | [diff] [blame] | 350 | total += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].idle_pct); |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 351 | rthr++; |
| 352 | } |
| 353 | return rthr ? total / rthr : 0; |
| 354 | } |
| 355 | |
| 356 | /* Update the idle time value twice a second, to be called after |
| 357 | * clock_update_date() when called after poll(), and currently called only by |
| 358 | * clock_leaving_poll() below. It relies on <before_poll> to be updated to |
| 359 | * the system time before calling poll(). |
| 360 | */ |
| 361 | static inline void clock_measure_idle(void) |
| 362 | { |
| 363 | /* Let's compute the idle to work ratio. We worked between after_poll |
| 364 | * and before_poll, and slept between before_poll and date. The idle_pct |
| 365 | * is updated at most twice every second. Note that the current second |
| 366 | * rarely changes so we avoid a multiply when not needed. |
| 367 | */ |
| 368 | int delta; |
| 369 | |
| 370 | if ((delta = date.tv_sec - before_poll.tv_sec)) |
| 371 | delta *= 1000000; |
| 372 | idle_time += delta + (date.tv_usec - before_poll.tv_usec); |
| 373 | |
| 374 | if ((delta = date.tv_sec - after_poll.tv_sec)) |
| 375 | delta *= 1000000; |
| 376 | samp_time += delta + (date.tv_usec - after_poll.tv_usec); |
| 377 | |
| 378 | after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec; |
| 379 | if (samp_time < 500000) |
| 380 | return; |
| 381 | |
Willy Tarreau | 45c38e2 | 2021-09-30 18:28:49 +0200 | [diff] [blame] | 382 | HA_ATOMIC_STORE(&th_ctx->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time); |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 383 | idle_time = samp_time = 0; |
| 384 | } |
| 385 | |
| 386 | /* Collect date and time information after leaving poll(). <timeout> must be |
| 387 | * set to the maximum sleep time passed to poll (in milliseconds), and |
| 388 | * <interrupted> must be zero if the poller reached the timeout or non-zero |
| 389 | * otherwise, which generally is provided by the poller's return value. |
| 390 | */ |
| 391 | void clock_leaving_poll(int timeout, int interrupted) |
| 392 | { |
| 393 | clock_measure_idle(); |
Willy Tarreau | 45c38e2 | 2021-09-30 18:28:49 +0200 | [diff] [blame] | 394 | th_ctx->prev_cpu_time = now_cpu_time(); |
| 395 | th_ctx->prev_mono_time = now_mono_time(); |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 396 | } |
| 397 | |
| 398 | /* Collect date and time information before calling poll(). This will be used |
| 399 | * to count the run time of the past loop and the sleep time of the next poll. |
Ilya Shipitsin | 4a689da | 2022-10-29 09:34:32 +0500 | [diff] [blame] | 400 | * It also compares the elapsed and cpu times during the activity period to |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 401 | * estimate the amount of stolen time, which is reported if higher than half |
| 402 | * a millisecond. |
| 403 | */ |
| 404 | void clock_entering_poll(void) |
| 405 | { |
| 406 | uint64_t new_mono_time; |
| 407 | uint64_t new_cpu_time; |
Willy Tarreau | 20adfde | 2021-10-08 11:34:46 +0200 | [diff] [blame] | 408 | uint32_t run_time; |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 409 | int64_t stolen; |
| 410 | |
| 411 | gettimeofday(&before_poll, NULL); |
| 412 | |
Willy Tarreau | 023df62 | 2024-09-08 19:15:38 +0200 | [diff] [blame] | 413 | /* The time might have jumped either backwards or forwards during tasks |
| 414 | * processing. It's easy to detect a backwards jump, but a forward jump |
| 415 | * needs a marging. Here the upper limit of 2 seconds corresponds to a |
| 416 | * large margin at which the watchdog would already trigger so it looks |
| 417 | * sufficient to avoid false positives most of the time. The goal here |
| 418 | * is to make sure that before_poll can be trusted when entering |
| 419 | * clock_update_local_date() so that we can detect and fix time jumps. |
| 420 | * All this will also make sure we don't report idle/run times that are |
| 421 | * too much wrong during such jumps. |
| 422 | */ |
| 423 | |
| 424 | if (unlikely(__tv_islt(&before_poll, &after_poll))) |
| 425 | before_poll = after_poll; |
| 426 | else if (unlikely(__tv_ms_elapsed(&after_poll, &before_poll) >= 2000)) |
| 427 | tv_ms_add(&before_poll, &after_poll, 2000); |
| 428 | |
Willy Tarreau | 20adfde | 2021-10-08 11:34:46 +0200 | [diff] [blame] | 429 | run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec); |
| 430 | |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 431 | new_cpu_time = now_cpu_time(); |
| 432 | new_mono_time = now_mono_time(); |
| 433 | |
Willy Tarreau | 45c38e2 | 2021-09-30 18:28:49 +0200 | [diff] [blame] | 434 | if (th_ctx->prev_cpu_time && th_ctx->prev_mono_time) { |
| 435 | new_cpu_time -= th_ctx->prev_cpu_time; |
| 436 | new_mono_time -= th_ctx->prev_mono_time; |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 437 | stolen = new_mono_time - new_cpu_time; |
| 438 | if (unlikely(stolen >= 500000)) { |
| 439 | stolen /= 500000; |
| 440 | /* more than half a millisecond difference might |
| 441 | * indicate an undesired preemption. |
| 442 | */ |
| 443 | report_stolen_time(stolen); |
| 444 | } |
| 445 | } |
Willy Tarreau | 20adfde | 2021-10-08 11:34:46 +0200 | [diff] [blame] | 446 | |
| 447 | /* update the average runtime */ |
| 448 | activity_count_runtime(run_time); |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 449 | } |
| 450 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 451 | /* returns the current date as returned by gettimeofday() in ISO+microsecond |
| 452 | * format. It uses a thread-local static variable that the reader can consume |
| 453 | * for as long as it wants until next call. Thus, do not call it from a signal |
| 454 | * handler. If <pad> is non-0, a trailing space will be added. It will always |
| 455 | * return exactly 32 or 33 characters (depending on padding) and will always be |
| 456 | * zero-terminated, thus it will always fit into a 34 bytes buffer. |
| 457 | * This also always include the local timezone (in +/-HH:mm format) . |
| 458 | */ |
| 459 | char *timeofday_as_iso_us(int pad) |
| 460 | { |
| 461 | struct timeval new_date; |
| 462 | struct tm tm; |
| 463 | const char *offset; |
| 464 | char c; |
| 465 | |
| 466 | gettimeofday(&new_date, NULL); |
| 467 | if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) { |
| 468 | get_localtime(new_date.tv_sec, &tm); |
| 469 | offset = get_gmt_offset(new_date.tv_sec, &tm); |
| 470 | if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32)) |
Willy Tarreau | fc458ec | 2023-04-07 18:11:39 +0200 | [diff] [blame] | 471 | strlcpy2(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00", sizeof(iso_time_str)); // make the failure visible but respect format. |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 472 | iso_time_str[26] = offset[0]; |
| 473 | iso_time_str[27] = offset[1]; |
| 474 | iso_time_str[28] = offset[2]; |
| 475 | iso_time_str[30] = offset[3]; |
| 476 | iso_time_str[31] = offset[4]; |
| 477 | iso_time_sec = new_date.tv_sec; |
| 478 | } |
| 479 | |
| 480 | /* utoa_pad adds a trailing 0 so we save the char for restore */ |
| 481 | c = iso_time_str[26]; |
| 482 | utoa_pad(new_date.tv_usec, iso_time_str + 20, 7); |
| 483 | iso_time_str[26] = c; |
| 484 | if (pad) { |
| 485 | iso_time_str[32] = ' '; |
| 486 | iso_time_str[33] = 0; |
| 487 | } |
| 488 | return iso_time_str; |
| 489 | } |