Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 1 | /* |
| 2 | * General time-keeping code and variables |
| 3 | * |
| 4 | * Copyright 2000-2021 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <sys/time.h> |
| 14 | #include <time.h> |
| 15 | |
| 16 | #include <haproxy/api.h> |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 17 | #include <haproxy/activity.h> |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 18 | #include <haproxy/clock.h> |
| 19 | #include <haproxy/time.h> |
| 20 | #include <haproxy/tinfo-t.h> |
| 21 | #include <haproxy/tools.h> |
| 22 | |
| 23 | struct timeval start_date; /* the process's start date in wall-clock time */ |
| 24 | volatile ullong global_now; /* common monotonic date between all threads (32:32) */ |
| 25 | volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */ |
| 26 | |
| 27 | THREAD_ALIGNED(64) static ullong now_offset; /* global offset between system time and global time */ |
| 28 | |
| 29 | THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */ |
| 30 | THREAD_LOCAL struct timeval now; /* internal monotonic date derived from real clock */ |
| 31 | THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */ |
| 32 | THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */ |
| 33 | THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */ |
| 34 | |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 35 | static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */ |
| 36 | static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */ |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 37 | static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */ |
| 38 | static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */ |
| 39 | |
| 40 | /* returns the system's monotonic time in nanoseconds if supported, otherwise zero */ |
| 41 | uint64_t now_mono_time(void) |
| 42 | { |
| 43 | uint64_t ret = 0; |
| 44 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK) |
| 45 | struct timespec ts; |
| 46 | clock_gettime(CLOCK_MONOTONIC, &ts); |
| 47 | ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
| 48 | #endif |
| 49 | return ret; |
| 50 | } |
| 51 | |
| 52 | /* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */ |
| 53 | uint64_t now_cpu_time(void) |
| 54 | { |
| 55 | uint64_t ret = 0; |
| 56 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) |
| 57 | struct timespec ts; |
| 58 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); |
| 59 | ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
| 60 | #endif |
| 61 | return ret; |
| 62 | } |
| 63 | |
| 64 | /* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */ |
| 65 | uint64_t now_cpu_time_thread(const struct thread_info *thr) |
| 66 | { |
| 67 | uint64_t ret = 0; |
| 68 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) |
| 69 | struct timespec ts; |
| 70 | clock_gettime(thr->clock_id, &ts); |
| 71 | ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
| 72 | #endif |
| 73 | return ret; |
| 74 | } |
| 75 | |
| 76 | /* clock_update_date: sets <date> to system time, and sets <now> to something as |
| 77 | * close as possible to real time, following a monotonic function. The main |
| 78 | * principle consists in detecting backwards and forwards time jumps and adjust |
| 79 | * an offset to correct them. This function should be called once after each |
| 80 | * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should |
| 81 | * be passed in <max_wait>, and the return value in <interrupted> (a non-zero |
| 82 | * value means that we have not expired the timeout). |
| 83 | * |
| 84 | * clock_init_process_date() must have been called once first, and |
| 85 | * clock_init_thread_date() must also have been called once for each thread. |
| 86 | * |
| 87 | * An offset is used to adjust the current time (date), to figure a monotonic |
| 88 | * local time (now). The offset is not critical, as it is only updated after a |
| 89 | * clock jump is detected. From this point all threads will apply it to their |
| 90 | * locally measured time, and will then agree around a common monotonic |
| 91 | * global_now value that serves to further refine their local time. As it is |
| 92 | * not possible to atomically update a timeval, both global_now and the |
| 93 | * now_offset values are instead stored as 64-bit integers made of two 32 bit |
| 94 | * values for the tv_sec and tv_usec parts. The offset is made of two signed |
| 95 | * ints so that the clock can be adjusted in the two directions. |
| 96 | */ |
| 97 | void clock_update_date(int max_wait, int interrupted) |
| 98 | { |
| 99 | struct timeval min_deadline, max_deadline, tmp_now; |
| 100 | uint old_now_ms; |
| 101 | ullong old_now; |
| 102 | ullong new_now; |
| 103 | ullong ofs, ofs_new; |
| 104 | uint sec_ofs, usec_ofs; |
| 105 | |
| 106 | gettimeofday(&date, NULL); |
| 107 | |
| 108 | /* compute the minimum and maximum local date we may have reached based |
| 109 | * on our past date and the associated timeout. There are three possible |
| 110 | * extremities: |
| 111 | * - the new date cannot be older than before_poll |
| 112 | * - if not interrupted, the new date cannot be older than |
| 113 | * before_poll+max_wait |
| 114 | * - in any case the new date cannot be newer than |
| 115 | * before_poll+max_wait+some margin (100ms used here). |
| 116 | * In case of violation, we'll ignore the current date and instead |
| 117 | * restart from the last date we knew. |
| 118 | */ |
| 119 | _tv_ms_add(&min_deadline, &before_poll, max_wait); |
| 120 | _tv_ms_add(&max_deadline, &before_poll, max_wait + 100); |
| 121 | |
| 122 | ofs = HA_ATOMIC_LOAD(&now_offset); |
| 123 | |
| 124 | if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards |
| 125 | (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards |
| 126 | __tv_islt(&max_deadline, &date))) { // big jump forwards |
| 127 | if (!interrupted) |
| 128 | _tv_ms_add(&now, &now, max_wait); |
| 129 | } else { |
| 130 | /* The date is still within expectations. Let's apply the |
| 131 | * now_offset to the system date. Note: ofs if made of two |
| 132 | * independent signed ints. |
| 133 | */ |
| 134 | now.tv_sec = date.tv_sec + (int)(ofs >> 32); // note: may be positive or negative |
| 135 | now.tv_usec = date.tv_usec + (int)ofs; // note: may be positive or negative |
| 136 | if ((int)now.tv_usec < 0) { |
| 137 | now.tv_usec += 1000000; |
| 138 | now.tv_sec -= 1; |
| 139 | } else if (now.tv_usec >= 1000000) { |
| 140 | now.tv_usec -= 1000000; |
| 141 | now.tv_sec += 1; |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | /* now that we have bounded the local time, let's check if it's |
| 146 | * realistic regarding the global date, which only moves forward, |
| 147 | * otherwise catch up. |
| 148 | */ |
| 149 | old_now = global_now; |
| 150 | old_now_ms = global_now_ms; |
| 151 | |
| 152 | do { |
| 153 | tmp_now.tv_sec = (unsigned int)(old_now >> 32); |
| 154 | tmp_now.tv_usec = old_now & 0xFFFFFFFFU; |
| 155 | |
| 156 | if (__tv_islt(&now, &tmp_now)) |
| 157 | now = tmp_now; |
| 158 | |
| 159 | /* now <now> is expected to be the most accurate date, |
| 160 | * equal to <global_now> or newer. |
| 161 | */ |
| 162 | new_now = ((ullong)now.tv_sec << 32) + (uint)now.tv_usec; |
| 163 | now_ms = __tv_to_ms(&now); |
| 164 | |
| 165 | /* let's try to update the global <now> (both in timeval |
| 166 | * and ms forms) or loop again. |
| 167 | */ |
| 168 | } while (((new_now != old_now && !_HA_ATOMIC_CAS(&global_now, &old_now, new_now)) || |
| 169 | (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) && |
| 170 | __ha_cpu_relax()); |
| 171 | |
| 172 | /* <now> and <now_ms> are now updated to the last value of global_now |
| 173 | * and global_now_ms, which were also monotonically updated. We can |
| 174 | * compute the latest offset, we don't care who writes it last, the |
| 175 | * variations will not break the monotonic property. |
| 176 | */ |
| 177 | |
| 178 | sec_ofs = now.tv_sec - date.tv_sec; |
| 179 | usec_ofs = now.tv_usec - date.tv_usec; |
| 180 | if ((int)usec_ofs < 0) { |
| 181 | usec_ofs += 1000000; |
| 182 | sec_ofs -= 1; |
| 183 | } |
| 184 | ofs_new = ((ullong)sec_ofs << 32) + usec_ofs; |
| 185 | if (ofs_new != ofs) |
| 186 | HA_ATOMIC_STORE(&now_offset, ofs_new); |
| 187 | } |
| 188 | |
| 189 | /* must be called once at boot to initialize some global variables */ |
| 190 | void clock_init_process_date(void) |
| 191 | { |
| 192 | now_offset = 0; |
| 193 | gettimeofday(&date, NULL); |
| 194 | now = after_poll = before_poll = date; |
| 195 | global_now = ((ullong)date.tv_sec << 32) + (uint)date.tv_usec; |
| 196 | global_now_ms = now.tv_sec * 1000 + now.tv_usec / 1000; |
| 197 | ti->idle_pct = 100; |
| 198 | clock_update_date(0, 1); |
| 199 | } |
| 200 | |
| 201 | /* must be called once per thread to initialize their thread-local variables. |
| 202 | * Note that other threads might also be initializing and running in parallel. |
| 203 | */ |
| 204 | void clock_init_thread_date(void) |
| 205 | { |
| 206 | ullong old_now; |
| 207 | |
| 208 | gettimeofday(&date, NULL); |
| 209 | after_poll = before_poll = date; |
| 210 | |
| 211 | old_now = _HA_ATOMIC_LOAD(&global_now); |
| 212 | now.tv_sec = old_now >> 32; |
| 213 | now.tv_usec = (uint)old_now; |
| 214 | ti->idle_pct = 100; |
| 215 | clock_update_date(0, 1); |
| 216 | } |
| 217 | |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 218 | /* report the average CPU idle percentage over all running threads, between 0 and 100 */ |
| 219 | uint clock_report_idle(void) |
| 220 | { |
| 221 | uint total = 0; |
| 222 | uint rthr = 0; |
| 223 | uint thr; |
| 224 | |
| 225 | for (thr = 0; thr < MAX_THREADS; thr++) { |
| 226 | if (!(all_threads_mask & (1UL << thr))) |
| 227 | continue; |
| 228 | total += HA_ATOMIC_LOAD(&ha_thread_info[thr].idle_pct); |
| 229 | rthr++; |
| 230 | } |
| 231 | return rthr ? total / rthr : 0; |
| 232 | } |
| 233 | |
| 234 | /* Update the idle time value twice a second, to be called after |
| 235 | * clock_update_date() when called after poll(), and currently called only by |
| 236 | * clock_leaving_poll() below. It relies on <before_poll> to be updated to |
| 237 | * the system time before calling poll(). |
| 238 | */ |
| 239 | static inline void clock_measure_idle(void) |
| 240 | { |
| 241 | /* Let's compute the idle to work ratio. We worked between after_poll |
| 242 | * and before_poll, and slept between before_poll and date. The idle_pct |
| 243 | * is updated at most twice every second. Note that the current second |
| 244 | * rarely changes so we avoid a multiply when not needed. |
| 245 | */ |
| 246 | int delta; |
| 247 | |
| 248 | if ((delta = date.tv_sec - before_poll.tv_sec)) |
| 249 | delta *= 1000000; |
| 250 | idle_time += delta + (date.tv_usec - before_poll.tv_usec); |
| 251 | |
| 252 | if ((delta = date.tv_sec - after_poll.tv_sec)) |
| 253 | delta *= 1000000; |
| 254 | samp_time += delta + (date.tv_usec - after_poll.tv_usec); |
| 255 | |
| 256 | after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec; |
| 257 | if (samp_time < 500000) |
| 258 | return; |
| 259 | |
| 260 | HA_ATOMIC_STORE(&ti->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time); |
| 261 | idle_time = samp_time = 0; |
| 262 | } |
| 263 | |
| 264 | /* Collect date and time information after leaving poll(). <timeout> must be |
| 265 | * set to the maximum sleep time passed to poll (in milliseconds), and |
| 266 | * <interrupted> must be zero if the poller reached the timeout or non-zero |
| 267 | * otherwise, which generally is provided by the poller's return value. |
| 268 | */ |
| 269 | void clock_leaving_poll(int timeout, int interrupted) |
| 270 | { |
| 271 | clock_measure_idle(); |
| 272 | ti->prev_cpu_time = now_cpu_time(); |
| 273 | ti->prev_mono_time = now_mono_time(); |
| 274 | } |
| 275 | |
| 276 | /* Collect date and time information before calling poll(). This will be used |
| 277 | * to count the run time of the past loop and the sleep time of the next poll. |
| 278 | * It also compares the elasped and cpu times during the activity period to |
| 279 | * estimate the amount of stolen time, which is reported if higher than half |
| 280 | * a millisecond. |
| 281 | */ |
| 282 | void clock_entering_poll(void) |
| 283 | { |
| 284 | uint64_t new_mono_time; |
| 285 | uint64_t new_cpu_time; |
Willy Tarreau | 20adfde | 2021-10-08 11:34:46 +0200 | [diff] [blame^] | 286 | uint32_t run_time; |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 287 | int64_t stolen; |
| 288 | |
| 289 | gettimeofday(&before_poll, NULL); |
| 290 | |
Willy Tarreau | 20adfde | 2021-10-08 11:34:46 +0200 | [diff] [blame^] | 291 | run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec); |
| 292 | |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 293 | new_cpu_time = now_cpu_time(); |
| 294 | new_mono_time = now_mono_time(); |
| 295 | |
| 296 | if (ti->prev_cpu_time && ti->prev_mono_time) { |
| 297 | new_cpu_time -= ti->prev_cpu_time; |
| 298 | new_mono_time -= ti->prev_mono_time; |
| 299 | stolen = new_mono_time - new_cpu_time; |
| 300 | if (unlikely(stolen >= 500000)) { |
| 301 | stolen /= 500000; |
| 302 | /* more than half a millisecond difference might |
| 303 | * indicate an undesired preemption. |
| 304 | */ |
| 305 | report_stolen_time(stolen); |
| 306 | } |
| 307 | } |
Willy Tarreau | 20adfde | 2021-10-08 11:34:46 +0200 | [diff] [blame^] | 308 | |
| 309 | /* update the average runtime */ |
| 310 | activity_count_runtime(run_time); |
Willy Tarreau | f9d5e10 | 2021-10-08 10:43:59 +0200 | [diff] [blame] | 311 | } |
| 312 | |
Willy Tarreau | 5554264 | 2021-10-08 09:33:24 +0200 | [diff] [blame] | 313 | /* returns the current date as returned by gettimeofday() in ISO+microsecond |
| 314 | * format. It uses a thread-local static variable that the reader can consume |
| 315 | * for as long as it wants until next call. Thus, do not call it from a signal |
| 316 | * handler. If <pad> is non-0, a trailing space will be added. It will always |
| 317 | * return exactly 32 or 33 characters (depending on padding) and will always be |
| 318 | * zero-terminated, thus it will always fit into a 34 bytes buffer. |
| 319 | * This also always include the local timezone (in +/-HH:mm format) . |
| 320 | */ |
| 321 | char *timeofday_as_iso_us(int pad) |
| 322 | { |
| 323 | struct timeval new_date; |
| 324 | struct tm tm; |
| 325 | const char *offset; |
| 326 | char c; |
| 327 | |
| 328 | gettimeofday(&new_date, NULL); |
| 329 | if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) { |
| 330 | get_localtime(new_date.tv_sec, &tm); |
| 331 | offset = get_gmt_offset(new_date.tv_sec, &tm); |
| 332 | if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32)) |
| 333 | strcpy(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00"); // make the failure visible but respect format. |
| 334 | iso_time_str[26] = offset[0]; |
| 335 | iso_time_str[27] = offset[1]; |
| 336 | iso_time_str[28] = offset[2]; |
| 337 | iso_time_str[30] = offset[3]; |
| 338 | iso_time_str[31] = offset[4]; |
| 339 | iso_time_sec = new_date.tv_sec; |
| 340 | } |
| 341 | |
| 342 | /* utoa_pad adds a trailing 0 so we save the char for restore */ |
| 343 | c = iso_time_str[26]; |
| 344 | utoa_pad(new_date.tv_usec, iso_time_str + 20, 7); |
| 345 | iso_time_str[26] = c; |
| 346 | if (pad) { |
| 347 | iso_time_str[32] = ' '; |
| 348 | iso_time_str[33] = 0; |
| 349 | } |
| 350 | return iso_time_str; |
| 351 | } |