Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 1 | /* |
| 2 | * FD polling functions for SunOS event ports. |
| 3 | * |
| 4 | * Copyright 2018 Joyent, Inc. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ |
| 11 | |
| 12 | #include <unistd.h> |
| 13 | #include <sys/time.h> |
| 14 | #include <sys/types.h> |
| 15 | |
| 16 | #include <poll.h> |
| 17 | #include <port.h> |
| 18 | #include <errno.h> |
| 19 | #include <syslog.h> |
| 20 | |
Willy Tarreau | b255105 | 2020-06-09 09:07:15 +0200 | [diff] [blame] | 21 | #include <haproxy/activity.h> |
Willy Tarreau | 4c7e4b7 | 2020-05-27 12:58:42 +0200 | [diff] [blame] | 22 | #include <haproxy/api.h> |
Willy Tarreau | b255105 | 2020-06-09 09:07:15 +0200 | [diff] [blame] | 23 | #include <haproxy/fd.h> |
| 24 | #include <haproxy/global.h> |
Willy Tarreau | 3727a8a | 2020-06-04 17:37:26 +0200 | [diff] [blame] | 25 | #include <haproxy/signal.h> |
Willy Tarreau | c2f7c58 | 2020-06-02 18:15:32 +0200 | [diff] [blame] | 26 | #include <haproxy/ticks.h> |
Willy Tarreau | 92b4f13 | 2020-06-01 11:05:15 +0200 | [diff] [blame] | 27 | #include <haproxy/time.h> |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 28 | |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 29 | /* |
| 30 | * Private data: |
| 31 | */ |
| 32 | static int evports_fd[MAX_THREADS]; // per-thread evports_fd |
| 33 | static THREAD_LOCAL port_event_t *evports_evlist = NULL; |
| 34 | static THREAD_LOCAL int evports_evlist_max = 0; |
| 35 | |
| 36 | /* |
| 37 | * Convert the "state" member of "fdtab" into an event ports event mask. |
| 38 | */ |
| 39 | static inline int evports_state_to_events(int state) |
| 40 | { |
| 41 | int events = 0; |
| 42 | |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 43 | if (state & FD_EV_ACTIVE_W) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 44 | events |= POLLOUT; |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 45 | if (state & FD_EV_ACTIVE_R) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 46 | events |= POLLIN; |
| 47 | |
| 48 | return (events); |
| 49 | } |
| 50 | |
| 51 | /* |
| 52 | * Associate or dissociate this file descriptor with the event port, using the |
| 53 | * specified event mask. |
| 54 | */ |
| 55 | static inline void evports_resync_fd(int fd, int events) |
| 56 | { |
| 57 | if (events == 0) |
| 58 | port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd); |
| 59 | else |
| 60 | port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL); |
| 61 | } |
| 62 | |
| 63 | static void _update_fd(int fd) |
| 64 | { |
| 65 | int en; |
| 66 | int events; |
| 67 | |
| 68 | en = fdtab[fd].state; |
| 69 | |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 70 | if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 71 | if (!(polled_mask[fd].poll_recv & tid_bit) && |
| 72 | !(polled_mask[fd].poll_send & tid_bit)) { |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 73 | /* fd was not watched, it's still not */ |
| 74 | return; |
| 75 | } |
| 76 | /* fd totally removed from poll list */ |
| 77 | events = 0; |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 78 | if (polled_mask[fd].poll_recv & tid_bit) |
| 79 | _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); |
| 80 | if (polled_mask[fd].poll_send & tid_bit) |
| 81 | _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 82 | } |
| 83 | else { |
| 84 | /* OK fd has to be monitored, it was either added or changed */ |
| 85 | events = evports_state_to_events(en); |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 86 | if (en & FD_EV_ACTIVE_R) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 87 | if (!(polled_mask[fd].poll_recv & tid_bit)) |
| 88 | _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); |
| 89 | } else { |
| 90 | if (polled_mask[fd].poll_recv & tid_bit) |
| 91 | _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); |
| 92 | } |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 93 | if (en & FD_EV_ACTIVE_W) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 94 | if (!(polled_mask[fd].poll_send & tid_bit)) |
| 95 | _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); |
| 96 | } else { |
| 97 | if (polled_mask[fd].poll_send & tid_bit) |
| 98 | _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); |
| 99 | } |
| 100 | |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 101 | } |
| 102 | evports_resync_fd(fd, events); |
| 103 | } |
| 104 | |
| 105 | /* |
| 106 | * Event Ports poller. This routine interacts with the file descriptor |
| 107 | * management data structures and routines; see the large block comment in |
| 108 | * "src/fd.c" for more information. |
| 109 | */ |
| 110 | |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 111 | static void _do_poll(struct poller *p, int exp, int wake) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 112 | { |
| 113 | int i; |
| 114 | int wait_time; |
| 115 | struct timespec timeout_ts; |
| 116 | unsigned int nevlist; |
| 117 | int fd, old_fd; |
| 118 | int status; |
| 119 | |
| 120 | /* |
| 121 | * Scan the list of file descriptors with an updated status: |
| 122 | */ |
| 123 | for (i = 0; i < fd_nbupdt; i++) { |
| 124 | fd = fd_updt[i]; |
| 125 | |
| 126 | _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit); |
| 127 | if (fdtab[fd].owner == NULL) { |
Willy Tarreau | e406386 | 2020-06-17 20:35:33 +0200 | [diff] [blame] | 128 | activity[tid].poll_drop_fd++; |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 129 | continue; |
| 130 | } |
| 131 | |
| 132 | _update_fd(fd); |
| 133 | } |
| 134 | fd_nbupdt = 0; |
| 135 | /* Scan the global update list */ |
| 136 | for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { |
| 137 | if (fd == -2) { |
| 138 | fd = old_fd; |
| 139 | continue; |
| 140 | } |
| 141 | else if (fd <= -3) |
| 142 | fd = -fd -4; |
| 143 | if (fd == -1) |
| 144 | break; |
| 145 | if (fdtab[fd].update_mask & tid_bit) |
| 146 | done_update_polling(fd); |
| 147 | else |
| 148 | continue; |
| 149 | if (!fdtab[fd].owner) |
| 150 | continue; |
| 151 | _update_fd(fd); |
| 152 | } |
| 153 | |
| 154 | thread_harmless_now(); |
Olivier Houchard | 305d5ab | 2019-07-24 18:07:06 +0200 | [diff] [blame] | 155 | if (sleeping_thread_mask & tid_bit) |
| 156 | _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 157 | |
| 158 | /* |
| 159 | * Determine how long to wait for events to materialise on the port. |
| 160 | */ |
Willy Tarreau | 2ae84e4 | 2019-05-28 16:44:05 +0200 | [diff] [blame] | 161 | wait_time = wake ? 0 : compute_poll_timeout(exp); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 162 | tv_entering_poll(); |
| 163 | activity_count_runtime(); |
| 164 | |
| 165 | do { |
| 166 | int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; |
| 167 | int interrupted = 0; |
| 168 | nevlist = 1; /* desired number of events to be retrieved */ |
| 169 | timeout_ts.tv_sec = (timeout / 1000); |
| 170 | timeout_ts.tv_nsec = (timeout % 1000) * 1000000; |
| 171 | |
| 172 | status = port_getn(evports_fd[tid], |
| 173 | evports_evlist, |
| 174 | evports_evlist_max, |
| 175 | &nevlist, /* updated to the number of events retrieved */ |
| 176 | &timeout_ts); |
| 177 | if (status != 0) { |
| 178 | int e = errno; |
| 179 | switch (e) { |
| 180 | case ETIME: |
| 181 | /* |
| 182 | * Though the manual page has not historically made it |
| 183 | * clear, port_getn() can return -1 with an errno of |
| 184 | * ETIME and still have returned some number of events. |
| 185 | */ |
| 186 | /* nevlist >= 0 */ |
| 187 | break; |
| 188 | default: |
| 189 | nevlist = 0; |
| 190 | interrupted = 1; |
| 191 | break; |
| 192 | } |
| 193 | } |
| 194 | tv_update_date(timeout, nevlist); |
| 195 | |
| 196 | if (nevlist || interrupted) |
| 197 | break; |
| 198 | if (timeout || !wait_time) |
| 199 | break; |
Willy Tarreau | 2ae84e4 | 2019-05-28 16:44:05 +0200 | [diff] [blame] | 200 | if (signal_queue_len || wake) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 201 | break; |
| 202 | if (tick_isset(exp) && tick_is_expired(exp, now_ms)) |
| 203 | break; |
| 204 | } while(1); |
| 205 | |
| 206 | tv_leaving_poll(wait_time, nevlist); |
| 207 | |
| 208 | thread_harmless_end(); |
| 209 | |
Willy Tarreau | e545153 | 2020-06-17 20:25:18 +0200 | [diff] [blame] | 210 | if (nevlist > 0) |
| 211 | activity[tid].poll_io++; |
| 212 | |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 213 | for (i = 0; i < nevlist; i++) { |
| 214 | unsigned int n = 0; |
| 215 | int events, rebind_events; |
| 216 | fd = evports_evlist[i].portev_object; |
| 217 | events = evports_evlist[i].portev_events; |
| 218 | |
Willy Tarreau | 38e8a1c | 2020-06-23 10:04:54 +0200 | [diff] [blame] | 219 | #ifdef DEBUG_FD |
Willy Tarreau | 4781b15 | 2021-04-06 13:53:36 +0200 | [diff] [blame] | 220 | _HA_ATOMIC_INC(&fdtab[fd].event_count); |
Willy Tarreau | 38e8a1c | 2020-06-23 10:04:54 +0200 | [diff] [blame] | 221 | #endif |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 222 | if (fdtab[fd].owner == NULL) { |
Willy Tarreau | e406386 | 2020-06-17 20:35:33 +0200 | [diff] [blame] | 223 | activity[tid].poll_dead_fd++; |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 224 | continue; |
| 225 | } |
| 226 | |
| 227 | if (!(fdtab[fd].thread_mask & tid_bit)) { |
Willy Tarreau | e406386 | 2020-06-17 20:35:33 +0200 | [diff] [blame] | 228 | activity[tid].poll_skip_fd++; |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 229 | continue; |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | * By virtue of receiving an event for this file descriptor, it |
| 234 | * is no longer associated with the port in question. Store |
| 235 | * the previous event mask so that we may reassociate after |
| 236 | * processing is complete. |
| 237 | */ |
| 238 | rebind_events = evports_state_to_events(fdtab[fd].state); |
| 239 | /* rebind_events != 0 */ |
| 240 | |
| 241 | /* |
| 242 | * Set bits based on the events we received from the port: |
| 243 | */ |
Emmanuel Hocdet | 7ceb96b | 2019-09-19 11:08:26 +0000 | [diff] [blame] | 244 | n = ((events & POLLIN) ? FD_EV_READY_R : 0) | |
| 245 | ((events & POLLOUT) ? FD_EV_READY_W : 0) | |
| 246 | ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) | |
| 247 | ((events & POLLERR) ? FD_EV_ERR_RW : 0); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 248 | |
| 249 | /* |
| 250 | * Call connection processing callbacks. Note that it's |
| 251 | * possible for this processing to alter the required event |
Ilya Shipitsin | ce7b00f | 2020-03-23 22:28:40 +0500 | [diff] [blame] | 252 | * port association; i.e., the "state" member of the "fdtab" |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 253 | * entry. If it changes, the fd will be placed on the updated |
| 254 | * list for processing the next time we are called. |
| 255 | */ |
| 256 | fd_update_events(fd, n); |
| 257 | |
| 258 | /* |
| 259 | * This file descriptor was closed during the processing of |
| 260 | * polled events. No need to reassociate. |
| 261 | */ |
| 262 | if (fdtab[fd].owner == NULL) |
| 263 | continue; |
| 264 | |
| 265 | /* |
| 266 | * Reassociate with the port, using the same event mask as |
| 267 | * before. This call will not result in a dissociation as we |
| 268 | * asserted that _some_ events needed to be rebound above. |
| 269 | * |
| 270 | * Reassociating with the same mask allows us to mimic the |
| 271 | * level-triggered behaviour of poll(2). In the event that we |
| 272 | * are interested in the same events on the next turn of the |
| 273 | * loop, this represents no extra work. |
| 274 | * |
| 275 | * If this additional port_associate(3C) call becomes a |
| 276 | * performance problem, we would need to verify that we can |
| 277 | * correctly interact with the file descriptor cache and update |
| 278 | * list (see "src/fd.c") to avoid reassociating here, or to use |
| 279 | * a different events mask. |
| 280 | */ |
| 281 | evports_resync_fd(fd, rebind_events); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | static int init_evports_per_thread() |
| 286 | { |
| 287 | int fd; |
| 288 | |
| 289 | evports_evlist_max = global.tune.maxpollevents; |
| 290 | evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t)); |
| 291 | if (evports_evlist == NULL) { |
| 292 | goto fail_alloc; |
| 293 | } |
| 294 | |
| 295 | if (MAX_THREADS > 1 && tid) { |
| 296 | if ((evports_fd[tid] = port_create()) == -1) { |
| 297 | goto fail_fd; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | /* we may have to unregister some events initially registered on the |
| 302 | * original fd when it was alone, and/or to register events on the new |
| 303 | * fd for this thread. Let's just mark them as updated, the poller will |
| 304 | * do the rest. |
| 305 | */ |
| 306 | for (fd = 0; fd < global.maxsock; fd++) |
| 307 | updt_fd_polling(fd); |
| 308 | |
| 309 | return 1; |
| 310 | |
| 311 | fail_fd: |
Willy Tarreau | 61cfdf4 | 2021-02-20 10:46:51 +0100 | [diff] [blame] | 312 | ha_free(&evports_evlist); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 313 | evports_evlist_max = 0; |
| 314 | fail_alloc: |
| 315 | return 0; |
| 316 | } |
| 317 | |
| 318 | static void deinit_evports_per_thread() |
| 319 | { |
| 320 | if (MAX_THREADS > 1 && tid) |
| 321 | close(evports_fd[tid]); |
| 322 | |
Willy Tarreau | 61cfdf4 | 2021-02-20 10:46:51 +0100 | [diff] [blame] | 323 | ha_free(&evports_evlist); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 324 | evports_evlist_max = 0; |
| 325 | } |
| 326 | |
| 327 | /* |
| 328 | * Initialisation of the event ports poller. |
| 329 | * Returns 0 in case of failure, non-zero in case of success. |
| 330 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 331 | static int _do_init(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 332 | { |
| 333 | p->private = NULL; |
| 334 | |
| 335 | if ((evports_fd[tid] = port_create()) == -1) { |
| 336 | goto fail; |
| 337 | } |
| 338 | |
| 339 | hap_register_per_thread_init(init_evports_per_thread); |
| 340 | hap_register_per_thread_deinit(deinit_evports_per_thread); |
| 341 | |
| 342 | return 1; |
| 343 | |
| 344 | fail: |
| 345 | p->pref = 0; |
| 346 | return 0; |
| 347 | } |
| 348 | |
| 349 | /* |
| 350 | * Termination of the event ports poller. |
| 351 | * All resources are released and the poller is marked as inoperative. |
| 352 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 353 | static void _do_term(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 354 | { |
| 355 | if (evports_fd[tid] != -1) { |
| 356 | close(evports_fd[tid]); |
| 357 | evports_fd[tid] = -1; |
| 358 | } |
| 359 | |
| 360 | p->private = NULL; |
| 361 | p->pref = 0; |
| 362 | |
Willy Tarreau | 61cfdf4 | 2021-02-20 10:46:51 +0100 | [diff] [blame] | 363 | ha_free(&evports_evlist); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 364 | evports_evlist_max = 0; |
| 365 | } |
| 366 | |
| 367 | /* |
| 368 | * Run-time check to make sure we can allocate the resources needed for |
| 369 | * the poller to function correctly. |
| 370 | * Returns 1 on success, otherwise 0. |
| 371 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 372 | static int _do_test(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 373 | { |
| 374 | int fd; |
| 375 | |
| 376 | if ((fd = port_create()) == -1) { |
| 377 | return 0; |
| 378 | } |
| 379 | |
| 380 | close(fd); |
| 381 | return 1; |
| 382 | } |
| 383 | |
| 384 | /* |
| 385 | * Close and recreate the event port after fork(). Returns 1 on success, |
| 386 | * otherwise 0. If this function fails, "_do_term()" must be called to |
| 387 | * clean up the poller. |
| 388 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 389 | static int _do_fork(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 390 | { |
| 391 | if (evports_fd[tid] != -1) { |
| 392 | close(evports_fd[tid]); |
| 393 | } |
| 394 | |
| 395 | if ((evports_fd[tid] = port_create()) == -1) { |
| 396 | return 0; |
| 397 | } |
| 398 | |
| 399 | return 1; |
| 400 | } |
| 401 | |
| 402 | /* |
| 403 | * This constructor must be called before main() to register the event ports |
| 404 | * poller. |
| 405 | */ |
| 406 | __attribute__((constructor)) |
| 407 | static void _do_register(void) |
| 408 | { |
| 409 | struct poller *p; |
| 410 | int i; |
| 411 | |
| 412 | if (nbpollers >= MAX_POLLERS) |
| 413 | return; |
| 414 | |
| 415 | for (i = 0; i < MAX_THREADS; i++) |
| 416 | evports_fd[i] = -1; |
| 417 | |
| 418 | p = &pollers[nbpollers++]; |
| 419 | |
| 420 | p->name = "evports"; |
| 421 | p->pref = 300; |
Willy Tarreau | 11ef083 | 2019-11-28 18:17:33 +0100 | [diff] [blame] | 422 | p->flags = HAP_POLL_F_ERRHUP; |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 423 | p->private = NULL; |
| 424 | |
| 425 | p->clo = NULL; |
| 426 | p->test = _do_test; |
| 427 | p->init = _do_init; |
| 428 | p->term = _do_term; |
| 429 | p->poll = _do_poll; |
| 430 | p->fork = _do_fork; |
| 431 | } |