Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 1 | /* |
| 2 | * FD polling functions for SunOS event ports. |
| 3 | * |
| 4 | * Copyright 2018 Joyent, Inc. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ |
| 11 | |
| 12 | #include <unistd.h> |
| 13 | #include <sys/time.h> |
| 14 | #include <sys/types.h> |
| 15 | |
| 16 | #include <poll.h> |
| 17 | #include <port.h> |
| 18 | #include <errno.h> |
| 19 | #include <syslog.h> |
| 20 | |
| 21 | #include <common/compat.h> |
| 22 | #include <common/config.h> |
| 23 | #include <common/hathreads.h> |
| 24 | #include <common/ticks.h> |
| 25 | #include <common/time.h> |
| 26 | #include <common/tools.h> |
| 27 | |
| 28 | #include <types/global.h> |
| 29 | |
| 30 | #include <proto/activity.h> |
| 31 | #include <proto/fd.h> |
| 32 | #include <proto/log.h> |
| 33 | #include <proto/signal.h> |
| 34 | |
| 35 | /* |
| 36 | * Private data: |
| 37 | */ |
| 38 | static int evports_fd[MAX_THREADS]; // per-thread evports_fd |
| 39 | static THREAD_LOCAL port_event_t *evports_evlist = NULL; |
| 40 | static THREAD_LOCAL int evports_evlist_max = 0; |
| 41 | |
| 42 | /* |
| 43 | * Convert the "state" member of "fdtab" into an event ports event mask. |
| 44 | */ |
| 45 | static inline int evports_state_to_events(int state) |
| 46 | { |
| 47 | int events = 0; |
| 48 | |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 49 | if (state & FD_EV_ACTIVE_W) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 50 | events |= POLLOUT; |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 51 | if (state & FD_EV_ACTIVE_R) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 52 | events |= POLLIN; |
| 53 | |
| 54 | return (events); |
| 55 | } |
| 56 | |
| 57 | /* |
| 58 | * Associate or dissociate this file descriptor with the event port, using the |
| 59 | * specified event mask. |
| 60 | */ |
| 61 | static inline void evports_resync_fd(int fd, int events) |
| 62 | { |
| 63 | if (events == 0) |
| 64 | port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd); |
| 65 | else |
| 66 | port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL); |
| 67 | } |
| 68 | |
| 69 | static void _update_fd(int fd) |
| 70 | { |
| 71 | int en; |
| 72 | int events; |
| 73 | |
| 74 | en = fdtab[fd].state; |
| 75 | |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 76 | if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 77 | if (!(polled_mask[fd].poll_recv & tid_bit) && |
| 78 | !(polled_mask[fd].poll_send & tid_bit)) { |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 79 | /* fd was not watched, it's still not */ |
| 80 | return; |
| 81 | } |
| 82 | /* fd totally removed from poll list */ |
| 83 | events = 0; |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 84 | if (polled_mask[fd].poll_recv & tid_bit) |
| 85 | _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); |
| 86 | if (polled_mask[fd].poll_send & tid_bit) |
| 87 | _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 88 | } |
| 89 | else { |
| 90 | /* OK fd has to be monitored, it was either added or changed */ |
| 91 | events = evports_state_to_events(en); |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 92 | if (en & FD_EV_ACTIVE_R) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 93 | if (!(polled_mask[fd].poll_recv & tid_bit)) |
| 94 | _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); |
| 95 | } else { |
| 96 | if (polled_mask[fd].poll_recv & tid_bit) |
| 97 | _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); |
| 98 | } |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 99 | if (en & FD_EV_ACTIVE_W) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 100 | if (!(polled_mask[fd].poll_send & tid_bit)) |
| 101 | _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); |
| 102 | } else { |
| 103 | if (polled_mask[fd].poll_send & tid_bit) |
| 104 | _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); |
| 105 | } |
| 106 | |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 107 | } |
| 108 | evports_resync_fd(fd, events); |
| 109 | } |
| 110 | |
| 111 | /* |
| 112 | * Event Ports poller. This routine interacts with the file descriptor |
| 113 | * management data structures and routines; see the large block comment in |
| 114 | * "src/fd.c" for more information. |
| 115 | */ |
| 116 | |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 117 | static void _do_poll(struct poller *p, int exp, int wake) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 118 | { |
| 119 | int i; |
| 120 | int wait_time; |
| 121 | struct timespec timeout_ts; |
| 122 | unsigned int nevlist; |
| 123 | int fd, old_fd; |
| 124 | int status; |
| 125 | |
| 126 | /* |
| 127 | * Scan the list of file descriptors with an updated status: |
| 128 | */ |
| 129 | for (i = 0; i < fd_nbupdt; i++) { |
| 130 | fd = fd_updt[i]; |
| 131 | |
| 132 | _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit); |
| 133 | if (fdtab[fd].owner == NULL) { |
| 134 | activity[tid].poll_drop++; |
| 135 | continue; |
| 136 | } |
| 137 | |
| 138 | _update_fd(fd); |
| 139 | } |
| 140 | fd_nbupdt = 0; |
| 141 | /* Scan the global update list */ |
| 142 | for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { |
| 143 | if (fd == -2) { |
| 144 | fd = old_fd; |
| 145 | continue; |
| 146 | } |
| 147 | else if (fd <= -3) |
| 148 | fd = -fd -4; |
| 149 | if (fd == -1) |
| 150 | break; |
| 151 | if (fdtab[fd].update_mask & tid_bit) |
| 152 | done_update_polling(fd); |
| 153 | else |
| 154 | continue; |
| 155 | if (!fdtab[fd].owner) |
| 156 | continue; |
| 157 | _update_fd(fd); |
| 158 | } |
| 159 | |
| 160 | thread_harmless_now(); |
Olivier Houchard | 305d5ab | 2019-07-24 18:07:06 +0200 | [diff] [blame] | 161 | if (sleeping_thread_mask & tid_bit) |
| 162 | _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 163 | |
| 164 | /* |
| 165 | * Determine how long to wait for events to materialise on the port. |
| 166 | */ |
Willy Tarreau | 2ae84e4 | 2019-05-28 16:44:05 +0200 | [diff] [blame] | 167 | wait_time = wake ? 0 : compute_poll_timeout(exp); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 168 | tv_entering_poll(); |
| 169 | activity_count_runtime(); |
| 170 | |
| 171 | do { |
| 172 | int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; |
| 173 | int interrupted = 0; |
| 174 | nevlist = 1; /* desired number of events to be retrieved */ |
| 175 | timeout_ts.tv_sec = (timeout / 1000); |
| 176 | timeout_ts.tv_nsec = (timeout % 1000) * 1000000; |
| 177 | |
| 178 | status = port_getn(evports_fd[tid], |
| 179 | evports_evlist, |
| 180 | evports_evlist_max, |
| 181 | &nevlist, /* updated to the number of events retrieved */ |
| 182 | &timeout_ts); |
| 183 | if (status != 0) { |
| 184 | int e = errno; |
| 185 | switch (e) { |
| 186 | case ETIME: |
| 187 | /* |
| 188 | * Though the manual page has not historically made it |
| 189 | * clear, port_getn() can return -1 with an errno of |
| 190 | * ETIME and still have returned some number of events. |
| 191 | */ |
| 192 | /* nevlist >= 0 */ |
| 193 | break; |
| 194 | default: |
| 195 | nevlist = 0; |
| 196 | interrupted = 1; |
| 197 | break; |
| 198 | } |
| 199 | } |
| 200 | tv_update_date(timeout, nevlist); |
| 201 | |
| 202 | if (nevlist || interrupted) |
| 203 | break; |
| 204 | if (timeout || !wait_time) |
| 205 | break; |
Willy Tarreau | 2ae84e4 | 2019-05-28 16:44:05 +0200 | [diff] [blame] | 206 | if (signal_queue_len || wake) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 207 | break; |
| 208 | if (tick_isset(exp) && tick_is_expired(exp, now_ms)) |
| 209 | break; |
| 210 | } while(1); |
| 211 | |
| 212 | tv_leaving_poll(wait_time, nevlist); |
| 213 | |
| 214 | thread_harmless_end(); |
| 215 | |
| 216 | for (i = 0; i < nevlist; i++) { |
| 217 | unsigned int n = 0; |
| 218 | int events, rebind_events; |
| 219 | fd = evports_evlist[i].portev_object; |
| 220 | events = evports_evlist[i].portev_events; |
| 221 | |
| 222 | if (fdtab[fd].owner == NULL) { |
| 223 | activity[tid].poll_dead++; |
| 224 | continue; |
| 225 | } |
| 226 | |
| 227 | if (!(fdtab[fd].thread_mask & tid_bit)) { |
| 228 | activity[tid].poll_skip++; |
| 229 | continue; |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | * By virtue of receiving an event for this file descriptor, it |
| 234 | * is no longer associated with the port in question. Store |
| 235 | * the previous event mask so that we may reassociate after |
| 236 | * processing is complete. |
| 237 | */ |
| 238 | rebind_events = evports_state_to_events(fdtab[fd].state); |
| 239 | /* rebind_events != 0 */ |
| 240 | |
| 241 | /* |
| 242 | * Set bits based on the events we received from the port: |
| 243 | */ |
Emmanuel Hocdet | 7ceb96b | 2019-09-19 11:08:26 +0000 | [diff] [blame] | 244 | n = ((events & POLLIN) ? FD_EV_READY_R : 0) | |
| 245 | ((events & POLLOUT) ? FD_EV_READY_W : 0) | |
| 246 | ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) | |
| 247 | ((events & POLLERR) ? FD_EV_ERR_RW : 0); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 248 | |
| 249 | /* |
| 250 | * Call connection processing callbacks. Note that it's |
| 251 | * possible for this processing to alter the required event |
Ilya Shipitsin | ce7b00f | 2020-03-23 22:28:40 +0500 | [diff] [blame] | 252 | * port association; i.e., the "state" member of the "fdtab" |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 253 | * entry. If it changes, the fd will be placed on the updated |
| 254 | * list for processing the next time we are called. |
| 255 | */ |
| 256 | fd_update_events(fd, n); |
| 257 | |
| 258 | /* |
| 259 | * This file descriptor was closed during the processing of |
| 260 | * polled events. No need to reassociate. |
| 261 | */ |
| 262 | if (fdtab[fd].owner == NULL) |
| 263 | continue; |
| 264 | |
| 265 | /* |
| 266 | * Reassociate with the port, using the same event mask as |
| 267 | * before. This call will not result in a dissociation as we |
| 268 | * asserted that _some_ events needed to be rebound above. |
| 269 | * |
| 270 | * Reassociating with the same mask allows us to mimic the |
| 271 | * level-triggered behaviour of poll(2). In the event that we |
| 272 | * are interested in the same events on the next turn of the |
| 273 | * loop, this represents no extra work. |
| 274 | * |
| 275 | * If this additional port_associate(3C) call becomes a |
| 276 | * performance problem, we would need to verify that we can |
| 277 | * correctly interact with the file descriptor cache and update |
| 278 | * list (see "src/fd.c") to avoid reassociating here, or to use |
| 279 | * a different events mask. |
| 280 | */ |
| 281 | evports_resync_fd(fd, rebind_events); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | static int init_evports_per_thread() |
| 286 | { |
| 287 | int fd; |
| 288 | |
| 289 | evports_evlist_max = global.tune.maxpollevents; |
| 290 | evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t)); |
| 291 | if (evports_evlist == NULL) { |
| 292 | goto fail_alloc; |
| 293 | } |
| 294 | |
| 295 | if (MAX_THREADS > 1 && tid) { |
| 296 | if ((evports_fd[tid] = port_create()) == -1) { |
| 297 | goto fail_fd; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | /* we may have to unregister some events initially registered on the |
| 302 | * original fd when it was alone, and/or to register events on the new |
| 303 | * fd for this thread. Let's just mark them as updated, the poller will |
| 304 | * do the rest. |
| 305 | */ |
| 306 | for (fd = 0; fd < global.maxsock; fd++) |
| 307 | updt_fd_polling(fd); |
| 308 | |
| 309 | return 1; |
| 310 | |
| 311 | fail_fd: |
| 312 | free(evports_evlist); |
| 313 | evports_evlist = NULL; |
| 314 | evports_evlist_max = 0; |
| 315 | fail_alloc: |
| 316 | return 0; |
| 317 | } |
| 318 | |
| 319 | static void deinit_evports_per_thread() |
| 320 | { |
| 321 | if (MAX_THREADS > 1 && tid) |
| 322 | close(evports_fd[tid]); |
| 323 | |
| 324 | free(evports_evlist); |
| 325 | evports_evlist = NULL; |
| 326 | evports_evlist_max = 0; |
| 327 | } |
| 328 | |
| 329 | /* |
| 330 | * Initialisation of the event ports poller. |
| 331 | * Returns 0 in case of failure, non-zero in case of success. |
| 332 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 333 | static int _do_init(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 334 | { |
| 335 | p->private = NULL; |
| 336 | |
| 337 | if ((evports_fd[tid] = port_create()) == -1) { |
| 338 | goto fail; |
| 339 | } |
| 340 | |
| 341 | hap_register_per_thread_init(init_evports_per_thread); |
| 342 | hap_register_per_thread_deinit(deinit_evports_per_thread); |
| 343 | |
| 344 | return 1; |
| 345 | |
| 346 | fail: |
| 347 | p->pref = 0; |
| 348 | return 0; |
| 349 | } |
| 350 | |
| 351 | /* |
| 352 | * Termination of the event ports poller. |
| 353 | * All resources are released and the poller is marked as inoperative. |
| 354 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 355 | static void _do_term(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 356 | { |
| 357 | if (evports_fd[tid] != -1) { |
| 358 | close(evports_fd[tid]); |
| 359 | evports_fd[tid] = -1; |
| 360 | } |
| 361 | |
| 362 | p->private = NULL; |
| 363 | p->pref = 0; |
| 364 | |
| 365 | free(evports_evlist); |
| 366 | evports_evlist = NULL; |
| 367 | evports_evlist_max = 0; |
| 368 | } |
| 369 | |
| 370 | /* |
| 371 | * Run-time check to make sure we can allocate the resources needed for |
| 372 | * the poller to function correctly. |
| 373 | * Returns 1 on success, otherwise 0. |
| 374 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 375 | static int _do_test(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 376 | { |
| 377 | int fd; |
| 378 | |
| 379 | if ((fd = port_create()) == -1) { |
| 380 | return 0; |
| 381 | } |
| 382 | |
| 383 | close(fd); |
| 384 | return 1; |
| 385 | } |
| 386 | |
| 387 | /* |
| 388 | * Close and recreate the event port after fork(). Returns 1 on success, |
| 389 | * otherwise 0. If this function fails, "_do_term()" must be called to |
| 390 | * clean up the poller. |
| 391 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 392 | static int _do_fork(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 393 | { |
| 394 | if (evports_fd[tid] != -1) { |
| 395 | close(evports_fd[tid]); |
| 396 | } |
| 397 | |
| 398 | if ((evports_fd[tid] = port_create()) == -1) { |
| 399 | return 0; |
| 400 | } |
| 401 | |
| 402 | return 1; |
| 403 | } |
| 404 | |
| 405 | /* |
| 406 | * This constructor must be called before main() to register the event ports |
| 407 | * poller. |
| 408 | */ |
| 409 | __attribute__((constructor)) |
| 410 | static void _do_register(void) |
| 411 | { |
| 412 | struct poller *p; |
| 413 | int i; |
| 414 | |
| 415 | if (nbpollers >= MAX_POLLERS) |
| 416 | return; |
| 417 | |
| 418 | for (i = 0; i < MAX_THREADS; i++) |
| 419 | evports_fd[i] = -1; |
| 420 | |
| 421 | p = &pollers[nbpollers++]; |
| 422 | |
| 423 | p->name = "evports"; |
| 424 | p->pref = 300; |
Willy Tarreau | 11ef083 | 2019-11-28 18:17:33 +0100 | [diff] [blame] | 425 | p->flags = HAP_POLL_F_ERRHUP; |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 426 | p->private = NULL; |
| 427 | |
| 428 | p->clo = NULL; |
| 429 | p->test = _do_test; |
| 430 | p->init = _do_init; |
| 431 | p->term = _do_term; |
| 432 | p->poll = _do_poll; |
| 433 | p->fork = _do_fork; |
| 434 | } |