Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 1 | /* |
| 2 | * FD polling functions for SunOS event ports. |
| 3 | * |
| 4 | * Copyright 2018 Joyent, Inc. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ |
| 11 | |
| 12 | #include <unistd.h> |
| 13 | #include <sys/time.h> |
| 14 | #include <sys/types.h> |
| 15 | |
| 16 | #include <poll.h> |
| 17 | #include <port.h> |
| 18 | #include <errno.h> |
| 19 | #include <syslog.h> |
| 20 | |
Willy Tarreau | 4c7e4b7 | 2020-05-27 12:58:42 +0200 | [diff] [blame] | 21 | #include <haproxy/api.h> |
Willy Tarreau | 3f567e4 | 2020-05-28 15:29:19 +0200 | [diff] [blame^] | 22 | #include <haproxy/thread-t.h> |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 23 | #include <common/ticks.h> |
| 24 | #include <common/time.h> |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 25 | |
| 26 | #include <types/global.h> |
| 27 | |
| 28 | #include <proto/activity.h> |
| 29 | #include <proto/fd.h> |
| 30 | #include <proto/log.h> |
| 31 | #include <proto/signal.h> |
| 32 | |
| 33 | /* |
| 34 | * Private data: |
| 35 | */ |
| 36 | static int evports_fd[MAX_THREADS]; // per-thread evports_fd |
| 37 | static THREAD_LOCAL port_event_t *evports_evlist = NULL; |
| 38 | static THREAD_LOCAL int evports_evlist_max = 0; |
| 39 | |
| 40 | /* |
| 41 | * Convert the "state" member of "fdtab" into an event ports event mask. |
| 42 | */ |
| 43 | static inline int evports_state_to_events(int state) |
| 44 | { |
| 45 | int events = 0; |
| 46 | |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 47 | if (state & FD_EV_ACTIVE_W) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 48 | events |= POLLOUT; |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 49 | if (state & FD_EV_ACTIVE_R) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 50 | events |= POLLIN; |
| 51 | |
| 52 | return (events); |
| 53 | } |
| 54 | |
| 55 | /* |
| 56 | * Associate or dissociate this file descriptor with the event port, using the |
| 57 | * specified event mask. |
| 58 | */ |
| 59 | static inline void evports_resync_fd(int fd, int events) |
| 60 | { |
| 61 | if (events == 0) |
| 62 | port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd); |
| 63 | else |
| 64 | port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL); |
| 65 | } |
| 66 | |
| 67 | static void _update_fd(int fd) |
| 68 | { |
| 69 | int en; |
| 70 | int events; |
| 71 | |
| 72 | en = fdtab[fd].state; |
| 73 | |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 74 | if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 75 | if (!(polled_mask[fd].poll_recv & tid_bit) && |
| 76 | !(polled_mask[fd].poll_send & tid_bit)) { |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 77 | /* fd was not watched, it's still not */ |
| 78 | return; |
| 79 | } |
| 80 | /* fd totally removed from poll list */ |
| 81 | events = 0; |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 82 | if (polled_mask[fd].poll_recv & tid_bit) |
| 83 | _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); |
| 84 | if (polled_mask[fd].poll_send & tid_bit) |
| 85 | _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 86 | } |
| 87 | else { |
| 88 | /* OK fd has to be monitored, it was either added or changed */ |
| 89 | events = evports_state_to_events(en); |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 90 | if (en & FD_EV_ACTIVE_R) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 91 | if (!(polled_mask[fd].poll_recv & tid_bit)) |
| 92 | _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); |
| 93 | } else { |
| 94 | if (polled_mask[fd].poll_recv & tid_bit) |
| 95 | _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); |
| 96 | } |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 97 | if (en & FD_EV_ACTIVE_W) { |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 98 | if (!(polled_mask[fd].poll_send & tid_bit)) |
| 99 | _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); |
| 100 | } else { |
| 101 | if (polled_mask[fd].poll_send & tid_bit) |
| 102 | _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); |
| 103 | } |
| 104 | |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 105 | } |
| 106 | evports_resync_fd(fd, events); |
| 107 | } |
| 108 | |
| 109 | /* |
| 110 | * Event Ports poller. This routine interacts with the file descriptor |
| 111 | * management data structures and routines; see the large block comment in |
| 112 | * "src/fd.c" for more information. |
| 113 | */ |
| 114 | |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 115 | static void _do_poll(struct poller *p, int exp, int wake) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 116 | { |
| 117 | int i; |
| 118 | int wait_time; |
| 119 | struct timespec timeout_ts; |
| 120 | unsigned int nevlist; |
| 121 | int fd, old_fd; |
| 122 | int status; |
| 123 | |
| 124 | /* |
| 125 | * Scan the list of file descriptors with an updated status: |
| 126 | */ |
| 127 | for (i = 0; i < fd_nbupdt; i++) { |
| 128 | fd = fd_updt[i]; |
| 129 | |
| 130 | _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit); |
| 131 | if (fdtab[fd].owner == NULL) { |
| 132 | activity[tid].poll_drop++; |
| 133 | continue; |
| 134 | } |
| 135 | |
| 136 | _update_fd(fd); |
| 137 | } |
| 138 | fd_nbupdt = 0; |
| 139 | /* Scan the global update list */ |
| 140 | for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { |
| 141 | if (fd == -2) { |
| 142 | fd = old_fd; |
| 143 | continue; |
| 144 | } |
| 145 | else if (fd <= -3) |
| 146 | fd = -fd -4; |
| 147 | if (fd == -1) |
| 148 | break; |
| 149 | if (fdtab[fd].update_mask & tid_bit) |
| 150 | done_update_polling(fd); |
| 151 | else |
| 152 | continue; |
| 153 | if (!fdtab[fd].owner) |
| 154 | continue; |
| 155 | _update_fd(fd); |
| 156 | } |
| 157 | |
| 158 | thread_harmless_now(); |
Olivier Houchard | 305d5ab | 2019-07-24 18:07:06 +0200 | [diff] [blame] | 159 | if (sleeping_thread_mask & tid_bit) |
| 160 | _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 161 | |
| 162 | /* |
| 163 | * Determine how long to wait for events to materialise on the port. |
| 164 | */ |
Willy Tarreau | 2ae84e4 | 2019-05-28 16:44:05 +0200 | [diff] [blame] | 165 | wait_time = wake ? 0 : compute_poll_timeout(exp); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 166 | tv_entering_poll(); |
| 167 | activity_count_runtime(); |
| 168 | |
| 169 | do { |
| 170 | int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; |
| 171 | int interrupted = 0; |
| 172 | nevlist = 1; /* desired number of events to be retrieved */ |
| 173 | timeout_ts.tv_sec = (timeout / 1000); |
| 174 | timeout_ts.tv_nsec = (timeout % 1000) * 1000000; |
| 175 | |
| 176 | status = port_getn(evports_fd[tid], |
| 177 | evports_evlist, |
| 178 | evports_evlist_max, |
| 179 | &nevlist, /* updated to the number of events retrieved */ |
| 180 | &timeout_ts); |
| 181 | if (status != 0) { |
| 182 | int e = errno; |
| 183 | switch (e) { |
| 184 | case ETIME: |
| 185 | /* |
| 186 | * Though the manual page has not historically made it |
| 187 | * clear, port_getn() can return -1 with an errno of |
| 188 | * ETIME and still have returned some number of events. |
| 189 | */ |
| 190 | /* nevlist >= 0 */ |
| 191 | break; |
| 192 | default: |
| 193 | nevlist = 0; |
| 194 | interrupted = 1; |
| 195 | break; |
| 196 | } |
| 197 | } |
| 198 | tv_update_date(timeout, nevlist); |
| 199 | |
| 200 | if (nevlist || interrupted) |
| 201 | break; |
| 202 | if (timeout || !wait_time) |
| 203 | break; |
Willy Tarreau | 2ae84e4 | 2019-05-28 16:44:05 +0200 | [diff] [blame] | 204 | if (signal_queue_len || wake) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 205 | break; |
| 206 | if (tick_isset(exp) && tick_is_expired(exp, now_ms)) |
| 207 | break; |
| 208 | } while(1); |
| 209 | |
| 210 | tv_leaving_poll(wait_time, nevlist); |
| 211 | |
| 212 | thread_harmless_end(); |
| 213 | |
| 214 | for (i = 0; i < nevlist; i++) { |
| 215 | unsigned int n = 0; |
| 216 | int events, rebind_events; |
| 217 | fd = evports_evlist[i].portev_object; |
| 218 | events = evports_evlist[i].portev_events; |
| 219 | |
| 220 | if (fdtab[fd].owner == NULL) { |
| 221 | activity[tid].poll_dead++; |
| 222 | continue; |
| 223 | } |
| 224 | |
| 225 | if (!(fdtab[fd].thread_mask & tid_bit)) { |
| 226 | activity[tid].poll_skip++; |
| 227 | continue; |
| 228 | } |
| 229 | |
| 230 | /* |
| 231 | * By virtue of receiving an event for this file descriptor, it |
| 232 | * is no longer associated with the port in question. Store |
| 233 | * the previous event mask so that we may reassociate after |
| 234 | * processing is complete. |
| 235 | */ |
| 236 | rebind_events = evports_state_to_events(fdtab[fd].state); |
| 237 | /* rebind_events != 0 */ |
| 238 | |
| 239 | /* |
| 240 | * Set bits based on the events we received from the port: |
| 241 | */ |
Emmanuel Hocdet | 7ceb96b | 2019-09-19 11:08:26 +0000 | [diff] [blame] | 242 | n = ((events & POLLIN) ? FD_EV_READY_R : 0) | |
| 243 | ((events & POLLOUT) ? FD_EV_READY_W : 0) | |
| 244 | ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) | |
| 245 | ((events & POLLERR) ? FD_EV_ERR_RW : 0); |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 246 | |
| 247 | /* |
| 248 | * Call connection processing callbacks. Note that it's |
| 249 | * possible for this processing to alter the required event |
Ilya Shipitsin | ce7b00f | 2020-03-23 22:28:40 +0500 | [diff] [blame] | 250 | * port association; i.e., the "state" member of the "fdtab" |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 251 | * entry. If it changes, the fd will be placed on the updated |
| 252 | * list for processing the next time we are called. |
| 253 | */ |
| 254 | fd_update_events(fd, n); |
| 255 | |
| 256 | /* |
| 257 | * This file descriptor was closed during the processing of |
| 258 | * polled events. No need to reassociate. |
| 259 | */ |
| 260 | if (fdtab[fd].owner == NULL) |
| 261 | continue; |
| 262 | |
| 263 | /* |
| 264 | * Reassociate with the port, using the same event mask as |
| 265 | * before. This call will not result in a dissociation as we |
| 266 | * asserted that _some_ events needed to be rebound above. |
| 267 | * |
| 268 | * Reassociating with the same mask allows us to mimic the |
| 269 | * level-triggered behaviour of poll(2). In the event that we |
| 270 | * are interested in the same events on the next turn of the |
| 271 | * loop, this represents no extra work. |
| 272 | * |
| 273 | * If this additional port_associate(3C) call becomes a |
| 274 | * performance problem, we would need to verify that we can |
| 275 | * correctly interact with the file descriptor cache and update |
| 276 | * list (see "src/fd.c") to avoid reassociating here, or to use |
| 277 | * a different events mask. |
| 278 | */ |
| 279 | evports_resync_fd(fd, rebind_events); |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | static int init_evports_per_thread() |
| 284 | { |
| 285 | int fd; |
| 286 | |
| 287 | evports_evlist_max = global.tune.maxpollevents; |
| 288 | evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t)); |
| 289 | if (evports_evlist == NULL) { |
| 290 | goto fail_alloc; |
| 291 | } |
| 292 | |
| 293 | if (MAX_THREADS > 1 && tid) { |
| 294 | if ((evports_fd[tid] = port_create()) == -1) { |
| 295 | goto fail_fd; |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | /* we may have to unregister some events initially registered on the |
| 300 | * original fd when it was alone, and/or to register events on the new |
| 301 | * fd for this thread. Let's just mark them as updated, the poller will |
| 302 | * do the rest. |
| 303 | */ |
| 304 | for (fd = 0; fd < global.maxsock; fd++) |
| 305 | updt_fd_polling(fd); |
| 306 | |
| 307 | return 1; |
| 308 | |
| 309 | fail_fd: |
| 310 | free(evports_evlist); |
| 311 | evports_evlist = NULL; |
| 312 | evports_evlist_max = 0; |
| 313 | fail_alloc: |
| 314 | return 0; |
| 315 | } |
| 316 | |
| 317 | static void deinit_evports_per_thread() |
| 318 | { |
| 319 | if (MAX_THREADS > 1 && tid) |
| 320 | close(evports_fd[tid]); |
| 321 | |
| 322 | free(evports_evlist); |
| 323 | evports_evlist = NULL; |
| 324 | evports_evlist_max = 0; |
| 325 | } |
| 326 | |
| 327 | /* |
| 328 | * Initialisation of the event ports poller. |
| 329 | * Returns 0 in case of failure, non-zero in case of success. |
| 330 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 331 | static int _do_init(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 332 | { |
| 333 | p->private = NULL; |
| 334 | |
| 335 | if ((evports_fd[tid] = port_create()) == -1) { |
| 336 | goto fail; |
| 337 | } |
| 338 | |
| 339 | hap_register_per_thread_init(init_evports_per_thread); |
| 340 | hap_register_per_thread_deinit(deinit_evports_per_thread); |
| 341 | |
| 342 | return 1; |
| 343 | |
| 344 | fail: |
| 345 | p->pref = 0; |
| 346 | return 0; |
| 347 | } |
| 348 | |
| 349 | /* |
| 350 | * Termination of the event ports poller. |
| 351 | * All resources are released and the poller is marked as inoperative. |
| 352 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 353 | static void _do_term(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 354 | { |
| 355 | if (evports_fd[tid] != -1) { |
| 356 | close(evports_fd[tid]); |
| 357 | evports_fd[tid] = -1; |
| 358 | } |
| 359 | |
| 360 | p->private = NULL; |
| 361 | p->pref = 0; |
| 362 | |
| 363 | free(evports_evlist); |
| 364 | evports_evlist = NULL; |
| 365 | evports_evlist_max = 0; |
| 366 | } |
| 367 | |
| 368 | /* |
| 369 | * Run-time check to make sure we can allocate the resources needed for |
| 370 | * the poller to function correctly. |
| 371 | * Returns 1 on success, otherwise 0. |
| 372 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 373 | static int _do_test(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 374 | { |
| 375 | int fd; |
| 376 | |
| 377 | if ((fd = port_create()) == -1) { |
| 378 | return 0; |
| 379 | } |
| 380 | |
| 381 | close(fd); |
| 382 | return 1; |
| 383 | } |
| 384 | |
| 385 | /* |
| 386 | * Close and recreate the event port after fork(). Returns 1 on success, |
| 387 | * otherwise 0. If this function fails, "_do_term()" must be called to |
| 388 | * clean up the poller. |
| 389 | */ |
Willy Tarreau | 03e7853 | 2020-02-25 07:38:05 +0100 | [diff] [blame] | 390 | static int _do_fork(struct poller *p) |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 391 | { |
| 392 | if (evports_fd[tid] != -1) { |
| 393 | close(evports_fd[tid]); |
| 394 | } |
| 395 | |
| 396 | if ((evports_fd[tid] = port_create()) == -1) { |
| 397 | return 0; |
| 398 | } |
| 399 | |
| 400 | return 1; |
| 401 | } |
| 402 | |
| 403 | /* |
| 404 | * This constructor must be called before main() to register the event ports |
| 405 | * poller. |
| 406 | */ |
| 407 | __attribute__((constructor)) |
| 408 | static void _do_register(void) |
| 409 | { |
| 410 | struct poller *p; |
| 411 | int i; |
| 412 | |
| 413 | if (nbpollers >= MAX_POLLERS) |
| 414 | return; |
| 415 | |
| 416 | for (i = 0; i < MAX_THREADS; i++) |
| 417 | evports_fd[i] = -1; |
| 418 | |
| 419 | p = &pollers[nbpollers++]; |
| 420 | |
| 421 | p->name = "evports"; |
| 422 | p->pref = 300; |
Willy Tarreau | 11ef083 | 2019-11-28 18:17:33 +0100 | [diff] [blame] | 423 | p->flags = HAP_POLL_F_ERRHUP; |
Emmanuel Hocdet | 0ba4f48 | 2019-04-08 16:53:32 +0000 | [diff] [blame] | 424 | p->private = NULL; |
| 425 | |
| 426 | p->clo = NULL; |
| 427 | p->test = _do_test; |
| 428 | p->init = _do_init; |
| 429 | p->term = _do_term; |
| 430 | p->poll = _do_poll; |
| 431 | p->fork = _do_fork; |
| 432 | } |