Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 1 | /* |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 2 | * include/proto/fd.h |
| 3 | * File descriptors states. |
| 4 | * |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 5 | * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation, version 2.1 |
| 10 | * exclusively. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with this library; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 21 | |
| 22 | #ifndef _PROTO_FD_H |
| 23 | #define _PROTO_FD_H |
| 24 | |
Willy Tarreau | 2ff7622 | 2007-04-09 19:29:56 +0200 | [diff] [blame] | 25 | #include <stdio.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 26 | #include <sys/time.h> |
| 27 | #include <sys/types.h> |
| 28 | #include <unistd.h> |
| 29 | |
Willy Tarreau | 4c7e4b7 | 2020-05-27 12:58:42 +0200 | [diff] [blame] | 30 | #include <haproxy/api.h> |
Willy Tarreau | f37ba94 | 2018-10-17 11:25:54 +0200 | [diff] [blame] | 31 | #include <common/ticks.h> |
Willy Tarreau | 92b4f13 | 2020-06-01 11:05:15 +0200 | [diff] [blame] | 32 | #include <haproxy/time.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 33 | #include <types/fd.h> |
Willy Tarreau | 609aad9 | 2018-11-22 08:31:09 +0100 | [diff] [blame] | 34 | #include <proto/activity.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 35 | |
Willy Tarreau | 7be79a4 | 2012-11-11 15:02:54 +0100 | [diff] [blame] | 36 | /* public variables */ |
Christopher Faulet | d4604ad | 2017-05-29 10:40:41 +0200 | [diff] [blame] | 37 | |
Olivier Houchard | 6b96f72 | 2018-04-25 16:58:25 +0200 | [diff] [blame] | 38 | extern volatile struct fdlist update_list; |
| 39 | |
Olivier Houchard | 5305505 | 2019-07-25 14:00:18 +0000 | [diff] [blame] | 40 | |
| 41 | extern struct polled_mask { |
| 42 | unsigned long poll_recv; |
| 43 | unsigned long poll_send; |
| 44 | } *polled_mask; |
Olivier Houchard | cb92f5c | 2018-04-26 14:23:07 +0200 | [diff] [blame] | 45 | |
Christopher Faulet | d4604ad | 2017-05-29 10:40:41 +0200 | [diff] [blame] | 46 | extern THREAD_LOCAL int *fd_updt; // FD updates list |
| 47 | extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list |
| 48 | |
Olivier Houchard | 79321b9 | 2018-07-26 17:55:11 +0200 | [diff] [blame] | 49 | extern int poller_wr_pipe[MAX_THREADS]; |
| 50 | |
Olivier Houchard | 7c49d2e | 2019-04-16 18:37:05 +0200 | [diff] [blame] | 51 | extern volatile int ha_used_fds; // Number of FDs we're currently using |
| 52 | |
Willy Tarreau | 173d995 | 2018-01-26 21:48:23 +0100 | [diff] [blame] | 53 | /* Deletes an FD from the fdsets. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 54 | * The file descriptor is also closed. |
| 55 | */ |
| 56 | void fd_delete(int fd); |
| 57 | |
Willy Tarreau | 173d995 | 2018-01-26 21:48:23 +0100 | [diff] [blame] | 58 | /* Deletes an FD from the fdsets. |
Olivier Houchard | 1fc0516 | 2017-04-06 01:05:05 +0200 | [diff] [blame] | 59 | * The file descriptor is kept open. |
| 60 | */ |
| 61 | void fd_remove(int fd); |
| 62 | |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 63 | /* |
| 64 | * Take over a FD belonging to another thread. |
| 65 | * Returns 0 on success, and -1 on failure. |
| 66 | */ |
| 67 | int fd_takeover(int fd, void *expected_owner); |
| 68 | |
| 69 | #ifndef HA_HAVE_CAS_DW |
Willy Tarreau | af613e8 | 2020-06-05 08:40:51 +0200 | [diff] [blame] | 70 | __decl_thread(extern HA_RWLOCK_T fd_mig_lock); |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 71 | #endif |
| 72 | |
Willy Tarreau | 931d8b7 | 2019-08-27 11:08:17 +0200 | [diff] [blame] | 73 | ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl); |
| 74 | |
Willy Tarreau | 2d7f81b | 2019-02-21 22:19:17 +0100 | [diff] [blame] | 75 | /* close all FDs starting from <start> */ |
| 76 | void my_closefrom(int start); |
| 77 | |
Willy Tarreau | 4f60f16 | 2007-04-08 16:39:58 +0200 | [diff] [blame] | 78 | /* disable the specified poller */ |
| 79 | void disable_poller(const char *poller_name); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 80 | |
Olivier Houchard | 79321b9 | 2018-07-26 17:55:11 +0200 | [diff] [blame] | 81 | void poller_pipe_io_handler(int fd); |
| 82 | |
Willy Tarreau | 2a42950 | 2006-10-15 14:52:29 +0200 | [diff] [blame] | 83 | /* |
Willy Tarreau | 4f60f16 | 2007-04-08 16:39:58 +0200 | [diff] [blame] | 84 | * Initialize the pollers till the best one is found. |
| 85 | * If none works, returns 0, otherwise 1. |
Willy Tarreau | ef1d1f8 | 2007-04-16 00:25:25 +0200 | [diff] [blame] | 86 | * The pollers register themselves just before main() is called. |
Willy Tarreau | 2a42950 | 2006-10-15 14:52:29 +0200 | [diff] [blame] | 87 | */ |
Willy Tarreau | 4f60f16 | 2007-04-08 16:39:58 +0200 | [diff] [blame] | 88 | int init_pollers(); |
Willy Tarreau | 2a42950 | 2006-10-15 14:52:29 +0200 | [diff] [blame] | 89 | |
Willy Tarreau | 4f60f16 | 2007-04-08 16:39:58 +0200 | [diff] [blame] | 90 | /* |
Krzysztof Piotr Oledzki | a643baf | 2008-05-29 23:53:44 +0200 | [diff] [blame] | 91 | * Deinitialize the pollers. |
| 92 | */ |
| 93 | void deinit_pollers(); |
| 94 | |
| 95 | /* |
Willy Tarreau | 2ff7622 | 2007-04-09 19:29:56 +0200 | [diff] [blame] | 96 | * Some pollers may lose their connection after a fork(). It may be necessary |
| 97 | * to create initialize part of them again. Returns 0 in case of failure, |
| 98 | * otherwise 1. The fork() function may be NULL if unused. In case of error, |
| 99 | * the the current poller is destroyed and the caller is responsible for trying |
| 100 | * another one by calling init_pollers() again. |
| 101 | */ |
| 102 | int fork_poller(); |
| 103 | |
| 104 | /* |
| 105 | * Lists the known pollers on <out>. |
| 106 | * Should be performed only before initialization. |
| 107 | */ |
| 108 | int list_pollers(FILE *out); |
| 109 | |
| 110 | /* |
Willy Tarreau | 4f60f16 | 2007-04-08 16:39:58 +0200 | [diff] [blame] | 111 | * Runs the polling loop |
| 112 | */ |
| 113 | void run_poller(); |
Willy Tarreau | 2a42950 | 2006-10-15 14:52:29 +0200 | [diff] [blame] | 114 | |
Olivier Houchard | 6a2cf87 | 2018-04-25 15:10:30 +0200 | [diff] [blame] | 115 | void fd_add_to_fd_list(volatile struct fdlist *list, int fd, int off); |
| 116 | void fd_rm_from_fd_list(volatile struct fdlist *list, int fd, int off); |
Willy Tarreau | dbe3060 | 2019-09-04 13:25:41 +0200 | [diff] [blame] | 117 | void updt_fd_polling(const int fd); |
Willy Tarreau | 4d84186 | 2018-01-17 22:57:54 +0100 | [diff] [blame] | 118 | |
Ilya Shipitsin | 77e3b4a | 2020-03-10 12:06:11 +0500 | [diff] [blame] | 119 | /* Called from the poller to acknowledge we read an entry from the global |
Olivier Houchard | 6b96f72 | 2018-04-25 16:58:25 +0200 | [diff] [blame] | 120 | * update list, to remove our bit from the update_mask, and remove it from |
| 121 | * the list if we were the last one. |
| 122 | */ |
| 123 | static inline void done_update_polling(int fd) |
| 124 | { |
| 125 | unsigned long update_mask; |
| 126 | |
Olivier Houchard | d360879 | 2019-03-08 18:47:42 +0100 | [diff] [blame] | 127 | update_mask = _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit); |
Olivier Houchard | 6b96f72 | 2018-04-25 16:58:25 +0200 | [diff] [blame] | 128 | while ((update_mask & all_threads_mask)== 0) { |
| 129 | /* If we were the last one that had to update that entry, remove it from the list */ |
| 130 | fd_rm_from_fd_list(&update_list, fd, offsetof(struct fdtab, update)); |
Olivier Houchard | 6b96f72 | 2018-04-25 16:58:25 +0200 | [diff] [blame] | 131 | update_mask = (volatile unsigned long)fdtab[fd].update_mask; |
| 132 | if ((update_mask & all_threads_mask) != 0) { |
| 133 | /* Maybe it's been re-updated in the meanwhile, and we |
| 134 | * wrongly removed it from the list, if so, re-add it |
| 135 | */ |
| 136 | fd_add_to_fd_list(&update_list, fd, offsetof(struct fdtab, update)); |
| 137 | update_mask = (volatile unsigned long)(fdtab[fd].update_mask); |
| 138 | /* And then check again, just in case after all it |
| 139 | * should be removed, even if it's very unlikely, given |
| 140 | * the current thread wouldn't have been able to take |
| 141 | * care of it yet */ |
| 142 | } else |
| 143 | break; |
Willy Tarreau | 4d84186 | 2018-01-17 22:57:54 +0100 | [diff] [blame] | 144 | |
Olivier Houchard | 6b96f72 | 2018-04-25 16:58:25 +0200 | [diff] [blame] | 145 | } |
Willy Tarreau | 7be79a4 | 2012-11-11 15:02:54 +0100 | [diff] [blame] | 146 | } |
| 147 | |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 148 | /* |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 149 | * returns true if the FD is active for recv |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 150 | */ |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 151 | static inline int fd_recv_active(const int fd) |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 152 | { |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 153 | return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_R; |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 154 | } |
| 155 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 156 | /* |
| 157 | * returns true if the FD is ready for recv |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 158 | */ |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 159 | static inline int fd_recv_ready(const int fd) |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 160 | { |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 161 | return (unsigned)fdtab[fd].state & FD_EV_READY_R; |
| 162 | } |
| 163 | |
| 164 | /* |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 165 | * returns true if the FD is active for send |
| 166 | */ |
| 167 | static inline int fd_send_active(const int fd) |
| 168 | { |
| 169 | return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_W; |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 170 | } |
| 171 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 172 | /* |
| 173 | * returns true if the FD is ready for send |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 174 | */ |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 175 | static inline int fd_send_ready(const int fd) |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 176 | { |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 177 | return (unsigned)fdtab[fd].state & FD_EV_READY_W; |
| 178 | } |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 179 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 180 | /* |
Christopher Faulet | 8db2fdf | 2017-08-30 09:59:38 +0200 | [diff] [blame] | 181 | * returns true if the FD is active for recv or send |
| 182 | */ |
| 183 | static inline int fd_active(const int fd) |
| 184 | { |
| 185 | return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_RW; |
| 186 | } |
| 187 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 188 | /* Disable processing recv events on fd <fd> */ |
| 189 | static inline void fd_stop_recv(int fd) |
| 190 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 191 | if (!(fdtab[fd].state & FD_EV_ACTIVE_R) || |
| 192 | !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) |
| 193 | return; |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 194 | } |
| 195 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 196 | /* Disable processing send events on fd <fd> */ |
| 197 | static inline void fd_stop_send(int fd) |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 198 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 199 | if (!(fdtab[fd].state & FD_EV_ACTIVE_W) || |
| 200 | !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) |
| 201 | return; |
Willy Tarreau | 6ea20b1 | 2012-11-11 16:05:19 +0100 | [diff] [blame] | 202 | } |
| 203 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 204 | /* Disable processing of events on fd <fd> for both directions. */ |
| 205 | static inline void fd_stop_both(int fd) |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 206 | { |
Willy Tarreau | 7ac0e35 | 2018-01-17 21:25:57 +0100 | [diff] [blame] | 207 | unsigned char old, new; |
| 208 | |
| 209 | old = fdtab[fd].state; |
| 210 | do { |
| 211 | if (!(old & FD_EV_ACTIVE_RW)) |
| 212 | return; |
| 213 | new = old & ~FD_EV_ACTIVE_RW; |
Olivier Houchard | d360879 | 2019-03-08 18:47:42 +0100 | [diff] [blame] | 214 | } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 215 | } |
| 216 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 217 | /* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */ |
| 218 | static inline void fd_cant_recv(const int fd) |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 219 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 220 | /* marking ready never changes polled status */ |
| 221 | if (!(fdtab[fd].state & FD_EV_READY_R) || |
| 222 | !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT)) |
| 223 | return; |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 224 | } |
| 225 | |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 226 | /* Report that FD <fd> may receive again without polling. */ |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 227 | static inline void fd_may_recv(const int fd) |
Willy Tarreau | babd05a | 2012-08-09 12:14:03 +0200 | [diff] [blame] | 228 | { |
Willy Tarreau | 7ac0e35 | 2018-01-17 21:25:57 +0100 | [diff] [blame] | 229 | /* marking ready never changes polled status */ |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 230 | if ((fdtab[fd].state & FD_EV_READY_R) || |
| 231 | HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT)) |
| 232 | return; |
Willy Tarreau | babd05a | 2012-08-09 12:14:03 +0200 | [diff] [blame] | 233 | } |
| 234 | |
Willy Tarreau | 8f2825f | 2019-09-05 16:39:21 +0200 | [diff] [blame] | 235 | /* Report that FD <fd> may receive again without polling but only if its not |
| 236 | * active yet. This is in order to speculatively try to enable I/Os when it's |
| 237 | * highly likely that these will succeed, but without interfering with polling. |
| 238 | */ |
| 239 | static inline void fd_cond_recv(const int fd) |
| 240 | { |
| 241 | if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) == 0) |
| 242 | HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT); |
| 243 | } |
| 244 | |
| 245 | /* Report that FD <fd> may send again without polling but only if its not |
| 246 | * active yet. This is in order to speculatively try to enable I/Os when it's |
| 247 | * highly likely that these will succeed, but without interfering with polling. |
| 248 | */ |
| 249 | static inline void fd_cond_send(const int fd) |
| 250 | { |
| 251 | if ((fdtab[fd].state & (FD_EV_ACTIVE_W|FD_EV_READY_W)) == 0) |
| 252 | HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT); |
| 253 | } |
| 254 | |
Willy Tarreau | 4ac9d06 | 2019-09-05 16:30:39 +0200 | [diff] [blame] | 255 | /* Report that FD <fd> may receive and send without polling. Used at FD |
| 256 | * initialization. |
| 257 | */ |
| 258 | static inline void fd_may_both(const int fd) |
| 259 | { |
| 260 | HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_RW); |
| 261 | } |
| 262 | |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 263 | /* Disable readiness when active. This is useful to interrupt reading when it |
Willy Tarreau | 6c11bd2 | 2014-01-24 00:54:27 +0100 | [diff] [blame] | 264 | * is suspected that the end of data might have been reached (eg: short read). |
| 265 | * This can only be done using level-triggered pollers, so if any edge-triggered |
| 266 | * is ever implemented, a test will have to be added here. |
| 267 | */ |
| 268 | static inline void fd_done_recv(const int fd) |
| 269 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 270 | /* removing ready never changes polled status */ |
| 271 | if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) != (FD_EV_ACTIVE_R|FD_EV_READY_R) || |
| 272 | !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT)) |
| 273 | return; |
Willy Tarreau | 6c11bd2 | 2014-01-24 00:54:27 +0100 | [diff] [blame] | 274 | } |
| 275 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 276 | /* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */ |
| 277 | static inline void fd_cant_send(const int fd) |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 278 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 279 | /* removing ready never changes polled status */ |
| 280 | if (!(fdtab[fd].state & FD_EV_READY_W) || |
| 281 | !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_W_BIT)) |
| 282 | return; |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 283 | } |
| 284 | |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 285 | /* Report that FD <fd> may send again without polling (EAGAIN not detected). */ |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 286 | static inline void fd_may_send(const int fd) |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 287 | { |
Willy Tarreau | 7ac0e35 | 2018-01-17 21:25:57 +0100 | [diff] [blame] | 288 | /* marking ready never changes polled status */ |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 289 | if ((fdtab[fd].state & FD_EV_READY_W) || |
| 290 | HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT)) |
| 291 | return; |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 292 | } |
Willy Tarreau | 2a42950 | 2006-10-15 14:52:29 +0200 | [diff] [blame] | 293 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 294 | /* Prepare FD <fd> to try to receive */ |
| 295 | static inline void fd_want_recv(int fd) |
Willy Tarreau | babd05a | 2012-08-09 12:14:03 +0200 | [diff] [blame] | 296 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 297 | if ((fdtab[fd].state & FD_EV_ACTIVE_R) || |
| 298 | HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) |
| 299 | return; |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 300 | updt_fd_polling(fd); |
Willy Tarreau | babd05a | 2012-08-09 12:14:03 +0200 | [diff] [blame] | 301 | } |
| 302 | |
Willy Tarreau | f817e9f | 2014-01-10 16:58:45 +0100 | [diff] [blame] | 303 | /* Prepare FD <fd> to try to send */ |
| 304 | static inline void fd_want_send(int fd) |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 305 | { |
Willy Tarreau | f8ecc7f | 2019-09-04 13:22:50 +0200 | [diff] [blame] | 306 | if ((fdtab[fd].state & FD_EV_ACTIVE_W) || |
| 307 | HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) |
| 308 | return; |
Willy Tarreau | 5bee3e2 | 2019-09-04 09:52:57 +0200 | [diff] [blame] | 309 | updt_fd_polling(fd); |
Willy Tarreau | 49b046d | 2012-08-09 12:11:58 +0200 | [diff] [blame] | 310 | } |
Willy Tarreau | 2a42950 | 2006-10-15 14:52:29 +0200 | [diff] [blame] | 311 | |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 312 | /* Set the fd as currently running on the current thread. |
Ilya Shipitsin | 856aabc | 2020-04-16 23:51:34 +0500 | [diff] [blame] | 313 | * Returns 0 if all goes well, or -1 if we no longer own the fd, and should |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 314 | * do nothing with it. |
| 315 | */ |
| 316 | static inline int fd_set_running(int fd) |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 317 | { |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 318 | #ifndef HA_HAVE_CAS_DW |
| 319 | HA_RWLOCK_RDLOCK(OTHER_LOCK, &fd_mig_lock); |
| 320 | if (!(fdtab[fd].thread_mask & tid_bit)) { |
| 321 | HA_RWLOCK_RDUNLOCK(OTHER_LOCK, &fd_mig_lock); |
| 322 | return -1; |
| 323 | } |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 324 | _HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit); |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 325 | HA_RWLOCK_RDUNLOCK(OTHER_LOCK, &fd_mig_lock); |
| 326 | return 0; |
| 327 | #else |
| 328 | unsigned long old_masks[2]; |
| 329 | unsigned long new_masks[2]; |
| 330 | old_masks[0] = fdtab[fd].running_mask; |
| 331 | old_masks[1] = fdtab[fd].thread_mask; |
| 332 | do { |
| 333 | if (!(old_masks[1] & tid_bit)) |
| 334 | return -1; |
| 335 | new_masks[0] = fdtab[fd].running_mask | tid_bit; |
| 336 | new_masks[1] = old_masks[1]; |
| 337 | |
| 338 | } while (!(HA_ATOMIC_DWCAS(&fdtab[fd].running_mask, &old_masks, &new_masks))); |
| 339 | return 0; |
| 340 | #endif |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 341 | } |
| 342 | |
| 343 | static inline void fd_set_running_excl(int fd) |
| 344 | { |
| 345 | unsigned long old_mask = 0; |
| 346 | while (!_HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old_mask, tid_bit)); |
| 347 | } |
| 348 | |
| 349 | |
| 350 | static inline void fd_clr_running(int fd) |
| 351 | { |
| 352 | _HA_ATOMIC_AND(&fdtab[fd].running_mask, ~tid_bit); |
| 353 | } |
| 354 | |
Willy Tarreau | 6b30898 | 2019-09-06 19:05:50 +0200 | [diff] [blame] | 355 | /* Update events seen for FD <fd> and its state if needed. This should be |
| 356 | * called by the poller, passing FD_EV_*_{R,W,RW} in <evts>. FD_EV_ERR_* |
| 357 | * doesn't need to also pass FD_EV_SHUT_*, it's implied. ERR and SHUT are |
| 358 | * allowed to be reported regardless of R/W readiness. |
| 359 | */ |
| 360 | static inline void fd_update_events(int fd, unsigned char evts) |
Christopher Faulet | 21e9267 | 2017-08-30 10:30:04 +0200 | [diff] [blame] | 361 | { |
Richard Russo | bc9d984 | 2019-02-20 12:43:45 -0800 | [diff] [blame] | 362 | unsigned long locked = atleast2(fdtab[fd].thread_mask); |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 363 | unsigned char old, new; |
Willy Tarreau | eaf05be | 2019-12-27 15:52:34 +0100 | [diff] [blame] | 364 | int new_flags, must_stop; |
Willy Tarreau | 6b30898 | 2019-09-06 19:05:50 +0200 | [diff] [blame] | 365 | |
| 366 | new_flags = |
| 367 | ((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) | |
| 368 | ((evts & FD_EV_READY_W) ? FD_POLL_OUT : 0) | |
| 369 | ((evts & FD_EV_SHUT_R) ? FD_POLL_HUP : 0) | |
Willy Tarreau | 1ed3781 | 2020-02-26 16:12:45 +0100 | [diff] [blame] | 370 | ((evts & FD_EV_ERR_RW) ? FD_POLL_ERR : 0); |
Richard Russo | bc9d984 | 2019-02-20 12:43:45 -0800 | [diff] [blame] | 371 | |
Willy Tarreau | 2aaeee3 | 2019-10-01 11:46:40 +0200 | [diff] [blame] | 372 | /* SHUTW reported while FD was active for writes is an error */ |
| 373 | if ((fdtab[fd].ev & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W)) |
| 374 | new_flags |= FD_POLL_ERR; |
| 375 | |
Willy Tarreau | eaf05be | 2019-12-27 15:52:34 +0100 | [diff] [blame] | 376 | /* compute the inactive events reported late that must be stopped */ |
| 377 | must_stop = 0; |
| 378 | if (unlikely(!fd_active(fd))) { |
| 379 | /* both sides stopped */ |
| 380 | must_stop = FD_POLL_IN | FD_POLL_OUT; |
| 381 | } |
Willy Tarreau | 1ed3781 | 2020-02-26 16:12:45 +0100 | [diff] [blame] | 382 | else if (unlikely(!fd_recv_active(fd) && (evts & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_RW)))) { |
Willy Tarreau | eaf05be | 2019-12-27 15:52:34 +0100 | [diff] [blame] | 383 | /* only send remains */ |
| 384 | must_stop = FD_POLL_IN; |
| 385 | } |
Willy Tarreau | 1ed3781 | 2020-02-26 16:12:45 +0100 | [diff] [blame] | 386 | else if (unlikely(!fd_send_active(fd) && (evts & (FD_EV_READY_W | FD_EV_SHUT_W | FD_EV_ERR_RW)))) { |
Willy Tarreau | eaf05be | 2019-12-27 15:52:34 +0100 | [diff] [blame] | 387 | /* only recv remains */ |
| 388 | must_stop = FD_POLL_OUT; |
| 389 | } |
| 390 | |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 391 | old = fdtab[fd].ev; |
Willy Tarreau | 6b30898 | 2019-09-06 19:05:50 +0200 | [diff] [blame] | 392 | new = (old & FD_POLL_STICKY) | new_flags; |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 393 | |
| 394 | if (unlikely(locked)) { |
| 395 | /* Locked FDs (those with more than 2 threads) are atomically updated */ |
| 396 | while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].ev, &old, new))) |
Willy Tarreau | 6b30898 | 2019-09-06 19:05:50 +0200 | [diff] [blame] | 397 | new = (old & FD_POLL_STICKY) | new_flags; |
Willy Tarreau | 1dad384 | 2019-07-08 23:09:03 +0200 | [diff] [blame] | 398 | } else { |
| 399 | if (new != old) |
| 400 | fdtab[fd].ev = new; |
| 401 | } |
Christopher Faulet | 21e9267 | 2017-08-30 10:30:04 +0200 | [diff] [blame] | 402 | |
| 403 | if (fdtab[fd].ev & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR)) |
| 404 | fd_may_recv(fd); |
| 405 | |
| 406 | if (fdtab[fd].ev & (FD_POLL_OUT | FD_POLL_ERR)) |
| 407 | fd_may_send(fd); |
Olivier Houchard | 305d5ab | 2019-07-24 18:07:06 +0200 | [diff] [blame] | 408 | |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 409 | if (fdtab[fd].iocb && fd_active(fd)) { |
Olivier Houchard | 8851664 | 2020-03-05 18:10:51 +0100 | [diff] [blame] | 410 | if (fd_set_running(fd) == -1) |
| 411 | return; |
Olivier Houchard | 305d5ab | 2019-07-24 18:07:06 +0200 | [diff] [blame] | 412 | fdtab[fd].iocb(fd); |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 413 | fd_clr_running(fd); |
| 414 | } |
Willy Tarreau | f5cab82 | 2019-08-16 16:06:14 +0200 | [diff] [blame] | 415 | |
Willy Tarreau | eaf05be | 2019-12-27 15:52:34 +0100 | [diff] [blame] | 416 | /* we had to stop this FD and it still must be stopped after the I/O |
| 417 | * cb's changes, so let's program an update for this. |
| 418 | */ |
| 419 | if (must_stop && !(fdtab[fd].update_mask & tid_bit)) { |
| 420 | if (((must_stop & FD_POLL_IN) && !fd_recv_active(fd)) || |
| 421 | ((must_stop & FD_POLL_OUT) && !fd_send_active(fd))) |
| 422 | if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid)) |
| 423 | fd_updt[fd_nbupdt++] = fd; |
| 424 | } |
| 425 | |
Willy Tarreau | f5cab82 | 2019-08-16 16:06:14 +0200 | [diff] [blame] | 426 | ti->flags &= ~TI_FL_STUCK; // this thread is still running |
Christopher Faulet | 21e9267 | 2017-08-30 10:30:04 +0200 | [diff] [blame] | 427 | } |
| 428 | |
Willy Tarreau | d6f087e | 2008-01-18 17:20:13 +0100 | [diff] [blame] | 429 | /* Prepares <fd> for being polled */ |
Willy Tarreau | a9786b6 | 2018-01-25 07:22:13 +0100 | [diff] [blame] | 430 | static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 431 | { |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 432 | int locked = fdtab[fd].running_mask != tid_bit; |
Richard Russo | bc9d984 | 2019-02-20 12:43:45 -0800 | [diff] [blame] | 433 | |
| 434 | if (locked) |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 435 | fd_set_running_excl(fd); |
Willy Tarreau | a9786b6 | 2018-01-25 07:22:13 +0100 | [diff] [blame] | 436 | fdtab[fd].owner = owner; |
| 437 | fdtab[fd].iocb = iocb; |
Willy Tarreau | d6f087e | 2008-01-18 17:20:13 +0100 | [diff] [blame] | 438 | fdtab[fd].ev = 0; |
Willy Tarreau | ad38ace | 2013-12-15 14:19:38 +0100 | [diff] [blame] | 439 | fdtab[fd].linger_risk = 0; |
Conrad Hoffmann | 041751c | 2014-05-20 14:28:24 +0200 | [diff] [blame] | 440 | fdtab[fd].cloned = 0; |
Willy Tarreau | f65610a | 2017-10-31 16:06:06 +0100 | [diff] [blame] | 441 | fdtab[fd].thread_mask = thread_mask; |
Willy Tarreau | c9c8378 | 2018-01-17 18:44:46 +0100 | [diff] [blame] | 442 | /* note: do not reset polled_mask here as it indicates which poller |
| 443 | * still knows this FD from a possible previous round. |
| 444 | */ |
Richard Russo | bc9d984 | 2019-02-20 12:43:45 -0800 | [diff] [blame] | 445 | if (locked) |
Olivier Houchard | a7bf573 | 2020-02-27 17:26:13 +0100 | [diff] [blame] | 446 | fd_clr_running(fd); |
Willy Tarreau | 4ac9d06 | 2019-09-05 16:30:39 +0200 | [diff] [blame] | 447 | /* the two directions are ready until proven otherwise */ |
| 448 | fd_may_both(fd); |
Olivier Houchard | 7c49d2e | 2019-04-16 18:37:05 +0200 | [diff] [blame] | 449 | _HA_ATOMIC_ADD(&ha_used_fds, 1); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 450 | } |
| 451 | |
Willy Tarreau | f37ba94 | 2018-10-17 11:25:54 +0200 | [diff] [blame] | 452 | /* Computes the bounded poll() timeout based on the next expiration timer <next> |
| 453 | * by bounding it to MAX_DELAY_MS. <next> may equal TICK_ETERNITY. The pollers |
| 454 | * just needs to call this function right before polling to get their timeout |
| 455 | * value. Timeouts that are already expired (possibly due to a pending event) |
| 456 | * are accounted for in activity.poll_exp. |
| 457 | */ |
| 458 | static inline int compute_poll_timeout(int next) |
| 459 | { |
| 460 | int wait_time; |
| 461 | |
| 462 | if (!tick_isset(next)) |
| 463 | wait_time = MAX_DELAY_MS; |
| 464 | else if (tick_is_expired(next, now_ms)) { |
| 465 | activity[tid].poll_exp++; |
| 466 | wait_time = 0; |
| 467 | } |
| 468 | else { |
| 469 | wait_time = TICKS_TO_MS(tick_remain(now_ms, next)) + 1; |
| 470 | if (wait_time > MAX_DELAY_MS) |
| 471 | wait_time = MAX_DELAY_MS; |
| 472 | } |
| 473 | return wait_time; |
| 474 | } |
| 475 | |
Willy Tarreau | 322e6c7 | 2018-01-25 16:37:04 +0100 | [diff] [blame] | 476 | /* These are replacements for FD_SET, FD_CLR, FD_ISSET, working on uints */ |
| 477 | static inline void hap_fd_set(int fd, unsigned int *evts) |
| 478 | { |
Olivier Houchard | d360879 | 2019-03-08 18:47:42 +0100 | [diff] [blame] | 479 | _HA_ATOMIC_OR(&evts[fd / (8*sizeof(*evts))], 1U << (fd & (8*sizeof(*evts) - 1))); |
Willy Tarreau | 322e6c7 | 2018-01-25 16:37:04 +0100 | [diff] [blame] | 480 | } |
| 481 | |
| 482 | static inline void hap_fd_clr(int fd, unsigned int *evts) |
| 483 | { |
Olivier Houchard | d360879 | 2019-03-08 18:47:42 +0100 | [diff] [blame] | 484 | _HA_ATOMIC_AND(&evts[fd / (8*sizeof(*evts))], ~(1U << (fd & (8*sizeof(*evts) - 1)))); |
Willy Tarreau | 322e6c7 | 2018-01-25 16:37:04 +0100 | [diff] [blame] | 485 | } |
| 486 | |
| 487 | static inline unsigned int hap_fd_isset(int fd, unsigned int *evts) |
| 488 | { |
| 489 | return evts[fd / (8*sizeof(*evts))] & (1U << (fd & (8*sizeof(*evts) - 1))); |
| 490 | } |
| 491 | |
Olivier Houchard | 79321b9 | 2018-07-26 17:55:11 +0200 | [diff] [blame] | 492 | static inline void wake_thread(int tid) |
| 493 | { |
| 494 | char c = 'c'; |
| 495 | |
Willy Tarreau | 2e8ab6b | 2020-03-14 11:03:20 +0100 | [diff] [blame] | 496 | DISGUISE(write(poller_wr_pipe[tid], &c, 1)); |
Olivier Houchard | 79321b9 | 2018-07-26 17:55:11 +0200 | [diff] [blame] | 497 | } |
| 498 | |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 499 | |
| 500 | #endif /* _PROTO_FD_H */ |
| 501 | |
| 502 | /* |
| 503 | * Local variables: |
| 504 | * c-indent-level: 8 |
| 505 | * c-basic-offset: 8 |
| 506 | * End: |
| 507 | */ |