blob: f882b54a771d8973b27895b24245db4afb18e30a [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
Willy Tarreau49b046d2012-08-09 12:11:58 +02002 * include/proto/fd.h
3 * File descriptors states.
4 *
Willy Tarreauf817e9f2014-01-10 16:58:45 +01005 * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
Willy Tarreau49b046d2012-08-09 12:11:58 +02006 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
22#ifndef _PROTO_FD_H
23#define _PROTO_FD_H
24
Willy Tarreau2ff76222007-04-09 19:29:56 +020025#include <stdio.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <sys/time.h>
27#include <sys/types.h>
28#include <unistd.h>
29
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020030#include <haproxy/api.h>
Willy Tarreauf37ba942018-10-17 11:25:54 +020031#include <common/ticks.h>
Willy Tarreau92b4f132020-06-01 11:05:15 +020032#include <haproxy/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <types/fd.h>
Willy Tarreau609aad92018-11-22 08:31:09 +010034#include <proto/activity.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020035
Willy Tarreau7be79a42012-11-11 15:02:54 +010036/* public variables */
Christopher Fauletd4604ad2017-05-29 10:40:41 +020037
Olivier Houchard6b96f722018-04-25 16:58:25 +020038extern volatile struct fdlist update_list;
39
Olivier Houchard53055052019-07-25 14:00:18 +000040
41extern struct polled_mask {
42 unsigned long poll_recv;
43 unsigned long poll_send;
44} *polled_mask;
Olivier Houchardcb92f5c2018-04-26 14:23:07 +020045
Christopher Fauletd4604ad2017-05-29 10:40:41 +020046extern THREAD_LOCAL int *fd_updt; // FD updates list
47extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list
48
Olivier Houchard79321b92018-07-26 17:55:11 +020049extern int poller_wr_pipe[MAX_THREADS];
50
Olivier Houchard7c49d2e2019-04-16 18:37:05 +020051extern volatile int ha_used_fds; // Number of FDs we're currently using
52
Willy Tarreau173d9952018-01-26 21:48:23 +010053/* Deletes an FD from the fdsets.
Willy Tarreaubaaee002006-06-26 02:48:02 +020054 * The file descriptor is also closed.
55 */
56void fd_delete(int fd);
57
Willy Tarreau173d9952018-01-26 21:48:23 +010058/* Deletes an FD from the fdsets.
Olivier Houchard1fc05162017-04-06 01:05:05 +020059 * The file descriptor is kept open.
60 */
61void fd_remove(int fd);
62
Olivier Houchard88516642020-03-05 18:10:51 +010063/*
64 * Take over a FD belonging to another thread.
65 * Returns 0 on success, and -1 on failure.
66 */
67int fd_takeover(int fd, void *expected_owner);
68
69#ifndef HA_HAVE_CAS_DW
Willy Tarreauaf613e82020-06-05 08:40:51 +020070__decl_thread(extern HA_RWLOCK_T fd_mig_lock);
Olivier Houchard88516642020-03-05 18:10:51 +010071#endif
72
Willy Tarreau931d8b72019-08-27 11:08:17 +020073ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl);
74
Willy Tarreau2d7f81b2019-02-21 22:19:17 +010075/* close all FDs starting from <start> */
76void my_closefrom(int start);
77
Willy Tarreau4f60f162007-04-08 16:39:58 +020078/* disable the specified poller */
79void disable_poller(const char *poller_name);
Willy Tarreaubaaee002006-06-26 02:48:02 +020080
Olivier Houchard79321b92018-07-26 17:55:11 +020081void poller_pipe_io_handler(int fd);
82
Willy Tarreau2a429502006-10-15 14:52:29 +020083/*
Willy Tarreau4f60f162007-04-08 16:39:58 +020084 * Initialize the pollers till the best one is found.
85 * If none works, returns 0, otherwise 1.
Willy Tarreauef1d1f82007-04-16 00:25:25 +020086 * The pollers register themselves just before main() is called.
Willy Tarreau2a429502006-10-15 14:52:29 +020087 */
Willy Tarreau4f60f162007-04-08 16:39:58 +020088int init_pollers();
Willy Tarreau2a429502006-10-15 14:52:29 +020089
Willy Tarreau4f60f162007-04-08 16:39:58 +020090/*
Krzysztof Piotr Oledzkia643baf2008-05-29 23:53:44 +020091 * Deinitialize the pollers.
92 */
93void deinit_pollers();
94
95/*
Willy Tarreau2ff76222007-04-09 19:29:56 +020096 * Some pollers may lose their connection after a fork(). It may be necessary
97 * to create initialize part of them again. Returns 0 in case of failure,
98 * otherwise 1. The fork() function may be NULL if unused. In case of error,
99 * the the current poller is destroyed and the caller is responsible for trying
100 * another one by calling init_pollers() again.
101 */
102int fork_poller();
103
104/*
105 * Lists the known pollers on <out>.
106 * Should be performed only before initialization.
107 */
108int list_pollers(FILE *out);
109
110/*
Willy Tarreau4f60f162007-04-08 16:39:58 +0200111 * Runs the polling loop
112 */
113void run_poller();
Willy Tarreau2a429502006-10-15 14:52:29 +0200114
Olivier Houchard6a2cf872018-04-25 15:10:30 +0200115void fd_add_to_fd_list(volatile struct fdlist *list, int fd, int off);
116void fd_rm_from_fd_list(volatile struct fdlist *list, int fd, int off);
Willy Tarreaudbe30602019-09-04 13:25:41 +0200117void updt_fd_polling(const int fd);
Willy Tarreau4d841862018-01-17 22:57:54 +0100118
Ilya Shipitsin77e3b4a2020-03-10 12:06:11 +0500119/* Called from the poller to acknowledge we read an entry from the global
Olivier Houchard6b96f722018-04-25 16:58:25 +0200120 * update list, to remove our bit from the update_mask, and remove it from
121 * the list if we were the last one.
122 */
123static inline void done_update_polling(int fd)
124{
125 unsigned long update_mask;
126
Olivier Houchardd3608792019-03-08 18:47:42 +0100127 update_mask = _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
Olivier Houchard6b96f722018-04-25 16:58:25 +0200128 while ((update_mask & all_threads_mask)== 0) {
129 /* If we were the last one that had to update that entry, remove it from the list */
130 fd_rm_from_fd_list(&update_list, fd, offsetof(struct fdtab, update));
Olivier Houchard6b96f722018-04-25 16:58:25 +0200131 update_mask = (volatile unsigned long)fdtab[fd].update_mask;
132 if ((update_mask & all_threads_mask) != 0) {
133 /* Maybe it's been re-updated in the meanwhile, and we
134 * wrongly removed it from the list, if so, re-add it
135 */
136 fd_add_to_fd_list(&update_list, fd, offsetof(struct fdtab, update));
137 update_mask = (volatile unsigned long)(fdtab[fd].update_mask);
138 /* And then check again, just in case after all it
139 * should be removed, even if it's very unlikely, given
140 * the current thread wouldn't have been able to take
141 * care of it yet */
142 } else
143 break;
Willy Tarreau4d841862018-01-17 22:57:54 +0100144
Olivier Houchard6b96f722018-04-25 16:58:25 +0200145 }
Willy Tarreau7be79a42012-11-11 15:02:54 +0100146}
147
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100148/*
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100149 * returns true if the FD is active for recv
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100150 */
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100151static inline int fd_recv_active(const int fd)
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100152{
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100153 return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_R;
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100154}
155
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100156/*
157 * returns true if the FD is ready for recv
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100158 */
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100159static inline int fd_recv_ready(const int fd)
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100160{
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100161 return (unsigned)fdtab[fd].state & FD_EV_READY_R;
162}
163
164/*
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100165 * returns true if the FD is active for send
166 */
167static inline int fd_send_active(const int fd)
168{
169 return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_W;
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100170}
171
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100172/*
173 * returns true if the FD is ready for send
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100174 */
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100175static inline int fd_send_ready(const int fd)
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100176{
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100177 return (unsigned)fdtab[fd].state & FD_EV_READY_W;
178}
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100179
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100180/*
Christopher Faulet8db2fdf2017-08-30 09:59:38 +0200181 * returns true if the FD is active for recv or send
182 */
183static inline int fd_active(const int fd)
184{
185 return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_RW;
186}
187
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100188/* Disable processing recv events on fd <fd> */
189static inline void fd_stop_recv(int fd)
190{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200191 if (!(fdtab[fd].state & FD_EV_ACTIVE_R) ||
192 !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT))
193 return;
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100194}
195
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100196/* Disable processing send events on fd <fd> */
197static inline void fd_stop_send(int fd)
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100198{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200199 if (!(fdtab[fd].state & FD_EV_ACTIVE_W) ||
200 !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT))
201 return;
Willy Tarreau6ea20b12012-11-11 16:05:19 +0100202}
203
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100204/* Disable processing of events on fd <fd> for both directions. */
205static inline void fd_stop_both(int fd)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200206{
Willy Tarreau7ac0e352018-01-17 21:25:57 +0100207 unsigned char old, new;
208
209 old = fdtab[fd].state;
210 do {
211 if (!(old & FD_EV_ACTIVE_RW))
212 return;
213 new = old & ~FD_EV_ACTIVE_RW;
Olivier Houchardd3608792019-03-08 18:47:42 +0100214 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
Willy Tarreau49b046d2012-08-09 12:11:58 +0200215}
216
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100217/* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */
218static inline void fd_cant_recv(const int fd)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200219{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200220 /* marking ready never changes polled status */
221 if (!(fdtab[fd].state & FD_EV_READY_R) ||
222 !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT))
223 return;
Willy Tarreau49b046d2012-08-09 12:11:58 +0200224}
225
Willy Tarreau1dad3842019-07-08 23:09:03 +0200226/* Report that FD <fd> may receive again without polling. */
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100227static inline void fd_may_recv(const int fd)
Willy Tarreaubabd05a2012-08-09 12:14:03 +0200228{
Willy Tarreau7ac0e352018-01-17 21:25:57 +0100229 /* marking ready never changes polled status */
Willy Tarreau1dad3842019-07-08 23:09:03 +0200230 if ((fdtab[fd].state & FD_EV_READY_R) ||
231 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT))
232 return;
Willy Tarreaubabd05a2012-08-09 12:14:03 +0200233}
234
Willy Tarreau8f2825f2019-09-05 16:39:21 +0200235/* Report that FD <fd> may receive again without polling but only if its not
236 * active yet. This is in order to speculatively try to enable I/Os when it's
237 * highly likely that these will succeed, but without interfering with polling.
238 */
239static inline void fd_cond_recv(const int fd)
240{
241 if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) == 0)
242 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT);
243}
244
245/* Report that FD <fd> may send again without polling but only if its not
246 * active yet. This is in order to speculatively try to enable I/Os when it's
247 * highly likely that these will succeed, but without interfering with polling.
248 */
249static inline void fd_cond_send(const int fd)
250{
251 if ((fdtab[fd].state & (FD_EV_ACTIVE_W|FD_EV_READY_W)) == 0)
252 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT);
253}
254
Willy Tarreau4ac9d062019-09-05 16:30:39 +0200255/* Report that FD <fd> may receive and send without polling. Used at FD
256 * initialization.
257 */
258static inline void fd_may_both(const int fd)
259{
260 HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_RW);
261}
262
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200263/* Disable readiness when active. This is useful to interrupt reading when it
Willy Tarreau6c11bd22014-01-24 00:54:27 +0100264 * is suspected that the end of data might have been reached (eg: short read).
265 * This can only be done using level-triggered pollers, so if any edge-triggered
266 * is ever implemented, a test will have to be added here.
267 */
268static inline void fd_done_recv(const int fd)
269{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200270 /* removing ready never changes polled status */
271 if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) != (FD_EV_ACTIVE_R|FD_EV_READY_R) ||
272 !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT))
273 return;
Willy Tarreau6c11bd22014-01-24 00:54:27 +0100274}
275
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100276/* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */
277static inline void fd_cant_send(const int fd)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200278{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200279 /* removing ready never changes polled status */
280 if (!(fdtab[fd].state & FD_EV_READY_W) ||
281 !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_W_BIT))
282 return;
Willy Tarreau49b046d2012-08-09 12:11:58 +0200283}
284
Willy Tarreau1dad3842019-07-08 23:09:03 +0200285/* Report that FD <fd> may send again without polling (EAGAIN not detected). */
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100286static inline void fd_may_send(const int fd)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200287{
Willy Tarreau7ac0e352018-01-17 21:25:57 +0100288 /* marking ready never changes polled status */
Willy Tarreau1dad3842019-07-08 23:09:03 +0200289 if ((fdtab[fd].state & FD_EV_READY_W) ||
290 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT))
291 return;
Willy Tarreau49b046d2012-08-09 12:11:58 +0200292}
Willy Tarreau2a429502006-10-15 14:52:29 +0200293
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100294/* Prepare FD <fd> to try to receive */
295static inline void fd_want_recv(int fd)
Willy Tarreaubabd05a2012-08-09 12:14:03 +0200296{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200297 if ((fdtab[fd].state & FD_EV_ACTIVE_R) ||
298 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT))
299 return;
Willy Tarreau5bee3e22019-09-04 09:52:57 +0200300 updt_fd_polling(fd);
Willy Tarreaubabd05a2012-08-09 12:14:03 +0200301}
302
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100303/* Prepare FD <fd> to try to send */
304static inline void fd_want_send(int fd)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200305{
Willy Tarreauf8ecc7f2019-09-04 13:22:50 +0200306 if ((fdtab[fd].state & FD_EV_ACTIVE_W) ||
307 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT))
308 return;
Willy Tarreau5bee3e22019-09-04 09:52:57 +0200309 updt_fd_polling(fd);
Willy Tarreau49b046d2012-08-09 12:11:58 +0200310}
Willy Tarreau2a429502006-10-15 14:52:29 +0200311
Olivier Houchard88516642020-03-05 18:10:51 +0100312/* Set the fd as currently running on the current thread.
Ilya Shipitsin856aabc2020-04-16 23:51:34 +0500313 * Returns 0 if all goes well, or -1 if we no longer own the fd, and should
Olivier Houchard88516642020-03-05 18:10:51 +0100314 * do nothing with it.
315 */
316static inline int fd_set_running(int fd)
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100317{
Olivier Houchard88516642020-03-05 18:10:51 +0100318#ifndef HA_HAVE_CAS_DW
319 HA_RWLOCK_RDLOCK(OTHER_LOCK, &fd_mig_lock);
320 if (!(fdtab[fd].thread_mask & tid_bit)) {
321 HA_RWLOCK_RDUNLOCK(OTHER_LOCK, &fd_mig_lock);
322 return -1;
323 }
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100324 _HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
Olivier Houchard88516642020-03-05 18:10:51 +0100325 HA_RWLOCK_RDUNLOCK(OTHER_LOCK, &fd_mig_lock);
326 return 0;
327#else
328 unsigned long old_masks[2];
329 unsigned long new_masks[2];
330 old_masks[0] = fdtab[fd].running_mask;
331 old_masks[1] = fdtab[fd].thread_mask;
332 do {
333 if (!(old_masks[1] & tid_bit))
334 return -1;
335 new_masks[0] = fdtab[fd].running_mask | tid_bit;
336 new_masks[1] = old_masks[1];
337
338 } while (!(HA_ATOMIC_DWCAS(&fdtab[fd].running_mask, &old_masks, &new_masks)));
339 return 0;
340#endif
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100341}
342
343static inline void fd_set_running_excl(int fd)
344{
345 unsigned long old_mask = 0;
346 while (!_HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old_mask, tid_bit));
347}
348
349
350static inline void fd_clr_running(int fd)
351{
352 _HA_ATOMIC_AND(&fdtab[fd].running_mask, ~tid_bit);
353}
354
Willy Tarreau6b308982019-09-06 19:05:50 +0200355/* Update events seen for FD <fd> and its state if needed. This should be
356 * called by the poller, passing FD_EV_*_{R,W,RW} in <evts>. FD_EV_ERR_*
357 * doesn't need to also pass FD_EV_SHUT_*, it's implied. ERR and SHUT are
358 * allowed to be reported regardless of R/W readiness.
359 */
360static inline void fd_update_events(int fd, unsigned char evts)
Christopher Faulet21e92672017-08-30 10:30:04 +0200361{
Richard Russobc9d9842019-02-20 12:43:45 -0800362 unsigned long locked = atleast2(fdtab[fd].thread_mask);
Willy Tarreau1dad3842019-07-08 23:09:03 +0200363 unsigned char old, new;
Willy Tarreaueaf05be2019-12-27 15:52:34 +0100364 int new_flags, must_stop;
Willy Tarreau6b308982019-09-06 19:05:50 +0200365
366 new_flags =
367 ((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) |
368 ((evts & FD_EV_READY_W) ? FD_POLL_OUT : 0) |
369 ((evts & FD_EV_SHUT_R) ? FD_POLL_HUP : 0) |
Willy Tarreau1ed37812020-02-26 16:12:45 +0100370 ((evts & FD_EV_ERR_RW) ? FD_POLL_ERR : 0);
Richard Russobc9d9842019-02-20 12:43:45 -0800371
Willy Tarreau2aaeee32019-10-01 11:46:40 +0200372 /* SHUTW reported while FD was active for writes is an error */
373 if ((fdtab[fd].ev & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W))
374 new_flags |= FD_POLL_ERR;
375
Willy Tarreaueaf05be2019-12-27 15:52:34 +0100376 /* compute the inactive events reported late that must be stopped */
377 must_stop = 0;
378 if (unlikely(!fd_active(fd))) {
379 /* both sides stopped */
380 must_stop = FD_POLL_IN | FD_POLL_OUT;
381 }
Willy Tarreau1ed37812020-02-26 16:12:45 +0100382 else if (unlikely(!fd_recv_active(fd) && (evts & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_RW)))) {
Willy Tarreaueaf05be2019-12-27 15:52:34 +0100383 /* only send remains */
384 must_stop = FD_POLL_IN;
385 }
Willy Tarreau1ed37812020-02-26 16:12:45 +0100386 else if (unlikely(!fd_send_active(fd) && (evts & (FD_EV_READY_W | FD_EV_SHUT_W | FD_EV_ERR_RW)))) {
Willy Tarreaueaf05be2019-12-27 15:52:34 +0100387 /* only recv remains */
388 must_stop = FD_POLL_OUT;
389 }
390
Willy Tarreau1dad3842019-07-08 23:09:03 +0200391 old = fdtab[fd].ev;
Willy Tarreau6b308982019-09-06 19:05:50 +0200392 new = (old & FD_POLL_STICKY) | new_flags;
Willy Tarreau1dad3842019-07-08 23:09:03 +0200393
394 if (unlikely(locked)) {
395 /* Locked FDs (those with more than 2 threads) are atomically updated */
396 while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].ev, &old, new)))
Willy Tarreau6b308982019-09-06 19:05:50 +0200397 new = (old & FD_POLL_STICKY) | new_flags;
Willy Tarreau1dad3842019-07-08 23:09:03 +0200398 } else {
399 if (new != old)
400 fdtab[fd].ev = new;
401 }
Christopher Faulet21e92672017-08-30 10:30:04 +0200402
403 if (fdtab[fd].ev & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
404 fd_may_recv(fd);
405
406 if (fdtab[fd].ev & (FD_POLL_OUT | FD_POLL_ERR))
407 fd_may_send(fd);
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200408
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100409 if (fdtab[fd].iocb && fd_active(fd)) {
Olivier Houchard88516642020-03-05 18:10:51 +0100410 if (fd_set_running(fd) == -1)
411 return;
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200412 fdtab[fd].iocb(fd);
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100413 fd_clr_running(fd);
414 }
Willy Tarreauf5cab822019-08-16 16:06:14 +0200415
Willy Tarreaueaf05be2019-12-27 15:52:34 +0100416 /* we had to stop this FD and it still must be stopped after the I/O
417 * cb's changes, so let's program an update for this.
418 */
419 if (must_stop && !(fdtab[fd].update_mask & tid_bit)) {
420 if (((must_stop & FD_POLL_IN) && !fd_recv_active(fd)) ||
421 ((must_stop & FD_POLL_OUT) && !fd_send_active(fd)))
422 if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
423 fd_updt[fd_nbupdt++] = fd;
424 }
425
Willy Tarreauf5cab822019-08-16 16:06:14 +0200426 ti->flags &= ~TI_FL_STUCK; // this thread is still running
Christopher Faulet21e92672017-08-30 10:30:04 +0200427}
428
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100429/* Prepares <fd> for being polled */
Willy Tarreaua9786b62018-01-25 07:22:13 +0100430static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200431{
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100432 int locked = fdtab[fd].running_mask != tid_bit;
Richard Russobc9d9842019-02-20 12:43:45 -0800433
434 if (locked)
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100435 fd_set_running_excl(fd);
Willy Tarreaua9786b62018-01-25 07:22:13 +0100436 fdtab[fd].owner = owner;
437 fdtab[fd].iocb = iocb;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100438 fdtab[fd].ev = 0;
Willy Tarreauad38ace2013-12-15 14:19:38 +0100439 fdtab[fd].linger_risk = 0;
Conrad Hoffmann041751c2014-05-20 14:28:24 +0200440 fdtab[fd].cloned = 0;
Willy Tarreauf65610a2017-10-31 16:06:06 +0100441 fdtab[fd].thread_mask = thread_mask;
Willy Tarreauc9c83782018-01-17 18:44:46 +0100442 /* note: do not reset polled_mask here as it indicates which poller
443 * still knows this FD from a possible previous round.
444 */
Richard Russobc9d9842019-02-20 12:43:45 -0800445 if (locked)
Olivier Houcharda7bf5732020-02-27 17:26:13 +0100446 fd_clr_running(fd);
Willy Tarreau4ac9d062019-09-05 16:30:39 +0200447 /* the two directions are ready until proven otherwise */
448 fd_may_both(fd);
Olivier Houchard7c49d2e2019-04-16 18:37:05 +0200449 _HA_ATOMIC_ADD(&ha_used_fds, 1);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200450}
451
Willy Tarreauf37ba942018-10-17 11:25:54 +0200452/* Computes the bounded poll() timeout based on the next expiration timer <next>
453 * by bounding it to MAX_DELAY_MS. <next> may equal TICK_ETERNITY. The pollers
454 * just needs to call this function right before polling to get their timeout
455 * value. Timeouts that are already expired (possibly due to a pending event)
456 * are accounted for in activity.poll_exp.
457 */
458static inline int compute_poll_timeout(int next)
459{
460 int wait_time;
461
462 if (!tick_isset(next))
463 wait_time = MAX_DELAY_MS;
464 else if (tick_is_expired(next, now_ms)) {
465 activity[tid].poll_exp++;
466 wait_time = 0;
467 }
468 else {
469 wait_time = TICKS_TO_MS(tick_remain(now_ms, next)) + 1;
470 if (wait_time > MAX_DELAY_MS)
471 wait_time = MAX_DELAY_MS;
472 }
473 return wait_time;
474}
475
Willy Tarreau322e6c72018-01-25 16:37:04 +0100476/* These are replacements for FD_SET, FD_CLR, FD_ISSET, working on uints */
477static inline void hap_fd_set(int fd, unsigned int *evts)
478{
Olivier Houchardd3608792019-03-08 18:47:42 +0100479 _HA_ATOMIC_OR(&evts[fd / (8*sizeof(*evts))], 1U << (fd & (8*sizeof(*evts) - 1)));
Willy Tarreau322e6c72018-01-25 16:37:04 +0100480}
481
482static inline void hap_fd_clr(int fd, unsigned int *evts)
483{
Olivier Houchardd3608792019-03-08 18:47:42 +0100484 _HA_ATOMIC_AND(&evts[fd / (8*sizeof(*evts))], ~(1U << (fd & (8*sizeof(*evts) - 1))));
Willy Tarreau322e6c72018-01-25 16:37:04 +0100485}
486
487static inline unsigned int hap_fd_isset(int fd, unsigned int *evts)
488{
489 return evts[fd / (8*sizeof(*evts))] & (1U << (fd & (8*sizeof(*evts) - 1)));
490}
491
Olivier Houchard79321b92018-07-26 17:55:11 +0200492static inline void wake_thread(int tid)
493{
494 char c = 'c';
495
Willy Tarreau2e8ab6b2020-03-14 11:03:20 +0100496 DISGUISE(write(poller_wr_pipe[tid], &c, 1));
Olivier Houchard79321b92018-07-26 17:55:11 +0200497}
498
Willy Tarreaubaaee002006-06-26 02:48:02 +0200499
500#endif /* _PROTO_FD_H */
501
502/*
503 * Local variables:
504 * c-indent-level: 8
505 * c-basic-offset: 8
506 * End:
507 */