| /* |
| * include/haproxy/fd.h |
| * File descriptors states - exported variables and functions |
| * |
| * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation, version 2.1 |
| * exclusively. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #ifndef _HAPROXY_FD_H |
| #define _HAPROXY_FD_H |
| |
| #include <sys/time.h> |
| #include <sys/types.h> |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <import/ist.h> |
| #include <haproxy/api.h> |
| #include <haproxy/atomic.h> |
| #include <haproxy/fd-t.h> |
| #include <haproxy/global.h> |
| #include <haproxy/thread.h> |
| |
| /* public variables */ |
| |
| extern struct poller cur_poller; /* the current poller */ |
| extern int nbpollers; |
| extern struct poller pollers[MAX_POLLERS]; /* all registered pollers */ |
| extern struct fdtab *fdtab; /* array of all the file descriptors */ |
| extern struct fdinfo *fdinfo; /* less-often used infos for file descriptors */ |
| extern int totalconn; /* total # of terminated sessions */ |
| extern int actconn; /* # of active sessions */ |
| |
| extern volatile struct fdlist update_list[MAX_TGROUPS]; |
| extern struct polled_mask *polled_mask; |
| |
| extern THREAD_LOCAL int *fd_updt; // FD updates list |
| extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list |
| |
| extern int poller_wr_pipe[MAX_THREADS]; |
| |
| extern volatile int ha_used_fds; // Number of FDs we're currently using |
| |
| /* Deletes an FD from the fdsets. |
| * The file descriptor is also closed. |
| */ |
| void fd_delete(int fd); |
| void _fd_delete_orphan(int fd); |
| |
| /* makes the new fd non-blocking and clears all other O_* flags; |
| * this is meant to be used on new FDs. Returns -1 on failure. |
| */ |
| int fd_set_nonblock(int fd); |
| |
| /* makes the fd close-on-exec; returns -1 on failure. */ |
| int fd_set_cloexec(int fd); |
| |
| /* Migrate a FD to a new thread <new_tid>. */ |
| void fd_migrate_on(int fd, uint new_tid); |
| |
| /* |
| * Take over a FD belonging to another thread. |
| * Returns 0 on success, and -1 on failure. |
| */ |
| int fd_takeover(int fd, void *expected_owner); |
| |
| ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl); |
| |
| /* close all FDs starting from <start> */ |
| void my_closefrom(int start); |
| |
| struct rlimit; |
| int raise_rlim_nofile(struct rlimit *old_limit, struct rlimit *new_limit); |
| |
| int compute_poll_timeout(int next); |
| void fd_leaving_poll(int wait_time, int status); |
| |
| /* disable the specified poller */ |
| void disable_poller(const char *poller_name); |
| |
| void poller_pipe_io_handler(int fd); |
| |
| /* |
| * Initialize the pollers till the best one is found. |
| * If none works, returns 0, otherwise 1. |
| * The pollers register themselves just before main() is called. |
| */ |
| int init_pollers(void); |
| |
| /* |
| * Deinitialize the pollers. |
| */ |
| void deinit_pollers(void); |
| |
| /* |
| * Some pollers may lose their connection after a fork(). It may be necessary |
| * to create initialize part of them again. Returns 0 in case of failure, |
| * otherwise 1. The fork() function may be NULL if unused. In case of error, |
| * the the current poller is destroyed and the caller is responsible for trying |
| * another one by calling init_pollers() again. |
| */ |
| int fork_poller(void); |
| |
| /* |
| * Lists the known pollers on <out>. |
| * Should be performed only before initialization. |
| */ |
| int list_pollers(FILE *out); |
| |
| /* |
| * Runs the polling loop |
| */ |
| void run_poller(); |
| |
| void fd_add_to_fd_list(volatile struct fdlist *list, int fd); |
| void fd_rm_from_fd_list(volatile struct fdlist *list, int fd); |
| void updt_fd_polling(const int fd); |
| int fd_update_events(int fd, uint evts); |
| void fd_reregister_all(int tgrp, ulong mask); |
| |
| /* Called from the poller to acknowledge we read an entry from the global |
| * update list, to remove our bit from the update_mask, and remove it from |
| * the list if we were the last one. |
| */ |
| static inline void done_update_polling(int fd) |
| { |
| unsigned long update_mask; |
| |
| update_mask = _HA_ATOMIC_AND_FETCH(&fdtab[fd].update_mask, ~ti->ltid_bit); |
| while ((update_mask & _HA_ATOMIC_LOAD(&tg->threads_enabled)) == 0) { |
| /* If we were the last one that had to update that entry, remove it from the list */ |
| fd_rm_from_fd_list(&update_list[tgid - 1], fd); |
| update_mask = _HA_ATOMIC_LOAD(&fdtab[fd].update_mask); |
| if ((update_mask & _HA_ATOMIC_LOAD(&tg->threads_enabled)) != 0) { |
| /* Maybe it's been re-updated in the meanwhile, and we |
| * wrongly removed it from the list, if so, re-add it |
| */ |
| fd_add_to_fd_list(&update_list[tgid - 1], fd); |
| update_mask = _HA_ATOMIC_LOAD(&fdtab[fd].update_mask); |
| /* And then check again, just in case after all it |
| * should be removed, even if it's very unlikely, given |
| * the current thread wouldn't have been able to take |
| * care of it yet */ |
| } else |
| break; |
| } |
| } |
| |
| /* |
| * returns true if the FD is active for recv |
| */ |
| static inline int fd_recv_active(const int fd) |
| { |
| return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_R; |
| } |
| |
| /* |
| * returns true if the FD is ready for recv |
| */ |
| static inline int fd_recv_ready(const int fd) |
| { |
| return (unsigned)fdtab[fd].state & FD_EV_READY_R; |
| } |
| |
| /* |
| * returns true if the FD is active for send |
| */ |
| static inline int fd_send_active(const int fd) |
| { |
| return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_W; |
| } |
| |
| /* |
| * returns true if the FD is ready for send |
| */ |
| static inline int fd_send_ready(const int fd) |
| { |
| return (unsigned)fdtab[fd].state & FD_EV_READY_W; |
| } |
| |
| /* |
| * returns true if the FD is active for recv or send |
| */ |
| static inline int fd_active(const int fd) |
| { |
| return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_RW; |
| } |
| |
| /* Disable processing recv events on fd <fd> */ |
| static inline void fd_stop_recv(int fd) |
| { |
| if (!(fdtab[fd].state & FD_EV_ACTIVE_R) || |
| !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) |
| return; |
| } |
| |
| /* Disable processing send events on fd <fd> */ |
| static inline void fd_stop_send(int fd) |
| { |
| if (!(fdtab[fd].state & FD_EV_ACTIVE_W) || |
| !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) |
| return; |
| } |
| |
| /* Disable processing of events on fd <fd> for both directions. */ |
| static inline void fd_stop_both(int fd) |
| { |
| uint old, new; |
| |
| old = fdtab[fd].state; |
| do { |
| if (!(old & FD_EV_ACTIVE_RW)) |
| return; |
| new = old & ~FD_EV_ACTIVE_RW; |
| } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); |
| } |
| |
| /* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */ |
| static inline void fd_cant_recv(const int fd) |
| { |
| /* marking ready never changes polled status */ |
| if (!(fdtab[fd].state & FD_EV_READY_R) || |
| !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT)) |
| return; |
| } |
| |
| /* Report that FD <fd> may receive again without polling. */ |
| static inline void fd_may_recv(const int fd) |
| { |
| /* marking ready never changes polled status */ |
| if ((fdtab[fd].state & FD_EV_READY_R) || |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT)) |
| return; |
| } |
| |
| /* Report that FD <fd> may receive again without polling but only if its not |
| * active yet. This is in order to speculatively try to enable I/Os when it's |
| * highly likely that these will succeed, but without interfering with polling. |
| */ |
| static inline void fd_cond_recv(const int fd) |
| { |
| if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) == 0) |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT); |
| } |
| |
| /* Report that FD <fd> may send again without polling but only if its not |
| * active yet. This is in order to speculatively try to enable I/Os when it's |
| * highly likely that these will succeed, but without interfering with polling. |
| */ |
| static inline void fd_cond_send(const int fd) |
| { |
| if ((fdtab[fd].state & (FD_EV_ACTIVE_W|FD_EV_READY_W)) == 0) |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT); |
| } |
| |
| /* Report that FD <fd> may receive and send without polling. Used at FD |
| * initialization. |
| */ |
| static inline void fd_may_both(const int fd) |
| { |
| HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_RW); |
| } |
| |
| /* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */ |
| static inline void fd_cant_send(const int fd) |
| { |
| /* removing ready never changes polled status */ |
| if (!(fdtab[fd].state & FD_EV_READY_W) || |
| !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_W_BIT)) |
| return; |
| } |
| |
| /* Report that FD <fd> may send again without polling (EAGAIN not detected). */ |
| static inline void fd_may_send(const int fd) |
| { |
| /* marking ready never changes polled status */ |
| if ((fdtab[fd].state & FD_EV_READY_W) || |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT)) |
| return; |
| } |
| |
| /* Prepare FD <fd> to try to receive */ |
| static inline void fd_want_recv(int fd) |
| { |
| if ((fdtab[fd].state & FD_EV_ACTIVE_R) || |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) |
| return; |
| updt_fd_polling(fd); |
| } |
| |
| /* Prepare FD <fd> to try to receive, and only create update if fd_updt exists |
| * (essentially for receivers during early boot). |
| */ |
| static inline void fd_want_recv_safe(int fd) |
| { |
| if ((fdtab[fd].state & FD_EV_ACTIVE_R) || |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) |
| return; |
| if (fd_updt) |
| updt_fd_polling(fd); |
| } |
| |
| /* Prepare FD <fd> to try to send */ |
| static inline void fd_want_send(int fd) |
| { |
| if ((fdtab[fd].state & FD_EV_ACTIVE_W) || |
| HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) |
| return; |
| updt_fd_polling(fd); |
| } |
| |
| /* returns the tgid from an fd (masks the refcount) */ |
| static forceinline int fd_tgid(int fd) |
| { |
| return _HA_ATOMIC_LOAD(&fdtab[fd].refc_tgid) & 0xFFFF; |
| } |
| |
| /* Release a tgid previously taken by fd_grab_tgid() */ |
| static forceinline void fd_drop_tgid(int fd) |
| { |
| HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); |
| } |
| |
| /* Unlock a tgid currently locked by fd_lock_tgid(). This will effectively |
| * allow threads from the FD's tgid to check the masks and manipulate the FD. |
| */ |
| static forceinline void fd_unlock_tgid(int fd) |
| { |
| HA_ATOMIC_AND(&fdtab[fd].refc_tgid, 0xffff7fffU); |
| } |
| |
| /* Switch the FD's TGID to the new value with a refcount of 1 and the lock bit |
| * set. It doesn't care about the current TGID, except that it will wait for |
| * the FD not to be already switching and having its refcount cleared. After |
| * the function returns, the caller is free to manipulate the masks, and it |
| * must call fd_unlock_tgid() to drop the lock, allowing threads from the |
| * designated group to use the FD. Finally a call to fd_drop_tgid() will be |
| * needed to drop the reference. |
| */ |
| static inline void fd_lock_tgid(int fd, uint desired_tgid) |
| { |
| uint old; |
| |
| BUG_ON(!desired_tgid); |
| |
| old = tgid; // assume we start from the caller's tgid |
| desired_tgid |= 0x18000; // refcount=1, lock bit=1. |
| |
| while (1) { |
| old &= 0x7fff; // expect no lock and refcount==0 |
| if (_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &old, desired_tgid)) |
| break; |
| __ha_cpu_relax(); |
| } |
| } |
| |
| /* Grab a reference to the FD's TGID, and return the tgid. Note that a TGID of |
| * zero indicates the FD was closed, thus also fails (i.e. no need to drop it). |
| * On non-zero (success), the caller must release it using fd_drop_tgid(). |
| */ |
| static inline uint fd_take_tgid(int fd) |
| { |
| uint old; |
| |
| old = _HA_ATOMIC_FETCH_ADD(&fdtab[fd].refc_tgid, 0x10000) & 0xffff; |
| if (likely(old)) |
| return old; |
| HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); |
| return 0; |
| } |
| |
| /* Reset a tgid without affecting the refcount */ |
| static forceinline void fd_reset_tgid(int fd) |
| { |
| HA_ATOMIC_AND(&fdtab[fd].refc_tgid, 0xffff0000U); |
| } |
| |
| /* Try to grab a reference to the FD's TGID, but only if it matches the |
| * requested one (i.e. it succeeds with TGID refcnt held, or fails). Note that |
| * a TGID of zero indicates the FD was closed, thus also fails. It returns |
| * non-zero on success, in which case the caller must then release it using |
| * fd_drop_tgid(), or zero on failure. The function is optimized for use |
| * when it's likely that the tgid matches the desired one as it's by far |
| * the most common. |
| */ |
| static inline uint fd_grab_tgid(int fd, uint desired_tgid) |
| { |
| uint old; |
| |
| old = _HA_ATOMIC_FETCH_ADD(&fdtab[fd].refc_tgid, 0x10000) & 0xffff; |
| if (likely(old == desired_tgid)) |
| return 1; |
| HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); |
| return 0; |
| } |
| |
| /* Set the FD's TGID to the new value with a refcount of 1, waiting for the |
| * current refcount to become 0, to cover the rare possibly that a late |
| * competing thread would be touching the tgid or the running mask in parallel. |
| * The caller must call fd_drop_tgid() once done. |
| */ |
| static inline void fd_claim_tgid(int fd, uint desired_tgid) |
| { |
| uint old; |
| |
| BUG_ON(!desired_tgid); |
| |
| desired_tgid += 0x10000; // refcount=1 |
| old = 0; // assume unused (most likely) |
| while (1) { |
| if (_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &old, desired_tgid)) |
| break; |
| __ha_cpu_relax(); |
| old &= 0x7fff; // keep only the tgid and drop the lock |
| } |
| } |
| |
| /* atomically read the running mask if the tgid matches, or returns zero if it |
| * does not match. This is meant for use in code paths where the bit is expected |
| * to be present and will be sufficient to protect against a short-term group |
| * migration (e.g. takss and return from iocb). |
| */ |
| static inline ulong fd_get_running(int fd, uint desired_tgid) |
| { |
| ulong ret = 0; |
| uint old; |
| |
| /* TODO: may also be checked using an atomic double-load from a DWCAS |
| * on compatible architectures, which wouldn't require to modify nor |
| * restore the original value. |
| */ |
| old = _HA_ATOMIC_ADD_FETCH(&fdtab[fd].refc_tgid, 0x10000); |
| if (likely((old & 0xffff) == desired_tgid)) |
| ret = _HA_ATOMIC_LOAD(&fdtab[fd].running_mask); |
| _HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000); |
| return ret; |
| } |
| |
| /* remove tid_bit from the fd's running mask and returns the value before the |
| * atomic operation, so that the caller can know if it was present. |
| */ |
| static inline long fd_clr_running(int fd) |
| { |
| return _HA_ATOMIC_FETCH_AND(&fdtab[fd].running_mask, ~ti->ltid_bit); |
| } |
| |
| /* Prepares <fd> for being polled on all permitted threads of this group ID |
| * (these will then be refined to only cover running ones). |
| */ |
| static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), int tgid, unsigned long thread_mask) |
| { |
| extern void sock_conn_iocb(int); |
| int newstate; |
| |
| /* conn_fd_handler should support edge-triggered FDs */ |
| newstate = 0; |
| if ((global.tune.options & GTUNE_FD_ET) && iocb == sock_conn_iocb) |
| newstate |= FD_ET_POSSIBLE; |
| |
| /* This must never happen and would definitely indicate a bug, in |
| * addition to overwriting some unexpected memory areas. |
| */ |
| BUG_ON(fd < 0); |
| BUG_ON(fd >= global.maxsock); |
| BUG_ON(fdtab[fd].owner != NULL); |
| BUG_ON(fdtab[fd].state != 0); |
| BUG_ON(tgid < 1 || tgid > MAX_TGROUPS); |
| |
| thread_mask &= tg->threads_enabled; |
| BUG_ON(thread_mask == 0); |
| |
| fd_claim_tgid(fd, tgid); |
| |
| BUG_ON(fdtab[fd].running_mask); |
| |
| fdtab[fd].owner = owner; |
| fdtab[fd].iocb = iocb; |
| fdtab[fd].state = newstate; |
| fdtab[fd].thread_mask = thread_mask; |
| fd_drop_tgid(fd); |
| |
| #ifdef DEBUG_FD |
| fdtab[fd].event_count = 0; |
| #endif |
| |
| /* note: do not reset polled_mask here as it indicates which poller |
| * still knows this FD from a possible previous round. |
| */ |
| |
| /* the two directions are ready until proven otherwise */ |
| fd_may_both(fd); |
| _HA_ATOMIC_INC(&ha_used_fds); |
| } |
| |
| /* These are replacements for FD_SET, FD_CLR, FD_ISSET, working on uints */ |
| static inline void hap_fd_set(int fd, unsigned int *evts) |
| { |
| _HA_ATOMIC_OR(&evts[fd / (8*sizeof(*evts))], 1U << (fd & (8*sizeof(*evts) - 1))); |
| } |
| |
| static inline void hap_fd_clr(int fd, unsigned int *evts) |
| { |
| _HA_ATOMIC_AND(&evts[fd / (8*sizeof(*evts))], ~(1U << (fd & (8*sizeof(*evts) - 1)))); |
| } |
| |
| static inline unsigned int hap_fd_isset(int fd, unsigned int *evts) |
| { |
| return evts[fd / (8*sizeof(*evts))] & (1U << (fd & (8*sizeof(*evts) - 1))); |
| } |
| |
| /* send a wake-up event to this thread, only if it's asleep and not notified yet */ |
| static inline void wake_thread(int thr) |
| { |
| struct thread_ctx *ctx = &ha_thread_ctx[thr]; |
| |
| if ((_HA_ATOMIC_FETCH_OR(&ctx->flags, TH_FL_NOTIFIED) & (TH_FL_SLEEPING|TH_FL_NOTIFIED)) == TH_FL_SLEEPING) { |
| char c = 'c'; |
| DISGUISE(write(poller_wr_pipe[thr], &c, 1)); |
| } |
| } |
| |
| |
| #endif /* _HAPROXY_FD_H */ |
| |
| /* |
| * Local variables: |
| * c-indent-level: 8 |
| * c-basic-offset: 8 |
| * End: |
| */ |