blob: 8f0b7c90725206339f76a16b7de54ef0f81b7157 [file] [log] [blame]
Willy Tarreau1e63130a2007-04-09 12:03:06 +02001/*
2 * FD polling functions for FreeBSD kqueue()
3 *
Willy Tarreauf817e9f2014-01-10 16:58:45 +01004 * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
Willy Tarreau1e63130a2007-04-09 12:03:06 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
Willy Tarreau1e63130a2007-04-09 12:03:06 +020011 */
12
Willy Tarreau1e63130a2007-04-09 12:03:06 +020013#include <unistd.h>
14#include <sys/time.h>
15#include <sys/types.h>
16
17#include <sys/event.h>
18#include <sys/time.h>
19
20#include <common/compat.h>
21#include <common/config.h>
Willy Tarreau60b639c2018-08-02 10:16:17 +020022#include <common/hathreads.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020023#include <common/ticks.h>
Willy Tarreau1e63130a2007-04-09 12:03:06 +020024#include <common/time.h>
Willy Tarreau1db37712007-06-03 17:16:49 +020025#include <common/tools.h>
Willy Tarreau1e63130a2007-04-09 12:03:06 +020026
Willy Tarreau1e63130a2007-04-09 12:03:06 +020027#include <types/global.h>
28
Willy Tarreau609aad92018-11-22 08:31:09 +010029#include <proto/activity.h>
Willy Tarreau1e63130a2007-04-09 12:03:06 +020030#include <proto/fd.h>
Willy Tarreaubeb859a2018-11-22 18:07:59 +010031#include <proto/signal.h>
Willy Tarreau10146c92015-04-13 20:44:19 +020032
Willy Tarreau1e63130a2007-04-09 12:03:06 +020033
34/* private data */
Willy Tarreau7a2364d2018-01-19 08:56:14 +010035static int kqueue_fd[MAX_THREADS]; // per-thread kqueue_fd
Christopher Fauletd4604ad2017-05-29 10:40:41 +020036static THREAD_LOCAL struct kevent *kev = NULL;
Olivier Houchardebaba752018-04-16 13:24:48 +020037static struct kevent *kev_out = NULL; // Trash buffer for kevent() to write the eventlist in
Willy Tarreau1e63130a2007-04-09 12:03:06 +020038
PiBa-NLc55b88e2018-05-10 01:01:28 +020039static int _update_fd(int fd, int start)
Olivier Houchard6b96f722018-04-25 16:58:25 +020040{
41 int en;
PiBa-NLc55b88e2018-05-10 01:01:28 +020042 int changes = start;
Olivier Houchard6b96f722018-04-25 16:58:25 +020043
44 en = fdtab[fd].state;
45
Willy Tarreau5bee3e22019-09-04 09:52:57 +020046 if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
Olivier Houchard53055052019-07-25 14:00:18 +000047 if (!(polled_mask[fd].poll_recv & tid_bit) &&
48 !(polled_mask[fd].poll_send & tid_bit)) {
Olivier Houchard6b96f722018-04-25 16:58:25 +020049 /* fd was not watched, it's still not */
Olivier Houchard5ab33942018-09-11 14:44:51 +020050 return changes;
Olivier Houchard6b96f722018-04-25 16:58:25 +020051 }
52 /* fd totally removed from poll list */
53 EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
54 EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
Olivier Houchard53055052019-07-25 14:00:18 +000055 if (polled_mask[fd].poll_recv & tid_bit)
56 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
57 if (polled_mask[fd].poll_send & tid_bit)
58 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
Olivier Houchard6b96f722018-04-25 16:58:25 +020059 }
60 else {
61 /* OK fd has to be monitored, it was either added or changed */
62
Willy Tarreau5bee3e22019-09-04 09:52:57 +020063 if (en & FD_EV_ACTIVE_R) {
Olivier Houchard53055052019-07-25 14:00:18 +000064 if (!(polled_mask[fd].poll_recv & tid_bit)) {
65 EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
66 _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
67 }
68 }
69 else if (polled_mask[fd].poll_recv & tid_bit) {
Olivier Houchard6b96f722018-04-25 16:58:25 +020070 EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
Olivier Houchard53055052019-07-25 14:00:18 +000071 HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
72 }
Olivier Houchard6b96f722018-04-25 16:58:25 +020073
Willy Tarreau5bee3e22019-09-04 09:52:57 +020074 if (en & FD_EV_ACTIVE_W) {
Olivier Houchard53055052019-07-25 14:00:18 +000075 if (!(polled_mask[fd].poll_send & tid_bit)) {
76 EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
77 _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
78 }
79 }
80 else if (polled_mask[fd].poll_send & tid_bit) {
Olivier Houchard6b96f722018-04-25 16:58:25 +020081 EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
Olivier Houchard53055052019-07-25 14:00:18 +000082 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
83 }
Olivier Houchard6b96f722018-04-25 16:58:25 +020084
Olivier Houchard6b96f722018-04-25 16:58:25 +020085 }
86 return changes;
87}
88
Willy Tarreau1e63130a2007-04-09 12:03:06 +020089/*
Willy Tarreau4a226272012-11-11 20:49:49 +010090 * kqueue() poller
Willy Tarreau1e63130a2007-04-09 12:03:06 +020091 */
Willy Tarreau2ae84e42019-05-28 16:44:05 +020092REGPRM3 static void _do_poll(struct poller *p, int exp, int wake)
Willy Tarreau1e63130a2007-04-09 12:03:06 +020093{
Willy Tarreau4a226272012-11-11 20:49:49 +010094 int status;
Willy Tarreaubeb859a2018-11-22 18:07:59 +010095 int count, fd, wait_time;
96 struct timespec timeout_ts;
Olivier Houchard6b96f722018-04-25 16:58:25 +020097 int updt_idx;
Willy Tarreau4a226272012-11-11 20:49:49 +010098 int changes = 0;
Olivier Houchard6b96f722018-04-25 16:58:25 +020099 int old_fd;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200100
Willy Tarreaubeb859a2018-11-22 18:07:59 +0100101 timeout_ts.tv_sec = 0;
102 timeout_ts.tv_nsec = 0;
Willy Tarreau4a226272012-11-11 20:49:49 +0100103 /* first, scan the update list to find changes */
104 for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
105 fd = fd_updt[updt_idx];
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100106
Olivier Houchardcb6c9272019-03-08 18:49:54 +0100107 _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
Willy Tarreaud80cb4e2018-01-20 19:30:13 +0100108 if (!fdtab[fd].owner) {
109 activity[tid].poll_drop++;
Willy Tarreauf817e9f2014-01-10 16:58:45 +0100110 continue;
Willy Tarreaud80cb4e2018-01-20 19:30:13 +0100111 }
PiBa-NLc55b88e2018-05-10 01:01:28 +0200112 changes = _update_fd(fd, changes);
Olivier Houchard6b96f722018-04-25 16:58:25 +0200113 }
114 /* Scan the global update list */
115 for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
116 if (fd == -2) {
117 fd = old_fd;
118 continue;
Willy Tarreau4a226272012-11-11 20:49:49 +0100119 }
Olivier Houchard6b96f722018-04-25 16:58:25 +0200120 else if (fd <= -3)
121 fd = -fd -4;
122 if (fd == -1)
123 break;
124 if (fdtab[fd].update_mask & tid_bit)
125 done_update_polling(fd);
126 else
127 continue;
128 if (!fdtab[fd].owner)
129 continue;
PiBa-NLc55b88e2018-05-10 01:01:28 +0200130 changes = _update_fd(fd, changes);
Willy Tarreau4a226272012-11-11 20:49:49 +0100131 }
Olivier Houchard6b96f722018-04-25 16:58:25 +0200132
Willy Tarreau60b639c2018-08-02 10:16:17 +0200133 thread_harmless_now();
134
Olivier Houchardebaba752018-04-16 13:24:48 +0200135 if (changes) {
136#ifdef EV_RECEIPT
137 kev[0].flags |= EV_RECEIPT;
138#else
139 /* If EV_RECEIPT isn't defined, just add an invalid entry,
140 * so that we get an error and kevent() stops before scanning
141 * the kqueue.
142 */
143 EV_SET(&kev[changes++], -1, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
144#endif
Willy Tarreaubeb859a2018-11-22 18:07:59 +0100145 kevent(kqueue_fd[tid], kev, changes, kev_out, changes, &timeout_ts);
Olivier Houchardebaba752018-04-16 13:24:48 +0200146 }
Willy Tarreau4a226272012-11-11 20:49:49 +0100147 fd_nbupdt = 0;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200148
Willy Tarreauf37ba942018-10-17 11:25:54 +0200149 /* now let's wait for events */
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200150 wait_time = wake ? 0 : compute_poll_timeout(exp);
Willy Tarreauce036bc2018-01-29 14:58:02 +0100151 fd = global.tune.maxpollevents;
Willy Tarreau7e9c4ae2018-10-17 14:31:19 +0200152 tv_entering_poll();
Willy Tarreau609aad92018-11-22 08:31:09 +0100153 activity_count_runtime();
Willy Tarreaubeb859a2018-11-22 18:07:59 +0100154
155 do {
156 int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
157
158 timeout_ts.tv_sec = (timeout / 1000);
159 timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
160
161 status = kevent(kqueue_fd[tid], // int kq
162 NULL, // const struct kevent *changelist
163 0, // int nchanges
164 kev, // struct kevent *eventlist
165 fd, // int nevents
166 &timeout_ts); // const struct timespec *timeout
167 tv_update_date(timeout, status);
168
169 if (status)
170 break;
171 if (timeout || !wait_time)
172 break;
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200173 if (signal_queue_len || wake)
Willy Tarreaubeb859a2018-11-22 18:07:59 +0100174 break;
175 if (tick_isset(exp) && tick_is_expired(exp, now_ms))
176 break;
177 } while (1);
178
179 tv_leaving_poll(wait_time, status);
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200180
Willy Tarreau60b639c2018-08-02 10:16:17 +0200181 thread_harmless_end();
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200182 if (sleeping_thread_mask & tid_bit)
183 _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
Willy Tarreau60b639c2018-08-02 10:16:17 +0200184
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200185 for (count = 0; count < status; count++) {
Christopher Fauletab62f512017-08-30 10:34:36 +0200186 unsigned int n = 0;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200187 fd = kev[count].ident;
Willy Tarreau9845e752012-07-06 11:44:28 +0200188
Willy Tarreaud80cb4e2018-01-20 19:30:13 +0100189 if (!fdtab[fd].owner) {
190 activity[tid].poll_dead++;
Willy Tarreau076be252012-07-06 16:02:29 +0200191 continue;
Willy Tarreaud80cb4e2018-01-20 19:30:13 +0100192 }
193
194 if (!(fdtab[fd].thread_mask & tid_bit)) {
195 activity[tid].poll_skip++;
196 continue;
197 }
Willy Tarreau076be252012-07-06 16:02:29 +0200198
Willy Tarreau6b308982019-09-06 19:05:50 +0200199 if (kev[count].filter == EVFILT_READ) {
Willy Tarreaufa061762017-03-13 20:49:56 +0100200 if (kev[count].data)
Willy Tarreau6b308982019-09-06 19:05:50 +0200201 n |= FD_EV_READY_R;
Willy Tarreau19c4ab92017-03-13 20:36:48 +0100202 if (kev[count].flags & EV_EOF)
Willy Tarreau6b308982019-09-06 19:05:50 +0200203 n |= FD_EV_SHUT_R;
Willy Tarreau4a226272012-11-11 20:49:49 +0100204 }
Willy Tarreau6b308982019-09-06 19:05:50 +0200205 else if (kev[count].filter == EVFILT_WRITE) {
206 n |= FD_EV_READY_W;
Willy Tarreau19c4ab92017-03-13 20:36:48 +0100207 if (kev[count].flags & EV_EOF)
Willy Tarreau6b308982019-09-06 19:05:50 +0200208 n |= FD_EV_ERR_RW;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200209 }
Willy Tarreau4a226272012-11-11 20:49:49 +0100210
Christopher Fauletab62f512017-08-30 10:34:36 +0200211 fd_update_events(fd, n);
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200212 }
213}
214
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200215
216static int init_kqueue_per_thread()
217{
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100218 int fd;
219
Olivier Houchardebaba752018-04-16 13:24:48 +0200220 /* we can have up to two events per fd, so allocate enough to store
221 * 2*fd event, and an extra one, in case EV_RECEIPT isn't defined,
222 * so that we can add an invalid entry and get an error, to avoid
223 * scanning the kqueue uselessly.
224 */
225 kev = calloc(1, sizeof(struct kevent) * (2 * global.maxsock + 1));
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200226 if (kev == NULL)
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100227 goto fail_alloc;
228
Christopher Faulet727c89b2018-01-25 16:40:35 +0100229 if (MAX_THREADS > 1 && tid) {
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100230 kqueue_fd[tid] = kqueue();
231 if (kqueue_fd[tid] < 0)
232 goto fail_fd;
233 }
234
235 /* we may have to unregister some events initially registered on the
236 * original fd when it was alone, and/or to register events on the new
237 * fd for this thread. Let's just mark them as updated, the poller will
238 * do the rest.
239 */
Willy Tarreauce036bc2018-01-29 14:58:02 +0100240 for (fd = 0; fd < global.maxsock; fd++)
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100241 updt_fd_polling(fd);
242
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200243 return 1;
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100244 fail_fd:
245 free(kev);
246 fail_alloc:
247 return 0;
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200248}
249
250static void deinit_kqueue_per_thread()
251{
Christopher Faulet727c89b2018-01-25 16:40:35 +0100252 if (MAX_THREADS > 1 && tid)
Christopher Faulet13b007d2018-01-25 16:32:18 +0100253 close(kqueue_fd[tid]);
254
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200255 free(kev);
Christopher Fauletcd7879a2017-10-27 13:53:47 +0200256 kev = NULL;
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200257}
258
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200259/*
260 * Initialization of the kqueue() poller.
261 * Returns 0 in case of failure, non-zero in case of success. If it fails, it
262 * disables the poller by setting its pref to 0.
263 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200264REGPRM1 static int _do_init(struct poller *p)
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200265{
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200266 p->private = NULL;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200267
Olivier Houchardebaba752018-04-16 13:24:48 +0200268 /* we can have up to two events per fd, so allocate enough to store
269 * 2*fd event, and an extra one, in case EV_RECEIPT isn't defined,
270 * so that we can add an invalid entry and get an error, to avoid
271 * scanning the kqueue uselessly.
272 */
273 kev_out = calloc(1, sizeof(struct kevent) * (2 * global.maxsock + 1));
274 if (!kev_out)
275 goto fail_alloc;
276
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100277 kqueue_fd[tid] = kqueue();
278 if (kqueue_fd[tid] < 0)
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200279 goto fail_fd;
280
Christopher Fauletcd7879a2017-10-27 13:53:47 +0200281 hap_register_per_thread_init(init_kqueue_per_thread);
282 hap_register_per_thread_deinit(deinit_kqueue_per_thread);
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200283 return 1;
284
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200285 fail_fd:
Olivier Houchardebaba752018-04-16 13:24:48 +0200286 free(kev_out);
287 kev_out = NULL;
288fail_alloc:
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200289 p->pref = 0;
290 return 0;
291}
292
293/*
294 * Termination of the kqueue() poller.
295 * Memory is released and the poller is marked as unselectable.
296 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200297REGPRM1 static void _do_term(struct poller *p)
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200298{
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100299 if (kqueue_fd[tid] >= 0) {
300 close(kqueue_fd[tid]);
301 kqueue_fd[tid] = -1;
Willy Tarreaud79e79b2009-05-10 10:18:54 +0200302 }
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200303
304 p->private = NULL;
305 p->pref = 0;
Olivier Houchardebaba752018-04-16 13:24:48 +0200306 if (kev_out) {
307 free(kev_out);
308 kev_out = NULL;
309 }
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200310}
311
312/*
Willy Tarreau2ff76222007-04-09 19:29:56 +0200313 * Check that the poller works.
314 * Returns 1 if OK, otherwise 0.
315 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200316REGPRM1 static int _do_test(struct poller *p)
Willy Tarreau2ff76222007-04-09 19:29:56 +0200317{
318 int fd;
319
320 fd = kqueue();
321 if (fd < 0)
322 return 0;
323 close(fd);
324 return 1;
325}
326
327/*
328 * Recreate the kqueue file descriptor after a fork(). Returns 1 if OK,
329 * otherwise 0. Note that some pollers need to be reopened after a fork()
330 * (such as kqueue), and some others may fail to do so in a chroot.
331 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200332REGPRM1 static int _do_fork(struct poller *p)
Willy Tarreau2ff76222007-04-09 19:29:56 +0200333{
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100334 kqueue_fd[tid] = kqueue();
335 if (kqueue_fd[tid] < 0)
Willy Tarreau2ff76222007-04-09 19:29:56 +0200336 return 0;
337 return 1;
338}
339
340/*
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200341 * It is a constructor, which means that it will automatically be called before
342 * main(). This is GCC-specific but it works at least since 2.95.
343 * Special care must be taken so that it does not need any uninitialized data.
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200344 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200345__attribute__((constructor))
346static void _do_register(void)
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200347{
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200348 struct poller *p;
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100349 int i;
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200350
351 if (nbpollers >= MAX_POLLERS)
352 return;
Willy Tarreaud79e79b2009-05-10 10:18:54 +0200353
Willy Tarreau7a2364d2018-01-19 08:56:14 +0100354 for (i = 0; i < MAX_THREADS; i++)
355 kqueue_fd[i] = -1;
356
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200357 p = &pollers[nbpollers++];
358
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200359 p->name = "kqueue";
360 p->pref = 300;
Willy Tarreau19c4ab92017-03-13 20:36:48 +0100361 p->flags = HAP_POLL_F_RDHUP;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200362 p->private = NULL;
363
Willy Tarreau70c6fd82012-11-11 21:02:34 +0100364 p->clo = NULL;
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200365 p->test = _do_test;
366 p->init = _do_init;
367 p->term = _do_term;
368 p->poll = _do_poll;
369 p->fork = _do_fork;
Willy Tarreau1e63130a2007-04-09 12:03:06 +0200370}
371
372
373/*
374 * Local variables:
375 * c-indent-level: 8
376 * c-basic-offset: 8
377 * End:
378 */