blob: f8c7f34720a0e59e67d7302a8ce043997f4b62c5 [file] [log] [blame]
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +00001/*
2 * FD polling functions for SunOS event ports.
3 *
4 * Copyright 2018 Joyent, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <unistd.h>
13#include <sys/time.h>
14#include <sys/types.h>
15
16#include <poll.h>
17#include <port.h>
18#include <errno.h>
19#include <syslog.h>
20
Willy Tarreaub2551052020-06-09 09:07:15 +020021#include <haproxy/activity.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020022#include <haproxy/api.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020023#include <haproxy/fd.h>
24#include <haproxy/global.h>
Willy Tarreau3727a8a2020-06-04 17:37:26 +020025#include <haproxy/signal.h>
Willy Tarreau6dfab112021-09-30 17:53:22 +020026#include <haproxy/task.h>
Willy Tarreauc2f7c582020-06-02 18:15:32 +020027#include <haproxy/ticks.h>
Willy Tarreau92b4f132020-06-01 11:05:15 +020028#include <haproxy/time.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000029
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000030/*
31 * Private data:
32 */
33static int evports_fd[MAX_THREADS]; // per-thread evports_fd
34static THREAD_LOCAL port_event_t *evports_evlist = NULL;
35static THREAD_LOCAL int evports_evlist_max = 0;
36
37/*
38 * Convert the "state" member of "fdtab" into an event ports event mask.
39 */
40static inline int evports_state_to_events(int state)
41{
42 int events = 0;
43
Willy Tarreau5bee3e22019-09-04 09:52:57 +020044 if (state & FD_EV_ACTIVE_W)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000045 events |= POLLOUT;
Willy Tarreau5bee3e22019-09-04 09:52:57 +020046 if (state & FD_EV_ACTIVE_R)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000047 events |= POLLIN;
48
49 return (events);
50}
51
52/*
53 * Associate or dissociate this file descriptor with the event port, using the
54 * specified event mask.
55 */
56static inline void evports_resync_fd(int fd, int events)
57{
58 if (events == 0)
59 port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
60 else
61 port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
62}
63
64static void _update_fd(int fd)
65{
66 int en;
67 int events;
68
69 en = fdtab[fd].state;
70
Willy Tarreau5bee3e22019-09-04 09:52:57 +020071 if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
Olivier Houchard53055052019-07-25 14:00:18 +000072 if (!(polled_mask[fd].poll_recv & tid_bit) &&
73 !(polled_mask[fd].poll_send & tid_bit)) {
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000074 /* fd was not watched, it's still not */
75 return;
76 }
77 /* fd totally removed from poll list */
78 events = 0;
Olivier Houchard53055052019-07-25 14:00:18 +000079 if (polled_mask[fd].poll_recv & tid_bit)
80 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
81 if (polled_mask[fd].poll_send & tid_bit)
82 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000083 }
84 else {
85 /* OK fd has to be monitored, it was either added or changed */
86 events = evports_state_to_events(en);
Willy Tarreau5bee3e22019-09-04 09:52:57 +020087 if (en & FD_EV_ACTIVE_R) {
Olivier Houchard53055052019-07-25 14:00:18 +000088 if (!(polled_mask[fd].poll_recv & tid_bit))
89 _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
90 } else {
91 if (polled_mask[fd].poll_recv & tid_bit)
92 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
93 }
Willy Tarreau5bee3e22019-09-04 09:52:57 +020094 if (en & FD_EV_ACTIVE_W) {
Olivier Houchard53055052019-07-25 14:00:18 +000095 if (!(polled_mask[fd].poll_send & tid_bit))
96 _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
97 } else {
98 if (polled_mask[fd].poll_send & tid_bit)
99 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
100 }
101
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000102 }
103 evports_resync_fd(fd, events);
104}
105
106/*
107 * Event Ports poller. This routine interacts with the file descriptor
108 * management data structures and routines; see the large block comment in
109 * "src/fd.c" for more information.
110 */
111
Willy Tarreau03e78532020-02-25 07:38:05 +0100112static void _do_poll(struct poller *p, int exp, int wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000113{
114 int i;
115 int wait_time;
116 struct timespec timeout_ts;
117 unsigned int nevlist;
118 int fd, old_fd;
119 int status;
120
121 /*
122 * Scan the list of file descriptors with an updated status:
123 */
124 for (i = 0; i < fd_nbupdt; i++) {
125 fd = fd_updt[i];
126
127 _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
128 if (fdtab[fd].owner == NULL) {
Willy Tarreaue4063862020-06-17 20:35:33 +0200129 activity[tid].poll_drop_fd++;
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000130 continue;
131 }
132
133 _update_fd(fd);
134 }
135 fd_nbupdt = 0;
136 /* Scan the global update list */
137 for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
138 if (fd == -2) {
139 fd = old_fd;
140 continue;
141 }
142 else if (fd <= -3)
143 fd = -fd -4;
144 if (fd == -1)
145 break;
146 if (fdtab[fd].update_mask & tid_bit)
147 done_update_polling(fd);
148 else
149 continue;
150 if (!fdtab[fd].owner)
151 continue;
152 _update_fd(fd);
153 }
154
Willy Tarreau88d1c5d2021-08-04 11:44:17 +0200155 thread_idle_now();
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000156 thread_harmless_now();
157
158 /*
159 * Determine how long to wait for events to materialise on the port.
160 */
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200161 wait_time = wake ? 0 : compute_poll_timeout(exp);
Willy Tarreau6dfab112021-09-30 17:53:22 +0200162 sched_entering_poll();
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000163 activity_count_runtime();
164
165 do {
166 int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
167 int interrupted = 0;
168 nevlist = 1; /* desired number of events to be retrieved */
169 timeout_ts.tv_sec = (timeout / 1000);
170 timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
171
172 status = port_getn(evports_fd[tid],
173 evports_evlist,
174 evports_evlist_max,
175 &nevlist, /* updated to the number of events retrieved */
176 &timeout_ts);
177 if (status != 0) {
178 int e = errno;
179 switch (e) {
180 case ETIME:
181 /*
182 * Though the manual page has not historically made it
183 * clear, port_getn() can return -1 with an errno of
184 * ETIME and still have returned some number of events.
185 */
186 /* nevlist >= 0 */
187 break;
188 default:
189 nevlist = 0;
190 interrupted = 1;
191 break;
192 }
193 }
194 tv_update_date(timeout, nevlist);
195
196 if (nevlist || interrupted)
197 break;
198 if (timeout || !wait_time)
199 break;
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200200 if (signal_queue_len || wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000201 break;
202 if (tick_isset(exp) && tick_is_expired(exp, now_ms))
203 break;
204 } while(1);
205
Willy Tarreau6dfab112021-09-30 17:53:22 +0200206 sched_leaving_poll(wait_time, nevlist);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000207
208 thread_harmless_end();
Willy Tarreau88d1c5d2021-08-04 11:44:17 +0200209 thread_idle_end();
210
Willy Tarreauc37ccd72021-07-30 10:57:09 +0200211 if (sleeping_thread_mask & tid_bit)
212 _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000213
Willy Tarreaue5451532020-06-17 20:25:18 +0200214 if (nevlist > 0)
215 activity[tid].poll_io++;
216
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000217 for (i = 0; i < nevlist; i++) {
218 unsigned int n = 0;
219 int events, rebind_events;
Willy Tarreau200bd502021-07-29 16:57:19 +0200220 int ret;
221
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000222 fd = evports_evlist[i].portev_object;
223 events = evports_evlist[i].portev_events;
224
Willy Tarreau38e8a1c2020-06-23 10:04:54 +0200225#ifdef DEBUG_FD
Willy Tarreau4781b152021-04-06 13:53:36 +0200226 _HA_ATOMIC_INC(&fdtab[fd].event_count);
Willy Tarreau38e8a1c2020-06-23 10:04:54 +0200227#endif
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000228 /*
229 * By virtue of receiving an event for this file descriptor, it
230 * is no longer associated with the port in question. Store
231 * the previous event mask so that we may reassociate after
232 * processing is complete.
233 */
234 rebind_events = evports_state_to_events(fdtab[fd].state);
235 /* rebind_events != 0 */
236
237 /*
238 * Set bits based on the events we received from the port:
239 */
Emmanuel Hocdet7ceb96b2019-09-19 11:08:26 +0000240 n = ((events & POLLIN) ? FD_EV_READY_R : 0) |
241 ((events & POLLOUT) ? FD_EV_READY_W : 0) |
242 ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) |
243 ((events & POLLERR) ? FD_EV_ERR_RW : 0);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000244
245 /*
246 * Call connection processing callbacks. Note that it's
247 * possible for this processing to alter the required event
Ilya Shipitsince7b00f2020-03-23 22:28:40 +0500248 * port association; i.e., the "state" member of the "fdtab"
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000249 * entry. If it changes, the fd will be placed on the updated
250 * list for processing the next time we are called.
251 */
Willy Tarreau200bd502021-07-29 16:57:19 +0200252 ret = fd_update_events(fd, n);
253
254 /* If the FD was already dead , skip it */
255 if (ret == FD_UPDT_DEAD)
256 continue;
257
258 /* disable polling on this instance if the FD was migrated */
259 if (ret == FD_UPDT_MIGRATED) {
260 if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
261 fd_updt[fd_nbupdt++] = fd;
262 continue;
263 }
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000264
265 /*
266 * This file descriptor was closed during the processing of
267 * polled events. No need to reassociate.
268 */
Willy Tarreau200bd502021-07-29 16:57:19 +0200269 if (ret == FD_UPDT_CLOSED)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000270 continue;
271
272 /*
273 * Reassociate with the port, using the same event mask as
274 * before. This call will not result in a dissociation as we
275 * asserted that _some_ events needed to be rebound above.
276 *
277 * Reassociating with the same mask allows us to mimic the
278 * level-triggered behaviour of poll(2). In the event that we
279 * are interested in the same events on the next turn of the
280 * loop, this represents no extra work.
281 *
282 * If this additional port_associate(3C) call becomes a
283 * performance problem, we would need to verify that we can
284 * correctly interact with the file descriptor cache and update
285 * list (see "src/fd.c") to avoid reassociating here, or to use
286 * a different events mask.
287 */
288 evports_resync_fd(fd, rebind_events);
289 }
290}
291
292static int init_evports_per_thread()
293{
294 int fd;
295
296 evports_evlist_max = global.tune.maxpollevents;
297 evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t));
298 if (evports_evlist == NULL) {
299 goto fail_alloc;
300 }
301
302 if (MAX_THREADS > 1 && tid) {
303 if ((evports_fd[tid] = port_create()) == -1) {
304 goto fail_fd;
305 }
306 }
307
308 /* we may have to unregister some events initially registered on the
309 * original fd when it was alone, and/or to register events on the new
310 * fd for this thread. Let's just mark them as updated, the poller will
311 * do the rest.
312 */
313 for (fd = 0; fd < global.maxsock; fd++)
314 updt_fd_polling(fd);
315
316 return 1;
317
318 fail_fd:
Willy Tarreau61cfdf42021-02-20 10:46:51 +0100319 ha_free(&evports_evlist);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000320 evports_evlist_max = 0;
321 fail_alloc:
322 return 0;
323}
324
325static void deinit_evports_per_thread()
326{
327 if (MAX_THREADS > 1 && tid)
328 close(evports_fd[tid]);
329
Willy Tarreau61cfdf42021-02-20 10:46:51 +0100330 ha_free(&evports_evlist);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000331 evports_evlist_max = 0;
332}
333
334/*
335 * Initialisation of the event ports poller.
336 * Returns 0 in case of failure, non-zero in case of success.
337 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100338static int _do_init(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000339{
340 p->private = NULL;
341
342 if ((evports_fd[tid] = port_create()) == -1) {
343 goto fail;
344 }
345
346 hap_register_per_thread_init(init_evports_per_thread);
347 hap_register_per_thread_deinit(deinit_evports_per_thread);
348
349 return 1;
350
351fail:
352 p->pref = 0;
353 return 0;
354}
355
356/*
357 * Termination of the event ports poller.
358 * All resources are released and the poller is marked as inoperative.
359 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100360static void _do_term(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000361{
362 if (evports_fd[tid] != -1) {
363 close(evports_fd[tid]);
364 evports_fd[tid] = -1;
365 }
366
367 p->private = NULL;
368 p->pref = 0;
369
Willy Tarreau61cfdf42021-02-20 10:46:51 +0100370 ha_free(&evports_evlist);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000371 evports_evlist_max = 0;
372}
373
374/*
375 * Run-time check to make sure we can allocate the resources needed for
376 * the poller to function correctly.
377 * Returns 1 on success, otherwise 0.
378 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100379static int _do_test(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000380{
381 int fd;
382
383 if ((fd = port_create()) == -1) {
384 return 0;
385 }
386
387 close(fd);
388 return 1;
389}
390
391/*
392 * Close and recreate the event port after fork(). Returns 1 on success,
393 * otherwise 0. If this function fails, "_do_term()" must be called to
394 * clean up the poller.
395 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100396static int _do_fork(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000397{
398 if (evports_fd[tid] != -1) {
399 close(evports_fd[tid]);
400 }
401
402 if ((evports_fd[tid] = port_create()) == -1) {
403 return 0;
404 }
405
406 return 1;
407}
408
409/*
410 * This constructor must be called before main() to register the event ports
411 * poller.
412 */
413__attribute__((constructor))
414static void _do_register(void)
415{
416 struct poller *p;
417 int i;
418
419 if (nbpollers >= MAX_POLLERS)
420 return;
421
422 for (i = 0; i < MAX_THREADS; i++)
423 evports_fd[i] = -1;
424
425 p = &pollers[nbpollers++];
426
427 p->name = "evports";
428 p->pref = 300;
Willy Tarreau11ef0832019-11-28 18:17:33 +0100429 p->flags = HAP_POLL_F_ERRHUP;
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000430 p->private = NULL;
431
432 p->clo = NULL;
433 p->test = _do_test;
434 p->init = _do_init;
435 p->term = _do_term;
436 p->poll = _do_poll;
437 p->fork = _do_fork;
438}