blob: d9d1637d416cdd289dee5c4db21d5d27b66fb1ba [file] [log] [blame]
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +00001/*
2 * FD polling functions for SunOS event ports.
3 *
4 * Copyright 2018 Joyent, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <unistd.h>
13#include <sys/time.h>
14#include <sys/types.h>
15
16#include <poll.h>
17#include <port.h>
18#include <errno.h>
19#include <syslog.h>
20
21#include <common/compat.h>
22#include <common/config.h>
23#include <common/hathreads.h>
24#include <common/ticks.h>
25#include <common/time.h>
26#include <common/tools.h>
27
28#include <types/global.h>
29
30#include <proto/activity.h>
31#include <proto/fd.h>
32#include <proto/log.h>
33#include <proto/signal.h>
34
35/*
36 * Private data:
37 */
38static int evports_fd[MAX_THREADS]; // per-thread evports_fd
39static THREAD_LOCAL port_event_t *evports_evlist = NULL;
40static THREAD_LOCAL int evports_evlist_max = 0;
41
42/*
43 * Convert the "state" member of "fdtab" into an event ports event mask.
44 */
45static inline int evports_state_to_events(int state)
46{
47 int events = 0;
48
49 if (state & FD_EV_POLLED_W)
50 events |= POLLOUT;
51 if (state & FD_EV_POLLED_R)
52 events |= POLLIN;
53
54 return (events);
55}
56
57/*
58 * Associate or dissociate this file descriptor with the event port, using the
59 * specified event mask.
60 */
61static inline void evports_resync_fd(int fd, int events)
62{
63 if (events == 0)
64 port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
65 else
66 port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
67}
68
69static void _update_fd(int fd)
70{
71 int en;
72 int events;
73
74 en = fdtab[fd].state;
75
76 if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) {
Olivier Houchard53055052019-07-25 14:00:18 +000077 if (!(polled_mask[fd].poll_recv & tid_bit) &&
78 !(polled_mask[fd].poll_send & tid_bit)) {
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000079 /* fd was not watched, it's still not */
80 return;
81 }
82 /* fd totally removed from poll list */
83 events = 0;
Olivier Houchard53055052019-07-25 14:00:18 +000084 if (polled_mask[fd].poll_recv & tid_bit)
85 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
86 if (polled_mask[fd].poll_send & tid_bit)
87 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000088 }
89 else {
90 /* OK fd has to be monitored, it was either added or changed */
91 events = evports_state_to_events(en);
Olivier Houchard53055052019-07-25 14:00:18 +000092 if (en & FD_EV_POLLED_R) {
93 if (!(polled_mask[fd].poll_recv & tid_bit))
94 _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
95 } else {
96 if (polled_mask[fd].poll_recv & tid_bit)
97 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
98 }
99 if (en & FD_EV_POLLED_W) {
100 if (!(polled_mask[fd].poll_send & tid_bit))
101 _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
102 } else {
103 if (polled_mask[fd].poll_send & tid_bit)
104 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
105 }
106
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000107 }
108 evports_resync_fd(fd, events);
109}
110
111/*
112 * Event Ports poller. This routine interacts with the file descriptor
113 * management data structures and routines; see the large block comment in
114 * "src/fd.c" for more information.
115 */
116
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200117REGPRM3 static void _do_poll(struct poller *p, int exp, int wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000118{
119 int i;
120 int wait_time;
121 struct timespec timeout_ts;
122 unsigned int nevlist;
123 int fd, old_fd;
124 int status;
125
126 /*
127 * Scan the list of file descriptors with an updated status:
128 */
129 for (i = 0; i < fd_nbupdt; i++) {
130 fd = fd_updt[i];
131
132 _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
133 if (fdtab[fd].owner == NULL) {
134 activity[tid].poll_drop++;
135 continue;
136 }
137
138 _update_fd(fd);
139 }
140 fd_nbupdt = 0;
141 /* Scan the global update list */
142 for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
143 if (fd == -2) {
144 fd = old_fd;
145 continue;
146 }
147 else if (fd <= -3)
148 fd = -fd -4;
149 if (fd == -1)
150 break;
151 if (fdtab[fd].update_mask & tid_bit)
152 done_update_polling(fd);
153 else
154 continue;
155 if (!fdtab[fd].owner)
156 continue;
157 _update_fd(fd);
158 }
159
160 thread_harmless_now();
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200161 if (sleeping_thread_mask & tid_bit)
162 _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000163
164 /*
165 * Determine how long to wait for events to materialise on the port.
166 */
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200167 wait_time = wake ? 0 : compute_poll_timeout(exp);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000168 tv_entering_poll();
169 activity_count_runtime();
170
171 do {
172 int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
173 int interrupted = 0;
174 nevlist = 1; /* desired number of events to be retrieved */
175 timeout_ts.tv_sec = (timeout / 1000);
176 timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
177
178 status = port_getn(evports_fd[tid],
179 evports_evlist,
180 evports_evlist_max,
181 &nevlist, /* updated to the number of events retrieved */
182 &timeout_ts);
183 if (status != 0) {
184 int e = errno;
185 switch (e) {
186 case ETIME:
187 /*
188 * Though the manual page has not historically made it
189 * clear, port_getn() can return -1 with an errno of
190 * ETIME and still have returned some number of events.
191 */
192 /* nevlist >= 0 */
193 break;
194 default:
195 nevlist = 0;
196 interrupted = 1;
197 break;
198 }
199 }
200 tv_update_date(timeout, nevlist);
201
202 if (nevlist || interrupted)
203 break;
204 if (timeout || !wait_time)
205 break;
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200206 if (signal_queue_len || wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000207 break;
208 if (tick_isset(exp) && tick_is_expired(exp, now_ms))
209 break;
210 } while(1);
211
212 tv_leaving_poll(wait_time, nevlist);
213
214 thread_harmless_end();
215
216 for (i = 0; i < nevlist; i++) {
217 unsigned int n = 0;
218 int events, rebind_events;
219 fd = evports_evlist[i].portev_object;
220 events = evports_evlist[i].portev_events;
221
222 if (fdtab[fd].owner == NULL) {
223 activity[tid].poll_dead++;
224 continue;
225 }
226
227 if (!(fdtab[fd].thread_mask & tid_bit)) {
228 activity[tid].poll_skip++;
229 continue;
230 }
231
232 /*
233 * By virtue of receiving an event for this file descriptor, it
234 * is no longer associated with the port in question. Store
235 * the previous event mask so that we may reassociate after
236 * processing is complete.
237 */
238 rebind_events = evports_state_to_events(fdtab[fd].state);
239 /* rebind_events != 0 */
240
241 /*
242 * Set bits based on the events we received from the port:
243 */
244 if (events & POLLIN)
245 n |= FD_POLL_IN;
246 if (events & POLLOUT)
247 n |= FD_POLL_OUT;
248 if (events & POLLERR)
249 n |= FD_POLL_ERR;
250 if (events & POLLHUP)
251 n |= FD_POLL_HUP;
252
253 /*
254 * Call connection processing callbacks. Note that it's
255 * possible for this processing to alter the required event
256 * port assocation; i.e., the "state" member of the "fdtab"
257 * entry. If it changes, the fd will be placed on the updated
258 * list for processing the next time we are called.
259 */
260 fd_update_events(fd, n);
261
262 /*
263 * This file descriptor was closed during the processing of
264 * polled events. No need to reassociate.
265 */
266 if (fdtab[fd].owner == NULL)
267 continue;
268
269 /*
270 * Reassociate with the port, using the same event mask as
271 * before. This call will not result in a dissociation as we
272 * asserted that _some_ events needed to be rebound above.
273 *
274 * Reassociating with the same mask allows us to mimic the
275 * level-triggered behaviour of poll(2). In the event that we
276 * are interested in the same events on the next turn of the
277 * loop, this represents no extra work.
278 *
279 * If this additional port_associate(3C) call becomes a
280 * performance problem, we would need to verify that we can
281 * correctly interact with the file descriptor cache and update
282 * list (see "src/fd.c") to avoid reassociating here, or to use
283 * a different events mask.
284 */
285 evports_resync_fd(fd, rebind_events);
286 }
287}
288
289static int init_evports_per_thread()
290{
291 int fd;
292
293 evports_evlist_max = global.tune.maxpollevents;
294 evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t));
295 if (evports_evlist == NULL) {
296 goto fail_alloc;
297 }
298
299 if (MAX_THREADS > 1 && tid) {
300 if ((evports_fd[tid] = port_create()) == -1) {
301 goto fail_fd;
302 }
303 }
304
305 /* we may have to unregister some events initially registered on the
306 * original fd when it was alone, and/or to register events on the new
307 * fd for this thread. Let's just mark them as updated, the poller will
308 * do the rest.
309 */
310 for (fd = 0; fd < global.maxsock; fd++)
311 updt_fd_polling(fd);
312
313 return 1;
314
315 fail_fd:
316 free(evports_evlist);
317 evports_evlist = NULL;
318 evports_evlist_max = 0;
319 fail_alloc:
320 return 0;
321}
322
323static void deinit_evports_per_thread()
324{
325 if (MAX_THREADS > 1 && tid)
326 close(evports_fd[tid]);
327
328 free(evports_evlist);
329 evports_evlist = NULL;
330 evports_evlist_max = 0;
331}
332
333/*
334 * Initialisation of the event ports poller.
335 * Returns 0 in case of failure, non-zero in case of success.
336 */
337REGPRM1 static int _do_init(struct poller *p)
338{
339 p->private = NULL;
340
341 if ((evports_fd[tid] = port_create()) == -1) {
342 goto fail;
343 }
344
345 hap_register_per_thread_init(init_evports_per_thread);
346 hap_register_per_thread_deinit(deinit_evports_per_thread);
347
348 return 1;
349
350fail:
351 p->pref = 0;
352 return 0;
353}
354
355/*
356 * Termination of the event ports poller.
357 * All resources are released and the poller is marked as inoperative.
358 */
359REGPRM1 static void _do_term(struct poller *p)
360{
361 if (evports_fd[tid] != -1) {
362 close(evports_fd[tid]);
363 evports_fd[tid] = -1;
364 }
365
366 p->private = NULL;
367 p->pref = 0;
368
369 free(evports_evlist);
370 evports_evlist = NULL;
371 evports_evlist_max = 0;
372}
373
374/*
375 * Run-time check to make sure we can allocate the resources needed for
376 * the poller to function correctly.
377 * Returns 1 on success, otherwise 0.
378 */
379REGPRM1 static int _do_test(struct poller *p)
380{
381 int fd;
382
383 if ((fd = port_create()) == -1) {
384 return 0;
385 }
386
387 close(fd);
388 return 1;
389}
390
391/*
392 * Close and recreate the event port after fork(). Returns 1 on success,
393 * otherwise 0. If this function fails, "_do_term()" must be called to
394 * clean up the poller.
395 */
396REGPRM1 static int _do_fork(struct poller *p)
397{
398 if (evports_fd[tid] != -1) {
399 close(evports_fd[tid]);
400 }
401
402 if ((evports_fd[tid] = port_create()) == -1) {
403 return 0;
404 }
405
406 return 1;
407}
408
409/*
410 * This constructor must be called before main() to register the event ports
411 * poller.
412 */
413__attribute__((constructor))
414static void _do_register(void)
415{
416 struct poller *p;
417 int i;
418
419 if (nbpollers >= MAX_POLLERS)
420 return;
421
422 for (i = 0; i < MAX_THREADS; i++)
423 evports_fd[i] = -1;
424
425 p = &pollers[nbpollers++];
426
427 p->name = "evports";
428 p->pref = 300;
429 p->flags = 0;
430 p->private = NULL;
431
432 p->clo = NULL;
433 p->test = _do_test;
434 p->init = _do_init;
435 p->term = _do_term;
436 p->poll = _do_poll;
437 p->fork = _do_fork;
438}