blob: 7842bf242b81f85146606f69c82d91bc724bc220 [file] [log] [blame]
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +00001/*
2 * FD polling functions for SunOS event ports.
3 *
4 * Copyright 2018 Joyent, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <unistd.h>
13#include <sys/time.h>
14#include <sys/types.h>
15
16#include <poll.h>
17#include <port.h>
18#include <errno.h>
19#include <syslog.h>
20
21#include <common/compat.h>
22#include <common/config.h>
23#include <common/hathreads.h>
24#include <common/ticks.h>
25#include <common/time.h>
26#include <common/tools.h>
27
28#include <types/global.h>
29
30#include <proto/activity.h>
31#include <proto/fd.h>
32#include <proto/log.h>
33#include <proto/signal.h>
34
35/*
36 * Private data:
37 */
38static int evports_fd[MAX_THREADS]; // per-thread evports_fd
39static THREAD_LOCAL port_event_t *evports_evlist = NULL;
40static THREAD_LOCAL int evports_evlist_max = 0;
41
42/*
43 * Convert the "state" member of "fdtab" into an event ports event mask.
44 */
45static inline int evports_state_to_events(int state)
46{
47 int events = 0;
48
49 if (state & FD_EV_POLLED_W)
50 events |= POLLOUT;
51 if (state & FD_EV_POLLED_R)
52 events |= POLLIN;
53
54 return (events);
55}
56
57/*
58 * Associate or dissociate this file descriptor with the event port, using the
59 * specified event mask.
60 */
61static inline void evports_resync_fd(int fd, int events)
62{
63 if (events == 0)
64 port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
65 else
66 port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
67}
68
69static void _update_fd(int fd)
70{
71 int en;
72 int events;
73
74 en = fdtab[fd].state;
75
76 if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) {
77 if (!(polled_mask[fd] & tid_bit)) {
78 /* fd was not watched, it's still not */
79 return;
80 }
81 /* fd totally removed from poll list */
82 events = 0;
83 _HA_ATOMIC_AND(&polled_mask[fd], ~tid_bit);
84 }
85 else {
86 /* OK fd has to be monitored, it was either added or changed */
87 events = evports_state_to_events(en);
88 _HA_ATOMIC_OR(&polled_mask[fd], tid_bit);
89 }
90 evports_resync_fd(fd, events);
91}
92
93/*
94 * Event Ports poller. This routine interacts with the file descriptor
95 * management data structures and routines; see the large block comment in
96 * "src/fd.c" for more information.
97 */
98
Willy Tarreau2ae84e42019-05-28 16:44:05 +020099REGPRM3 static void _do_poll(struct poller *p, int exp, int wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000100{
101 int i;
102 int wait_time;
103 struct timespec timeout_ts;
104 unsigned int nevlist;
105 int fd, old_fd;
106 int status;
107
108 /*
109 * Scan the list of file descriptors with an updated status:
110 */
111 for (i = 0; i < fd_nbupdt; i++) {
112 fd = fd_updt[i];
113
114 _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
115 if (fdtab[fd].owner == NULL) {
116 activity[tid].poll_drop++;
117 continue;
118 }
119
120 _update_fd(fd);
121 }
122 fd_nbupdt = 0;
123 /* Scan the global update list */
124 for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
125 if (fd == -2) {
126 fd = old_fd;
127 continue;
128 }
129 else if (fd <= -3)
130 fd = -fd -4;
131 if (fd == -1)
132 break;
133 if (fdtab[fd].update_mask & tid_bit)
134 done_update_polling(fd);
135 else
136 continue;
137 if (!fdtab[fd].owner)
138 continue;
139 _update_fd(fd);
140 }
141
142 thread_harmless_now();
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200143 if (sleeping_thread_mask & tid_bit)
144 _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000145
146 /*
147 * Determine how long to wait for events to materialise on the port.
148 */
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200149 wait_time = wake ? 0 : compute_poll_timeout(exp);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000150 tv_entering_poll();
151 activity_count_runtime();
152
153 do {
154 int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
155 int interrupted = 0;
156 nevlist = 1; /* desired number of events to be retrieved */
157 timeout_ts.tv_sec = (timeout / 1000);
158 timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
159
160 status = port_getn(evports_fd[tid],
161 evports_evlist,
162 evports_evlist_max,
163 &nevlist, /* updated to the number of events retrieved */
164 &timeout_ts);
165 if (status != 0) {
166 int e = errno;
167 switch (e) {
168 case ETIME:
169 /*
170 * Though the manual page has not historically made it
171 * clear, port_getn() can return -1 with an errno of
172 * ETIME and still have returned some number of events.
173 */
174 /* nevlist >= 0 */
175 break;
176 default:
177 nevlist = 0;
178 interrupted = 1;
179 break;
180 }
181 }
182 tv_update_date(timeout, nevlist);
183
184 if (nevlist || interrupted)
185 break;
186 if (timeout || !wait_time)
187 break;
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200188 if (signal_queue_len || wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000189 break;
190 if (tick_isset(exp) && tick_is_expired(exp, now_ms))
191 break;
192 } while(1);
193
194 tv_leaving_poll(wait_time, nevlist);
195
196 thread_harmless_end();
197
198 for (i = 0; i < nevlist; i++) {
199 unsigned int n = 0;
200 int events, rebind_events;
201 fd = evports_evlist[i].portev_object;
202 events = evports_evlist[i].portev_events;
203
204 if (fdtab[fd].owner == NULL) {
205 activity[tid].poll_dead++;
206 continue;
207 }
208
209 if (!(fdtab[fd].thread_mask & tid_bit)) {
210 activity[tid].poll_skip++;
211 continue;
212 }
213
214 /*
215 * By virtue of receiving an event for this file descriptor, it
216 * is no longer associated with the port in question. Store
217 * the previous event mask so that we may reassociate after
218 * processing is complete.
219 */
220 rebind_events = evports_state_to_events(fdtab[fd].state);
221 /* rebind_events != 0 */
222
223 /*
224 * Set bits based on the events we received from the port:
225 */
226 if (events & POLLIN)
227 n |= FD_POLL_IN;
228 if (events & POLLOUT)
229 n |= FD_POLL_OUT;
230 if (events & POLLERR)
231 n |= FD_POLL_ERR;
232 if (events & POLLHUP)
233 n |= FD_POLL_HUP;
234
235 /*
236 * Call connection processing callbacks. Note that it's
237 * possible for this processing to alter the required event
238 * port assocation; i.e., the "state" member of the "fdtab"
239 * entry. If it changes, the fd will be placed on the updated
240 * list for processing the next time we are called.
241 */
242 fd_update_events(fd, n);
243
244 /*
245 * This file descriptor was closed during the processing of
246 * polled events. No need to reassociate.
247 */
248 if (fdtab[fd].owner == NULL)
249 continue;
250
251 /*
252 * Reassociate with the port, using the same event mask as
253 * before. This call will not result in a dissociation as we
254 * asserted that _some_ events needed to be rebound above.
255 *
256 * Reassociating with the same mask allows us to mimic the
257 * level-triggered behaviour of poll(2). In the event that we
258 * are interested in the same events on the next turn of the
259 * loop, this represents no extra work.
260 *
261 * If this additional port_associate(3C) call becomes a
262 * performance problem, we would need to verify that we can
263 * correctly interact with the file descriptor cache and update
264 * list (see "src/fd.c") to avoid reassociating here, or to use
265 * a different events mask.
266 */
267 evports_resync_fd(fd, rebind_events);
268 }
269}
270
271static int init_evports_per_thread()
272{
273 int fd;
274
275 evports_evlist_max = global.tune.maxpollevents;
276 evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t));
277 if (evports_evlist == NULL) {
278 goto fail_alloc;
279 }
280
281 if (MAX_THREADS > 1 && tid) {
282 if ((evports_fd[tid] = port_create()) == -1) {
283 goto fail_fd;
284 }
285 }
286
287 /* we may have to unregister some events initially registered on the
288 * original fd when it was alone, and/or to register events on the new
289 * fd for this thread. Let's just mark them as updated, the poller will
290 * do the rest.
291 */
292 for (fd = 0; fd < global.maxsock; fd++)
293 updt_fd_polling(fd);
294
295 return 1;
296
297 fail_fd:
298 free(evports_evlist);
299 evports_evlist = NULL;
300 evports_evlist_max = 0;
301 fail_alloc:
302 return 0;
303}
304
305static void deinit_evports_per_thread()
306{
307 if (MAX_THREADS > 1 && tid)
308 close(evports_fd[tid]);
309
310 free(evports_evlist);
311 evports_evlist = NULL;
312 evports_evlist_max = 0;
313}
314
315/*
316 * Initialisation of the event ports poller.
317 * Returns 0 in case of failure, non-zero in case of success.
318 */
319REGPRM1 static int _do_init(struct poller *p)
320{
321 p->private = NULL;
322
323 if ((evports_fd[tid] = port_create()) == -1) {
324 goto fail;
325 }
326
327 hap_register_per_thread_init(init_evports_per_thread);
328 hap_register_per_thread_deinit(deinit_evports_per_thread);
329
330 return 1;
331
332fail:
333 p->pref = 0;
334 return 0;
335}
336
337/*
338 * Termination of the event ports poller.
339 * All resources are released and the poller is marked as inoperative.
340 */
341REGPRM1 static void _do_term(struct poller *p)
342{
343 if (evports_fd[tid] != -1) {
344 close(evports_fd[tid]);
345 evports_fd[tid] = -1;
346 }
347
348 p->private = NULL;
349 p->pref = 0;
350
351 free(evports_evlist);
352 evports_evlist = NULL;
353 evports_evlist_max = 0;
354}
355
356/*
357 * Run-time check to make sure we can allocate the resources needed for
358 * the poller to function correctly.
359 * Returns 1 on success, otherwise 0.
360 */
361REGPRM1 static int _do_test(struct poller *p)
362{
363 int fd;
364
365 if ((fd = port_create()) == -1) {
366 return 0;
367 }
368
369 close(fd);
370 return 1;
371}
372
373/*
374 * Close and recreate the event port after fork(). Returns 1 on success,
375 * otherwise 0. If this function fails, "_do_term()" must be called to
376 * clean up the poller.
377 */
378REGPRM1 static int _do_fork(struct poller *p)
379{
380 if (evports_fd[tid] != -1) {
381 close(evports_fd[tid]);
382 }
383
384 if ((evports_fd[tid] = port_create()) == -1) {
385 return 0;
386 }
387
388 return 1;
389}
390
391/*
392 * This constructor must be called before main() to register the event ports
393 * poller.
394 */
395__attribute__((constructor))
396static void _do_register(void)
397{
398 struct poller *p;
399 int i;
400
401 if (nbpollers >= MAX_POLLERS)
402 return;
403
404 for (i = 0; i < MAX_THREADS; i++)
405 evports_fd[i] = -1;
406
407 p = &pollers[nbpollers++];
408
409 p->name = "evports";
410 p->pref = 300;
411 p->flags = 0;
412 p->private = NULL;
413
414 p->clo = NULL;
415 p->test = _do_test;
416 p->init = _do_init;
417 p->term = _do_term;
418 p->poll = _do_poll;
419 p->fork = _do_fork;
420}