blob: 3d60247125e9ee02c716851386224c7ca0dd0ed4 [file] [log] [blame]
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +00001/*
2 * FD polling functions for SunOS event ports.
3 *
4 * Copyright 2018 Joyent, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <unistd.h>
13#include <sys/time.h>
14#include <sys/types.h>
15
16#include <poll.h>
17#include <port.h>
18#include <errno.h>
19#include <syslog.h>
20
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020021#include <haproxy/api.h>
Willy Tarreau3727a8a2020-06-04 17:37:26 +020022#include <haproxy/signal.h>
Willy Tarreau3f567e42020-05-28 15:29:19 +020023#include <haproxy/thread-t.h>
Willy Tarreauc2f7c582020-06-02 18:15:32 +020024#include <haproxy/ticks.h>
Willy Tarreau92b4f132020-06-01 11:05:15 +020025#include <haproxy/time.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000026
Willy Tarreauf268ee82020-06-04 17:05:57 +020027#include <haproxy/global.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000028
Willy Tarreaua04ded52020-06-02 10:29:48 +020029#include <haproxy/activity.h>
Willy Tarreau0f6ffd62020-06-03 19:33:00 +020030#include <haproxy/fd.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000031#include <proto/log.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000032
33/*
34 * Private data:
35 */
36static int evports_fd[MAX_THREADS]; // per-thread evports_fd
37static THREAD_LOCAL port_event_t *evports_evlist = NULL;
38static THREAD_LOCAL int evports_evlist_max = 0;
39
40/*
41 * Convert the "state" member of "fdtab" into an event ports event mask.
42 */
43static inline int evports_state_to_events(int state)
44{
45 int events = 0;
46
Willy Tarreau5bee3e22019-09-04 09:52:57 +020047 if (state & FD_EV_ACTIVE_W)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000048 events |= POLLOUT;
Willy Tarreau5bee3e22019-09-04 09:52:57 +020049 if (state & FD_EV_ACTIVE_R)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000050 events |= POLLIN;
51
52 return (events);
53}
54
55/*
56 * Associate or dissociate this file descriptor with the event port, using the
57 * specified event mask.
58 */
59static inline void evports_resync_fd(int fd, int events)
60{
61 if (events == 0)
62 port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
63 else
64 port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
65}
66
67static void _update_fd(int fd)
68{
69 int en;
70 int events;
71
72 en = fdtab[fd].state;
73
Willy Tarreau5bee3e22019-09-04 09:52:57 +020074 if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
Olivier Houchard53055052019-07-25 14:00:18 +000075 if (!(polled_mask[fd].poll_recv & tid_bit) &&
76 !(polled_mask[fd].poll_send & tid_bit)) {
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000077 /* fd was not watched, it's still not */
78 return;
79 }
80 /* fd totally removed from poll list */
81 events = 0;
Olivier Houchard53055052019-07-25 14:00:18 +000082 if (polled_mask[fd].poll_recv & tid_bit)
83 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
84 if (polled_mask[fd].poll_send & tid_bit)
85 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000086 }
87 else {
88 /* OK fd has to be monitored, it was either added or changed */
89 events = evports_state_to_events(en);
Willy Tarreau5bee3e22019-09-04 09:52:57 +020090 if (en & FD_EV_ACTIVE_R) {
Olivier Houchard53055052019-07-25 14:00:18 +000091 if (!(polled_mask[fd].poll_recv & tid_bit))
92 _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
93 } else {
94 if (polled_mask[fd].poll_recv & tid_bit)
95 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
96 }
Willy Tarreau5bee3e22019-09-04 09:52:57 +020097 if (en & FD_EV_ACTIVE_W) {
Olivier Houchard53055052019-07-25 14:00:18 +000098 if (!(polled_mask[fd].poll_send & tid_bit))
99 _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
100 } else {
101 if (polled_mask[fd].poll_send & tid_bit)
102 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
103 }
104
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000105 }
106 evports_resync_fd(fd, events);
107}
108
109/*
110 * Event Ports poller. This routine interacts with the file descriptor
111 * management data structures and routines; see the large block comment in
112 * "src/fd.c" for more information.
113 */
114
Willy Tarreau03e78532020-02-25 07:38:05 +0100115static void _do_poll(struct poller *p, int exp, int wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000116{
117 int i;
118 int wait_time;
119 struct timespec timeout_ts;
120 unsigned int nevlist;
121 int fd, old_fd;
122 int status;
123
124 /*
125 * Scan the list of file descriptors with an updated status:
126 */
127 for (i = 0; i < fd_nbupdt; i++) {
128 fd = fd_updt[i];
129
130 _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
131 if (fdtab[fd].owner == NULL) {
132 activity[tid].poll_drop++;
133 continue;
134 }
135
136 _update_fd(fd);
137 }
138 fd_nbupdt = 0;
139 /* Scan the global update list */
140 for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
141 if (fd == -2) {
142 fd = old_fd;
143 continue;
144 }
145 else if (fd <= -3)
146 fd = -fd -4;
147 if (fd == -1)
148 break;
149 if (fdtab[fd].update_mask & tid_bit)
150 done_update_polling(fd);
151 else
152 continue;
153 if (!fdtab[fd].owner)
154 continue;
155 _update_fd(fd);
156 }
157
158 thread_harmless_now();
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200159 if (sleeping_thread_mask & tid_bit)
160 _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000161
162 /*
163 * Determine how long to wait for events to materialise on the port.
164 */
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200165 wait_time = wake ? 0 : compute_poll_timeout(exp);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000166 tv_entering_poll();
167 activity_count_runtime();
168
169 do {
170 int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
171 int interrupted = 0;
172 nevlist = 1; /* desired number of events to be retrieved */
173 timeout_ts.tv_sec = (timeout / 1000);
174 timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
175
176 status = port_getn(evports_fd[tid],
177 evports_evlist,
178 evports_evlist_max,
179 &nevlist, /* updated to the number of events retrieved */
180 &timeout_ts);
181 if (status != 0) {
182 int e = errno;
183 switch (e) {
184 case ETIME:
185 /*
186 * Though the manual page has not historically made it
187 * clear, port_getn() can return -1 with an errno of
188 * ETIME and still have returned some number of events.
189 */
190 /* nevlist >= 0 */
191 break;
192 default:
193 nevlist = 0;
194 interrupted = 1;
195 break;
196 }
197 }
198 tv_update_date(timeout, nevlist);
199
200 if (nevlist || interrupted)
201 break;
202 if (timeout || !wait_time)
203 break;
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200204 if (signal_queue_len || wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000205 break;
206 if (tick_isset(exp) && tick_is_expired(exp, now_ms))
207 break;
208 } while(1);
209
210 tv_leaving_poll(wait_time, nevlist);
211
212 thread_harmless_end();
213
214 for (i = 0; i < nevlist; i++) {
215 unsigned int n = 0;
216 int events, rebind_events;
217 fd = evports_evlist[i].portev_object;
218 events = evports_evlist[i].portev_events;
219
220 if (fdtab[fd].owner == NULL) {
221 activity[tid].poll_dead++;
222 continue;
223 }
224
225 if (!(fdtab[fd].thread_mask & tid_bit)) {
226 activity[tid].poll_skip++;
227 continue;
228 }
229
230 /*
231 * By virtue of receiving an event for this file descriptor, it
232 * is no longer associated with the port in question. Store
233 * the previous event mask so that we may reassociate after
234 * processing is complete.
235 */
236 rebind_events = evports_state_to_events(fdtab[fd].state);
237 /* rebind_events != 0 */
238
239 /*
240 * Set bits based on the events we received from the port:
241 */
Emmanuel Hocdet7ceb96b2019-09-19 11:08:26 +0000242 n = ((events & POLLIN) ? FD_EV_READY_R : 0) |
243 ((events & POLLOUT) ? FD_EV_READY_W : 0) |
244 ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) |
245 ((events & POLLERR) ? FD_EV_ERR_RW : 0);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000246
247 /*
248 * Call connection processing callbacks. Note that it's
249 * possible for this processing to alter the required event
Ilya Shipitsince7b00f2020-03-23 22:28:40 +0500250 * port association; i.e., the "state" member of the "fdtab"
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000251 * entry. If it changes, the fd will be placed on the updated
252 * list for processing the next time we are called.
253 */
254 fd_update_events(fd, n);
255
256 /*
257 * This file descriptor was closed during the processing of
258 * polled events. No need to reassociate.
259 */
260 if (fdtab[fd].owner == NULL)
261 continue;
262
263 /*
264 * Reassociate with the port, using the same event mask as
265 * before. This call will not result in a dissociation as we
266 * asserted that _some_ events needed to be rebound above.
267 *
268 * Reassociating with the same mask allows us to mimic the
269 * level-triggered behaviour of poll(2). In the event that we
270 * are interested in the same events on the next turn of the
271 * loop, this represents no extra work.
272 *
273 * If this additional port_associate(3C) call becomes a
274 * performance problem, we would need to verify that we can
275 * correctly interact with the file descriptor cache and update
276 * list (see "src/fd.c") to avoid reassociating here, or to use
277 * a different events mask.
278 */
279 evports_resync_fd(fd, rebind_events);
280 }
281}
282
283static int init_evports_per_thread()
284{
285 int fd;
286
287 evports_evlist_max = global.tune.maxpollevents;
288 evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t));
289 if (evports_evlist == NULL) {
290 goto fail_alloc;
291 }
292
293 if (MAX_THREADS > 1 && tid) {
294 if ((evports_fd[tid] = port_create()) == -1) {
295 goto fail_fd;
296 }
297 }
298
299 /* we may have to unregister some events initially registered on the
300 * original fd when it was alone, and/or to register events on the new
301 * fd for this thread. Let's just mark them as updated, the poller will
302 * do the rest.
303 */
304 for (fd = 0; fd < global.maxsock; fd++)
305 updt_fd_polling(fd);
306
307 return 1;
308
309 fail_fd:
310 free(evports_evlist);
311 evports_evlist = NULL;
312 evports_evlist_max = 0;
313 fail_alloc:
314 return 0;
315}
316
317static void deinit_evports_per_thread()
318{
319 if (MAX_THREADS > 1 && tid)
320 close(evports_fd[tid]);
321
322 free(evports_evlist);
323 evports_evlist = NULL;
324 evports_evlist_max = 0;
325}
326
327/*
328 * Initialisation of the event ports poller.
329 * Returns 0 in case of failure, non-zero in case of success.
330 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100331static int _do_init(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000332{
333 p->private = NULL;
334
335 if ((evports_fd[tid] = port_create()) == -1) {
336 goto fail;
337 }
338
339 hap_register_per_thread_init(init_evports_per_thread);
340 hap_register_per_thread_deinit(deinit_evports_per_thread);
341
342 return 1;
343
344fail:
345 p->pref = 0;
346 return 0;
347}
348
349/*
350 * Termination of the event ports poller.
351 * All resources are released and the poller is marked as inoperative.
352 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100353static void _do_term(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000354{
355 if (evports_fd[tid] != -1) {
356 close(evports_fd[tid]);
357 evports_fd[tid] = -1;
358 }
359
360 p->private = NULL;
361 p->pref = 0;
362
363 free(evports_evlist);
364 evports_evlist = NULL;
365 evports_evlist_max = 0;
366}
367
368/*
369 * Run-time check to make sure we can allocate the resources needed for
370 * the poller to function correctly.
371 * Returns 1 on success, otherwise 0.
372 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100373static int _do_test(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000374{
375 int fd;
376
377 if ((fd = port_create()) == -1) {
378 return 0;
379 }
380
381 close(fd);
382 return 1;
383}
384
385/*
386 * Close and recreate the event port after fork(). Returns 1 on success,
387 * otherwise 0. If this function fails, "_do_term()" must be called to
388 * clean up the poller.
389 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100390static int _do_fork(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000391{
392 if (evports_fd[tid] != -1) {
393 close(evports_fd[tid]);
394 }
395
396 if ((evports_fd[tid] = port_create()) == -1) {
397 return 0;
398 }
399
400 return 1;
401}
402
403/*
404 * This constructor must be called before main() to register the event ports
405 * poller.
406 */
407__attribute__((constructor))
408static void _do_register(void)
409{
410 struct poller *p;
411 int i;
412
413 if (nbpollers >= MAX_POLLERS)
414 return;
415
416 for (i = 0; i < MAX_THREADS; i++)
417 evports_fd[i] = -1;
418
419 p = &pollers[nbpollers++];
420
421 p->name = "evports";
422 p->pref = 300;
Willy Tarreau11ef0832019-11-28 18:17:33 +0100423 p->flags = HAP_POLL_F_ERRHUP;
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000424 p->private = NULL;
425
426 p->clo = NULL;
427 p->test = _do_test;
428 p->init = _do_init;
429 p->term = _do_term;
430 p->poll = _do_poll;
431 p->fork = _do_fork;
432}