blob: 81cbe5138f01d5ec8001cf20ed06603ca87a9c4b [file] [log] [blame]
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +00001/*
2 * FD polling functions for SunOS event ports.
3 *
4 * Copyright 2018 Joyent, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <unistd.h>
13#include <sys/time.h>
14#include <sys/types.h>
15
16#include <poll.h>
17#include <port.h>
18#include <errno.h>
19#include <syslog.h>
20
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020021#include <haproxy/api.h>
Willy Tarreau3727a8a2020-06-04 17:37:26 +020022#include <haproxy/signal.h>
Willy Tarreau3f567e42020-05-28 15:29:19 +020023#include <haproxy/thread-t.h>
Willy Tarreauc2f7c582020-06-02 18:15:32 +020024#include <haproxy/ticks.h>
Willy Tarreau92b4f132020-06-01 11:05:15 +020025#include <haproxy/time.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000026
Willy Tarreauf268ee82020-06-04 17:05:57 +020027#include <haproxy/global.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000028
Willy Tarreaua04ded52020-06-02 10:29:48 +020029#include <haproxy/activity.h>
Willy Tarreau0f6ffd62020-06-03 19:33:00 +020030#include <haproxy/fd.h>
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000031
32/*
33 * Private data:
34 */
35static int evports_fd[MAX_THREADS]; // per-thread evports_fd
36static THREAD_LOCAL port_event_t *evports_evlist = NULL;
37static THREAD_LOCAL int evports_evlist_max = 0;
38
39/*
40 * Convert the "state" member of "fdtab" into an event ports event mask.
41 */
42static inline int evports_state_to_events(int state)
43{
44 int events = 0;
45
Willy Tarreau5bee3e22019-09-04 09:52:57 +020046 if (state & FD_EV_ACTIVE_W)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000047 events |= POLLOUT;
Willy Tarreau5bee3e22019-09-04 09:52:57 +020048 if (state & FD_EV_ACTIVE_R)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000049 events |= POLLIN;
50
51 return (events);
52}
53
54/*
55 * Associate or dissociate this file descriptor with the event port, using the
56 * specified event mask.
57 */
58static inline void evports_resync_fd(int fd, int events)
59{
60 if (events == 0)
61 port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
62 else
63 port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
64}
65
66static void _update_fd(int fd)
67{
68 int en;
69 int events;
70
71 en = fdtab[fd].state;
72
Willy Tarreau5bee3e22019-09-04 09:52:57 +020073 if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) {
Olivier Houchard53055052019-07-25 14:00:18 +000074 if (!(polled_mask[fd].poll_recv & tid_bit) &&
75 !(polled_mask[fd].poll_send & tid_bit)) {
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000076 /* fd was not watched, it's still not */
77 return;
78 }
79 /* fd totally removed from poll list */
80 events = 0;
Olivier Houchard53055052019-07-25 14:00:18 +000081 if (polled_mask[fd].poll_recv & tid_bit)
82 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
83 if (polled_mask[fd].poll_send & tid_bit)
84 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +000085 }
86 else {
87 /* OK fd has to be monitored, it was either added or changed */
88 events = evports_state_to_events(en);
Willy Tarreau5bee3e22019-09-04 09:52:57 +020089 if (en & FD_EV_ACTIVE_R) {
Olivier Houchard53055052019-07-25 14:00:18 +000090 if (!(polled_mask[fd].poll_recv & tid_bit))
91 _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
92 } else {
93 if (polled_mask[fd].poll_recv & tid_bit)
94 _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit);
95 }
Willy Tarreau5bee3e22019-09-04 09:52:57 +020096 if (en & FD_EV_ACTIVE_W) {
Olivier Houchard53055052019-07-25 14:00:18 +000097 if (!(polled_mask[fd].poll_send & tid_bit))
98 _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
99 } else {
100 if (polled_mask[fd].poll_send & tid_bit)
101 _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit);
102 }
103
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000104 }
105 evports_resync_fd(fd, events);
106}
107
108/*
109 * Event Ports poller. This routine interacts with the file descriptor
110 * management data structures and routines; see the large block comment in
111 * "src/fd.c" for more information.
112 */
113
Willy Tarreau03e78532020-02-25 07:38:05 +0100114static void _do_poll(struct poller *p, int exp, int wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000115{
116 int i;
117 int wait_time;
118 struct timespec timeout_ts;
119 unsigned int nevlist;
120 int fd, old_fd;
121 int status;
122
123 /*
124 * Scan the list of file descriptors with an updated status:
125 */
126 for (i = 0; i < fd_nbupdt; i++) {
127 fd = fd_updt[i];
128
129 _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
130 if (fdtab[fd].owner == NULL) {
131 activity[tid].poll_drop++;
132 continue;
133 }
134
135 _update_fd(fd);
136 }
137 fd_nbupdt = 0;
138 /* Scan the global update list */
139 for (old_fd = fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) {
140 if (fd == -2) {
141 fd = old_fd;
142 continue;
143 }
144 else if (fd <= -3)
145 fd = -fd -4;
146 if (fd == -1)
147 break;
148 if (fdtab[fd].update_mask & tid_bit)
149 done_update_polling(fd);
150 else
151 continue;
152 if (!fdtab[fd].owner)
153 continue;
154 _update_fd(fd);
155 }
156
157 thread_harmless_now();
Olivier Houchard305d5ab2019-07-24 18:07:06 +0200158 if (sleeping_thread_mask & tid_bit)
159 _HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000160
161 /*
162 * Determine how long to wait for events to materialise on the port.
163 */
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200164 wait_time = wake ? 0 : compute_poll_timeout(exp);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000165 tv_entering_poll();
166 activity_count_runtime();
167
168 do {
169 int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
170 int interrupted = 0;
171 nevlist = 1; /* desired number of events to be retrieved */
172 timeout_ts.tv_sec = (timeout / 1000);
173 timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
174
175 status = port_getn(evports_fd[tid],
176 evports_evlist,
177 evports_evlist_max,
178 &nevlist, /* updated to the number of events retrieved */
179 &timeout_ts);
180 if (status != 0) {
181 int e = errno;
182 switch (e) {
183 case ETIME:
184 /*
185 * Though the manual page has not historically made it
186 * clear, port_getn() can return -1 with an errno of
187 * ETIME and still have returned some number of events.
188 */
189 /* nevlist >= 0 */
190 break;
191 default:
192 nevlist = 0;
193 interrupted = 1;
194 break;
195 }
196 }
197 tv_update_date(timeout, nevlist);
198
199 if (nevlist || interrupted)
200 break;
201 if (timeout || !wait_time)
202 break;
Willy Tarreau2ae84e42019-05-28 16:44:05 +0200203 if (signal_queue_len || wake)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000204 break;
205 if (tick_isset(exp) && tick_is_expired(exp, now_ms))
206 break;
207 } while(1);
208
209 tv_leaving_poll(wait_time, nevlist);
210
211 thread_harmless_end();
212
213 for (i = 0; i < nevlist; i++) {
214 unsigned int n = 0;
215 int events, rebind_events;
216 fd = evports_evlist[i].portev_object;
217 events = evports_evlist[i].portev_events;
218
219 if (fdtab[fd].owner == NULL) {
220 activity[tid].poll_dead++;
221 continue;
222 }
223
224 if (!(fdtab[fd].thread_mask & tid_bit)) {
225 activity[tid].poll_skip++;
226 continue;
227 }
228
229 /*
230 * By virtue of receiving an event for this file descriptor, it
231 * is no longer associated with the port in question. Store
232 * the previous event mask so that we may reassociate after
233 * processing is complete.
234 */
235 rebind_events = evports_state_to_events(fdtab[fd].state);
236 /* rebind_events != 0 */
237
238 /*
239 * Set bits based on the events we received from the port:
240 */
Emmanuel Hocdet7ceb96b2019-09-19 11:08:26 +0000241 n = ((events & POLLIN) ? FD_EV_READY_R : 0) |
242 ((events & POLLOUT) ? FD_EV_READY_W : 0) |
243 ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) |
244 ((events & POLLERR) ? FD_EV_ERR_RW : 0);
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000245
246 /*
247 * Call connection processing callbacks. Note that it's
248 * possible for this processing to alter the required event
Ilya Shipitsince7b00f2020-03-23 22:28:40 +0500249 * port association; i.e., the "state" member of the "fdtab"
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000250 * entry. If it changes, the fd will be placed on the updated
251 * list for processing the next time we are called.
252 */
253 fd_update_events(fd, n);
254
255 /*
256 * This file descriptor was closed during the processing of
257 * polled events. No need to reassociate.
258 */
259 if (fdtab[fd].owner == NULL)
260 continue;
261
262 /*
263 * Reassociate with the port, using the same event mask as
264 * before. This call will not result in a dissociation as we
265 * asserted that _some_ events needed to be rebound above.
266 *
267 * Reassociating with the same mask allows us to mimic the
268 * level-triggered behaviour of poll(2). In the event that we
269 * are interested in the same events on the next turn of the
270 * loop, this represents no extra work.
271 *
272 * If this additional port_associate(3C) call becomes a
273 * performance problem, we would need to verify that we can
274 * correctly interact with the file descriptor cache and update
275 * list (see "src/fd.c") to avoid reassociating here, or to use
276 * a different events mask.
277 */
278 evports_resync_fd(fd, rebind_events);
279 }
280}
281
282static int init_evports_per_thread()
283{
284 int fd;
285
286 evports_evlist_max = global.tune.maxpollevents;
287 evports_evlist = calloc(evports_evlist_max, sizeof (port_event_t));
288 if (evports_evlist == NULL) {
289 goto fail_alloc;
290 }
291
292 if (MAX_THREADS > 1 && tid) {
293 if ((evports_fd[tid] = port_create()) == -1) {
294 goto fail_fd;
295 }
296 }
297
298 /* we may have to unregister some events initially registered on the
299 * original fd when it was alone, and/or to register events on the new
300 * fd for this thread. Let's just mark them as updated, the poller will
301 * do the rest.
302 */
303 for (fd = 0; fd < global.maxsock; fd++)
304 updt_fd_polling(fd);
305
306 return 1;
307
308 fail_fd:
309 free(evports_evlist);
310 evports_evlist = NULL;
311 evports_evlist_max = 0;
312 fail_alloc:
313 return 0;
314}
315
316static void deinit_evports_per_thread()
317{
318 if (MAX_THREADS > 1 && tid)
319 close(evports_fd[tid]);
320
321 free(evports_evlist);
322 evports_evlist = NULL;
323 evports_evlist_max = 0;
324}
325
326/*
327 * Initialisation of the event ports poller.
328 * Returns 0 in case of failure, non-zero in case of success.
329 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100330static int _do_init(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000331{
332 p->private = NULL;
333
334 if ((evports_fd[tid] = port_create()) == -1) {
335 goto fail;
336 }
337
338 hap_register_per_thread_init(init_evports_per_thread);
339 hap_register_per_thread_deinit(deinit_evports_per_thread);
340
341 return 1;
342
343fail:
344 p->pref = 0;
345 return 0;
346}
347
348/*
349 * Termination of the event ports poller.
350 * All resources are released and the poller is marked as inoperative.
351 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100352static void _do_term(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000353{
354 if (evports_fd[tid] != -1) {
355 close(evports_fd[tid]);
356 evports_fd[tid] = -1;
357 }
358
359 p->private = NULL;
360 p->pref = 0;
361
362 free(evports_evlist);
363 evports_evlist = NULL;
364 evports_evlist_max = 0;
365}
366
367/*
368 * Run-time check to make sure we can allocate the resources needed for
369 * the poller to function correctly.
370 * Returns 1 on success, otherwise 0.
371 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100372static int _do_test(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000373{
374 int fd;
375
376 if ((fd = port_create()) == -1) {
377 return 0;
378 }
379
380 close(fd);
381 return 1;
382}
383
384/*
385 * Close and recreate the event port after fork(). Returns 1 on success,
386 * otherwise 0. If this function fails, "_do_term()" must be called to
387 * clean up the poller.
388 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100389static int _do_fork(struct poller *p)
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000390{
391 if (evports_fd[tid] != -1) {
392 close(evports_fd[tid]);
393 }
394
395 if ((evports_fd[tid] = port_create()) == -1) {
396 return 0;
397 }
398
399 return 1;
400}
401
402/*
403 * This constructor must be called before main() to register the event ports
404 * poller.
405 */
406__attribute__((constructor))
407static void _do_register(void)
408{
409 struct poller *p;
410 int i;
411
412 if (nbpollers >= MAX_POLLERS)
413 return;
414
415 for (i = 0; i < MAX_THREADS; i++)
416 evports_fd[i] = -1;
417
418 p = &pollers[nbpollers++];
419
420 p->name = "evports";
421 p->pref = 300;
Willy Tarreau11ef0832019-11-28 18:17:33 +0100422 p->flags = HAP_POLL_F_ERRHUP;
Emmanuel Hocdet0ba4f482019-04-08 16:53:32 +0000423 p->private = NULL;
424
425 p->clo = NULL;
426 p->test = _do_test;
427 p->init = _do_init;
428 p->term = _do_term;
429 p->poll = _do_poll;
430 p->fork = _do_fork;
431}