blob: 4f5a2d1126c520e0b7b56f883268e8b2cbd9ee1b [file] [log] [blame]
Willy Tarreau4f60f162007-04-08 16:39:58 +02001/*
2 * FD polling functions for generic select()
3 *
Willy Tarreauf817e9f2014-01-10 16:58:45 +01004 * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
Willy Tarreau4f60f162007-04-08 16:39:58 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <unistd.h>
14#include <sys/time.h>
15#include <sys/types.h>
16
17#include <common/compat.h>
18#include <common/config.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020019#include <common/ticks.h>
Willy Tarreau4f60f162007-04-08 16:39:58 +020020#include <common/time.h>
21
Willy Tarreau4f60f162007-04-08 16:39:58 +020022#include <types/global.h>
23
24#include <proto/fd.h>
Willy Tarreau4f60f162007-04-08 16:39:58 +020025
26
Christopher Fauletd4604ad2017-05-29 10:40:41 +020027/* private data */
Willy Tarreau28d86862007-04-08 17:42:27 +020028static fd_set *fd_evts[2];
Christopher Fauletd4604ad2017-05-29 10:40:41 +020029static THREAD_LOCAL fd_set *tmp_evts[2];
Willy Tarreau4f60f162007-04-08 16:39:58 +020030
Willy Tarreau4d31fb22012-11-11 16:53:50 +010031/* Immediately remove the entry upon close() */
32REGPRM1 static void __fd_clo(int fd)
Willy Tarreau4f60f162007-04-08 16:39:58 +020033{
Christopher Fauletd4604ad2017-05-29 10:40:41 +020034 SPIN_LOCK(POLL_LOCK, &poll_lock);
Willy Tarreau28d86862007-04-08 17:42:27 +020035 FD_CLR(fd, fd_evts[DIR_RD]);
36 FD_CLR(fd, fd_evts[DIR_WR]);
Christopher Fauletd4604ad2017-05-29 10:40:41 +020037 SPIN_UNLOCK(POLL_LOCK, &poll_lock);
Willy Tarreau4f60f162007-04-08 16:39:58 +020038}
39
Willy Tarreau4f60f162007-04-08 16:39:58 +020040/*
41 * Select() poller
42 */
Willy Tarreau0c303ee2008-07-07 00:09:58 +020043REGPRM2 static void _do_poll(struct poller *p, int exp)
Willy Tarreau4f60f162007-04-08 16:39:58 +020044{
45 int status;
46 int fd, i;
47 struct timeval delta;
Willy Tarreaub0b37bc2008-06-23 14:00:57 +020048 int delta_ms;
Willy Tarreau4f60f162007-04-08 16:39:58 +020049 int fds;
Willy Tarreau4d31fb22012-11-11 16:53:50 +010050 int updt_idx, en, eo;
Willy Tarreau4f60f162007-04-08 16:39:58 +020051 char count;
Christopher Fauletd4604ad2017-05-29 10:40:41 +020052 int readnotnull, writenotnull;
53
Willy Tarreau4d31fb22012-11-11 16:53:50 +010054 /* first, scan the update list to find changes */
55 for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
56 fd = fd_updt[updt_idx];
Willy Tarreau4d31fb22012-11-11 16:53:50 +010057
Willy Tarreauf817e9f2014-01-10 16:58:45 +010058 if (!fdtab[fd].owner)
59 continue;
Willy Tarreau4d31fb22012-11-11 16:53:50 +010060
Christopher Fauletd4604ad2017-05-29 10:40:41 +020061 SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
62 fdtab[fd].updated = 0;
63 fdtab[fd].new = 0;
64
Willy Tarreau25002d22014-01-25 10:32:56 +010065 eo = fdtab[fd].state;
66 en = fd_compute_new_polled_status(eo);
Christopher Fauletd4604ad2017-05-29 10:40:41 +020067 fdtab[fd].state = en;
68 SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
Willy Tarreauf817e9f2014-01-10 16:58:45 +010069
70 if ((eo ^ en) & FD_EV_POLLED_RW) {
71 /* poll status changed, update the lists */
Christopher Fauletd4604ad2017-05-29 10:40:41 +020072 SPIN_LOCK(POLL_LOCK, &poll_lock);
Willy Tarreauf817e9f2014-01-10 16:58:45 +010073 if ((eo & ~en) & FD_EV_POLLED_R)
74 FD_CLR(fd, fd_evts[DIR_RD]);
75 else if ((en & ~eo) & FD_EV_POLLED_R)
76 FD_SET(fd, fd_evts[DIR_RD]);
77
78 if ((eo & ~en) & FD_EV_POLLED_W)
79 FD_CLR(fd, fd_evts[DIR_WR]);
80 else if ((en & ~eo) & FD_EV_POLLED_W)
81 FD_SET(fd, fd_evts[DIR_WR]);
Christopher Fauletd4604ad2017-05-29 10:40:41 +020082 SPIN_UNLOCK(POLL_LOCK, &poll_lock);
Willy Tarreauf817e9f2014-01-10 16:58:45 +010083 }
Willy Tarreau4d31fb22012-11-11 16:53:50 +010084 }
85 fd_nbupdt = 0;
86
Christopher Fauletd4604ad2017-05-29 10:40:41 +020087 /* let's restore fdset state */
88 readnotnull = 0; writenotnull = 0;
89 for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
90 readnotnull |= (*(((int*)tmp_evts[DIR_RD])+i) = *(((int*)fd_evts[DIR_RD])+i)) != 0;
91 writenotnull |= (*(((int*)tmp_evts[DIR_WR])+i) = *(((int*)fd_evts[DIR_WR])+i)) != 0;
92 }
93
94#if 0
95 /* just a verification code, needs to be removed for performance */
96 for (i=0; i<maxfd; i++) {
97 if (FD_ISSET(i, tmp_evts[DIR_RD]) != FD_ISSET(i, fd_evts[DIR_RD]))
98 abort();
99 if (FD_ISSET(i, tmp_evts[DIR_WR]) != FD_ISSET(i, fd_evts[DIR_WR]))
100 abort();
101 }
102#endif
103
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200104 delta_ms = 0;
105 delta.tv_sec = 0;
106 delta.tv_usec = 0;
107
Willy Tarreau10146c92015-04-13 20:44:19 +0200108 if (!exp) {
109 delta_ms = MAX_DELAY_MS;
110 delta.tv_sec = (MAX_DELAY_MS / 1000);
111 delta.tv_usec = (MAX_DELAY_MS % 1000) * 1000;
112 }
113 else if (!tick_is_expired(exp, now_ms)) {
114 delta_ms = TICKS_TO_MS(tick_remain(now_ms, exp)) + SCHEDULER_RESOLUTION;
115 if (delta_ms > MAX_DELAY_MS)
116 delta_ms = MAX_DELAY_MS;
117 delta.tv_sec = (delta_ms / 1000);
118 delta.tv_usec = (delta_ms % 1000) * 1000;
Willy Tarreau4f60f162007-04-08 16:39:58 +0200119 }
120
Willy Tarreau45a12512011-09-10 16:56:42 +0200121 gettimeofday(&before_poll, NULL);
Willy Tarreau4f60f162007-04-08 16:39:58 +0200122 status = select(maxfd,
Willy Tarreau28d86862007-04-08 17:42:27 +0200123 readnotnull ? tmp_evts[DIR_RD] : NULL,
124 writenotnull ? tmp_evts[DIR_WR] : NULL,
Willy Tarreau4f60f162007-04-08 16:39:58 +0200125 NULL,
Willy Tarreaub0b37bc2008-06-23 14:00:57 +0200126 &delta);
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200127
Willy Tarreaub0b37bc2008-06-23 14:00:57 +0200128 tv_update_date(delta_ms, status);
Willy Tarreau45a12512011-09-10 16:56:42 +0200129 measure_idle();
Willy Tarreau4f60f162007-04-08 16:39:58 +0200130
131 if (status <= 0)
132 return;
133
Willy Tarreau177e2b02008-07-15 00:36:31 +0200134 for (fds = 0; (fds * BITS_PER_INT) < maxfd; fds++) {
Willy Tarreau28d86862007-04-08 17:42:27 +0200135 if ((((int *)(tmp_evts[DIR_RD]))[fds] | ((int *)(tmp_evts[DIR_WR]))[fds]) == 0)
Willy Tarreau4f60f162007-04-08 16:39:58 +0200136 continue;
137
Willy Tarreau177e2b02008-07-15 00:36:31 +0200138 for (count = BITS_PER_INT, fd = fds * BITS_PER_INT; count && fd < maxfd; count--, fd++) {
Christopher Fauletab62f512017-08-30 10:34:36 +0200139 unsigned int n = 0;
140
Willy Tarreau4f60f162007-04-08 16:39:58 +0200141 /* if we specify read first, the accepts and zero reads will be
142 * seen first. Moreover, system buffers will be flushed faster.
143 */
Christopher Faulet63e2ce62017-06-02 14:36:39 +0200144 if (!fdtab[fd].owner || !(fdtab[fd].process_mask & tid_bit))
Willy Tarreau076be252012-07-06 16:02:29 +0200145 continue;
146
Willy Tarreau076be252012-07-06 16:02:29 +0200147 if (FD_ISSET(fd, tmp_evts[DIR_RD]))
Christopher Fauletab62f512017-08-30 10:34:36 +0200148 n |= FD_POLL_IN;
Willy Tarreau4f60f162007-04-08 16:39:58 +0200149
Willy Tarreau076be252012-07-06 16:02:29 +0200150 if (FD_ISSET(fd, tmp_evts[DIR_WR]))
Christopher Fauletab62f512017-08-30 10:34:36 +0200151 n |= FD_POLL_OUT;
Willy Tarreau5be2f352014-11-19 19:43:05 +0100152
Christopher Fauletab62f512017-08-30 10:34:36 +0200153 fd_update_events(fd, n);
Willy Tarreau4f60f162007-04-08 16:39:58 +0200154 }
155 }
156}
157
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200158static int init_select_per_thread()
159{
160 int fd_set_bytes;
161
162 fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
163 if ((tmp_evts[DIR_RD] = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
164 goto fail;
165 if ((tmp_evts[DIR_WR] = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
166 goto fail;
167 return 1;
168 fail:
169 free(tmp_evts[DIR_RD]);
170 free(tmp_evts[DIR_WR]);
171 return 0;
172}
173
174static void deinit_select_per_thread()
175{
Christopher Fauletcd7879a2017-10-27 13:53:47 +0200176 free(tmp_evts[DIR_WR]); tmp_evts[DIR_WR] = NULL;
177 free(tmp_evts[DIR_RD]); tmp_evts[DIR_RD] = NULL;
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200178}
179
Willy Tarreau4f60f162007-04-08 16:39:58 +0200180/*
Willy Tarreaue54e9172007-04-09 09:23:31 +0200181 * Initialization of the select() poller.
182 * Returns 0 in case of failure, non-zero in case of success. If it fails, it
183 * disables the poller by setting its pref to 0.
184 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200185REGPRM1 static int _do_init(struct poller *p)
Willy Tarreaue54e9172007-04-09 09:23:31 +0200186{
Christopher Fauletd4604ad2017-05-29 10:40:41 +0200187 __label__ fail_swevt, fail_srevt, fail_revt;
Willy Tarreaue54e9172007-04-09 09:23:31 +0200188 int fd_set_bytes;
189
190 p->private = NULL;
Willy Tarreau3fa87b12013-03-31 14:41:15 +0200191
192 if (global.maxsock > FD_SETSIZE)
193 goto fail_revt;
194
Willy Tarreaue54e9172007-04-09 09:23:31 +0200195 fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
Willy Tarreaue54e9172007-04-09 09:23:31 +0200196
197 if ((fd_evts[DIR_RD] = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
198 goto fail_srevt;
Willy Tarreaue54e9172007-04-09 09:23:31 +0200199 if ((fd_evts[DIR_WR] = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
200 goto fail_swevt;
201
Christopher Fauletcd7879a2017-10-27 13:53:47 +0200202 hap_register_per_thread_init(init_select_per_thread);
203 hap_register_per_thread_deinit(deinit_select_per_thread);
204
Willy Tarreaue54e9172007-04-09 09:23:31 +0200205 return 1;
206
207 fail_swevt:
208 free(fd_evts[DIR_RD]);
209 fail_srevt:
210 free(tmp_evts[DIR_WR]);
Willy Tarreaue54e9172007-04-09 09:23:31 +0200211 free(tmp_evts[DIR_RD]);
212 fail_revt:
213 p->pref = 0;
214 return 0;
215}
216
217/*
218 * Termination of the select() poller.
219 * Memory is released and the poller is marked as unselectable.
220 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200221REGPRM1 static void _do_term(struct poller *p)
Willy Tarreaue54e9172007-04-09 09:23:31 +0200222{
Willy Tarreaua534fea2008-08-03 12:19:50 +0200223 free(fd_evts[DIR_WR]);
224 free(fd_evts[DIR_RD]);
Willy Tarreaue54e9172007-04-09 09:23:31 +0200225 p->private = NULL;
226 p->pref = 0;
227}
228
229/*
Willy Tarreau2ff76222007-04-09 19:29:56 +0200230 * Check that the poller works.
231 * Returns 1 if OK, otherwise 0.
232 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200233REGPRM1 static int _do_test(struct poller *p)
Willy Tarreau2ff76222007-04-09 19:29:56 +0200234{
Willy Tarreau3fa87b12013-03-31 14:41:15 +0200235 if (global.maxsock > FD_SETSIZE)
236 return 0;
237
Willy Tarreau2ff76222007-04-09 19:29:56 +0200238 return 1;
239}
240
241/*
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200242 * It is a constructor, which means that it will automatically be called before
243 * main(). This is GCC-specific but it works at least since 2.95.
244 * Special care must be taken so that it does not need any uninitialized data.
Willy Tarreau4f60f162007-04-08 16:39:58 +0200245 */
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200246__attribute__((constructor))
247static void _do_register(void)
Willy Tarreau4f60f162007-04-08 16:39:58 +0200248{
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200249 struct poller *p;
250
251 if (nbpollers >= MAX_POLLERS)
252 return;
253 p = &pollers[nbpollers++];
254
Willy Tarreau4f60f162007-04-08 16:39:58 +0200255 p->name = "select";
256 p->pref = 150;
Willy Tarreau5a767692017-03-13 11:38:28 +0100257 p->flags = 0;
Willy Tarreau4f60f162007-04-08 16:39:58 +0200258 p->private = NULL;
259
Willy Tarreau70c6fd82012-11-11 21:02:34 +0100260 p->clo = __fd_clo;
Willy Tarreauef1d1f82007-04-16 00:25:25 +0200261 p->test = _do_test;
262 p->init = _do_init;
263 p->term = _do_term;
264 p->poll = _do_poll;
Willy Tarreau4f60f162007-04-08 16:39:58 +0200265}
266
267
268/*
269 * Local variables:
270 * c-indent-level: 8
271 * c-basic-offset: 8
272 * End:
273 */