blob: 5b2495db28d4535799d5c8a7ca8bb4ef276ef251 [file] [log] [blame]
Willy Tarreaudd815982007-10-16 12:25:14 +02001/*
Willy Tarreaud1d54542012-09-12 22:58:11 +02002 * Listener management functions.
Willy Tarreaudd815982007-10-16 12:25:14 +02003 *
Willy Tarreau0ccb7442013-01-07 22:54:17 +01004 * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
Willy Tarreaudd815982007-10-16 12:25:14 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau44489252014-01-14 17:52:01 +010013#define _GNU_SOURCE
Willy Tarreau6ae1ba62014-05-07 19:01:58 +020014#include <ctype.h>
Willy Tarreaubbebbbf2012-05-07 21:22:09 +020015#include <errno.h>
Willy Tarreaudd815982007-10-16 12:25:14 +020016#include <stdio.h>
17#include <string.h>
Willy Tarreau95ccdde2014-02-01 09:28:36 +010018#include <unistd.h>
19#include <fcntl.h>
Willy Tarreaudd815982007-10-16 12:25:14 +020020
Willy Tarreau1bc4aab2012-10-08 20:11:03 +020021#include <common/accept4.h>
Christopher Fauletf1f0c5f2017-11-22 12:06:43 +010022#include <common/cfgparse.h>
Willy Tarreaudd815982007-10-16 12:25:14 +020023#include <common/config.h>
Willy Tarreaudabf2e22007-10-28 21:59:24 +010024#include <common/errors.h>
Willy Tarreau0108d902018-11-25 19:14:37 +010025#include <common/initcall.h>
Willy Tarreaudd815982007-10-16 12:25:14 +020026#include <common/mini-clist.h>
27#include <common/standard.h>
Willy Tarreaubbebbbf2012-05-07 21:22:09 +020028#include <common/time.h>
29
30#include <types/global.h>
Willy Tarreaud1d54542012-09-12 22:58:11 +020031#include <types/protocol.h>
Willy Tarreaudd815982007-10-16 12:25:14 +020032
Willy Tarreau645513a2010-05-24 20:55:15 +020033#include <proto/acl.h>
Christopher Fauleta717b992018-04-10 14:43:00 +020034#include <proto/connection.h>
Willy Tarreaub648d632007-10-28 22:13:50 +010035#include <proto/fd.h>
Willy Tarreaubbebbbf2012-05-07 21:22:09 +020036#include <proto/freq_ctr.h>
37#include <proto/log.h>
Willy Tarreau7a798e52016-04-14 11:13:20 +020038#include <proto/listener.h>
Willy Tarreau0de59fd2017-09-15 08:10:44 +020039#include <proto/protocol.h>
William Lallemand2fe7dd02018-09-11 16:51:29 +020040#include <proto/proto_sockpair.h>
Willy Tarreau0ccb7442013-01-07 22:54:17 +010041#include <proto/sample.h>
Willy Tarreaufb0afa72015-04-03 14:46:27 +020042#include <proto/stream.h>
Willy Tarreaubbebbbf2012-05-07 21:22:09 +020043#include <proto/task.h>
Willy Tarreaub648d632007-10-28 22:13:50 +010044
Willy Tarreau26982662012-09-12 23:17:10 +020045/* List head of all known bind keywords */
46static struct bind_kw_list bind_keywords = {
47 .list = LIST_HEAD_INIT(bind_keywords.list)
48};
49
Olivier Houchardf73629d2017-04-05 22:33:04 +020050struct xfer_sock_list *xfer_sock_list = NULL;
51
Willy Tarreauf2cb1692019-07-11 10:08:31 +020052/* there is one listener queue per thread so that a thread unblocking the
53 * global queue can wake up listeners bound only to foreing threads by
Willy Tarreau2bd65a72019-09-24 06:55:18 +020054 * moving them to the remote queues and waking up the associated tasklet.
Willy Tarreauf2cb1692019-07-11 10:08:31 +020055 */
56static struct work_list *local_listener_queue;
57
Willy Tarreaua1d97f82019-12-10 11:18:41 +010058/* list of the temporarily limited listeners because of lack of resource */
59static struct mt_list global_listener_queue = MT_LIST_HEAD_INIT(global_listener_queue);
60static struct task *global_listener_queue_task;
61static struct task *manage_global_listener_queue(struct task *t, void *context, unsigned short state);
62
63
Willy Tarreau1efafce2019-01-27 15:37:19 +010064#if defined(USE_THREAD)
65
66struct accept_queue_ring accept_queue_rings[MAX_THREADS] __attribute__((aligned(64))) = { };
67
68/* dequeue and process a pending connection from the local accept queue (single
69 * consumer). Returns the accepted fd or -1 if none was found. The listener is
70 * placed into *li. The address is copied into *addr for no more than *addr_len
71 * bytes, and the address length is returned into *addr_len.
72 */
73int accept_queue_pop_sc(struct accept_queue_ring *ring, struct listener **li, void *addr, int *addr_len)
74{
75 struct accept_queue_entry *e;
76 unsigned int pos, next;
77 struct listener *ptr;
78 int len;
79 int fd;
80
81 pos = ring->head;
82
83 if (pos == ring->tail)
84 return -1;
85
86 next = pos + 1;
87 if (next >= ACCEPT_QUEUE_SIZE)
88 next = 0;
89
90 e = &ring->entry[pos];
91
92 /* wait for the producer to update the listener's pointer */
93 while (1) {
94 ptr = e->listener;
95 __ha_barrier_load();
96 if (ptr)
97 break;
98 pl_cpu_relax();
99 }
100
101 fd = e->fd;
102 len = e->addr_len;
103 if (len > *addr_len)
104 len = *addr_len;
105
106 if (likely(len > 0))
107 memcpy(addr, &e->addr, len);
108
109 /* release the entry */
110 e->listener = NULL;
111
112 __ha_barrier_store();
113 ring->head = next;
114
115 *addr_len = len;
116 *li = ptr;
117
118 return fd;
119}
120
121
122/* tries to push a new accepted connection <fd> into ring <ring> for listener
123 * <li>, from address <addr> whose length is <addr_len>. Returns non-zero if it
124 * succeeds, or zero if the ring is full. Supports multiple producers.
125 */
126int accept_queue_push_mp(struct accept_queue_ring *ring, int fd,
127 struct listener *li, const void *addr, int addr_len)
128{
129 struct accept_queue_entry *e;
130 unsigned int pos, next;
131
132 pos = ring->tail;
133 do {
134 next = pos + 1;
135 if (next >= ACCEPT_QUEUE_SIZE)
136 next = 0;
137 if (next == ring->head)
138 return 0; // ring full
Olivier Houchard64213e92019-03-08 18:52:57 +0100139 } while (unlikely(!_HA_ATOMIC_CAS(&ring->tail, &pos, next)));
Willy Tarreau1efafce2019-01-27 15:37:19 +0100140
141
142 e = &ring->entry[pos];
143
144 if (addr_len > sizeof(e->addr))
145 addr_len = sizeof(e->addr);
146
147 if (addr_len)
148 memcpy(&e->addr, addr, addr_len);
149
150 e->addr_len = addr_len;
151 e->fd = fd;
152
153 __ha_barrier_store();
154 /* now commit the change */
155
156 e->listener = li;
157 return 1;
158}
159
160/* proceed with accepting new connections */
161static struct task *accept_queue_process(struct task *t, void *context, unsigned short state)
162{
163 struct accept_queue_ring *ring = context;
164 struct listener *li;
165 struct sockaddr_storage addr;
Christopher Faulet102854c2019-04-30 12:17:13 +0200166 unsigned int max_accept;
Willy Tarreau1efafce2019-01-27 15:37:19 +0100167 int addr_len;
168 int ret;
169 int fd;
170
Christopher Faulet102854c2019-04-30 12:17:13 +0200171 /* if global.tune.maxaccept is -1, then max_accept is UINT_MAX. It
172 * is not really illimited, but it is probably enough.
173 */
174 max_accept = global.tune.maxaccept ? global.tune.maxaccept : 64;
175 for (; max_accept; max_accept--) {
Willy Tarreau1efafce2019-01-27 15:37:19 +0100176 addr_len = sizeof(addr);
177 fd = accept_queue_pop_sc(ring, &li, &addr, &addr_len);
178 if (fd < 0)
179 break;
180
Olivier Houchard64213e92019-03-08 18:52:57 +0100181 _HA_ATOMIC_ADD(&li->thr_conn[tid], 1);
Willy Tarreau1efafce2019-01-27 15:37:19 +0100182 ret = li->accept(li, fd, &addr);
183 if (ret <= 0) {
184 /* connection was terminated by the application */
185 continue;
186 }
187
188 /* increase the per-process number of cumulated sessions, this
189 * may only be done once l->accept() has accepted the connection.
190 */
191 if (!(li->options & LI_O_UNLIMITED)) {
192 HA_ATOMIC_UPDATE_MAX(&global.sps_max,
193 update_freq_ctr(&global.sess_per_sec, 1));
194 if (li->bind_conf && li->bind_conf->is_ssl) {
195 HA_ATOMIC_UPDATE_MAX(&global.ssl_max,
196 update_freq_ctr(&global.ssl_per_sec, 1));
197 }
198 }
199 }
200
201 /* ran out of budget ? Let's come here ASAP */
Christopher Faulet102854c2019-04-30 12:17:13 +0200202 if (!max_accept)
Willy Tarreau2bd65a72019-09-24 06:55:18 +0200203 tasklet_wakeup(ring->tasklet);
Willy Tarreau1efafce2019-01-27 15:37:19 +0100204
Willy Tarreau2bd65a72019-09-24 06:55:18 +0200205 return NULL;
Willy Tarreau1efafce2019-01-27 15:37:19 +0100206}
207
208/* Initializes the accept-queues. Returns 0 on success, otherwise ERR_* flags */
209static int accept_queue_init()
210{
Willy Tarreau2bd65a72019-09-24 06:55:18 +0200211 struct tasklet *t;
Willy Tarreau1efafce2019-01-27 15:37:19 +0100212 int i;
213
214 for (i = 0; i < global.nbthread; i++) {
Willy Tarreau2bd65a72019-09-24 06:55:18 +0200215 t = tasklet_new();
Willy Tarreau1efafce2019-01-27 15:37:19 +0100216 if (!t) {
217 ha_alert("Out of memory while initializing accept queue for thread %d\n", i);
218 return ERR_FATAL|ERR_ABORT;
219 }
Willy Tarreau2bd65a72019-09-24 06:55:18 +0200220 t->tid = i;
Willy Tarreau1efafce2019-01-27 15:37:19 +0100221 t->process = accept_queue_process;
222 t->context = &accept_queue_rings[i];
Willy Tarreau2bd65a72019-09-24 06:55:18 +0200223 accept_queue_rings[i].tasklet = t;
Willy Tarreau1efafce2019-01-27 15:37:19 +0100224 }
225 return 0;
226}
227
228REGISTER_CONFIG_POSTPARSER("multi-threaded accept queue", accept_queue_init);
229
230#endif // USE_THREAD
231
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100232/* This function adds the specified listener's file descriptor to the polling
233 * lists if it is in the LI_LISTEN state. The listener enters LI_READY or
Willy Tarreauae302532014-05-07 19:22:24 +0200234 * LI_FULL state depending on its number of connections. In deamon mode, we
235 * also support binding only the relevant processes to their respective
236 * listeners. We don't do that in debug mode however.
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100237 */
Christopher Fauletf5b8adc2017-06-02 10:00:35 +0200238static void enable_listener(struct listener *listener)
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100239{
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100240 HA_SPIN_LOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100241 if (listener->state == LI_LISTEN) {
William Lallemand095ba4c2017-06-01 17:38:50 +0200242 if ((global.mode & (MODE_DAEMON | MODE_MWORKER)) &&
Willy Tarreau6daac192019-02-02 17:39:53 +0100243 !(proc_mask(listener->bind_conf->bind_proc) & pid_bit)) {
Willy Tarreauae302532014-05-07 19:22:24 +0200244 /* we don't want to enable this listener and don't
245 * want any fd event to reach it.
246 */
Olivier Houchard1fc05162017-04-06 01:05:05 +0200247 if (!(global.tune.options & GTUNE_SOCKET_TRANSFER))
Christopher Faulet510c0d62018-03-16 10:04:47 +0100248 do_unbind_listener(listener, 1);
Olivier Houchard1fc05162017-04-06 01:05:05 +0200249 else {
Christopher Faulet510c0d62018-03-16 10:04:47 +0100250 do_unbind_listener(listener, 0);
Olivier Houchard1fc05162017-04-06 01:05:05 +0200251 listener->state = LI_LISTEN;
252 }
Willy Tarreauae302532014-05-07 19:22:24 +0200253 }
Willy Tarreaua8cf66b2019-02-27 16:49:00 +0100254 else if (!listener->maxconn || listener->nbconn < listener->maxconn) {
Willy Tarreau49b046d2012-08-09 12:11:58 +0200255 fd_want_recv(listener->fd);
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100256 listener->state = LI_READY;
Willy Tarreauae302532014-05-07 19:22:24 +0200257 }
258 else {
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100259 listener->state = LI_FULL;
260 }
261 }
William Lallemande22f11f2018-09-11 10:06:27 +0200262 /* if this listener is supposed to be only in the master, close it in the workers */
263 if ((global.mode & MODE_MWORKER) &&
264 (listener->options & LI_O_MWORKER) &&
265 master == 0) {
266 do_unbind_listener(listener, 1);
267 }
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100268 HA_SPIN_UNLOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100269}
270
271/* This function removes the specified listener's file descriptor from the
272 * polling lists if it is in the LI_READY or in the LI_FULL state. The listener
273 * enters LI_LISTEN.
274 */
Christopher Fauletf5b8adc2017-06-02 10:00:35 +0200275static void disable_listener(struct listener *listener)
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100276{
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100277 HA_SPIN_LOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100278 if (listener->state < LI_READY)
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200279 goto end;
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100280 if (listener->state == LI_READY)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200281 fd_stop_recv(listener->fd);
Olivier Houchard859dc802019-08-08 15:47:21 +0200282 MT_LIST_DEL(&listener->wait_queue);
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100283 listener->state = LI_LISTEN;
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200284 end:
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100285 HA_SPIN_UNLOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100286}
287
Willy Tarreaube58c382011-07-24 18:28:10 +0200288/* This function tries to temporarily disable a listener, depending on the OS
289 * capabilities. Linux unbinds the listen socket after a SHUT_RD, and ignores
290 * SHUT_WR. Solaris refuses either shutdown(). OpenBSD ignores SHUT_RD but
291 * closes upon SHUT_WR and refuses to rebind. So a common validation path
292 * involves SHUT_WR && listen && SHUT_RD. In case of success, the FD's polling
293 * is disabled. It normally returns non-zero, unless an error is reported.
294 */
295int pause_listener(struct listener *l)
296{
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200297 int ret = 1;
298
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100299 HA_SPIN_LOCK(LISTENER_LOCK, &l->lock);
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200300
Olivier Houchard1fc05162017-04-06 01:05:05 +0200301 if (l->state <= LI_ZOMBIE)
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200302 goto end;
Willy Tarreaube58c382011-07-24 18:28:10 +0200303
Willy Tarreau092d8652014-07-07 20:22:12 +0200304 if (l->proto->pause) {
305 /* Returns < 0 in case of failure, 0 if the listener
306 * was totally stopped, or > 0 if correctly paused.
307 */
308 int ret = l->proto->pause(l);
Willy Tarreaube58c382011-07-24 18:28:10 +0200309
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200310 if (ret < 0) {
311 ret = 0;
312 goto end;
313 }
Willy Tarreau092d8652014-07-07 20:22:12 +0200314 else if (ret == 0)
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200315 goto end;
Willy Tarreaub3fb60b2012-10-04 08:56:31 +0200316 }
Willy Tarreaube58c382011-07-24 18:28:10 +0200317
Olivier Houchard859dc802019-08-08 15:47:21 +0200318 MT_LIST_DEL(&l->wait_queue);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200319
Willy Tarreau49b046d2012-08-09 12:11:58 +0200320 fd_stop_recv(l->fd);
Willy Tarreaube58c382011-07-24 18:28:10 +0200321 l->state = LI_PAUSED;
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200322 end:
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100323 HA_SPIN_UNLOCK(LISTENER_LOCK, &l->lock);
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200324 return ret;
Willy Tarreaube58c382011-07-24 18:28:10 +0200325}
326
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200327/* This function tries to resume a temporarily disabled listener. Paused, full,
328 * limited and disabled listeners are handled, which means that this function
329 * may replace enable_listener(). The resulting state will either be LI_READY
330 * or LI_FULL. 0 is returned in case of failure to resume (eg: dead socket).
Willy Tarreauae302532014-05-07 19:22:24 +0200331 * Listeners bound to a different process are not woken up unless we're in
Willy Tarreauaf2fd582015-04-14 12:07:16 +0200332 * foreground mode, and are ignored. If the listener was only in the assigned
333 * state, it's totally rebound. This can happen if a pause() has completely
334 * stopped it. If the resume fails, 0 is returned and an error might be
335 * displayed.
Willy Tarreaube58c382011-07-24 18:28:10 +0200336 */
Willy Tarreau01abd022019-02-28 10:27:18 +0100337int resume_listener(struct listener *l)
Willy Tarreaube58c382011-07-24 18:28:10 +0200338{
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200339 int ret = 1;
340
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100341 HA_SPIN_LOCK(LISTENER_LOCK, &l->lock);
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200342
Willy Tarreauf2cb1692019-07-11 10:08:31 +0200343 /* check that another thread didn't to the job in parallel (e.g. at the
344 * end of listen_accept() while we'd come from dequeue_all_listeners().
345 */
Olivier Houchard859dc802019-08-08 15:47:21 +0200346 if (MT_LIST_ADDED(&l->wait_queue))
Willy Tarreauf2cb1692019-07-11 10:08:31 +0200347 goto end;
348
William Lallemand095ba4c2017-06-01 17:38:50 +0200349 if ((global.mode & (MODE_DAEMON | MODE_MWORKER)) &&
Willy Tarreau6daac192019-02-02 17:39:53 +0100350 !(proc_mask(l->bind_conf->bind_proc) & pid_bit))
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200351 goto end;
Willy Tarreau3569df32017-03-15 12:47:46 +0100352
Willy Tarreau1c4b8142014-07-07 21:06:24 +0200353 if (l->state == LI_ASSIGNED) {
354 char msg[100];
355 int err;
356
357 err = l->proto->bind(l, msg, sizeof(msg));
358 if (err & ERR_ALERT)
Christopher Faulet767a84b2017-11-24 16:50:31 +0100359 ha_alert("Resuming listener: %s\n", msg);
Willy Tarreau1c4b8142014-07-07 21:06:24 +0200360 else if (err & ERR_WARN)
Christopher Faulet767a84b2017-11-24 16:50:31 +0100361 ha_warning("Resuming listener: %s\n", msg);
Willy Tarreau1c4b8142014-07-07 21:06:24 +0200362
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200363 if (err & (ERR_FATAL | ERR_ABORT)) {
364 ret = 0;
365 goto end;
366 }
Willy Tarreau1c4b8142014-07-07 21:06:24 +0200367 }
368
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200369 if (l->state < LI_PAUSED || l->state == LI_ZOMBIE) {
370 ret = 0;
371 goto end;
372 }
Willy Tarreaube58c382011-07-24 18:28:10 +0200373
Willy Tarreaub3fb60b2012-10-04 08:56:31 +0200374 if (l->proto->sock_prot == IPPROTO_TCP &&
375 l->state == LI_PAUSED &&
Willy Tarreaue2711c72019-02-27 15:39:41 +0100376 listen(l->fd, listener_backlog(l)) != 0) {
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200377 ret = 0;
378 goto end;
379 }
Willy Tarreaube58c382011-07-24 18:28:10 +0200380
381 if (l->state == LI_READY)
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200382 goto end;
Willy Tarreaube58c382011-07-24 18:28:10 +0200383
Olivier Houchard859dc802019-08-08 15:47:21 +0200384 MT_LIST_DEL(&l->wait_queue);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200385
Willy Tarreaua8cf66b2019-02-27 16:49:00 +0100386 if (l->maxconn && l->nbconn >= l->maxconn) {
Willy Tarreaube58c382011-07-24 18:28:10 +0200387 l->state = LI_FULL;
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200388 goto end;
Willy Tarreaube58c382011-07-24 18:28:10 +0200389 }
390
Willy Tarreauf2cb1692019-07-11 10:08:31 +0200391 if (!(thread_mask(l->bind_conf->bind_thread) & tid_bit)) {
392 /* we're not allowed to touch this listener's FD, let's requeue
393 * the listener into one of its owning thread's queue instead.
394 */
395 int first_thread = my_flsl(thread_mask(l->bind_conf->bind_thread)) - 1;
396 work_list_add(&local_listener_queue[first_thread], &l->wait_queue);
397 goto end;
398 }
399
Willy Tarreau49b046d2012-08-09 12:11:58 +0200400 fd_want_recv(l->fd);
Willy Tarreaube58c382011-07-24 18:28:10 +0200401 l->state = LI_READY;
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200402 end:
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100403 HA_SPIN_UNLOCK(LISTENER_LOCK, &l->lock);
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200404 return ret;
405}
406
Willy Tarreau87b09662015-04-03 00:22:06 +0200407/* Marks a ready listener as full so that the stream code tries to re-enable
Willy Tarreau62793712011-07-24 19:23:38 +0200408 * it upon next close() using resume_listener().
409 */
Christopher Faulet5580ba22017-08-28 15:29:20 +0200410static void listener_full(struct listener *l)
Willy Tarreau62793712011-07-24 19:23:38 +0200411{
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100412 HA_SPIN_LOCK(LISTENER_LOCK, &l->lock);
Willy Tarreau62793712011-07-24 19:23:38 +0200413 if (l->state >= LI_READY) {
Olivier Houchard859dc802019-08-08 15:47:21 +0200414 MT_LIST_DEL(&l->wait_queue);
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100415 if (l->state != LI_FULL) {
416 fd_stop_recv(l->fd);
417 l->state = LI_FULL;
418 }
Willy Tarreau62793712011-07-24 19:23:38 +0200419 }
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100420 HA_SPIN_UNLOCK(LISTENER_LOCK, &l->lock);
Willy Tarreau62793712011-07-24 19:23:38 +0200421}
422
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200423/* Marks a ready listener as limited so that we only try to re-enable it when
424 * resources are free again. It will be queued into the specified queue.
425 */
Olivier Houchard859dc802019-08-08 15:47:21 +0200426static void limit_listener(struct listener *l, struct mt_list *list)
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200427{
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100428 HA_SPIN_LOCK(LISTENER_LOCK, &l->lock);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200429 if (l->state == LI_READY) {
Olivier Houchard859dc802019-08-08 15:47:21 +0200430 MT_LIST_ADDQ(list, &l->wait_queue);
Willy Tarreau49b046d2012-08-09 12:11:58 +0200431 fd_stop_recv(l->fd);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200432 l->state = LI_LIMITED;
433 }
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100434 HA_SPIN_UNLOCK(LISTENER_LOCK, &l->lock);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200435}
436
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100437/* This function adds all of the protocol's listener's file descriptors to the
438 * polling lists when they are in the LI_LISTEN state. It is intended to be
439 * used as a protocol's generic enable_all() primitive, for use after the
440 * fork(). It puts the listeners into LI_READY or LI_FULL states depending on
441 * their number of connections. It always returns ERR_NONE.
Willy Tarreaudaacf362019-07-24 16:45:02 +0200442 *
443 * Must be called with proto_lock held.
444 *
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100445 */
446int enable_all_listeners(struct protocol *proto)
447{
448 struct listener *listener;
449
450 list_for_each_entry(listener, &proto->listeners, proto_list)
451 enable_listener(listener);
452 return ERR_NONE;
453}
454
455/* This function removes all of the protocol's listener's file descriptors from
456 * the polling lists when they are in the LI_READY or LI_FULL states. It is
457 * intended to be used as a protocol's generic disable_all() primitive. It puts
458 * the listeners into LI_LISTEN, and always returns ERR_NONE.
Willy Tarreaudaacf362019-07-24 16:45:02 +0200459 *
460 * Must be called with proto_lock held.
461 *
Willy Tarreaudabf2e22007-10-28 21:59:24 +0100462 */
463int disable_all_listeners(struct protocol *proto)
464{
465 struct listener *listener;
466
467 list_for_each_entry(listener, &proto->listeners, proto_list)
468 disable_listener(listener);
469 return ERR_NONE;
470}
471
Willy Tarreau241797a2019-12-10 14:10:52 +0100472/* Dequeues all listeners waiting for a resource the global wait queue */
473void dequeue_all_listeners()
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200474{
Willy Tarreau01abd022019-02-28 10:27:18 +0100475 struct listener *listener;
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200476
Willy Tarreau241797a2019-12-10 14:10:52 +0100477 while ((listener = MT_LIST_POP(&global_listener_queue, struct listener *, wait_queue))) {
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200478 /* This cannot fail because the listeners are by definition in
Willy Tarreau01abd022019-02-28 10:27:18 +0100479 * the LI_LIMITED state.
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200480 */
Willy Tarreau01abd022019-02-28 10:27:18 +0100481 resume_listener(listener);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200482 }
483}
484
Willy Tarreau241797a2019-12-10 14:10:52 +0100485/* Dequeues all listeners waiting for a resource in proxy <px>'s queue */
486void dequeue_proxy_listeners(struct proxy *px)
487{
488 struct listener *listener;
489
490 while ((listener = MT_LIST_POP(&px->listener_queue, struct listener *, wait_queue))) {
491 /* This cannot fail because the listeners are by definition in
492 * the LI_LIMITED state.
493 */
494 resume_listener(listener);
495 }
496}
497
Christopher Faulet510c0d62018-03-16 10:04:47 +0100498/* Must be called with the lock held. Depending on <do_close> value, it does
499 * what unbind_listener or unbind_listener_no_close should do.
500 */
501void do_unbind_listener(struct listener *listener, int do_close)
Willy Tarreaub648d632007-10-28 22:13:50 +0100502{
Olivier Houcharda5188562019-03-08 15:35:42 +0100503 if (listener->state == LI_READY && fd_updt)
Willy Tarreau49b046d2012-08-09 12:11:58 +0200504 fd_stop_recv(listener->fd);
Willy Tarreaub648d632007-10-28 22:13:50 +0100505
Olivier Houchard859dc802019-08-08 15:47:21 +0200506 MT_LIST_DEL(&listener->wait_queue);
Willy Tarreaue6ca1fc2011-07-24 22:03:52 +0200507
Willy Tarreaube58c382011-07-24 18:28:10 +0200508 if (listener->state >= LI_PAUSED) {
Olivier Houchard1fc05162017-04-06 01:05:05 +0200509 if (do_close) {
510 fd_delete(listener->fd);
511 listener->fd = -1;
512 }
513 else
514 fd_remove(listener->fd);
Willy Tarreaub648d632007-10-28 22:13:50 +0100515 listener->state = LI_ASSIGNED;
516 }
Willy Tarreaubbd09b92017-11-05 11:38:44 +0100517}
518
Olivier Houchard1fc05162017-04-06 01:05:05 +0200519/* This function closes the listening socket for the specified listener,
520 * provided that it's already in a listening state. The listener enters the
Willy Tarreaubbd09b92017-11-05 11:38:44 +0100521 * LI_ASSIGNED state. This function is intended to be used as a generic
522 * function for standard protocols.
Olivier Houchard1fc05162017-04-06 01:05:05 +0200523 */
Willy Tarreaubbd09b92017-11-05 11:38:44 +0100524void unbind_listener(struct listener *listener)
Olivier Houchard1fc05162017-04-06 01:05:05 +0200525{
Christopher Faulet510c0d62018-03-16 10:04:47 +0100526 HA_SPIN_LOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreaubbd09b92017-11-05 11:38:44 +0100527 do_unbind_listener(listener, 1);
Christopher Faulet510c0d62018-03-16 10:04:47 +0100528 HA_SPIN_UNLOCK(LISTENER_LOCK, &listener->lock);
Olivier Houchard1fc05162017-04-06 01:05:05 +0200529}
530
531/* This function pretends the listener is dead, but keeps the FD opened, so
532 * that we can provide it, for conf reloading.
533 */
Willy Tarreaubbd09b92017-11-05 11:38:44 +0100534void unbind_listener_no_close(struct listener *listener)
Olivier Houchard1fc05162017-04-06 01:05:05 +0200535{
Christopher Faulet510c0d62018-03-16 10:04:47 +0100536 HA_SPIN_LOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreaubbd09b92017-11-05 11:38:44 +0100537 do_unbind_listener(listener, 0);
Christopher Faulet510c0d62018-03-16 10:04:47 +0100538 HA_SPIN_UNLOCK(LISTENER_LOCK, &listener->lock);
Olivier Houchard1fc05162017-04-06 01:05:05 +0200539}
540
Willy Tarreau3acf8c32007-10-28 22:35:41 +0100541/* This function closes all listening sockets bound to the protocol <proto>,
542 * and the listeners end in LI_ASSIGNED state if they were higher. It does not
543 * detach them from the protocol. It always returns ERR_NONE.
Willy Tarreaudaacf362019-07-24 16:45:02 +0200544 *
545 * Must be called with proto_lock held.
546 *
Willy Tarreau3acf8c32007-10-28 22:35:41 +0100547 */
548int unbind_all_listeners(struct protocol *proto)
549{
550 struct listener *listener;
551
552 list_for_each_entry(listener, &proto->listeners, proto_list)
553 unbind_listener(listener);
554 return ERR_NONE;
555}
556
Willy Tarreau0de59fd2017-09-15 08:10:44 +0200557/* creates one or multiple listeners for bind_conf <bc> on sockaddr <ss> on port
558 * range <portl> to <porth>, and possibly attached to fd <fd> (or -1 for auto
559 * allocation). The address family is taken from ss->ss_family. The number of
560 * jobs and listeners is automatically increased by the number of listeners
William Lallemand75ea0a02017-11-15 19:02:58 +0100561 * created. If the <inherited> argument is set to 1, it specifies that the FD
562 * was obtained from a parent process.
563 * It returns non-zero on success, zero on error with the error message
Willy Tarreau0de59fd2017-09-15 08:10:44 +0200564 * set in <err>.
565 */
566int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss,
William Lallemand75ea0a02017-11-15 19:02:58 +0100567 int portl, int porth, int fd, int inherited, char **err)
Willy Tarreau0de59fd2017-09-15 08:10:44 +0200568{
569 struct protocol *proto = protocol_by_family(ss->ss_family);
570 struct listener *l;
571 int port;
572
573 if (!proto) {
574 memprintf(err, "unsupported protocol family %d", ss->ss_family);
575 return 0;
576 }
577
578 for (port = portl; port <= porth; port++) {
579 l = calloc(1, sizeof(*l));
580 if (!l) {
581 memprintf(err, "out of memory");
582 return 0;
583 }
584 l->obj_type = OBJ_TYPE_LISTENER;
585 LIST_ADDQ(&bc->frontend->conf.listeners, &l->by_fe);
586 LIST_ADDQ(&bc->listeners, &l->by_bind);
587 l->bind_conf = bc;
588
589 l->fd = fd;
590 memcpy(&l->addr, ss, sizeof(*ss));
Olivier Houchard859dc802019-08-08 15:47:21 +0200591 MT_LIST_INIT(&l->wait_queue);
Willy Tarreau0de59fd2017-09-15 08:10:44 +0200592 l->state = LI_INIT;
593
594 proto->add(l, port);
595
William Lallemand75ea0a02017-11-15 19:02:58 +0100596 if (inherited)
597 l->options |= LI_O_INHERITED;
598
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100599 HA_SPIN_INIT(&l->lock);
Olivier Houchard64213e92019-03-08 18:52:57 +0100600 _HA_ATOMIC_ADD(&jobs, 1);
601 _HA_ATOMIC_ADD(&listeners, 1);
Willy Tarreau0de59fd2017-09-15 08:10:44 +0200602 }
603 return 1;
604}
605
Willy Tarreau1a64d162007-10-28 22:26:05 +0100606/* Delete a listener from its protocol's list of listeners. The listener's
607 * state is automatically updated from LI_ASSIGNED to LI_INIT. The protocol's
Willy Tarreau2cc5bae2017-09-15 08:18:11 +0200608 * number of listeners is updated, as well as the global number of listeners
609 * and jobs. Note that the listener must have previously been unbound. This
610 * is the generic function to use to remove a listener.
Willy Tarreaudaacf362019-07-24 16:45:02 +0200611 *
612 * Will grab the proto_lock.
613 *
Willy Tarreau1a64d162007-10-28 22:26:05 +0100614 */
615void delete_listener(struct listener *listener)
616{
Willy Tarreau6ee9f8d2019-08-26 10:55:52 +0200617 HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100618 HA_SPIN_LOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100619 if (listener->state == LI_ASSIGNED) {
620 listener->state = LI_INIT;
621 LIST_DEL(&listener->proto_list);
622 listener->proto->nb_listeners--;
Olivier Houchard64213e92019-03-08 18:52:57 +0100623 _HA_ATOMIC_SUB(&jobs, 1);
624 _HA_ATOMIC_SUB(&listeners, 1);
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100625 }
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100626 HA_SPIN_UNLOCK(LISTENER_LOCK, &listener->lock);
Willy Tarreau6ee9f8d2019-08-26 10:55:52 +0200627 HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
Willy Tarreau1a64d162007-10-28 22:26:05 +0100628}
629
Willy Tarreaue2711c72019-02-27 15:39:41 +0100630/* Returns a suitable value for a listener's backlog. It uses the listener's,
631 * otherwise the frontend's backlog, otherwise the listener's maxconn,
632 * otherwise the frontend's maxconn, otherwise 1024.
633 */
634int listener_backlog(const struct listener *l)
635{
636 if (l->backlog)
637 return l->backlog;
638
639 if (l->bind_conf->frontend->backlog)
640 return l->bind_conf->frontend->backlog;
641
642 if (l->maxconn)
643 return l->maxconn;
644
645 if (l->bind_conf->frontend->maxconn)
646 return l->bind_conf->frontend->maxconn;
647
648 return 1024;
649}
650
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200651/* This function is called on a read event from a listening socket, corresponding
652 * to an accept. It tries to accept as many connections as possible, and for each
653 * calls the listener's accept handler (generally the frontend's accept handler).
654 */
Willy Tarreauafad0e02012-08-09 14:45:22 +0200655void listener_accept(int fd)
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200656{
657 struct listener *l = fdtab[fd].owner;
Olivier Houchardd16a9df2019-02-25 16:18:16 +0100658 struct proxy *p;
Christopher Faulet102854c2019-04-30 12:17:13 +0200659 unsigned int max_accept;
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100660 int next_conn = 0;
Willy Tarreau82c97892019-02-27 19:32:32 +0100661 int next_feconn = 0;
662 int next_actconn = 0;
Willy Tarreaubb660302014-05-07 19:47:02 +0200663 int expire;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200664 int cfd;
665 int ret;
Willy Tarreau818dca52014-01-31 19:40:19 +0100666#ifdef USE_ACCEPT4
667 static int accept4_broken;
668#endif
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200669
Olivier Houchardd16a9df2019-02-25 16:18:16 +0100670 if (!l)
671 return;
672 p = l->bind_conf->frontend;
Christopher Faulet102854c2019-04-30 12:17:13 +0200673
674 /* if l->maxaccept is -1, then max_accept is UINT_MAX. It is not really
675 * illimited, but it is probably enough.
676 */
Olivier Houchardd16a9df2019-02-25 16:18:16 +0100677 max_accept = l->maxaccept ? l->maxaccept : 1;
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200678
Willy Tarreau93e7c002013-10-07 18:51:07 +0200679 if (!(l->options & LI_O_UNLIMITED) && global.sps_lim) {
680 int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
Willy Tarreau93e7c002013-10-07 18:51:07 +0200681
682 if (unlikely(!max)) {
683 /* frontend accept rate limit was reached */
Willy Tarreau93e7c002013-10-07 18:51:07 +0200684 expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
Willy Tarreau0591bf72019-12-10 12:01:21 +0100685 goto limit_global;
Willy Tarreau93e7c002013-10-07 18:51:07 +0200686 }
687
688 if (max_accept > max)
689 max_accept = max;
690 }
691
692 if (!(l->options & LI_O_UNLIMITED) && global.cps_lim) {
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200693 int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
694
695 if (unlikely(!max)) {
696 /* frontend accept rate limit was reached */
Willy Tarreau93e7c002013-10-07 18:51:07 +0200697 expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
Willy Tarreau0591bf72019-12-10 12:01:21 +0100698 goto limit_global;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200699 }
700
701 if (max_accept > max)
702 max_accept = max;
703 }
Willy Tarreaue43d5322013-10-07 20:01:52 +0200704#ifdef USE_OPENSSL
705 if (!(l->options & LI_O_UNLIMITED) && global.ssl_lim && l->bind_conf && l->bind_conf->is_ssl) {
706 int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200707
Willy Tarreaue43d5322013-10-07 20:01:52 +0200708 if (unlikely(!max)) {
709 /* frontend accept rate limit was reached */
Willy Tarreaue43d5322013-10-07 20:01:52 +0200710 expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
Willy Tarreau0591bf72019-12-10 12:01:21 +0100711 goto limit_global;
Willy Tarreaue43d5322013-10-07 20:01:52 +0200712 }
713
714 if (max_accept > max)
715 max_accept = max;
716 }
717#endif
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200718 if (p && p->fe_sps_lim) {
719 int max = freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0);
720
721 if (unlikely(!max)) {
722 /* frontend accept rate limit was reached */
Willy Tarreau0591bf72019-12-10 12:01:21 +0100723 expire = tick_add(now_ms, next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0));
724 goto limit_proxy;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200725 }
726
727 if (max_accept > max)
728 max_accept = max;
729 }
730
731 /* Note: if we fail to allocate a connection because of configured
732 * limits, we'll schedule a new attempt worst 1 second later in the
733 * worst case. If we fail due to system limits or temporary resource
734 * shortage, we try again 100ms later in the worst case.
735 */
Christopher Faulet102854c2019-04-30 12:17:13 +0200736 for (; max_accept; next_conn = next_feconn = next_actconn = 0, max_accept--) {
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200737 struct sockaddr_storage addr;
738 socklen_t laddr = sizeof(addr);
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200739 unsigned int count;
Willy Tarreau57cb5062019-03-15 17:16:34 +0100740 __decl_hathreads(unsigned long mask);
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200741
Willy Tarreau82c97892019-02-27 19:32:32 +0100742 /* pre-increase the number of connections without going too far.
743 * We process the listener, then the proxy, then the process.
744 * We know which ones to unroll based on the next_xxx value.
745 */
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100746 do {
747 count = l->nbconn;
Willy Tarreau93604ed2019-11-15 10:20:07 +0100748 if (unlikely(l->maxconn && count >= l->maxconn)) {
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100749 /* the listener was marked full or another
750 * thread is going to do it.
751 */
752 next_conn = 0;
Willy Tarreau93604ed2019-11-15 10:20:07 +0100753 listener_full(l);
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100754 goto end;
755 }
756 next_conn = count + 1;
David Carlier56716622019-03-27 16:08:42 +0000757 } while (!_HA_ATOMIC_CAS(&l->nbconn, (int *)(&count), next_conn));
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100758
Willy Tarreau82c97892019-02-27 19:32:32 +0100759 if (p) {
760 do {
761 count = p->feconn;
Willy Tarreau93604ed2019-11-15 10:20:07 +0100762 if (unlikely(count >= p->maxconn)) {
Willy Tarreau82c97892019-02-27 19:32:32 +0100763 /* the frontend was marked full or another
764 * thread is going to do it.
765 */
766 next_feconn = 0;
Willy Tarreau0591bf72019-12-10 12:01:21 +0100767 expire = TICK_ETERNITY;
768 goto limit_proxy;
Willy Tarreau82c97892019-02-27 19:32:32 +0100769 }
770 next_feconn = count + 1;
Olivier Houchard64213e92019-03-08 18:52:57 +0100771 } while (!_HA_ATOMIC_CAS(&p->feconn, &count, next_feconn));
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200772 }
773
Willy Tarreau82c97892019-02-27 19:32:32 +0100774 if (!(l->options & LI_O_UNLIMITED)) {
775 do {
776 count = actconn;
Willy Tarreau93604ed2019-11-15 10:20:07 +0100777 if (unlikely(count >= global.maxconn)) {
Willy Tarreau82c97892019-02-27 19:32:32 +0100778 /* the process was marked full or another
779 * thread is going to do it.
780 */
781 next_actconn = 0;
Willy Tarreau0591bf72019-12-10 12:01:21 +0100782 expire = tick_add(now_ms, 1000); /* try again in 1 second */
783 goto limit_global;
Willy Tarreau82c97892019-02-27 19:32:32 +0100784 }
785 next_actconn = count + 1;
David Carlier56716622019-03-27 16:08:42 +0000786 } while (!_HA_ATOMIC_CAS(&actconn, (int *)(&count), next_actconn));
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200787 }
788
William Lallemand2fe7dd02018-09-11 16:51:29 +0200789 /* with sockpair@ we don't want to do an accept */
790 if (unlikely(l->addr.ss_family == AF_CUST_SOCKPAIR)) {
791 if ((cfd = recv_fd_uxst(fd)) != -1)
William Lallemandd9138002018-11-27 12:02:39 +0100792 fcntl(cfd, F_SETFL, O_NONBLOCK);
Willy Tarreau888d5672019-01-27 18:34:12 +0100793 /* just like with UNIX sockets, only the family is filled */
794 addr.ss_family = AF_UNIX;
795 laddr = sizeof(addr.ss_family);
William Lallemand2fe7dd02018-09-11 16:51:29 +0200796 } else
797
Willy Tarreau1bc4aab2012-10-08 20:11:03 +0200798#ifdef USE_ACCEPT4
Willy Tarreau818dca52014-01-31 19:40:19 +0100799 /* only call accept4() if it's known to be safe, otherwise
800 * fallback to the legacy accept() + fcntl().
801 */
802 if (unlikely(accept4_broken ||
William Lallemandd9138002018-11-27 12:02:39 +0100803 ((cfd = accept4(fd, (struct sockaddr *)&addr, &laddr, SOCK_NONBLOCK)) == -1 &&
Willy Tarreau818dca52014-01-31 19:40:19 +0100804 (errno == ENOSYS || errno == EINVAL || errno == EBADF) &&
805 (accept4_broken = 1))))
806#endif
Willy Tarreau6b3b0d42012-10-22 19:32:55 +0200807 if ((cfd = accept(fd, (struct sockaddr *)&addr, &laddr)) != -1)
William Lallemandd9138002018-11-27 12:02:39 +0100808 fcntl(cfd, F_SETFL, O_NONBLOCK);
Willy Tarreau818dca52014-01-31 19:40:19 +0100809
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200810 if (unlikely(cfd == -1)) {
811 switch (errno) {
812 case EAGAIN:
Willy Tarreau20aeb1c2019-12-10 08:42:21 +0100813 if (fdtab[fd].ev & (FD_POLL_HUP|FD_POLL_ERR)) {
Willy Tarreaubb660302014-05-07 19:47:02 +0200814 /* the listening socket might have been disabled in a shared
815 * process and we're a collateral victim. We'll just pause for
816 * a while in case it comes back. In the mean time, we need to
817 * clear this sticky flag.
818 */
Willy Tarreau20aeb1c2019-12-10 08:42:21 +0100819 _HA_ATOMIC_AND(&fdtab[fd].ev, ~(FD_POLL_HUP|FD_POLL_ERR));
Willy Tarreaubb660302014-05-07 19:47:02 +0200820 goto transient_error;
821 }
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +0200822 goto end; /* nothing more to accept */
Willy Tarreaubb660302014-05-07 19:47:02 +0200823 case EINVAL:
824 /* might be trying to accept on a shut fd (eg: soft stop) */
825 goto transient_error;
Willy Tarreaua593ec52014-01-20 21:21:30 +0100826 case EINTR:
827 case ECONNABORTED:
Olivier Houchard64213e92019-03-08 18:52:57 +0100828 _HA_ATOMIC_SUB(&l->nbconn, 1);
Willy Tarreau82c97892019-02-27 19:32:32 +0100829 if (p)
Olivier Houchard64213e92019-03-08 18:52:57 +0100830 _HA_ATOMIC_SUB(&p->feconn, 1);
Willy Tarreau82c97892019-02-27 19:32:32 +0100831 if (!(l->options & LI_O_UNLIMITED))
Olivier Houchard64213e92019-03-08 18:52:57 +0100832 _HA_ATOMIC_SUB(&actconn, 1);
Willy Tarreaua593ec52014-01-20 21:21:30 +0100833 continue;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200834 case ENFILE:
835 if (p)
836 send_log(p, LOG_EMERG,
Willy Tarreauc5532ac2018-01-29 15:06:04 +0100837 "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
838 p->id, global.maxsock);
Willy Tarreaubb660302014-05-07 19:47:02 +0200839 goto transient_error;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200840 case EMFILE:
841 if (p)
842 send_log(p, LOG_EMERG,
Willy Tarreauc5532ac2018-01-29 15:06:04 +0100843 "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
844 p->id, global.maxsock);
Willy Tarreaubb660302014-05-07 19:47:02 +0200845 goto transient_error;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200846 case ENOBUFS:
847 case ENOMEM:
848 if (p)
849 send_log(p, LOG_EMERG,
Willy Tarreauc5532ac2018-01-29 15:06:04 +0100850 "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
851 p->id, global.maxsock);
Willy Tarreaubb660302014-05-07 19:47:02 +0200852 goto transient_error;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200853 default:
Willy Tarreaua593ec52014-01-20 21:21:30 +0100854 /* unexpected result, let's give up and let other tasks run */
Willy Tarreau92079932019-12-10 09:30:05 +0100855 max_accept = 0;
856 goto end;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200857 }
858 }
859
William Lallemandd9138002018-11-27 12:02:39 +0100860 /* we don't want to leak the FD upon reload if it's in the master */
861 if (unlikely(master == 1))
862 fcntl(cfd, F_SETFD, FD_CLOEXEC);
863
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100864 /* The connection was accepted, it must be counted as such */
865 if (l->counters)
866 HA_ATOMIC_UPDATE_MAX(&l->counters->conn_max, next_conn);
867
Willy Tarreau82c97892019-02-27 19:32:32 +0100868 if (p)
869 HA_ATOMIC_UPDATE_MAX(&p->fe_counters.conn_max, next_feconn);
870
871 proxy_inc_fe_conn_ctr(l, p);
872
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100873 if (!(l->options & LI_O_UNLIMITED)) {
874 count = update_freq_ctr(&global.conn_per_sec, 1);
875 HA_ATOMIC_UPDATE_MAX(&global.cps_max, count);
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100876 }
877
Willy Tarreau64a9c052019-04-12 15:27:17 +0200878 _HA_ATOMIC_ADD(&activity[tid].accepted, 1);
879
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200880 if (unlikely(cfd >= global.maxsock)) {
881 send_log(p, LOG_EMERG,
882 "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n",
883 p->id);
884 close(cfd);
Willy Tarreau0591bf72019-12-10 12:01:21 +0100885 expire = tick_add(now_ms, 1000); /* try again in 1 second */
886 goto limit_global;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200887 }
888
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100889 /* past this point, l->accept() will automatically decrement
Willy Tarreau82c97892019-02-27 19:32:32 +0100890 * l->nbconn, feconn and actconn once done. Setting next_*conn=0
891 * allows the error path not to rollback on nbconn. It's more
892 * convenient than duplicating all exit labels.
Willy Tarreau3f0d02b2019-02-25 19:23:37 +0100893 */
894 next_conn = 0;
Willy Tarreau82c97892019-02-27 19:32:32 +0100895 next_feconn = 0;
896 next_actconn = 0;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +0200897
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100898#if defined(USE_THREAD)
Willy Tarreau897e2c52019-03-13 15:03:53 +0100899 mask = thread_mask(l->bind_conf->bind_thread) & all_threads_mask;
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100900 if (atleast2(mask) && (global.tune.options & GTUNE_LISTENER_MQ)) {
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100901 struct accept_queue_ring *ring;
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100902 unsigned int t, t0, t1, t2;
Willy Tarreaufc630bd2019-03-04 19:57:34 +0100903
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100904 /* The principle is that we have two running indexes,
905 * each visiting in turn all threads bound to this
906 * listener. The connection will be assigned to the one
907 * with the least connections, and the other one will
908 * be updated. This provides a good fairness on short
Willy Tarreaufc630bd2019-03-04 19:57:34 +0100909 * connections (round robin) and on long ones (conn
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100910 * count), without ever missing any idle thread.
Willy Tarreaufc630bd2019-03-04 19:57:34 +0100911 */
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100912
913 /* keep a copy for the final update. thr_idx is composite
914 * and made of (t2<<16) + t1.
915 */
Willy Tarreau0cf33172019-03-06 15:26:33 +0100916 t0 = l->thr_idx;
Willy Tarreaufc630bd2019-03-04 19:57:34 +0100917 do {
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100918 unsigned long m1, m2;
919 int q1, q2;
920
921 t2 = t1 = t0;
922 t2 >>= 16;
923 t1 &= 0xFFFF;
924
925 /* t1 walks low to high bits ;
926 * t2 walks high to low.
927 */
928 m1 = mask >> t1;
929 m2 = mask & (t2 ? nbits(t2 + 1) : ~0UL);
930
Willy Tarreau85d04242019-04-16 18:09:13 +0200931 if (unlikely(!(m1 & 1))) {
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100932 m1 &= ~1UL;
933 if (!m1) {
934 m1 = mask;
935 t1 = 0;
936 }
937 t1 += my_ffsl(m1) - 1;
938 }
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100939
Willy Tarreau85d04242019-04-16 18:09:13 +0200940 if (unlikely(!(m2 & (1UL << t2)) || t1 == t2)) {
941 /* highest bit not set */
942 if (!m2)
943 m2 = mask;
944
945 t2 = my_flsl(m2) - 1;
946 }
947
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100948 /* now we have two distinct thread IDs belonging to the mask */
949 q1 = accept_queue_rings[t1].tail - accept_queue_rings[t1].head + ACCEPT_QUEUE_SIZE;
950 if (q1 >= ACCEPT_QUEUE_SIZE)
951 q1 -= ACCEPT_QUEUE_SIZE;
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100952
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100953 q2 = accept_queue_rings[t2].tail - accept_queue_rings[t2].head + ACCEPT_QUEUE_SIZE;
954 if (q2 >= ACCEPT_QUEUE_SIZE)
955 q2 -= ACCEPT_QUEUE_SIZE;
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100956
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100957 /* we have 3 possibilities now :
958 * q1 < q2 : t1 is less loaded than t2, so we pick it
959 * and update t2 (since t1 might still be
960 * lower than another thread)
961 * q1 > q2 : t2 is less loaded than t1, so we pick it
962 * and update t1 (since t2 might still be
963 * lower than another thread)
964 * q1 = q2 : both are equally loaded, thus we pick t1
965 * and update t1 as it will become more loaded
966 * than t2.
967 */
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100968
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100969 q1 += l->thr_conn[t1];
970 q2 += l->thr_conn[t2];
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100971
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100972 if (q1 - q2 < 0) {
973 t = t1;
974 t2 = t2 ? t2 - 1 : LONGBITS - 1;
975 }
976 else if (q1 - q2 > 0) {
977 t = t2;
978 t1++;
979 if (t1 >= LONGBITS)
980 t1 = 0;
981 }
982 else {
983 t = t1;
984 t1++;
985 if (t1 >= LONGBITS)
986 t1 = 0;
987 }
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100988
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100989 /* new value for thr_idx */
990 t1 += (t2 << 16);
Olivier Houchard64213e92019-03-08 18:52:57 +0100991 } while (unlikely(!_HA_ATOMIC_CAS(&l->thr_idx, &t0, t1)));
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100992
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100993 /* We successfully selected the best thread "t" for this
994 * connection. We use deferred accepts even if it's the
995 * local thread because tests show that it's the best
996 * performing model, likely due to better cache locality
997 * when processing this loop.
Willy Tarreaue0e9c482019-01-27 15:37:19 +0100998 */
Willy Tarreau0fe703b2019-03-05 08:46:28 +0100999 ring = &accept_queue_rings[t];
Willy Tarreaue0e9c482019-01-27 15:37:19 +01001000 if (accept_queue_push_mp(ring, cfd, l, &addr, laddr)) {
Olivier Houchard64213e92019-03-08 18:52:57 +01001001 _HA_ATOMIC_ADD(&activity[t].accq_pushed, 1);
Willy Tarreau2bd65a72019-09-24 06:55:18 +02001002 tasklet_wakeup(ring->tasklet);
Willy Tarreaue0e9c482019-01-27 15:37:19 +01001003 continue;
1004 }
1005 /* If the ring is full we do a synchronous accept on
1006 * the local thread here.
Willy Tarreaue0e9c482019-01-27 15:37:19 +01001007 */
Olivier Houchard64213e92019-03-08 18:52:57 +01001008 _HA_ATOMIC_ADD(&activity[t].accq_full, 1);
Willy Tarreaue0e9c482019-01-27 15:37:19 +01001009 }
1010#endif // USE_THREAD
1011
Olivier Houchard64213e92019-03-08 18:52:57 +01001012 _HA_ATOMIC_ADD(&l->thr_conn[tid], 1);
Willy Tarreaubbebbbf2012-05-07 21:22:09 +02001013 ret = l->accept(l, cfd, &addr);
1014 if (unlikely(ret <= 0)) {
Willy Tarreau87b09662015-04-03 00:22:06 +02001015 /* The connection was closed by stream_accept(). Either
Willy Tarreaubbebbbf2012-05-07 21:22:09 +02001016 * we just have to ignore it (ret == 0) or it's a critical
1017 * error due to a resource shortage, and we must stop the
1018 * listener (ret < 0).
1019 */
Willy Tarreaubbebbbf2012-05-07 21:22:09 +02001020 if (ret == 0) /* successful termination */
1021 continue;
1022
Willy Tarreaubb660302014-05-07 19:47:02 +02001023 goto transient_error;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +02001024 }
1025
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001026 /* increase the per-process number of cumulated sessions, this
1027 * may only be done once l->accept() has accepted the connection.
1028 */
Willy Tarreau93e7c002013-10-07 18:51:07 +02001029 if (!(l->options & LI_O_UNLIMITED)) {
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +02001030 count = update_freq_ctr(&global.sess_per_sec, 1);
1031 HA_ATOMIC_UPDATE_MAX(&global.sps_max, count);
Willy Tarreau93e7c002013-10-07 18:51:07 +02001032 }
Willy Tarreaue43d5322013-10-07 20:01:52 +02001033#ifdef USE_OPENSSL
1034 if (!(l->options & LI_O_UNLIMITED) && l->bind_conf && l->bind_conf->is_ssl) {
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +02001035 count = update_freq_ctr(&global.ssl_per_sec, 1);
1036 HA_ATOMIC_UPDATE_MAX(&global.ssl_max, count);
Willy Tarreaue43d5322013-10-07 20:01:52 +02001037 }
1038#endif
Willy Tarreau93e7c002013-10-07 18:51:07 +02001039
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001040 } /* end of for (max_accept--) */
Willy Tarreaubbebbbf2012-05-07 21:22:09 +02001041
Christopher Faulet8d8aa0d2017-05-30 15:36:50 +02001042 end:
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001043 if (next_conn)
Olivier Houchard64213e92019-03-08 18:52:57 +01001044 _HA_ATOMIC_SUB(&l->nbconn, 1);
Willy Tarreau741b4d62019-02-25 15:02:04 +01001045
Willy Tarreau82c97892019-02-27 19:32:32 +01001046 if (p && next_feconn)
Olivier Houchard64213e92019-03-08 18:52:57 +01001047 _HA_ATOMIC_SUB(&p->feconn, 1);
Willy Tarreau82c97892019-02-27 19:32:32 +01001048
1049 if (next_actconn)
Olivier Houchard64213e92019-03-08 18:52:57 +01001050 _HA_ATOMIC_SUB(&actconn, 1);
Willy Tarreau82c97892019-02-27 19:32:32 +01001051
Willy Tarreaua8cf66b2019-02-27 16:49:00 +01001052 if ((l->state == LI_FULL && (!l->maxconn || l->nbconn < l->maxconn)) ||
Willy Tarreaucdcba112019-12-11 15:06:30 +01001053 (l->state == LI_LIMITED &&
1054 ((!p || p->feconn < p->maxconn) && (actconn < global.maxconn) &&
1055 (!tick_isset(global_listener_queue_task->expire) ||
1056 tick_is_expired(global_listener_queue_task->expire, now_ms))))) {
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001057 /* at least one thread has to this when quitting */
1058 resume_listener(l);
1059
1060 /* Dequeues all of the listeners waiting for a resource */
Willy Tarreau241797a2019-12-10 14:10:52 +01001061 dequeue_all_listeners();
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001062
Olivier Houchard859dc802019-08-08 15:47:21 +02001063 if (p && !MT_LIST_ISEMPTY(&p->listener_queue) &&
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001064 (!p->fe_sps_lim || freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0) > 0))
Willy Tarreau241797a2019-12-10 14:10:52 +01001065 dequeue_proxy_listeners(p);
Willy Tarreau3f0d02b2019-02-25 19:23:37 +01001066 }
Willy Tarreau4c044e22019-12-05 07:40:32 +01001067
Willy Tarreau92079932019-12-10 09:30:05 +01001068 /* Now it's getting tricky. The listener was supposed to be in LI_READY
1069 * state but in the mean time we might have changed it to LI_FULL or
1070 * LI_LIMITED, and another thread might also have turned it to
1071 * LI_PAUSED, LI_LISTEN or even LI_INI when stopping a proxy. We must
1072 * be certain to keep the FD enabled when in the READY state but we
1073 * must also stop it for other states that we might have switched to
1074 * while others re-enabled polling.
1075 */
1076 HA_SPIN_LOCK(LISTENER_LOCK, &l->lock);
1077 if (l->state == LI_READY) {
1078 if (max_accept > 0)
1079 fd_cant_recv(fd);
1080 else
1081 fd_done_recv(fd);
1082 } else if (l->state > LI_ASSIGNED) {
Willy Tarreau4c044e22019-12-05 07:40:32 +01001083 fd_stop_recv(l->fd);
Willy Tarreau92079932019-12-10 09:30:05 +01001084 }
1085 HA_SPIN_UNLOCK(LISTENER_LOCK, &l->lock);
Willy Tarreau0591bf72019-12-10 12:01:21 +01001086 return;
1087
1088 transient_error:
1089 /* pause the listener for up to 100 ms */
1090 expire = tick_add(now_ms, 100);
1091
1092 limit_global:
1093 /* (re-)queue the listener to the global queue and set it to expire no
1094 * later than <expire> ahead. The listener turns to LI_LIMITED.
1095 */
1096 limit_listener(l, &global_listener_queue);
1097 task_schedule(global_listener_queue_task, expire);
1098 goto end;
1099
1100 limit_proxy:
1101 /* (re-)queue the listener to the proxy's queue and set it to expire no
1102 * later than <expire> ahead. The listener turns to LI_LIMITED.
1103 */
1104 limit_listener(l, &p->listener_queue);
Willy Tarreaueeea8082020-01-08 19:15:07 +01001105 if (p->task && tick_isset(expire))
1106 task_schedule(p->task, expire);
Willy Tarreau0591bf72019-12-10 12:01:21 +01001107 goto end;
Willy Tarreaubbebbbf2012-05-07 21:22:09 +02001108}
1109
Willy Tarreau05f50472017-09-15 09:19:58 +02001110/* Notify the listener that a connection initiated from it was released. This
1111 * is used to keep the connection count consistent and to possibly re-open
1112 * listening when it was limited.
1113 */
1114void listener_release(struct listener *l)
1115{
1116 struct proxy *fe = l->bind_conf->frontend;
1117
1118 if (!(l->options & LI_O_UNLIMITED))
Olivier Houchard64213e92019-03-08 18:52:57 +01001119 _HA_ATOMIC_SUB(&actconn, 1);
Willy Tarreau82c97892019-02-27 19:32:32 +01001120 if (fe)
Olivier Houchard64213e92019-03-08 18:52:57 +01001121 _HA_ATOMIC_SUB(&fe->feconn, 1);
1122 _HA_ATOMIC_SUB(&l->nbconn, 1);
1123 _HA_ATOMIC_SUB(&l->thr_conn[tid], 1);
Willy Tarreau82c97892019-02-27 19:32:32 +01001124
1125 if (l->state == LI_FULL || l->state == LI_LIMITED)
Willy Tarreau05f50472017-09-15 09:19:58 +02001126 resume_listener(l);
1127
1128 /* Dequeues all of the listeners waiting for a resource */
Willy Tarreau241797a2019-12-10 14:10:52 +01001129 dequeue_all_listeners();
Willy Tarreau05f50472017-09-15 09:19:58 +02001130
Olivier Houchard859dc802019-08-08 15:47:21 +02001131 if (!MT_LIST_ISEMPTY(&fe->listener_queue) &&
Willy Tarreau05f50472017-09-15 09:19:58 +02001132 (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_sess_per_sec, fe->fe_sps_lim, 0) > 0))
Willy Tarreau241797a2019-12-10 14:10:52 +01001133 dequeue_proxy_listeners(fe);
Willy Tarreau05f50472017-09-15 09:19:58 +02001134}
1135
Willy Tarreauf2cb1692019-07-11 10:08:31 +02001136/* resume listeners waiting in the local listener queue. They are still in LI_LIMITED state */
1137static struct task *listener_queue_process(struct task *t, void *context, unsigned short state)
1138{
1139 struct work_list *wl = context;
1140 struct listener *l;
1141
Olivier Houchard859dc802019-08-08 15:47:21 +02001142 while ((l = MT_LIST_POP(&wl->head, struct listener *, wait_queue))) {
Willy Tarreauf2cb1692019-07-11 10:08:31 +02001143 /* The listeners are still in the LI_LIMITED state */
1144 resume_listener(l);
1145 }
1146 return t;
1147}
1148
1149/* Initializes the listener queues. Returns 0 on success, otherwise ERR_* flags */
1150static int listener_queue_init()
1151{
1152 local_listener_queue = work_list_create(global.nbthread, listener_queue_process, NULL);
1153 if (!local_listener_queue) {
1154 ha_alert("Out of memory while initializing listener queues.\n");
1155 return ERR_FATAL|ERR_ABORT;
1156 }
Willy Tarreaua1d97f82019-12-10 11:18:41 +01001157
1158 global_listener_queue_task = task_new(MAX_THREADS_MASK);
1159 if (!global_listener_queue_task) {
1160 ha_alert("Out of memory when initializing global listener queue\n");
1161 return ERR_FATAL|ERR_ABORT;
1162 }
1163 /* very simple initialization, users will queue the task if needed */
1164 global_listener_queue_task->context = NULL; /* not even a context! */
1165 global_listener_queue_task->process = manage_global_listener_queue;
1166
Willy Tarreauf2cb1692019-07-11 10:08:31 +02001167 return 0;
1168}
1169
1170static void listener_queue_deinit()
1171{
1172 work_list_destroy(local_listener_queue, global.nbthread);
Willy Tarreaua1d97f82019-12-10 11:18:41 +01001173 task_destroy(global_listener_queue_task);
1174 global_listener_queue_task = NULL;
Willy Tarreauf2cb1692019-07-11 10:08:31 +02001175}
1176
1177REGISTER_CONFIG_POSTPARSER("multi-threaded listener queue", listener_queue_init);
1178REGISTER_POST_DEINIT(listener_queue_deinit);
1179
Willy Tarreaua1d97f82019-12-10 11:18:41 +01001180
1181/* This is the global management task for listeners. It enables listeners waiting
1182 * for global resources when there are enough free resource, or at least once in
1183 * a while. It is designed to be called as a task.
1184 */
1185static struct task *manage_global_listener_queue(struct task *t, void *context, unsigned short state)
1186{
1187 /* If there are still too many concurrent connections, let's wait for
1188 * some of them to go away. We don't need to re-arm the timer because
1189 * each of them will scan the queue anyway.
1190 */
1191 if (unlikely(actconn >= global.maxconn))
1192 goto out;
1193
1194 /* We should periodically try to enable listeners waiting for a global
1195 * resource here, because it is possible, though very unlikely, that
1196 * they have been blocked by a temporary lack of global resource such
1197 * as a file descriptor or memory and that the temporary condition has
1198 * disappeared.
1199 */
1200 dequeue_all_listeners();
1201
1202 out:
1203 t->expire = TICK_ETERNITY;
1204 task_queue(t);
1205 return t;
1206}
1207
Willy Tarreau26982662012-09-12 23:17:10 +02001208/*
1209 * Registers the bind keyword list <kwl> as a list of valid keywords for next
1210 * parsing sessions.
1211 */
1212void bind_register_keywords(struct bind_kw_list *kwl)
1213{
1214 LIST_ADDQ(&bind_keywords.list, &kwl->list);
1215}
1216
1217/* Return a pointer to the bind keyword <kw>, or NULL if not found. If the
1218 * keyword is found with a NULL ->parse() function, then an attempt is made to
1219 * find one with a valid ->parse() function. This way it is possible to declare
1220 * platform-dependant, known keywords as NULL, then only declare them as valid
1221 * if some options are met. Note that if the requested keyword contains an
1222 * opening parenthesis, everything from this point is ignored.
1223 */
1224struct bind_kw *bind_find_kw(const char *kw)
1225{
1226 int index;
1227 const char *kwend;
1228 struct bind_kw_list *kwl;
1229 struct bind_kw *ret = NULL;
1230
1231 kwend = strchr(kw, '(');
1232 if (!kwend)
1233 kwend = kw + strlen(kw);
1234
1235 list_for_each_entry(kwl, &bind_keywords.list, list) {
1236 for (index = 0; kwl->kw[index].kw != NULL; index++) {
1237 if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
1238 kwl->kw[index].kw[kwend-kw] == 0) {
1239 if (kwl->kw[index].parse)
1240 return &kwl->kw[index]; /* found it !*/
1241 else
1242 ret = &kwl->kw[index]; /* may be OK */
1243 }
1244 }
1245 }
1246 return ret;
1247}
1248
Willy Tarreau8638f482012-09-18 18:01:17 +02001249/* Dumps all registered "bind" keywords to the <out> string pointer. The
1250 * unsupported keywords are only dumped if their supported form was not
1251 * found.
1252 */
1253void bind_dump_kws(char **out)
1254{
1255 struct bind_kw_list *kwl;
1256 int index;
1257
1258 *out = NULL;
1259 list_for_each_entry(kwl, &bind_keywords.list, list) {
1260 for (index = 0; kwl->kw[index].kw != NULL; index++) {
1261 if (kwl->kw[index].parse ||
1262 bind_find_kw(kwl->kw[index].kw) == &kwl->kw[index]) {
Willy Tarreau51fb7652012-09-18 18:24:39 +02001263 memprintf(out, "%s[%4s] %s%s%s\n", *out ? *out : "",
1264 kwl->scope,
Willy Tarreau8638f482012-09-18 18:01:17 +02001265 kwl->kw[index].kw,
Willy Tarreau51fb7652012-09-18 18:24:39 +02001266 kwl->kw[index].skip ? " <arg>" : "",
1267 kwl->kw[index].parse ? "" : " (not supported)");
Willy Tarreau8638f482012-09-18 18:01:17 +02001268 }
1269 }
1270 }
1271}
1272
Willy Tarreau645513a2010-05-24 20:55:15 +02001273/************************************************************************/
Willy Tarreau0ccb7442013-01-07 22:54:17 +01001274/* All supported sample and ACL keywords must be declared here. */
Willy Tarreau645513a2010-05-24 20:55:15 +02001275/************************************************************************/
1276
Willy Tarreaua5e37562011-12-16 17:06:15 +01001277/* set temp integer to the number of connexions to the same listening socket */
Willy Tarreau645513a2010-05-24 20:55:15 +02001278static int
Thierry FOURNIER0786d052015-05-11 15:42:45 +02001279smp_fetch_dconn(const struct arg *args, struct sample *smp, const char *kw, void *private)
Willy Tarreau645513a2010-05-24 20:55:15 +02001280{
Thierry FOURNIER8c542ca2015-08-19 09:00:18 +02001281 smp->data.type = SMP_T_SINT;
Thierry FOURNIER136f9d32015-08-19 09:07:19 +02001282 smp->data.u.sint = smp->sess->listener->nbconn;
Willy Tarreau645513a2010-05-24 20:55:15 +02001283 return 1;
1284}
1285
Willy Tarreaua5e37562011-12-16 17:06:15 +01001286/* set temp integer to the id of the socket (listener) */
Willy Tarreau645513a2010-05-24 20:55:15 +02001287static int
Thierry FOURNIER0786d052015-05-11 15:42:45 +02001288smp_fetch_so_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
Willy Tarreau37406352012-04-23 16:16:37 +02001289{
Thierry FOURNIER8c542ca2015-08-19 09:00:18 +02001290 smp->data.type = SMP_T_SINT;
Thierry FOURNIER136f9d32015-08-19 09:07:19 +02001291 smp->data.u.sint = smp->sess->listener->luid;
Willy Tarreau645513a2010-05-24 20:55:15 +02001292 return 1;
1293}
1294
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001295/* parse the "accept-proxy" bind keyword */
Willy Tarreau4348fad2012-09-20 16:48:07 +02001296static int bind_parse_accept_proxy(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001297{
1298 struct listener *l;
1299
Willy Tarreau4348fad2012-09-20 16:48:07 +02001300 list_for_each_entry(l, &conf->listeners, by_bind)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001301 l->options |= LI_O_ACC_PROXY;
1302
1303 return 0;
1304}
1305
Bertrand Jacquin93b227d2016-06-04 15:11:10 +01001306/* parse the "accept-netscaler-cip" bind keyword */
1307static int bind_parse_accept_netscaler_cip(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1308{
1309 struct listener *l;
1310 uint32_t val;
1311
1312 if (!*args[cur_arg + 1]) {
1313 memprintf(err, "'%s' : missing value", args[cur_arg]);
1314 return ERR_ALERT | ERR_FATAL;
1315 }
1316
1317 val = atol(args[cur_arg + 1]);
1318 if (val <= 0) {
Willy Tarreaue2711c72019-02-27 15:39:41 +01001319 memprintf(err, "'%s' : invalid value %d, must be >= 0", args[cur_arg], val);
Bertrand Jacquin93b227d2016-06-04 15:11:10 +01001320 return ERR_ALERT | ERR_FATAL;
1321 }
1322
1323 list_for_each_entry(l, &conf->listeners, by_bind) {
1324 l->options |= LI_O_ACC_CIP;
1325 conf->ns_cip_magic = val;
1326 }
1327
1328 return 0;
1329}
1330
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001331/* parse the "backlog" bind keyword */
Willy Tarreau4348fad2012-09-20 16:48:07 +02001332static int bind_parse_backlog(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001333{
1334 struct listener *l;
1335 int val;
1336
1337 if (!*args[cur_arg + 1]) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001338 memprintf(err, "'%s' : missing value", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001339 return ERR_ALERT | ERR_FATAL;
1340 }
1341
1342 val = atol(args[cur_arg + 1]);
Willy Tarreaue2711c72019-02-27 15:39:41 +01001343 if (val < 0) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001344 memprintf(err, "'%s' : invalid value %d, must be > 0", args[cur_arg], val);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001345 return ERR_ALERT | ERR_FATAL;
1346 }
1347
Willy Tarreau4348fad2012-09-20 16:48:07 +02001348 list_for_each_entry(l, &conf->listeners, by_bind)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001349 l->backlog = val;
1350
1351 return 0;
1352}
1353
1354/* parse the "id" bind keyword */
Willy Tarreau4348fad2012-09-20 16:48:07 +02001355static int bind_parse_id(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001356{
1357 struct eb32_node *node;
Willy Tarreau4348fad2012-09-20 16:48:07 +02001358 struct listener *l, *new;
Thierry Fourniere7fe8eb2016-02-26 08:45:58 +01001359 char *error;
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001360
Willy Tarreau4348fad2012-09-20 16:48:07 +02001361 if (conf->listeners.n != conf->listeners.p) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001362 memprintf(err, "'%s' can only be used with a single socket", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001363 return ERR_ALERT | ERR_FATAL;
1364 }
1365
1366 if (!*args[cur_arg + 1]) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001367 memprintf(err, "'%s' : expects an integer argument", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001368 return ERR_ALERT | ERR_FATAL;
1369 }
1370
Willy Tarreau4348fad2012-09-20 16:48:07 +02001371 new = LIST_NEXT(&conf->listeners, struct listener *, by_bind);
Thierry Fourniere7fe8eb2016-02-26 08:45:58 +01001372 new->luid = strtol(args[cur_arg + 1], &error, 10);
1373 if (*error != '\0') {
1374 memprintf(err, "'%s' : expects an integer argument, found '%s'", args[cur_arg], args[cur_arg + 1]);
1375 return ERR_ALERT | ERR_FATAL;
1376 }
Willy Tarreau4348fad2012-09-20 16:48:07 +02001377 new->conf.id.key = new->luid;
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001378
Willy Tarreau4348fad2012-09-20 16:48:07 +02001379 if (new->luid <= 0) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001380 memprintf(err, "'%s' : custom id has to be > 0", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001381 return ERR_ALERT | ERR_FATAL;
1382 }
1383
Willy Tarreau4348fad2012-09-20 16:48:07 +02001384 node = eb32_lookup(&px->conf.used_listener_id, new->luid);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001385 if (node) {
1386 l = container_of(node, struct listener, conf.id);
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001387 memprintf(err, "'%s' : custom id %d already used at %s:%d ('bind %s')",
1388 args[cur_arg], l->luid, l->bind_conf->file, l->bind_conf->line,
1389 l->bind_conf->arg);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001390 return ERR_ALERT | ERR_FATAL;
1391 }
1392
Willy Tarreau4348fad2012-09-20 16:48:07 +02001393 eb32_insert(&px->conf.used_listener_id, &new->conf.id);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001394 return 0;
1395}
1396
1397/* parse the "maxconn" bind keyword */
Willy Tarreau4348fad2012-09-20 16:48:07 +02001398static int bind_parse_maxconn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001399{
1400 struct listener *l;
1401 int val;
1402
1403 if (!*args[cur_arg + 1]) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001404 memprintf(err, "'%s' : missing value", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001405 return ERR_ALERT | ERR_FATAL;
1406 }
1407
1408 val = atol(args[cur_arg + 1]);
Willy Tarreaua8cf66b2019-02-27 16:49:00 +01001409 if (val < 0) {
1410 memprintf(err, "'%s' : invalid value %d, must be >= 0", args[cur_arg], val);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001411 return ERR_ALERT | ERR_FATAL;
1412 }
1413
Willy Tarreau4348fad2012-09-20 16:48:07 +02001414 list_for_each_entry(l, &conf->listeners, by_bind)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001415 l->maxconn = val;
1416
1417 return 0;
1418}
1419
1420/* parse the "name" bind keyword */
Willy Tarreau4348fad2012-09-20 16:48:07 +02001421static int bind_parse_name(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001422{
1423 struct listener *l;
1424
1425 if (!*args[cur_arg + 1]) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001426 memprintf(err, "'%s' : missing name", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001427 return ERR_ALERT | ERR_FATAL;
1428 }
1429
Willy Tarreau4348fad2012-09-20 16:48:07 +02001430 list_for_each_entry(l, &conf->listeners, by_bind)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001431 l->name = strdup(args[cur_arg + 1]);
1432
1433 return 0;
1434}
1435
1436/* parse the "nice" bind keyword */
Willy Tarreau4348fad2012-09-20 16:48:07 +02001437static int bind_parse_nice(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001438{
1439 struct listener *l;
1440 int val;
1441
1442 if (!*args[cur_arg + 1]) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001443 memprintf(err, "'%s' : missing value", args[cur_arg]);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001444 return ERR_ALERT | ERR_FATAL;
1445 }
1446
1447 val = atol(args[cur_arg + 1]);
1448 if (val < -1024 || val > 1024) {
Willy Tarreaueb6cead2012-09-20 19:43:14 +02001449 memprintf(err, "'%s' : invalid value %d, allowed range is -1024..1024", args[cur_arg], val);
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001450 return ERR_ALERT | ERR_FATAL;
1451 }
1452
Willy Tarreau4348fad2012-09-20 16:48:07 +02001453 list_for_each_entry(l, &conf->listeners, by_bind)
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001454 l->nice = val;
1455
1456 return 0;
1457}
1458
Willy Tarreau6ae1ba62014-05-07 19:01:58 +02001459/* parse the "process" bind keyword */
1460static int bind_parse_process(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1461{
Christopher Fauletc644fa92017-11-23 22:44:11 +01001462 char *slash;
1463 unsigned long proc = 0, thread = 0;
Willy Tarreau6ae1ba62014-05-07 19:01:58 +02001464
Christopher Fauletc644fa92017-11-23 22:44:11 +01001465 if ((slash = strchr(args[cur_arg + 1], '/')) != NULL)
1466 *slash = 0;
1467
Willy Tarreauff9c9142019-02-07 10:39:36 +01001468 if (parse_process_number(args[cur_arg + 1], &proc, MAX_PROCS, NULL, err)) {
Christopher Fauletf1f0c5f2017-11-22 12:06:43 +01001469 memprintf(err, "'%s' : %s", args[cur_arg], *err);
Willy Tarreau6ae1ba62014-05-07 19:01:58 +02001470 return ERR_ALERT | ERR_FATAL;
1471 }
1472
Christopher Fauletc644fa92017-11-23 22:44:11 +01001473 if (slash) {
Willy Tarreauc9a82e42019-01-26 13:25:14 +01001474 if (parse_process_number(slash+1, &thread, MAX_THREADS, NULL, err)) {
Christopher Fauletc644fa92017-11-23 22:44:11 +01001475 memprintf(err, "'%s' : %s", args[cur_arg], *err);
1476 return ERR_ALERT | ERR_FATAL;
1477 }
1478 *slash = '/';
1479 }
1480
1481 conf->bind_proc |= proc;
Willy Tarreaua36b3242019-02-02 13:14:34 +01001482 conf->bind_thread |= thread;
Willy Tarreau6ae1ba62014-05-07 19:01:58 +02001483 return 0;
1484}
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001485
Christopher Fauleta717b992018-04-10 14:43:00 +02001486/* parse the "proto" bind keyword */
1487static int bind_parse_proto(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1488{
1489 struct ist proto;
1490
1491 if (!*args[cur_arg + 1]) {
1492 memprintf(err, "'%s' : missing value", args[cur_arg]);
1493 return ERR_ALERT | ERR_FATAL;
1494 }
1495
1496 proto = ist2(args[cur_arg + 1], strlen(args[cur_arg + 1]));
1497 conf->mux_proto = get_mux_proto(proto);
1498 if (!conf->mux_proto) {
1499 memprintf(err, "'%s' : unknown MUX protocol '%s'", args[cur_arg], args[cur_arg+1]);
1500 return ERR_ALERT | ERR_FATAL;
1501 }
Christopher Fauleta717b992018-04-10 14:43:00 +02001502 return 0;
1503}
1504
Willy Tarreau7ac908b2019-02-27 12:02:18 +01001505/* config parser for global "tune.listener.multi-queue", accepts "on" or "off" */
1506static int cfg_parse_tune_listener_mq(char **args, int section_type, struct proxy *curpx,
1507 struct proxy *defpx, const char *file, int line,
1508 char **err)
1509{
1510 if (too_many_args(1, args, err, NULL))
1511 return -1;
1512
1513 if (strcmp(args[1], "on") == 0)
1514 global.tune.options |= GTUNE_LISTENER_MQ;
1515 else if (strcmp(args[1], "off") == 0)
1516 global.tune.options &= ~GTUNE_LISTENER_MQ;
1517 else {
1518 memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
1519 return -1;
1520 }
1521 return 0;
1522}
1523
Willy Tarreau61612d42012-04-19 18:42:05 +02001524/* Note: must not be declared <const> as its list will be overwritten.
1525 * Please take care of keeping this list alphabetically sorted.
1526 */
Willy Tarreaudc13c112013-06-21 23:16:39 +02001527static struct sample_fetch_kw_list smp_kws = {ILH, {
Thierry FOURNIER07ee64e2015-07-06 23:43:03 +02001528 { "dst_conn", smp_fetch_dconn, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
1529 { "so_id", smp_fetch_so_id, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
Willy Tarreau0ccb7442013-01-07 22:54:17 +01001530 { /* END */ },
1531}};
1532
Willy Tarreau0108d902018-11-25 19:14:37 +01001533INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
1534
Willy Tarreau0ccb7442013-01-07 22:54:17 +01001535/* Note: must not be declared <const> as its list will be overwritten.
1536 * Please take care of keeping this list alphabetically sorted.
1537 */
Willy Tarreaudc13c112013-06-21 23:16:39 +02001538static struct acl_kw_list acl_kws = {ILH, {
Willy Tarreau0ccb7442013-01-07 22:54:17 +01001539 { /* END */ },
Willy Tarreau645513a2010-05-24 20:55:15 +02001540}};
1541
Willy Tarreau0108d902018-11-25 19:14:37 +01001542INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
1543
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001544/* Note: must not be declared <const> as its list will be overwritten.
1545 * Please take care of keeping this list alphabetically sorted, doing so helps
1546 * all code contributors.
1547 * Optional keywords are also declared with a NULL ->parse() function so that
1548 * the config parser can report an appropriate error when a known keyword was
1549 * not enabled.
1550 */
Willy Tarreau51fb7652012-09-18 18:24:39 +02001551static struct bind_kw_list bind_kws = { "ALL", { }, {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +01001552 { "accept-netscaler-cip", bind_parse_accept_netscaler_cip, 1 }, /* enable NetScaler Client IP insertion protocol */
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001553 { "accept-proxy", bind_parse_accept_proxy, 0 }, /* enable PROXY protocol */
1554 { "backlog", bind_parse_backlog, 1 }, /* set backlog of listening socket */
1555 { "id", bind_parse_id, 1 }, /* set id of listening socket */
1556 { "maxconn", bind_parse_maxconn, 1 }, /* set maxconn of listening socket */
1557 { "name", bind_parse_name, 1 }, /* set name of listening socket */
1558 { "nice", bind_parse_nice, 1 }, /* set nice of listening socket */
Willy Tarreau6ae1ba62014-05-07 19:01:58 +02001559 { "process", bind_parse_process, 1 }, /* set list of allowed process for this socket */
Christopher Fauleta717b992018-04-10 14:43:00 +02001560 { "proto", bind_parse_proto, 1 }, /* set the proto to use for all incoming connections */
Willy Tarreau0ccb7442013-01-07 22:54:17 +01001561 { /* END */ },
Willy Tarreau3dcc3412012-09-18 17:17:28 +02001562}};
1563
Willy Tarreau0108d902018-11-25 19:14:37 +01001564INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
1565
Willy Tarreau7ac908b2019-02-27 12:02:18 +01001566/* config keyword parsers */
1567static struct cfg_kw_list cfg_kws = {ILH, {
1568 { CFG_GLOBAL, "tune.listener.multi-queue", cfg_parse_tune_listener_mq },
1569 { 0, NULL, NULL }
1570}};
1571
1572INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
1573
Willy Tarreau645513a2010-05-24 20:55:15 +02001574/*
1575 * Local variables:
1576 * c-indent-level: 8
1577 * c-basic-offset: 8
1578 * End:
1579 */