blob: 751fc49aac625c4ae6c0e1e763d60b2d042aaca4 [file] [log] [blame]
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +01001/*
2 * AF_INET/AF_INET6 QUIC protocol layer.
3 *
4 * Copyright 2020 Frédéric Lécaille <flecaille@haproxy.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <time.h>
20
21#include <sys/param.h>
22#include <sys/socket.h>
23#include <sys/types.h>
24
25#include <netinet/udp.h>
26#include <netinet/in.h>
27
28#include <haproxy/api.h>
29#include <haproxy/arg.h>
30#include <haproxy/connection.h>
31#include <haproxy/errors.h>
32#include <haproxy/fd.h>
33#include <haproxy/global.h>
34#include <haproxy/list.h>
35#include <haproxy/listener.h>
36#include <haproxy/log.h>
37#include <haproxy/namespace.h>
38#include <haproxy/port_range.h>
39#include <haproxy/protocol.h>
40#include <haproxy/proto_quic.h>
41#include <haproxy/proto_udp.h>
42#include <haproxy/proxy-t.h>
43#include <haproxy/sock.h>
Frédéric Lécaille70da8892020-11-06 15:49:49 +010044#include <haproxy/quic_sock.h>
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010045#include <haproxy/sock_inet.h>
46#include <haproxy/tools.h>
47
48
49static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen);
50static int quic_connect_server(struct connection *conn, int flags);
51static void quic_enable_listener(struct listener *listener);
52static void quic_disable_listener(struct listener *listener);
53
54/* Note: must not be declared <const> as its list will be overwritten */
55struct protocol proto_quic4 = {
56 .name = "quic4",
57
58 /* connection layer */
59 .ctrl_type = SOCK_STREAM,
60 .listen = quic_bind_listener,
61 .enable = quic_enable_listener,
62 .disable = quic_disable_listener,
63 .add = default_add_listener,
64 .unbind = default_unbind_listener,
65 .suspend = default_suspend_listener,
66 .resume = default_resume_listener,
Frédéric Lécaille70da8892020-11-06 15:49:49 +010067 .accept_conn = quic_sock_accept_conn,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010068 .connect = quic_connect_server,
69
70 /* binding layer */
71 .rx_suspend = udp_suspend_receiver,
72 .rx_resume = udp_resume_receiver,
73
74 /* address family */
75 .fam = &proto_fam_inet4,
76
77 /* socket layer */
78 .sock_type = SOCK_DGRAM,
79 .sock_prot = IPPROTO_UDP,
80 .rx_enable = sock_enable,
81 .rx_disable = sock_disable,
82 .rx_unbind = sock_unbind,
Frédéric Lécaille70da8892020-11-06 15:49:49 +010083 .rx_listening = quic_sock_accepting_conn,
84 .default_iocb = quic_sock_fd_iocb,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010085 .receivers = LIST_HEAD_INIT(proto_quic4.receivers),
86 .nb_receivers = 0,
87};
88
89INITCALL1(STG_REGISTER, protocol_register, &proto_quic4);
90
91/* Note: must not be declared <const> as its list will be overwritten */
92struct protocol proto_quic6 = {
93 .name = "quic6",
94
95 /* connection layer */
96 .ctrl_type = SOCK_STREAM,
97 .listen = quic_bind_listener,
98 .enable = quic_enable_listener,
99 .disable = quic_disable_listener,
100 .add = default_add_listener,
101 .unbind = default_unbind_listener,
102 .suspend = default_suspend_listener,
103 .resume = default_resume_listener,
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100104 .accept_conn = quic_sock_accept_conn,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +0100105 .connect = quic_connect_server,
106
107 /* binding layer */
108 .rx_suspend = udp_suspend_receiver,
109 .rx_resume = udp_resume_receiver,
110
111 /* address family */
112 .fam = &proto_fam_inet6,
113
114 /* socket layer */
115 .sock_type = SOCK_DGRAM,
116 .sock_prot = IPPROTO_UDP,
117 .rx_enable = sock_enable,
118 .rx_disable = sock_disable,
119 .rx_unbind = sock_unbind,
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100120 .rx_listening = quic_sock_accepting_conn,
121 .default_iocb = quic_sock_fd_iocb,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +0100122 .receivers = LIST_HEAD_INIT(proto_quic6.receivers),
123 .nb_receivers = 0,
124};
125
126INITCALL1(STG_REGISTER, protocol_register, &proto_quic6);
127
128/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
129 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
130 * - 0 : ignore remote address (may even be a NULL pointer)
131 * - 1 : use provided address
132 * - 2 : use provided port
133 * - 3 : use both
134 *
135 * The function supports multiple foreign binding methods :
136 * - linux_tproxy: we directly bind to the foreign address
137 * The second one can be used as a fallback for the first one.
138 * This function returns 0 when everything's OK, 1 if it could not bind, to the
139 * local address, 2 if it could not bind to the foreign address.
140 */
141int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
142{
143 struct sockaddr_storage bind_addr;
144 int foreign_ok = 0;
145 int ret;
146 static THREAD_LOCAL int ip_transp_working = 1;
147 static THREAD_LOCAL int ip6_transp_working = 1;
148
149 switch (local->ss_family) {
150 case AF_INET:
151 if (flags && ip_transp_working) {
152 /* This deserves some explanation. Some platforms will support
153 * multiple combinations of certain methods, so we try the
154 * supported ones until one succeeds.
155 */
156 if (sock_inet4_make_foreign(fd))
157 foreign_ok = 1;
158 else
159 ip_transp_working = 0;
160 }
161 break;
162 case AF_INET6:
163 if (flags && ip6_transp_working) {
164 if (sock_inet6_make_foreign(fd))
165 foreign_ok = 1;
166 else
167 ip6_transp_working = 0;
168 }
169 break;
170 }
171
172 if (flags) {
173 memset(&bind_addr, 0, sizeof(bind_addr));
174 bind_addr.ss_family = remote->ss_family;
175 switch (remote->ss_family) {
176 case AF_INET:
177 if (flags & 1)
178 ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
179 if (flags & 2)
180 ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
181 break;
182 case AF_INET6:
183 if (flags & 1)
184 ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
185 if (flags & 2)
186 ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
187 break;
188 default:
189 /* we don't want to try to bind to an unknown address family */
190 foreign_ok = 0;
191 }
192 }
193
194 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
195 if (foreign_ok) {
196 if (is_inet_addr(&bind_addr)) {
197 ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
198 if (ret < 0)
199 return 2;
200 }
201 }
202 else {
203 if (is_inet_addr(local)) {
204 ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
205 if (ret < 0)
206 return 1;
207 }
208 }
209
210 if (!flags)
211 return 0;
212
213 if (!foreign_ok)
214 /* we could not bind to a foreign address */
215 return 2;
216
217 return 0;
218}
219
220/*
221 * This function initiates a QUIC connection establishment to the target assigned
222 * to connection <conn> using (si->{target,dst}). A source address may be
223 * pointed to by conn->src in case of transparent proxying. Normal source
224 * bind addresses are still determined locally (due to the possible need of a
225 * source port). conn->target may point either to a valid server or to a backend,
226 * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
227 * supported. The <data> parameter is a boolean indicating whether there are data
228 * waiting for being sent or not, in order to adjust data write polling and on
229 * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
230 * is not used.
231 *
232 * Note that a pending send_proxy message accounts for data.
233 *
234 * It can return one of :
235 * - SF_ERR_NONE if everything's OK
236 * - SF_ERR_SRVTO if there are no more servers
237 * - SF_ERR_SRVCL if the connection was refused by the server
238 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
239 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
240 * - SF_ERR_INTERNAL for any other purely internal errors
241 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
242 *
243 * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
244 * it's invalid and the caller has nothing to do.
245 */
246
247int quic_connect_server(struct connection *conn, int flags)
248{
249 int fd;
250 struct server *srv;
251 struct proxy *be;
252 struct conn_src *src;
253 struct sockaddr_storage *addr;
254
255 conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
256
257 switch (obj_type(conn->target)) {
258 case OBJ_TYPE_PROXY:
259 be = objt_proxy(conn->target);
260 srv = NULL;
261 break;
262 case OBJ_TYPE_SERVER:
263 srv = objt_server(conn->target);
264 be = srv->proxy;
265 break;
266 default:
267 conn->flags |= CO_FL_ERROR;
268 return SF_ERR_INTERNAL;
269 }
270
271 if (!conn->dst) {
272 conn->flags |= CO_FL_ERROR;
273 return SF_ERR_INTERNAL;
274 }
275
276 fd = conn->handle.fd = sock_create_server_socket(conn);
277
278 if (fd == -1) {
279 qfprintf(stderr, "Cannot get a server socket.\n");
280
281 if (errno == ENFILE) {
282 conn->err_code = CO_ER_SYS_FDLIM;
283 send_log(be, LOG_EMERG,
284 "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
285 be->id, global.maxsock);
286 }
287 else if (errno == EMFILE) {
288 conn->err_code = CO_ER_PROC_FDLIM;
289 send_log(be, LOG_EMERG,
290 "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
291 be->id, global.maxsock);
292 }
293 else if (errno == ENOBUFS || errno == ENOMEM) {
294 conn->err_code = CO_ER_SYS_MEMLIM;
295 send_log(be, LOG_EMERG,
296 "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
297 be->id, global.maxsock);
298 }
299 else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
300 conn->err_code = CO_ER_NOPROTO;
301 }
302 else
303 conn->err_code = CO_ER_SOCK_ERR;
304
305 /* this is a resource error */
306 conn->flags |= CO_FL_ERROR;
307 return SF_ERR_RESOURCE;
308 }
309
310 if (fd >= global.maxsock) {
311 /* do not log anything there, it's a normal condition when this option
312 * is used to serialize connections to a server !
313 */
314 ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
315 close(fd);
316 conn->err_code = CO_ER_CONF_FDLIM;
317 conn->flags |= CO_FL_ERROR;
318 return SF_ERR_PRXCOND; /* it is a configuration limit */
319 }
320
321 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1)) {
322 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
323 close(fd);
324 conn->err_code = CO_ER_SOCK_ERR;
325 conn->flags |= CO_FL_ERROR;
326 return SF_ERR_INTERNAL;
327 }
328
329 if (master == 1 && (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)) {
330 ha_alert("Cannot set CLOEXEC on client socket.\n");
331 close(fd);
332 conn->err_code = CO_ER_SOCK_ERR;
333 conn->flags |= CO_FL_ERROR;
334 return SF_ERR_INTERNAL;
335 }
336
337 /* allow specific binding :
338 * - server-specific at first
339 * - proxy-specific next
340 */
341 if (srv && srv->conn_src.opts & CO_SRC_BIND)
342 src = &srv->conn_src;
343 else if (be->conn_src.opts & CO_SRC_BIND)
344 src = &be->conn_src;
345 else
346 src = NULL;
347
348 if (src) {
349 int ret, flags = 0;
350
351 if (conn->src && is_inet_addr(conn->src)) {
352 switch (src->opts & CO_SRC_TPROXY_MASK) {
353 case CO_SRC_TPROXY_CLI:
354 conn_set_private(conn);
355 /* fall through */
356 case CO_SRC_TPROXY_ADDR:
357 flags = 3;
358 break;
359 case CO_SRC_TPROXY_CIP:
360 case CO_SRC_TPROXY_DYN:
361 conn_set_private(conn);
362 flags = 1;
363 break;
364 }
365 }
366
367#ifdef SO_BINDTODEVICE
368 /* Note: this might fail if not CAP_NET_RAW */
369 if (src->iface_name)
370 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
371#endif
372
373 if (src->sport_range) {
374 int attempts = 10; /* should be more than enough to find a spare port */
375 struct sockaddr_storage sa;
376
377 ret = 1;
378 memcpy(&sa, &src->source_addr, sizeof(sa));
379
380 do {
381 /* note: in case of retry, we may have to release a previously
382 * allocated port, hence this loop's construct.
383 */
384 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
385 fdinfo[fd].port_range = NULL;
386
387 if (!attempts)
388 break;
389 attempts--;
390
391 fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
392 if (!fdinfo[fd].local_port) {
393 conn->err_code = CO_ER_PORT_RANGE;
394 break;
395 }
396
397 fdinfo[fd].port_range = src->sport_range;
398 set_host_port(&sa, fdinfo[fd].local_port);
399
400 ret = quic_bind_socket(fd, flags, &sa, conn->src);
401 if (ret != 0)
402 conn->err_code = CO_ER_CANT_BIND;
403 } while (ret != 0); /* binding NOK */
404 }
405 else {
406#ifdef IP_BIND_ADDRESS_NO_PORT
407 static THREAD_LOCAL int bind_address_no_port = 1;
408 setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
409#endif
410 ret = quic_bind_socket(fd, flags, &src->source_addr, conn->src);
411 if (ret != 0)
412 conn->err_code = CO_ER_CANT_BIND;
413 }
414
415 if (unlikely(ret != 0)) {
416 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
417 fdinfo[fd].port_range = NULL;
418 close(fd);
419
420 if (ret == 1) {
421 ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
422 be->id);
423 send_log(be, LOG_EMERG,
424 "Cannot bind to source address before connect() for backend %s.\n",
425 be->id);
426 } else {
427 ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
428 be->id);
429 send_log(be, LOG_EMERG,
430 "Cannot bind to tproxy source address before connect() for backend %s.\n",
431 be->id);
432 }
433 conn->flags |= CO_FL_ERROR;
434 return SF_ERR_RESOURCE;
435 }
436 }
437
438 if (global.tune.server_sndbuf)
439 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
440
441 if (global.tune.server_rcvbuf)
442 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
443
444 addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
445 if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
446 if (errno == EINPROGRESS || errno == EALREADY) {
447 /* common case, let's wait for connect status */
448 conn->flags |= CO_FL_WAIT_L4_CONN;
449 }
450 else if (errno == EISCONN) {
451 /* should normally not happen but if so, indicates that it's OK */
452 conn->flags &= ~CO_FL_WAIT_L4_CONN;
453 }
454 else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
455 char *msg;
456 if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
457 msg = "no free ports";
458 conn->err_code = CO_ER_FREE_PORTS;
459 }
460 else {
461 msg = "local address already in use";
462 conn->err_code = CO_ER_ADDR_INUSE;
463 }
464
465 qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
466 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
467 fdinfo[fd].port_range = NULL;
468 close(fd);
469 send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
470 conn->flags |= CO_FL_ERROR;
471 return SF_ERR_RESOURCE;
472 } else if (errno == ETIMEDOUT) {
473 //qfprintf(stderr,"Connect(): ETIMEDOUT");
474 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
475 fdinfo[fd].port_range = NULL;
476 close(fd);
477 conn->err_code = CO_ER_SOCK_ERR;
478 conn->flags |= CO_FL_ERROR;
479 return SF_ERR_SRVTO;
480 } else {
481 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
482 //qfprintf(stderr,"Connect(): %d", errno);
483 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
484 fdinfo[fd].port_range = NULL;
485 close(fd);
486 conn->err_code = CO_ER_SOCK_ERR;
487 conn->flags |= CO_FL_ERROR;
488 return SF_ERR_SRVCL;
489 }
490 }
491 else {
492 /* connect() == 0, this is great! */
493 conn->flags &= ~CO_FL_WAIT_L4_CONN;
494 }
495
496 conn->flags |= CO_FL_ADDR_TO_SET;
497
498 conn_ctrl_init(conn); /* registers the FD */
499 fdtab[fd].linger_risk = 1; /* close hard if needed */
500
501 if (conn->flags & CO_FL_WAIT_L4_CONN) {
502 fd_want_send(fd);
503 fd_cant_send(fd);
504 fd_cant_recv(fd);
505 }
506
507 if (conn_xprt_init(conn) < 0) {
508 conn_full_close(conn);
509 conn->flags |= CO_FL_ERROR;
510 return SF_ERR_RESOURCE;
511 }
512
513 return SF_ERR_NONE; /* connection is OK */
514}
515
516/* This function tries to bind a QUIC4/6 listener. It may return a warning or
517 * an error message in <errmsg> if the message is at most <errlen> bytes long
518 * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
519 * The return value is composed from ERR_ABORT, ERR_WARN,
520 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
521 * was alright and that no message was returned. ERR_RETRYABLE means that an
522 * error occurred but that it may vanish after a retry (eg: port in use), and
523 * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
524 * the meaning of the error, but just indicate that a message is present which
525 * should be displayed with the respective level. Last, ERR_ABORT indicates
526 * that it's pointless to try to start other listeners. No error message is
527 * returned if errlen is NULL.
528 */
529static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen)
530{
531 int err = ERR_NONE;
532 char *msg = NULL;
533
534 /* ensure we never return garbage */
535 if (errlen)
536 *errmsg = 0;
537
538 if (listener->state != LI_ASSIGNED)
539 return ERR_NONE; /* already bound */
540
541 if (!(listener->rx.flags & RX_F_BOUND)) {
542 msg = "receiving socket not bound";
543 goto udp_return;
544 }
545
546 listener_set_state(listener, LI_LISTEN);
547
548 udp_return:
549 if (msg && errlen) {
550 char pn[INET6_ADDRSTRLEN];
551
552 addr_to_str(&listener->rx.addr, pn, sizeof(pn));
553 snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
554 }
555 return err;
556}
557
558/* Enable receipt of incoming connections for listener <l>. The receiver must
559 * still be valid. Does nothing in early boot (needs fd_updt).
560 */
561static void quic_enable_listener(struct listener *l)
562{
563 /* FIXME: The following statements are incorrect. This
564 * is the responsability of the QUIC xprt to stop accepting new
565 * connections.
566 */
567 if (fd_updt)
568 fd_want_recv(l->rx.fd);
569}
570
571/* Disable receipt of incoming connections for listener <l>. The receiver must
572 * still be valid. Does nothing in early boot (needs fd_updt).
573 */
574static void quic_disable_listener(struct listener *l)
575{
576 /* FIXME: The following statements are incorrect. This
577 * is the responsability of the QUIC xprt to start accepting new
578 * connections again.
579 */
580 if (fd_updt)
581 fd_stop_recv(l->rx.fd);
582}
583
584/*
585 * Local variables:
586 * c-indent-level: 8
587 * c-basic-offset: 8
588 * End:
589 */