blob: 8b3e91bb9f0a1f78d67569ac5dcb0235cfd3f703 [file] [log] [blame]
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +01001/*
2 * AF_INET/AF_INET6 QUIC protocol layer.
3 *
4 * Copyright 2020 Frédéric Lécaille <flecaille@haproxy.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <time.h>
20
21#include <sys/param.h>
22#include <sys/socket.h>
23#include <sys/types.h>
24
25#include <netinet/udp.h>
26#include <netinet/in.h>
27
28#include <haproxy/api.h>
29#include <haproxy/arg.h>
30#include <haproxy/connection.h>
31#include <haproxy/errors.h>
32#include <haproxy/fd.h>
33#include <haproxy/global.h>
34#include <haproxy/list.h>
35#include <haproxy/listener.h>
36#include <haproxy/log.h>
37#include <haproxy/namespace.h>
38#include <haproxy/port_range.h>
39#include <haproxy/protocol.h>
40#include <haproxy/proto_quic.h>
41#include <haproxy/proto_udp.h>
42#include <haproxy/proxy-t.h>
43#include <haproxy/sock.h>
Frédéric Lécaille70da8892020-11-06 15:49:49 +010044#include <haproxy/quic_sock.h>
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010045#include <haproxy/sock_inet.h>
46#include <haproxy/tools.h>
47
48
Frédéric Lécaille884f2e92020-11-23 14:23:21 +010049static void quic_add_listener(struct protocol *proto, struct listener *listener);
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010050static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen);
51static int quic_connect_server(struct connection *conn, int flags);
52static void quic_enable_listener(struct listener *listener);
53static void quic_disable_listener(struct listener *listener);
54
55/* Note: must not be declared <const> as its list will be overwritten */
56struct protocol proto_quic4 = {
57 .name = "quic4",
58
59 /* connection layer */
60 .ctrl_type = SOCK_STREAM,
61 .listen = quic_bind_listener,
62 .enable = quic_enable_listener,
63 .disable = quic_disable_listener,
Frédéric Lécaille884f2e92020-11-23 14:23:21 +010064 .add = quic_add_listener,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010065 .unbind = default_unbind_listener,
66 .suspend = default_suspend_listener,
67 .resume = default_resume_listener,
Frédéric Lécaille70da8892020-11-06 15:49:49 +010068 .accept_conn = quic_sock_accept_conn,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010069 .connect = quic_connect_server,
70
71 /* binding layer */
72 .rx_suspend = udp_suspend_receiver,
73 .rx_resume = udp_resume_receiver,
74
75 /* address family */
76 .fam = &proto_fam_inet4,
77
78 /* socket layer */
79 .sock_type = SOCK_DGRAM,
80 .sock_prot = IPPROTO_UDP,
81 .rx_enable = sock_enable,
82 .rx_disable = sock_disable,
83 .rx_unbind = sock_unbind,
Frédéric Lécaille70da8892020-11-06 15:49:49 +010084 .rx_listening = quic_sock_accepting_conn,
85 .default_iocb = quic_sock_fd_iocb,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +010086 .receivers = LIST_HEAD_INIT(proto_quic4.receivers),
87 .nb_receivers = 0,
88};
89
90INITCALL1(STG_REGISTER, protocol_register, &proto_quic4);
91
92/* Note: must not be declared <const> as its list will be overwritten */
93struct protocol proto_quic6 = {
94 .name = "quic6",
95
96 /* connection layer */
97 .ctrl_type = SOCK_STREAM,
98 .listen = quic_bind_listener,
99 .enable = quic_enable_listener,
100 .disable = quic_disable_listener,
Frédéric Lécaille884f2e92020-11-23 14:23:21 +0100101 .add = quic_add_listener,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +0100102 .unbind = default_unbind_listener,
103 .suspend = default_suspend_listener,
104 .resume = default_resume_listener,
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100105 .accept_conn = quic_sock_accept_conn,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +0100106 .connect = quic_connect_server,
107
108 /* binding layer */
109 .rx_suspend = udp_suspend_receiver,
110 .rx_resume = udp_resume_receiver,
111
112 /* address family */
113 .fam = &proto_fam_inet6,
114
115 /* socket layer */
116 .sock_type = SOCK_DGRAM,
117 .sock_prot = IPPROTO_UDP,
118 .rx_enable = sock_enable,
119 .rx_disable = sock_disable,
120 .rx_unbind = sock_unbind,
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100121 .rx_listening = quic_sock_accepting_conn,
122 .default_iocb = quic_sock_fd_iocb,
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +0100123 .receivers = LIST_HEAD_INIT(proto_quic6.receivers),
124 .nb_receivers = 0,
125};
126
127INITCALL1(STG_REGISTER, protocol_register, &proto_quic6);
128
129/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
130 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
131 * - 0 : ignore remote address (may even be a NULL pointer)
132 * - 1 : use provided address
133 * - 2 : use provided port
134 * - 3 : use both
135 *
136 * The function supports multiple foreign binding methods :
137 * - linux_tproxy: we directly bind to the foreign address
138 * The second one can be used as a fallback for the first one.
139 * This function returns 0 when everything's OK, 1 if it could not bind, to the
140 * local address, 2 if it could not bind to the foreign address.
141 */
142int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
143{
144 struct sockaddr_storage bind_addr;
145 int foreign_ok = 0;
146 int ret;
147 static THREAD_LOCAL int ip_transp_working = 1;
148 static THREAD_LOCAL int ip6_transp_working = 1;
149
150 switch (local->ss_family) {
151 case AF_INET:
152 if (flags && ip_transp_working) {
153 /* This deserves some explanation. Some platforms will support
154 * multiple combinations of certain methods, so we try the
155 * supported ones until one succeeds.
156 */
157 if (sock_inet4_make_foreign(fd))
158 foreign_ok = 1;
159 else
160 ip_transp_working = 0;
161 }
162 break;
163 case AF_INET6:
164 if (flags && ip6_transp_working) {
165 if (sock_inet6_make_foreign(fd))
166 foreign_ok = 1;
167 else
168 ip6_transp_working = 0;
169 }
170 break;
171 }
172
173 if (flags) {
174 memset(&bind_addr, 0, sizeof(bind_addr));
175 bind_addr.ss_family = remote->ss_family;
176 switch (remote->ss_family) {
177 case AF_INET:
178 if (flags & 1)
179 ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
180 if (flags & 2)
181 ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
182 break;
183 case AF_INET6:
184 if (flags & 1)
185 ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
186 if (flags & 2)
187 ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
188 break;
189 default:
190 /* we don't want to try to bind to an unknown address family */
191 foreign_ok = 0;
192 }
193 }
194
195 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
196 if (foreign_ok) {
197 if (is_inet_addr(&bind_addr)) {
198 ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
199 if (ret < 0)
200 return 2;
201 }
202 }
203 else {
204 if (is_inet_addr(local)) {
205 ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
206 if (ret < 0)
207 return 1;
208 }
209 }
210
211 if (!flags)
212 return 0;
213
214 if (!foreign_ok)
215 /* we could not bind to a foreign address */
216 return 2;
217
218 return 0;
219}
220
221/*
222 * This function initiates a QUIC connection establishment to the target assigned
223 * to connection <conn> using (si->{target,dst}). A source address may be
224 * pointed to by conn->src in case of transparent proxying. Normal source
225 * bind addresses are still determined locally (due to the possible need of a
226 * source port). conn->target may point either to a valid server or to a backend,
227 * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
228 * supported. The <data> parameter is a boolean indicating whether there are data
229 * waiting for being sent or not, in order to adjust data write polling and on
230 * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
231 * is not used.
232 *
233 * Note that a pending send_proxy message accounts for data.
234 *
235 * It can return one of :
236 * - SF_ERR_NONE if everything's OK
237 * - SF_ERR_SRVTO if there are no more servers
238 * - SF_ERR_SRVCL if the connection was refused by the server
239 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
240 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
241 * - SF_ERR_INTERNAL for any other purely internal errors
242 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
243 *
244 * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
245 * it's invalid and the caller has nothing to do.
246 */
247
248int quic_connect_server(struct connection *conn, int flags)
249{
250 int fd;
251 struct server *srv;
252 struct proxy *be;
253 struct conn_src *src;
254 struct sockaddr_storage *addr;
255
256 conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
257
258 switch (obj_type(conn->target)) {
259 case OBJ_TYPE_PROXY:
260 be = objt_proxy(conn->target);
261 srv = NULL;
262 break;
263 case OBJ_TYPE_SERVER:
264 srv = objt_server(conn->target);
265 be = srv->proxy;
266 break;
267 default:
268 conn->flags |= CO_FL_ERROR;
269 return SF_ERR_INTERNAL;
270 }
271
272 if (!conn->dst) {
273 conn->flags |= CO_FL_ERROR;
274 return SF_ERR_INTERNAL;
275 }
276
277 fd = conn->handle.fd = sock_create_server_socket(conn);
278
279 if (fd == -1) {
280 qfprintf(stderr, "Cannot get a server socket.\n");
281
282 if (errno == ENFILE) {
283 conn->err_code = CO_ER_SYS_FDLIM;
284 send_log(be, LOG_EMERG,
285 "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
286 be->id, global.maxsock);
287 }
288 else if (errno == EMFILE) {
289 conn->err_code = CO_ER_PROC_FDLIM;
290 send_log(be, LOG_EMERG,
291 "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
292 be->id, global.maxsock);
293 }
294 else if (errno == ENOBUFS || errno == ENOMEM) {
295 conn->err_code = CO_ER_SYS_MEMLIM;
296 send_log(be, LOG_EMERG,
297 "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
298 be->id, global.maxsock);
299 }
300 else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
301 conn->err_code = CO_ER_NOPROTO;
302 }
303 else
304 conn->err_code = CO_ER_SOCK_ERR;
305
306 /* this is a resource error */
307 conn->flags |= CO_FL_ERROR;
308 return SF_ERR_RESOURCE;
309 }
310
311 if (fd >= global.maxsock) {
312 /* do not log anything there, it's a normal condition when this option
313 * is used to serialize connections to a server !
314 */
315 ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
316 close(fd);
317 conn->err_code = CO_ER_CONF_FDLIM;
318 conn->flags |= CO_FL_ERROR;
319 return SF_ERR_PRXCOND; /* it is a configuration limit */
320 }
321
322 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1)) {
323 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
324 close(fd);
325 conn->err_code = CO_ER_SOCK_ERR;
326 conn->flags |= CO_FL_ERROR;
327 return SF_ERR_INTERNAL;
328 }
329
330 if (master == 1 && (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)) {
331 ha_alert("Cannot set CLOEXEC on client socket.\n");
332 close(fd);
333 conn->err_code = CO_ER_SOCK_ERR;
334 conn->flags |= CO_FL_ERROR;
335 return SF_ERR_INTERNAL;
336 }
337
338 /* allow specific binding :
339 * - server-specific at first
340 * - proxy-specific next
341 */
342 if (srv && srv->conn_src.opts & CO_SRC_BIND)
343 src = &srv->conn_src;
344 else if (be->conn_src.opts & CO_SRC_BIND)
345 src = &be->conn_src;
346 else
347 src = NULL;
348
349 if (src) {
350 int ret, flags = 0;
351
352 if (conn->src && is_inet_addr(conn->src)) {
353 switch (src->opts & CO_SRC_TPROXY_MASK) {
354 case CO_SRC_TPROXY_CLI:
355 conn_set_private(conn);
356 /* fall through */
357 case CO_SRC_TPROXY_ADDR:
358 flags = 3;
359 break;
360 case CO_SRC_TPROXY_CIP:
361 case CO_SRC_TPROXY_DYN:
362 conn_set_private(conn);
363 flags = 1;
364 break;
365 }
366 }
367
368#ifdef SO_BINDTODEVICE
369 /* Note: this might fail if not CAP_NET_RAW */
370 if (src->iface_name)
371 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
372#endif
373
374 if (src->sport_range) {
375 int attempts = 10; /* should be more than enough to find a spare port */
376 struct sockaddr_storage sa;
377
378 ret = 1;
379 memcpy(&sa, &src->source_addr, sizeof(sa));
380
381 do {
382 /* note: in case of retry, we may have to release a previously
383 * allocated port, hence this loop's construct.
384 */
385 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
386 fdinfo[fd].port_range = NULL;
387
388 if (!attempts)
389 break;
390 attempts--;
391
392 fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
393 if (!fdinfo[fd].local_port) {
394 conn->err_code = CO_ER_PORT_RANGE;
395 break;
396 }
397
398 fdinfo[fd].port_range = src->sport_range;
399 set_host_port(&sa, fdinfo[fd].local_port);
400
401 ret = quic_bind_socket(fd, flags, &sa, conn->src);
402 if (ret != 0)
403 conn->err_code = CO_ER_CANT_BIND;
404 } while (ret != 0); /* binding NOK */
405 }
406 else {
407#ifdef IP_BIND_ADDRESS_NO_PORT
408 static THREAD_LOCAL int bind_address_no_port = 1;
409 setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
410#endif
411 ret = quic_bind_socket(fd, flags, &src->source_addr, conn->src);
412 if (ret != 0)
413 conn->err_code = CO_ER_CANT_BIND;
414 }
415
416 if (unlikely(ret != 0)) {
417 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
418 fdinfo[fd].port_range = NULL;
419 close(fd);
420
421 if (ret == 1) {
422 ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
423 be->id);
424 send_log(be, LOG_EMERG,
425 "Cannot bind to source address before connect() for backend %s.\n",
426 be->id);
427 } else {
428 ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
429 be->id);
430 send_log(be, LOG_EMERG,
431 "Cannot bind to tproxy source address before connect() for backend %s.\n",
432 be->id);
433 }
434 conn->flags |= CO_FL_ERROR;
435 return SF_ERR_RESOURCE;
436 }
437 }
438
439 if (global.tune.server_sndbuf)
440 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
441
442 if (global.tune.server_rcvbuf)
443 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
444
445 addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
446 if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
447 if (errno == EINPROGRESS || errno == EALREADY) {
448 /* common case, let's wait for connect status */
449 conn->flags |= CO_FL_WAIT_L4_CONN;
450 }
451 else if (errno == EISCONN) {
452 /* should normally not happen but if so, indicates that it's OK */
453 conn->flags &= ~CO_FL_WAIT_L4_CONN;
454 }
455 else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
456 char *msg;
457 if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
458 msg = "no free ports";
459 conn->err_code = CO_ER_FREE_PORTS;
460 }
461 else {
462 msg = "local address already in use";
463 conn->err_code = CO_ER_ADDR_INUSE;
464 }
465
466 qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
467 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
468 fdinfo[fd].port_range = NULL;
469 close(fd);
470 send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
471 conn->flags |= CO_FL_ERROR;
472 return SF_ERR_RESOURCE;
473 } else if (errno == ETIMEDOUT) {
474 //qfprintf(stderr,"Connect(): ETIMEDOUT");
475 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
476 fdinfo[fd].port_range = NULL;
477 close(fd);
478 conn->err_code = CO_ER_SOCK_ERR;
479 conn->flags |= CO_FL_ERROR;
480 return SF_ERR_SRVTO;
481 } else {
482 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
483 //qfprintf(stderr,"Connect(): %d", errno);
484 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
485 fdinfo[fd].port_range = NULL;
486 close(fd);
487 conn->err_code = CO_ER_SOCK_ERR;
488 conn->flags |= CO_FL_ERROR;
489 return SF_ERR_SRVCL;
490 }
491 }
492 else {
493 /* connect() == 0, this is great! */
494 conn->flags &= ~CO_FL_WAIT_L4_CONN;
495 }
496
497 conn->flags |= CO_FL_ADDR_TO_SET;
498
499 conn_ctrl_init(conn); /* registers the FD */
500 fdtab[fd].linger_risk = 1; /* close hard if needed */
501
502 if (conn->flags & CO_FL_WAIT_L4_CONN) {
503 fd_want_send(fd);
504 fd_cant_send(fd);
505 fd_cant_recv(fd);
506 }
507
508 if (conn_xprt_init(conn) < 0) {
509 conn_full_close(conn);
510 conn->flags |= CO_FL_ERROR;
511 return SF_ERR_RESOURCE;
512 }
513
514 return SF_ERR_NONE; /* connection is OK */
515}
516
Frédéric Lécaille884f2e92020-11-23 14:23:21 +0100517/* Add listener <listener> to protocol <proto>. Technically speaking we just
518 * initialize a few entries which should be doable during quic_bind_listener().
519 * The end of the initialization goes on with the default function.
520 */
521static void quic_add_listener(struct protocol *proto, struct listener *listener)
522{
523 LIST_INIT(&listener->rx.qpkts);
524 listener->rx.odcids = EB_ROOT_UNIQUE;
525 listener->rx.cids = EB_ROOT_UNIQUE;
526 default_add_listener(proto, listener);
527}
528
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +0100529/* This function tries to bind a QUIC4/6 listener. It may return a warning or
530 * an error message in <errmsg> if the message is at most <errlen> bytes long
531 * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
532 * The return value is composed from ERR_ABORT, ERR_WARN,
533 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
534 * was alright and that no message was returned. ERR_RETRYABLE means that an
535 * error occurred but that it may vanish after a retry (eg: port in use), and
536 * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
537 * the meaning of the error, but just indicate that a message is present which
538 * should be displayed with the respective level. Last, ERR_ABORT indicates
539 * that it's pointless to try to start other listeners. No error message is
540 * returned if errlen is NULL.
541 */
542static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen)
543{
544 int err = ERR_NONE;
545 char *msg = NULL;
546
547 /* ensure we never return garbage */
548 if (errlen)
549 *errmsg = 0;
550
551 if (listener->state != LI_ASSIGNED)
552 return ERR_NONE; /* already bound */
553
554 if (!(listener->rx.flags & RX_F_BOUND)) {
555 msg = "receiving socket not bound";
556 goto udp_return;
557 }
558
559 listener_set_state(listener, LI_LISTEN);
560
561 udp_return:
562 if (msg && errlen) {
563 char pn[INET6_ADDRSTRLEN];
564
565 addr_to_str(&listener->rx.addr, pn, sizeof(pn));
566 snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
567 }
568 return err;
569}
570
571/* Enable receipt of incoming connections for listener <l>. The receiver must
572 * still be valid. Does nothing in early boot (needs fd_updt).
573 */
574static void quic_enable_listener(struct listener *l)
575{
576 /* FIXME: The following statements are incorrect. This
577 * is the responsability of the QUIC xprt to stop accepting new
578 * connections.
579 */
580 if (fd_updt)
581 fd_want_recv(l->rx.fd);
582}
583
584/* Disable receipt of incoming connections for listener <l>. The receiver must
585 * still be valid. Does nothing in early boot (needs fd_updt).
586 */
587static void quic_disable_listener(struct listener *l)
588{
589 /* FIXME: The following statements are incorrect. This
590 * is the responsability of the QUIC xprt to start accepting new
591 * connections again.
592 */
593 if (fd_updt)
594 fd_stop_recv(l->rx.fd);
595}
596
597/*
598 * Local variables:
599 * c-indent-level: 8
600 * c-basic-offset: 8
601 * End:
602 */