blob: bde3e4859a120cb26ca5d7f628312295210b73ea [file] [log] [blame]
Frédéric Lécailleca42b2c2020-11-02 14:27:08 +01001/*
2 * AF_INET/AF_INET6 QUIC protocol layer.
3 *
4 * Copyright 2020 Frédéric Lécaille <flecaille@haproxy.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
14#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <time.h>
20
21#include <sys/param.h>
22#include <sys/socket.h>
23#include <sys/types.h>
24
25#include <netinet/udp.h>
26#include <netinet/in.h>
27
28#include <haproxy/api.h>
29#include <haproxy/arg.h>
30#include <haproxy/connection.h>
31#include <haproxy/errors.h>
32#include <haproxy/fd.h>
33#include <haproxy/global.h>
34#include <haproxy/list.h>
35#include <haproxy/listener.h>
36#include <haproxy/log.h>
37#include <haproxy/namespace.h>
38#include <haproxy/port_range.h>
39#include <haproxy/protocol.h>
40#include <haproxy/proto_quic.h>
41#include <haproxy/proto_udp.h>
42#include <haproxy/proxy-t.h>
43#include <haproxy/sock.h>
44#include <haproxy/sock_inet.h>
45#include <haproxy/tools.h>
46
47
48static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen);
49static int quic_connect_server(struct connection *conn, int flags);
50static void quic_enable_listener(struct listener *listener);
51static void quic_disable_listener(struct listener *listener);
52
53/* Note: must not be declared <const> as its list will be overwritten */
54struct protocol proto_quic4 = {
55 .name = "quic4",
56
57 /* connection layer */
58 .ctrl_type = SOCK_STREAM,
59 .listen = quic_bind_listener,
60 .enable = quic_enable_listener,
61 .disable = quic_disable_listener,
62 .add = default_add_listener,
63 .unbind = default_unbind_listener,
64 .suspend = default_suspend_listener,
65 .resume = default_resume_listener,
66 .accept_conn = sock_accept_conn,
67 .connect = quic_connect_server,
68
69 /* binding layer */
70 .rx_suspend = udp_suspend_receiver,
71 .rx_resume = udp_resume_receiver,
72
73 /* address family */
74 .fam = &proto_fam_inet4,
75
76 /* socket layer */
77 .sock_type = SOCK_DGRAM,
78 .sock_prot = IPPROTO_UDP,
79 .rx_enable = sock_enable,
80 .rx_disable = sock_disable,
81 .rx_unbind = sock_unbind,
82 .rx_listening = sock_accepting_conn,
83 .default_iocb = sock_accept_iocb,
84 .receivers = LIST_HEAD_INIT(proto_quic4.receivers),
85 .nb_receivers = 0,
86};
87
88INITCALL1(STG_REGISTER, protocol_register, &proto_quic4);
89
90/* Note: must not be declared <const> as its list will be overwritten */
91struct protocol proto_quic6 = {
92 .name = "quic6",
93
94 /* connection layer */
95 .ctrl_type = SOCK_STREAM,
96 .listen = quic_bind_listener,
97 .enable = quic_enable_listener,
98 .disable = quic_disable_listener,
99 .add = default_add_listener,
100 .unbind = default_unbind_listener,
101 .suspend = default_suspend_listener,
102 .resume = default_resume_listener,
103 .accept_conn = sock_accept_conn,
104 .connect = quic_connect_server,
105
106 /* binding layer */
107 .rx_suspend = udp_suspend_receiver,
108 .rx_resume = udp_resume_receiver,
109
110 /* address family */
111 .fam = &proto_fam_inet6,
112
113 /* socket layer */
114 .sock_type = SOCK_DGRAM,
115 .sock_prot = IPPROTO_UDP,
116 .rx_enable = sock_enable,
117 .rx_disable = sock_disable,
118 .rx_unbind = sock_unbind,
119 .rx_listening = sock_accepting_conn,
120 .default_iocb = sock_accept_iocb,
121 .receivers = LIST_HEAD_INIT(proto_quic6.receivers),
122 .nb_receivers = 0,
123};
124
125INITCALL1(STG_REGISTER, protocol_register, &proto_quic6);
126
127/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
128 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
129 * - 0 : ignore remote address (may even be a NULL pointer)
130 * - 1 : use provided address
131 * - 2 : use provided port
132 * - 3 : use both
133 *
134 * The function supports multiple foreign binding methods :
135 * - linux_tproxy: we directly bind to the foreign address
136 * The second one can be used as a fallback for the first one.
137 * This function returns 0 when everything's OK, 1 if it could not bind, to the
138 * local address, 2 if it could not bind to the foreign address.
139 */
140int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
141{
142 struct sockaddr_storage bind_addr;
143 int foreign_ok = 0;
144 int ret;
145 static THREAD_LOCAL int ip_transp_working = 1;
146 static THREAD_LOCAL int ip6_transp_working = 1;
147
148 switch (local->ss_family) {
149 case AF_INET:
150 if (flags && ip_transp_working) {
151 /* This deserves some explanation. Some platforms will support
152 * multiple combinations of certain methods, so we try the
153 * supported ones until one succeeds.
154 */
155 if (sock_inet4_make_foreign(fd))
156 foreign_ok = 1;
157 else
158 ip_transp_working = 0;
159 }
160 break;
161 case AF_INET6:
162 if (flags && ip6_transp_working) {
163 if (sock_inet6_make_foreign(fd))
164 foreign_ok = 1;
165 else
166 ip6_transp_working = 0;
167 }
168 break;
169 }
170
171 if (flags) {
172 memset(&bind_addr, 0, sizeof(bind_addr));
173 bind_addr.ss_family = remote->ss_family;
174 switch (remote->ss_family) {
175 case AF_INET:
176 if (flags & 1)
177 ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
178 if (flags & 2)
179 ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
180 break;
181 case AF_INET6:
182 if (flags & 1)
183 ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
184 if (flags & 2)
185 ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
186 break;
187 default:
188 /* we don't want to try to bind to an unknown address family */
189 foreign_ok = 0;
190 }
191 }
192
193 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
194 if (foreign_ok) {
195 if (is_inet_addr(&bind_addr)) {
196 ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
197 if (ret < 0)
198 return 2;
199 }
200 }
201 else {
202 if (is_inet_addr(local)) {
203 ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
204 if (ret < 0)
205 return 1;
206 }
207 }
208
209 if (!flags)
210 return 0;
211
212 if (!foreign_ok)
213 /* we could not bind to a foreign address */
214 return 2;
215
216 return 0;
217}
218
219/*
220 * This function initiates a QUIC connection establishment to the target assigned
221 * to connection <conn> using (si->{target,dst}). A source address may be
222 * pointed to by conn->src in case of transparent proxying. Normal source
223 * bind addresses are still determined locally (due to the possible need of a
224 * source port). conn->target may point either to a valid server or to a backend,
225 * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
226 * supported. The <data> parameter is a boolean indicating whether there are data
227 * waiting for being sent or not, in order to adjust data write polling and on
228 * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
229 * is not used.
230 *
231 * Note that a pending send_proxy message accounts for data.
232 *
233 * It can return one of :
234 * - SF_ERR_NONE if everything's OK
235 * - SF_ERR_SRVTO if there are no more servers
236 * - SF_ERR_SRVCL if the connection was refused by the server
237 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
238 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
239 * - SF_ERR_INTERNAL for any other purely internal errors
240 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
241 *
242 * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
243 * it's invalid and the caller has nothing to do.
244 */
245
246int quic_connect_server(struct connection *conn, int flags)
247{
248 int fd;
249 struct server *srv;
250 struct proxy *be;
251 struct conn_src *src;
252 struct sockaddr_storage *addr;
253
254 conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
255
256 switch (obj_type(conn->target)) {
257 case OBJ_TYPE_PROXY:
258 be = objt_proxy(conn->target);
259 srv = NULL;
260 break;
261 case OBJ_TYPE_SERVER:
262 srv = objt_server(conn->target);
263 be = srv->proxy;
264 break;
265 default:
266 conn->flags |= CO_FL_ERROR;
267 return SF_ERR_INTERNAL;
268 }
269
270 if (!conn->dst) {
271 conn->flags |= CO_FL_ERROR;
272 return SF_ERR_INTERNAL;
273 }
274
275 fd = conn->handle.fd = sock_create_server_socket(conn);
276
277 if (fd == -1) {
278 qfprintf(stderr, "Cannot get a server socket.\n");
279
280 if (errno == ENFILE) {
281 conn->err_code = CO_ER_SYS_FDLIM;
282 send_log(be, LOG_EMERG,
283 "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
284 be->id, global.maxsock);
285 }
286 else if (errno == EMFILE) {
287 conn->err_code = CO_ER_PROC_FDLIM;
288 send_log(be, LOG_EMERG,
289 "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
290 be->id, global.maxsock);
291 }
292 else if (errno == ENOBUFS || errno == ENOMEM) {
293 conn->err_code = CO_ER_SYS_MEMLIM;
294 send_log(be, LOG_EMERG,
295 "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
296 be->id, global.maxsock);
297 }
298 else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
299 conn->err_code = CO_ER_NOPROTO;
300 }
301 else
302 conn->err_code = CO_ER_SOCK_ERR;
303
304 /* this is a resource error */
305 conn->flags |= CO_FL_ERROR;
306 return SF_ERR_RESOURCE;
307 }
308
309 if (fd >= global.maxsock) {
310 /* do not log anything there, it's a normal condition when this option
311 * is used to serialize connections to a server !
312 */
313 ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
314 close(fd);
315 conn->err_code = CO_ER_CONF_FDLIM;
316 conn->flags |= CO_FL_ERROR;
317 return SF_ERR_PRXCOND; /* it is a configuration limit */
318 }
319
320 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1)) {
321 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
322 close(fd);
323 conn->err_code = CO_ER_SOCK_ERR;
324 conn->flags |= CO_FL_ERROR;
325 return SF_ERR_INTERNAL;
326 }
327
328 if (master == 1 && (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)) {
329 ha_alert("Cannot set CLOEXEC on client socket.\n");
330 close(fd);
331 conn->err_code = CO_ER_SOCK_ERR;
332 conn->flags |= CO_FL_ERROR;
333 return SF_ERR_INTERNAL;
334 }
335
336 /* allow specific binding :
337 * - server-specific at first
338 * - proxy-specific next
339 */
340 if (srv && srv->conn_src.opts & CO_SRC_BIND)
341 src = &srv->conn_src;
342 else if (be->conn_src.opts & CO_SRC_BIND)
343 src = &be->conn_src;
344 else
345 src = NULL;
346
347 if (src) {
348 int ret, flags = 0;
349
350 if (conn->src && is_inet_addr(conn->src)) {
351 switch (src->opts & CO_SRC_TPROXY_MASK) {
352 case CO_SRC_TPROXY_CLI:
353 conn_set_private(conn);
354 /* fall through */
355 case CO_SRC_TPROXY_ADDR:
356 flags = 3;
357 break;
358 case CO_SRC_TPROXY_CIP:
359 case CO_SRC_TPROXY_DYN:
360 conn_set_private(conn);
361 flags = 1;
362 break;
363 }
364 }
365
366#ifdef SO_BINDTODEVICE
367 /* Note: this might fail if not CAP_NET_RAW */
368 if (src->iface_name)
369 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
370#endif
371
372 if (src->sport_range) {
373 int attempts = 10; /* should be more than enough to find a spare port */
374 struct sockaddr_storage sa;
375
376 ret = 1;
377 memcpy(&sa, &src->source_addr, sizeof(sa));
378
379 do {
380 /* note: in case of retry, we may have to release a previously
381 * allocated port, hence this loop's construct.
382 */
383 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
384 fdinfo[fd].port_range = NULL;
385
386 if (!attempts)
387 break;
388 attempts--;
389
390 fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
391 if (!fdinfo[fd].local_port) {
392 conn->err_code = CO_ER_PORT_RANGE;
393 break;
394 }
395
396 fdinfo[fd].port_range = src->sport_range;
397 set_host_port(&sa, fdinfo[fd].local_port);
398
399 ret = quic_bind_socket(fd, flags, &sa, conn->src);
400 if (ret != 0)
401 conn->err_code = CO_ER_CANT_BIND;
402 } while (ret != 0); /* binding NOK */
403 }
404 else {
405#ifdef IP_BIND_ADDRESS_NO_PORT
406 static THREAD_LOCAL int bind_address_no_port = 1;
407 setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
408#endif
409 ret = quic_bind_socket(fd, flags, &src->source_addr, conn->src);
410 if (ret != 0)
411 conn->err_code = CO_ER_CANT_BIND;
412 }
413
414 if (unlikely(ret != 0)) {
415 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
416 fdinfo[fd].port_range = NULL;
417 close(fd);
418
419 if (ret == 1) {
420 ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
421 be->id);
422 send_log(be, LOG_EMERG,
423 "Cannot bind to source address before connect() for backend %s.\n",
424 be->id);
425 } else {
426 ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
427 be->id);
428 send_log(be, LOG_EMERG,
429 "Cannot bind to tproxy source address before connect() for backend %s.\n",
430 be->id);
431 }
432 conn->flags |= CO_FL_ERROR;
433 return SF_ERR_RESOURCE;
434 }
435 }
436
437 if (global.tune.server_sndbuf)
438 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
439
440 if (global.tune.server_rcvbuf)
441 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
442
443 addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
444 if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
445 if (errno == EINPROGRESS || errno == EALREADY) {
446 /* common case, let's wait for connect status */
447 conn->flags |= CO_FL_WAIT_L4_CONN;
448 }
449 else if (errno == EISCONN) {
450 /* should normally not happen but if so, indicates that it's OK */
451 conn->flags &= ~CO_FL_WAIT_L4_CONN;
452 }
453 else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
454 char *msg;
455 if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
456 msg = "no free ports";
457 conn->err_code = CO_ER_FREE_PORTS;
458 }
459 else {
460 msg = "local address already in use";
461 conn->err_code = CO_ER_ADDR_INUSE;
462 }
463
464 qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
465 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
466 fdinfo[fd].port_range = NULL;
467 close(fd);
468 send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
469 conn->flags |= CO_FL_ERROR;
470 return SF_ERR_RESOURCE;
471 } else if (errno == ETIMEDOUT) {
472 //qfprintf(stderr,"Connect(): ETIMEDOUT");
473 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
474 fdinfo[fd].port_range = NULL;
475 close(fd);
476 conn->err_code = CO_ER_SOCK_ERR;
477 conn->flags |= CO_FL_ERROR;
478 return SF_ERR_SRVTO;
479 } else {
480 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
481 //qfprintf(stderr,"Connect(): %d", errno);
482 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
483 fdinfo[fd].port_range = NULL;
484 close(fd);
485 conn->err_code = CO_ER_SOCK_ERR;
486 conn->flags |= CO_FL_ERROR;
487 return SF_ERR_SRVCL;
488 }
489 }
490 else {
491 /* connect() == 0, this is great! */
492 conn->flags &= ~CO_FL_WAIT_L4_CONN;
493 }
494
495 conn->flags |= CO_FL_ADDR_TO_SET;
496
497 conn_ctrl_init(conn); /* registers the FD */
498 fdtab[fd].linger_risk = 1; /* close hard if needed */
499
500 if (conn->flags & CO_FL_WAIT_L4_CONN) {
501 fd_want_send(fd);
502 fd_cant_send(fd);
503 fd_cant_recv(fd);
504 }
505
506 if (conn_xprt_init(conn) < 0) {
507 conn_full_close(conn);
508 conn->flags |= CO_FL_ERROR;
509 return SF_ERR_RESOURCE;
510 }
511
512 return SF_ERR_NONE; /* connection is OK */
513}
514
515/* This function tries to bind a QUIC4/6 listener. It may return a warning or
516 * an error message in <errmsg> if the message is at most <errlen> bytes long
517 * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
518 * The return value is composed from ERR_ABORT, ERR_WARN,
519 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
520 * was alright and that no message was returned. ERR_RETRYABLE means that an
521 * error occurred but that it may vanish after a retry (eg: port in use), and
522 * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
523 * the meaning of the error, but just indicate that a message is present which
524 * should be displayed with the respective level. Last, ERR_ABORT indicates
525 * that it's pointless to try to start other listeners. No error message is
526 * returned if errlen is NULL.
527 */
528static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen)
529{
530 int err = ERR_NONE;
531 char *msg = NULL;
532
533 /* ensure we never return garbage */
534 if (errlen)
535 *errmsg = 0;
536
537 if (listener->state != LI_ASSIGNED)
538 return ERR_NONE; /* already bound */
539
540 if (!(listener->rx.flags & RX_F_BOUND)) {
541 msg = "receiving socket not bound";
542 goto udp_return;
543 }
544
545 listener_set_state(listener, LI_LISTEN);
546
547 udp_return:
548 if (msg && errlen) {
549 char pn[INET6_ADDRSTRLEN];
550
551 addr_to_str(&listener->rx.addr, pn, sizeof(pn));
552 snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
553 }
554 return err;
555}
556
557/* Enable receipt of incoming connections for listener <l>. The receiver must
558 * still be valid. Does nothing in early boot (needs fd_updt).
559 */
560static void quic_enable_listener(struct listener *l)
561{
562 /* FIXME: The following statements are incorrect. This
563 * is the responsability of the QUIC xprt to stop accepting new
564 * connections.
565 */
566 if (fd_updt)
567 fd_want_recv(l->rx.fd);
568}
569
570/* Disable receipt of incoming connections for listener <l>. The receiver must
571 * still be valid. Does nothing in early boot (needs fd_updt).
572 */
573static void quic_disable_listener(struct listener *l)
574{
575 /* FIXME: The following statements are incorrect. This
576 * is the responsability of the QUIC xprt to start accepting new
577 * connections again.
578 */
579 if (fd_updt)
580 fd_stop_recv(l->rx.fd);
581}
582
583/*
584 * Local variables:
585 * c-indent-level: 8
586 * c-basic-offset: 8
587 * End:
588 */