blob: 9d5c5be885de05f33b93ff31ced883c5b02e586b [file] [log] [blame]
Frédéric Lécaille70da8892020-11-06 15:49:49 +01001/*
2 * QUIC socket management.
3 *
Willy Tarreau3dfb7da2022-03-02 22:33:39 +01004 * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
Frédéric Lécaille70da8892020-11-06 15:49:49 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +020013#define _GNU_SOURCE /* required for struct in6_pktinfo */
Frédéric Lécaille70da8892020-11-06 15:49:49 +010014#include <errno.h>
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020015#include <stdlib.h>
16#include <string.h>
Frédéric Lécaille70da8892020-11-06 15:49:49 +010017
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +020018#include <netinet/in.h>
Frédéric Lécaille70da8892020-11-06 15:49:49 +010019#include <sys/socket.h>
20#include <sys/types.h>
21
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020022#include <haproxy/api.h>
23#include <haproxy/buf.h>
Frédéric Lécaille70da8892020-11-06 15:49:49 +010024#include <haproxy/connection.h>
Amaury Denoyelle7c9fdd92022-11-16 11:01:02 +010025#include <haproxy/dynbuf.h>
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020026#include <haproxy/fd.h>
27#include <haproxy/freq_ctr.h>
28#include <haproxy/global-t.h>
29#include <haproxy/list.h>
Frédéric Lécaille70da8892020-11-06 15:49:49 +010030#include <haproxy/listener.h>
Amaury Denoyelle40909df2022-10-24 17:08:43 +020031#include <haproxy/log.h>
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020032#include <haproxy/pool.h>
Frédéric Lécaille6492e662022-05-17 17:23:16 +020033#include <haproxy/proto_quic.h>
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020034#include <haproxy/proxy-t.h>
Amaury Denoyelle92fa63f2022-09-30 18:11:13 +020035#include <haproxy/quic_conn.h>
Amaury Denoyelle4d295042022-01-19 16:18:44 +010036#include <haproxy/quic_sock.h>
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020037#include <haproxy/quic_tp-t.h>
Amaury Denoyelleeb01f592021-10-07 16:44:05 +020038#include <haproxy/session.h>
Amaury Denoyelle5c25dc52022-09-30 17:44:15 +020039#include <haproxy/stats-t.h>
40#include <haproxy/task.h>
Amaury Denoyelle8687b632022-09-27 14:22:09 +020041#include <haproxy/trace.h>
Amaury Denoyelle777969c2022-03-24 16:06:26 +010042#include <haproxy/tools.h>
Amaury Denoyelle5b414862022-10-24 17:40:37 +020043#include <haproxy/trace.h>
44
45#define TRACE_SOURCE &trace_quic
Frédéric Lécaille026a7922020-11-23 15:46:36 +010046
Amaury Denoyelle8687b632022-09-27 14:22:09 +020047#define TRACE_SOURCE &trace_quic
48
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020049/* Retrieve a connection's source address. Returns -1 on failure. */
50int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len)
51{
52 struct quic_conn *qc;
53
Willy Tarreau784b8682022-04-11 14:18:10 +020054 if (!conn || !conn->handle.qc)
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020055 return -1;
56
Willy Tarreau784b8682022-04-11 14:18:10 +020057 qc = conn->handle.qc;
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020058 if (conn_is_back(conn)) {
59 /* no source address defined for outgoing connections for now */
60 return -1;
61 } else {
62 /* front connection, return the peer's address */
63 if (len > sizeof(qc->peer_addr))
64 len = sizeof(qc->peer_addr);
65 memcpy(addr, &qc->peer_addr, len);
66 return 0;
67 }
68}
69
70/* Retrieve a connection's destination address. Returns -1 on failure. */
71int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len)
72{
73 struct quic_conn *qc;
74
Willy Tarreau784b8682022-04-11 14:18:10 +020075 if (!conn || !conn->handle.qc)
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020076 return -1;
77
Willy Tarreau784b8682022-04-11 14:18:10 +020078 qc = conn->handle.qc;
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020079 if (conn_is_back(conn)) {
80 /* back connection, return the peer's address */
81 if (len > sizeof(qc->peer_addr))
82 len = sizeof(qc->peer_addr);
83 memcpy(addr, &qc->peer_addr, len);
84 } else {
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +020085 struct sockaddr_storage *from;
86
87 /* Return listener address if IP_PKTINFO or friends are not
88 * supported by the socket.
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020089 */
90 BUG_ON(!qc->li);
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +020091 from = is_addr(&qc->local_addr) ? &qc->local_addr :
92 &qc->li->rx.addr;
93 if (len > sizeof(*from))
94 len = sizeof(*from);
95 memcpy(addr, from, len);
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020096 }
97 return 0;
98}
99
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100100/*
101 * Inspired from session_accept_fd().
102 * Instantiate a new connection (connection struct) to be attached to <qc>
103 * QUIC connection of <l> listener.
104 * Returns 1 if succeeded, 0 if not.
105 */
106static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l,
107 struct sockaddr_storage *saddr)
108{
109 struct connection *cli_conn;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100110
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100111 if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL))
112 goto out;
113
Willy Tarreau9cc88c32022-04-08 14:34:31 +0200114 if (!sockaddr_alloc(&cli_conn->src, saddr, sizeof *saddr))
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100115 goto out_free_conn;
116
Willy Tarreau030b3e62022-05-02 17:47:46 +0200117 cli_conn->flags |= CO_FL_FDLESS;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100118 qc->conn = cli_conn;
Willy Tarreau784b8682022-04-11 14:18:10 +0200119 cli_conn->handle.qc = qc;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100120
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100121 cli_conn->target = &l->obj_type;
122
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100123 return 1;
124
125 out_free_conn:
Frédéric Lécaille01ab6612021-06-14 10:31:43 +0200126 qc->conn = NULL;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100127 conn_stop_tracking(cli_conn);
128 conn_xprt_close(cli_conn);
129 conn_free(cli_conn);
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100130 out:
131
132 return 0;
133}
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100134
135/* Tests if the receiver supports accepting connections. Returns positive on
136 * success, 0 if not possible
137 */
138int quic_sock_accepting_conn(const struct receiver *rx)
139{
140 return 1;
141}
142
143/* Accept an incoming connection from listener <l>, and return it, as well as
144 * a CO_AC_* status code into <status> if not null. Null is returned on error.
145 * <l> must be a valid listener with a valid frontend.
146 */
147struct connection *quic_sock_accept_conn(struct listener *l, int *status)
148{
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100149 struct quic_conn *qc;
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100150 struct li_per_thread *lthr = &l->per_thr[tid];
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100151
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100152 qc = MT_LIST_POP(&lthr->quic_accept.conns, struct quic_conn *, accept_list);
153 if (!qc)
154 goto done;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100155
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100156 if (!new_quic_cli_conn(qc, l, &qc->peer_addr))
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100157 goto err;
158
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100159 done:
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100160 *status = CO_AC_DONE;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100161 return qc ? qc->conn : NULL;
162
163 err:
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100164 /* in case of error reinsert the element to process it later. */
165 MT_LIST_INSERT(&lthr->quic_accept.conns, &qc->accept_list);
166
167 *status = CO_AC_PAUSE;
168 return NULL;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100169}
170
Amaury Denoyelle8687b632022-09-27 14:22:09 +0200171/* QUIC datagrams handler task. */
172struct task *quic_lstnr_dghdlr(struct task *t, void *ctx, unsigned int state)
173{
174 struct quic_dghdlr *dghdlr = ctx;
175 struct quic_dgram *dgram;
176 int max_dgrams = global.tune.maxpollevents;
177
178 TRACE_ENTER(QUIC_EV_CONN_LPKT);
179
180 while ((dgram = MT_LIST_POP(&dghdlr->dgrams, typeof(dgram), handler_list))) {
181 if (quic_dgram_parse(dgram, NULL, dgram->owner)) {
182 /* TODO should we requeue the datagram ? */
183 break;
184 }
185
186 if (--max_dgrams <= 0)
187 goto stop_here;
188 }
189
190 TRACE_LEAVE(QUIC_EV_CONN_LPKT);
191 return t;
192
193 stop_here:
194 /* too much work done at once, come back here later */
195 if (!MT_LIST_ISEMPTY(&dghdlr->dgrams))
196 tasklet_wakeup((struct tasklet *)t);
197
198 TRACE_LEAVE(QUIC_EV_CONN_LPKT);
199 return t;
200}
201
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200202/* Retrieve the DCID from the datagram found in <buf> and deliver it to the
203 * correct datagram handler.
204 * Return 1 if a correct datagram could be found, 0 if not.
205 */
206static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner,
207 struct sockaddr_storage *saddr,
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +0200208 struct sockaddr_storage *daddr,
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200209 struct quic_dgram *new_dgram, struct list *dgrams)
210{
211 struct quic_dgram *dgram;
Willy Tarreauf5a0c8a2022-10-13 16:14:11 +0200212 const struct listener *l = owner;
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200213 unsigned char *dcid;
214 size_t dcid_len;
215 int cid_tid;
216
217 if (!len || !quic_get_dgram_dcid(buf, buf + len, &dcid, &dcid_len))
218 goto err;
219
220 dgram = new_dgram ? new_dgram : pool_alloc(pool_head_quic_dgram);
221 if (!dgram)
222 goto err;
223
Willy Tarreaueed78262022-12-21 09:09:19 +0100224 cid_tid = quic_get_cid_tid(dcid, &l->rx);
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200225
226 /* All the members must be initialized! */
227 dgram->owner = owner;
228 dgram->buf = buf;
229 dgram->len = len;
230 dgram->dcid = dcid;
231 dgram->dcid_len = dcid_len;
232 dgram->saddr = *saddr;
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +0200233 dgram->daddr = *daddr;
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200234 dgram->qc = NULL;
Amaury Denoyelle1cba8d62022-10-06 15:16:22 +0200235
236 /* Attached datagram to its quic_receiver_buf and quic_dghdlrs. */
237 LIST_APPEND(dgrams, &dgram->recv_list);
238 MT_LIST_APPEND(&quic_dghdlrs[cid_tid].dgrams, &dgram->handler_list);
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200239
Willy Tarreauf9d4a7d2022-08-05 08:45:56 +0200240 /* typically quic_lstnr_dghdlr() */
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200241 tasklet_wakeup(quic_dghdlrs[cid_tid].task);
242
243 return 1;
244
245 err:
Frédéric Lécaillebfb077a2022-08-12 11:55:20 +0200246 pool_free(pool_head_quic_dgram, new_dgram);
Frédéric Lécaille6492e662022-05-17 17:23:16 +0200247 return 0;
248}
249
Amaury Denoyelle91b23052022-10-06 14:45:09 +0200250/* This function is responsible to remove unused datagram attached in front of
251 * <buf>. Each instances will be freed until a not yet consumed datagram is
252 * found or end of the list is hit. The last unused datagram found is not freed
253 * and is instead returned so that the caller can reuse it if needed.
254 *
Ilya Shipitsin4a689da2022-10-29 09:34:32 +0500255 * Returns the last unused datagram or NULL if no occurrence found.
Amaury Denoyelle91b23052022-10-06 14:45:09 +0200256 */
257static struct quic_dgram *quic_rxbuf_purge_dgrams(struct quic_receiver_buf *buf)
258{
259 struct quic_dgram *cur, *prev = NULL;
260
261 while (!LIST_ISEMPTY(&buf->dgram_list)) {
262 cur = LIST_ELEM(buf->dgram_list.n, struct quic_dgram *, recv_list);
263
264 /* Loop until a not yet consumed datagram is found. */
Amaury Denoyelle0b13e942022-10-25 11:38:21 +0200265 if (HA_ATOMIC_LOAD(&cur->buf))
Amaury Denoyelle91b23052022-10-06 14:45:09 +0200266 break;
267
268 /* Clear buffer of current unused datagram. */
269 LIST_DELETE(&cur->recv_list);
270 b_del(&buf->buf, cur->len);
271
272 /* Free last found unused datagram. */
273 if (prev)
274 pool_free(pool_head_quic_dgram, prev);
275 prev = cur;
276 }
277
278 /* Return last unused datagram found. */
279 return prev;
280}
281
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +0200282/* Receive data from datagram socket <fd>. Data are placed in <out> buffer of
283 * length <len>.
284 *
285 * Datagram addresses will be returned via the next arguments. <from> will be
286 * the peer address and <to> the reception one. Note that <to> can only be
287 * retrieved if the socket supports IP_PKTINFO or affiliated options. If not,
288 * <to> will be set as AF_UNSPEC. The caller must specify <to_port> to ensure
289 * that <to> address is completely filled.
290 *
291 * Returns value from recvmsg syscall.
292 */
293static ssize_t quic_recv(int fd, void *out, size_t len,
294 struct sockaddr *from, socklen_t from_len,
295 struct sockaddr *to, socklen_t to_len,
296 uint16_t dst_port)
297{
298 union pktinfo {
299#ifdef IP_PKTINFO
300 struct in_pktinfo in;
301#else /* !IP_PKTINFO */
302 struct in_addr addr;
303#endif
304#ifdef IPV6_RECVPKTINFO
305 struct in6_pktinfo in6;
306#endif
307 };
308 char cdata[CMSG_SPACE(sizeof(union pktinfo))];
309 struct msghdr msg;
310 struct iovec vec;
311 struct cmsghdr *cmsg;
312 ssize_t ret;
313
314 vec.iov_base = out;
315 vec.iov_len = len;
316
317 memset(&msg, 0, sizeof(msg));
318 msg.msg_name = from;
319 msg.msg_namelen = from_len;
320 msg.msg_iov = &vec;
321 msg.msg_iovlen = 1;
322 msg.msg_control = &cdata;
323 msg.msg_controllen = sizeof(cdata);
324
325 clear_addr((struct sockaddr_storage *)to);
326
327 do {
328 ret = recvmsg(fd, &msg, 0);
329 } while (ret < 0 && errno == EINTR);
330
331 /* TODO handle errno. On EAGAIN/EWOULDBLOCK use fd_cant_recv() if
332 * using dedicated connection socket.
333 */
334
335 if (ret < 0)
336 goto end;
337
338 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
339 switch (cmsg->cmsg_level) {
340 case IPPROTO_IP:
341#if defined(IP_PKTINFO)
342 if (cmsg->cmsg_type == IP_PKTINFO) {
343 struct sockaddr_in *in = (struct sockaddr_in *)to;
344 struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg);
345
346 if (to_len >= sizeof(struct sockaddr_in)) {
347 in->sin_family = AF_INET;
348 in->sin_addr = info->ipi_addr;
349 in->sin_port = dst_port;
350 }
351 }
352#elif defined(IP_RECVDSTADDR)
353 if (cmsg->cmsg_type == IP_RECVDSTADDR) {
354 struct sockaddr_in *in = (struct sockaddr_in *)to;
355 struct in_addr *info = (struct in_addr *)CMSG_DATA(cmsg);
356
357 if (to_len >= sizeof(struct sockaddr_in)) {
358 in->sin_family = AF_INET;
359 in->sin_addr.s_addr = info->s_addr;
360 in->sin_port = dst_port;
361 }
362 }
363#endif /* IP_PKTINFO || IP_RECVDSTADDR */
364 break;
365
366 case IPPROTO_IPV6:
367#ifdef IPV6_RECVPKTINFO
368 if (cmsg->cmsg_type == IPV6_PKTINFO) {
369 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)to;
370 struct in6_pktinfo *info6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
371
372 if (to_len >= sizeof(struct sockaddr_in6)) {
373 in6->sin6_family = AF_INET6;
374 memcpy(&in6->sin6_addr, &info6->ipi6_addr, sizeof(in6->sin6_addr));
375 in6->sin6_port = dst_port;
376 }
377 }
378#endif
379 break;
380 }
381 }
382
383 end:
384 return ret;
385}
386
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100387/* Function called on a read event from a listening socket. It tries
388 * to handle as many connections as possible.
389 */
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200390void quic_lstnr_sock_fd_iocb(int fd)
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100391{
392 ssize_t ret;
Amaury Denoyelle1cba8d62022-10-06 15:16:22 +0200393 struct quic_receiver_buf *rxbuf;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100394 struct buffer *buf;
395 struct listener *l = objt_listener(fdtab[fd].owner);
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100396 struct quic_transport_params *params;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100397 /* Source address */
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +0200398 struct sockaddr_storage saddr = {0}, daddr = {0};
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100399 size_t max_sz, cspace;
Frédéric Lécaille2bed1f12022-06-23 21:05:05 +0200400 struct quic_dgram *new_dgram;
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100401 unsigned char *dgram_buf;
Frédéric Lécaille1b0707f2022-06-30 11:28:56 +0200402 int max_dgrams;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100403
Tim Duesterhus16554242021-09-15 13:58:49 +0200404 BUG_ON(!l);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100405
Frédéric Lécaille19ef6362022-06-23 18:00:37 +0200406 new_dgram = NULL;
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100407 if (!l)
408 return;
409
Willy Tarreauf5090652021-04-06 17:23:40 +0200410 if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd))
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100411 return;
412
Amaury Denoyelle1cba8d62022-10-06 15:16:22 +0200413 rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el);
Amaury Denoyelleee72a432021-11-19 15:49:29 +0100414 if (!rxbuf)
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100415 goto out;
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100416
Amaury Denoyelleee72a432021-11-19 15:49:29 +0100417 buf = &rxbuf->buf;
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100418
Frédéric Lécaille1b0707f2022-06-30 11:28:56 +0200419 max_dgrams = global.tune.maxpollevents;
420 start:
Ilya Shipitsin3b64a282022-07-29 22:26:53 +0500421 /* Try to reuse an existing dgram. Note that there is always at
Frédéric Lécaille2bed1f12022-06-23 21:05:05 +0200422 * least one datagram to pick, except the first time we enter
423 * this function for this <rxbuf> buffer.
424 */
Amaury Denoyelle91b23052022-10-06 14:45:09 +0200425 new_dgram = quic_rxbuf_purge_dgrams(rxbuf);
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100426
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100427 params = &l->bind_conf->quic_params;
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100428 max_sz = params->max_udp_payload_size;
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100429 cspace = b_contig_space(buf);
430 if (cspace < max_sz) {
Amaury Denoyelle735b44f2022-10-27 17:56:27 +0200431 struct proxy *px = l->bind_conf->frontend;
432 struct quic_counters *prx_counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, &quic_stats_module);
Frédéric Lécaille1712b1d2022-01-28 13:10:24 +0100433 struct quic_dgram *dgram;
434
Frédéric Lécaille0c535682022-06-23 17:47:10 +0200435 /* Do no mark <buf> as full, and do not try to consume it
Frédéric Lécailleba19acd2022-08-08 21:10:58 +0200436 * if the contiguous remaining space is not at the end
Frédéric Lécaille0c535682022-06-23 17:47:10 +0200437 */
Amaury Denoyelle735b44f2022-10-27 17:56:27 +0200438 if (b_tail(buf) + cspace < b_wrap(buf)) {
439 HA_ATOMIC_INC(&prx_counters->rxbuf_full);
Frédéric Lécaille0c535682022-06-23 17:47:10 +0200440 goto out;
Amaury Denoyelle735b44f2022-10-27 17:56:27 +0200441 }
Frédéric Lécaille0c535682022-06-23 17:47:10 +0200442
Frédéric Lécaille1712b1d2022-01-28 13:10:24 +0100443 /* Allocate a fake datagram, without data to locate
444 * the end of the RX buffer (required during purging).
445 */
Frédéric Lécailleba19acd2022-08-08 21:10:58 +0200446 dgram = pool_alloc(pool_head_quic_dgram);
Frédéric Lécaille1712b1d2022-01-28 13:10:24 +0100447 if (!dgram)
448 goto out;
449
Frédéric Lécailleba19acd2022-08-08 21:10:58 +0200450 /* Initialize only the useful members of this fake datagram. */
451 dgram->buf = NULL;
Frédéric Lécaille1712b1d2022-01-28 13:10:24 +0100452 dgram->len = cspace;
Frédéric Lécailleba19acd2022-08-08 21:10:58 +0200453 /* Append this datagram only to the RX buffer list. It will
454 * not be treated by any datagram handler.
455 */
Amaury Denoyelle1cba8d62022-10-06 15:16:22 +0200456 LIST_APPEND(&rxbuf->dgram_list, &dgram->recv_list);
Frédéric Lécaille0c535682022-06-23 17:47:10 +0200457
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100458 /* Consume the remaining space */
459 b_add(buf, cspace);
Amaury Denoyelle735b44f2022-10-27 17:56:27 +0200460 if (b_contig_space(buf) < max_sz) {
461 HA_ATOMIC_INC(&prx_counters->rxbuf_full);
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100462 goto out;
Amaury Denoyelle735b44f2022-10-27 17:56:27 +0200463 }
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100464 }
465
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100466 dgram_buf = (unsigned char *)b_tail(buf);
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +0200467 ret = quic_recv(fd, dgram_buf, max_sz,
468 (struct sockaddr *)&saddr, sizeof(saddr),
469 (struct sockaddr *)&daddr, sizeof(daddr),
470 get_net_port(&l->rx.addr));
471 if (ret <= 0)
472 goto out;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100473
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100474 b_add(buf, ret);
Amaury Denoyelle97ecc7a2022-09-23 17:15:58 +0200475 if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr, &daddr,
Amaury Denoyelle1cba8d62022-10-06 15:16:22 +0200476 new_dgram, &rxbuf->dgram_list)) {
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100477 /* If wrong, consume this datagram */
Amaury Denoyelle9875f022022-11-24 15:24:38 +0100478 b_sub(buf, ret);
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100479 }
Frédéric Lécaille19ef6362022-06-23 18:00:37 +0200480 new_dgram = NULL;
Frédéric Lécaille1b0707f2022-06-30 11:28:56 +0200481 if (--max_dgrams > 0)
482 goto start;
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100483 out:
Frédéric Lécaille19ef6362022-06-23 18:00:37 +0200484 pool_free(pool_head_quic_dgram, new_dgram);
Amaury Denoyelle1cba8d62022-10-06 15:16:22 +0200485 MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200486}
487
488/* FD-owned quic-conn socket callback. */
489static void quic_conn_sock_fd_iocb(int fd)
490{
Amaury Denoyelle7c9fdd92022-11-16 11:01:02 +0100491 struct quic_conn *qc = fdtab[fd].owner;
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200492
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200493 TRACE_ENTER(QUIC_EV_CONN_RCV, qc);
494
Amaury Denoyelle7c9fdd92022-11-16 11:01:02 +0100495 tasklet_wakeup_after(NULL, qc->wait_event.tasklet);
496 fd_stop_recv(fd);
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200497
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200498 TRACE_LEAVE(QUIC_EV_CONN_RCV, qc);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100499}
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100500
Frédéric Lécaille48bb8752022-08-03 20:52:20 +0200501/* Send a datagram stored into <buf> buffer with <sz> as size.
502 * The caller must ensure there is at least <sz> bytes in this buffer.
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200503 *
504 * Returns 0 on success else non-zero.
505 *
Frédéric Lécaille48bb8752022-08-03 20:52:20 +0200506 * TODO standardize this function for a generic UDP sendto wrapper. This can be
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100507 * done by removing the <qc> arg and replace it with address/port.
508 */
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200509int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz,
510 int flags)
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100511{
512 ssize_t ret;
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100513
Frédéric Lécaille48bb8752022-08-03 20:52:20 +0200514 do {
Amaury Denoyelledc0dcb32022-11-21 14:48:57 +0100515 if (qc_test_fd(qc)) {
516 ret = send(qc->fd, b_peek(buf, b_head_ofs(buf)), sz,
517 MSG_DONTWAIT | MSG_NOSIGNAL);
518 }
519 else {
520 ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz,
521 MSG_DONTWAIT|MSG_NOSIGNAL,
522 (struct sockaddr *)&qc->peer_addr,
523 get_addr_len(&qc->peer_addr));
524 }
Frédéric Lécaille48bb8752022-08-03 20:52:20 +0200525 } while (ret < 0 && errno == EINTR);
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100526
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200527 if (ret < 0 || ret != sz) {
Amaury Denoyelle1d9f1702022-10-24 10:03:33 +0200528 struct proxy *prx = qc->li->bind_conf->frontend;
529 struct quic_counters *prx_counters =
530 EXTRA_COUNTERS_GET(prx->extra_counters_fe,
531 &quic_stats_module);
532
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200533 /* TODO adjust errno for UDP context. */
534 if (errno == EAGAIN || errno == EWOULDBLOCK ||
535 errno == ENOTCONN || errno == EINPROGRESS || errno == EBADF) {
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200536 if (errno == EAGAIN || errno == EWOULDBLOCK)
537 HA_ATOMIC_INC(&prx_counters->socket_full);
538 else
539 HA_ATOMIC_INC(&prx_counters->sendto_err);
540 }
541 else if (errno) {
Amaury Denoyelledc0dcb32022-11-21 14:48:57 +0100542 /* TODO unlisted errno : handle it explicitly.
543 * ECONNRESET may be encounter on quic-conn socket.
544 */
Amaury Denoyelle1d9f1702022-10-24 10:03:33 +0200545 HA_ATOMIC_INC(&prx_counters->sendto_err_unknown);
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200546 }
547
548 return 1;
Frédéric Lécaille48bb8752022-08-03 20:52:20 +0200549 }
550
Amaury Denoyelle6715cbf2022-08-05 11:56:36 +0200551 /* we count the total bytes sent, and the send rate for 32-byte blocks.
552 * The reason for the latter is that freq_ctr are limited to 4GB and
553 * that it's not enough per second.
554 */
555 _HA_ATOMIC_ADD(&global.out_bytes, ret);
556 update_freq_ctr(&global.out_32bps, (ret + 16) / 32);
557
558 return 0;
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100559}
560
Amaury Denoyelle7c9fdd92022-11-16 11:01:02 +0100561/* Receive datagram on <qc> FD-owned socket.
562 *
563 * Returns the total number of bytes read or a negative value on error.
564 */
565int qc_rcv_buf(struct quic_conn *qc)
566{
567 struct sockaddr_storage saddr = {0}, daddr = {0};
568 struct quic_transport_params *params;
569 struct quic_dgram *new_dgram = NULL;
570 struct buffer buf = BUF_NULL;
571 size_t max_sz;
572 unsigned char *dgram_buf;
573 struct listener *l;
574 ssize_t ret = 0;
575
576 /* Do not call this if quic-conn FD is uninitialized. */
577 BUG_ON(qc->fd < 0);
578
579 TRACE_ENTER(QUIC_EV_CONN_RCV, qc);
580 l = qc->li;
581
582 params = &l->bind_conf->quic_params;
583 max_sz = params->max_udp_payload_size;
584
585 do {
586 if (!b_alloc(&buf))
587 break; /* TODO subscribe for memory again available. */
588
589 b_reset(&buf);
590 BUG_ON(b_contig_space(&buf) < max_sz);
591
592 /* Allocate datagram on first loop or after requeuing. */
593 if (!new_dgram && !(new_dgram = pool_alloc(pool_head_quic_dgram)))
594 break; /* TODO subscribe for memory again available. */
595
596 dgram_buf = (unsigned char *)b_tail(&buf);
597 ret = quic_recv(qc->fd, dgram_buf, max_sz,
598 (struct sockaddr *)&saddr, sizeof(saddr),
599 (struct sockaddr *)&daddr, sizeof(daddr),
600 get_net_port(&qc->local_addr));
601 if (ret <= 0) {
602 /* Subscribe FD for future reception. */
603 fd_want_recv(qc->fd);
604 break;
605 }
606
607 b_add(&buf, ret);
608
609 new_dgram->buf = dgram_buf;
610 new_dgram->len = ret;
611 new_dgram->dcid_len = 0;
612 new_dgram->dcid = NULL;
613 new_dgram->saddr = saddr;
614 new_dgram->daddr = daddr;
615 new_dgram->qc = NULL; /* set later via quic_dgram_parse() */
616
617 TRACE_DEVEL("read datagram", QUIC_EV_CONN_RCV, qc, new_dgram);
618
619 if (!quic_get_dgram_dcid(new_dgram->buf,
620 new_dgram->buf + new_dgram->len,
621 &new_dgram->dcid, &new_dgram->dcid_len)) {
622 continue;
623 }
624
625 if (!qc_check_dcid(qc, new_dgram->dcid, new_dgram->dcid_len)) {
626 /* Datagram received by error on the connection FD, dispatch it
627 * to its associated quic-conn.
628 *
629 * TODO count redispatch datagrams.
630 */
Amaury Denoyelleb2bd8392022-10-05 17:56:08 +0200631 struct quic_receiver_buf *rxbuf;
632 struct quic_dgram *tmp_dgram;
633 unsigned char *rxbuf_tail;
634
635 TRACE_STATE("datagram for other connection on quic-conn socket, requeue it", QUIC_EV_CONN_RCV, qc);
636
637 rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el);
638
639 tmp_dgram = quic_rxbuf_purge_dgrams(rxbuf);
640 pool_free(pool_head_quic_dgram, tmp_dgram);
641
642 if (b_contig_space(&rxbuf->buf) < new_dgram->len) {
643 /* TODO count lost datagrams */
644 MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
645 continue;
646 }
647
648 rxbuf_tail = (unsigned char *)b_tail(&rxbuf->buf);
649 __b_putblk(&rxbuf->buf, (char *)dgram_buf, new_dgram->len);
650 if (!quic_lstnr_dgram_dispatch(rxbuf_tail, ret, l, &qc->peer_addr, &daddr,
651 new_dgram, &rxbuf->dgram_list)) {
652 /* TODO count lost datagrams. */
653 b_sub(&buf, ret);
654 }
655 else {
656 /* datagram must not be freed as it was requeued. */
657 new_dgram = NULL;
658 }
659
660 MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
661 continue;
Amaury Denoyelle7c9fdd92022-11-16 11:01:02 +0100662 }
663
664 quic_dgram_parse(new_dgram, qc, qc->li);
665 /* A datagram must always be consumed after quic_parse_dgram(). */
666 BUG_ON(new_dgram->buf);
667 } while (ret > 0);
668
669 pool_free(pool_head_quic_dgram, new_dgram);
670
671 if (b_size(&buf)) {
672 b_free(&buf);
673 offer_buffers(NULL, 1);
674 }
675
676 TRACE_LEAVE(QUIC_EV_CONN_RCV, qc);
677 return ret;
678}
679
Amaury Denoyelle40909df2022-10-24 17:08:43 +0200680/* Allocate a socket file-descriptor specific for QUIC connection <qc>.
681 * Endpoint addresses are specified by the two following arguments : <src> is
682 * the local address and <dst> is the remote one.
683 *
684 * Return the socket FD or a negative error code. On error, socket is marked as
685 * uninitialized.
686 */
687void qc_alloc_fd(struct quic_conn *qc, const struct sockaddr_storage *src,
688 const struct sockaddr_storage *dst)
689{
690 struct proxy *p = qc->li->bind_conf->frontend;
691 int fd = -1;
692 int ret;
693
694 /* Must not happen. */
695 BUG_ON(src->ss_family != dst->ss_family);
696
697 qc_init_fd(qc);
698
699 fd = socket(src->ss_family, SOCK_DGRAM, 0);
700 if (fd < 0)
701 goto err;
702
703 if (fd >= global.maxsock) {
704 send_log(p, LOG_EMERG,
705 "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n",
706 p->id);
707 goto err;
708 }
709
710 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
711 if (ret < 0)
712 goto err;
713
714 switch (src->ss_family) {
715 case AF_INET:
716#if defined(IP_PKTINFO)
717 ret = setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one));
718#elif defined(IP_RECVDSTADDR)
719 ret = setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one));
720#endif /* IP_PKTINFO || IP_RECVDSTADDR */
721 break;
722 case AF_INET6:
723#ifdef IPV6_RECVPKTINFO
724 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
725#endif
726 break;
727 }
728 if (ret < 0)
729 goto err;
730
731 ret = bind(fd, (struct sockaddr *)src, get_addr_len(src));
732 if (ret < 0)
733 goto err;
734
735 ret = connect(fd, (struct sockaddr *)dst, get_addr_len(dst));
736 if (ret < 0)
737 goto err;
738
739 qc->fd = fd;
740 fd_set_nonblock(fd);
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200741 fd_insert(fd, qc, quic_conn_sock_fd_iocb, tgid, ti->ltid_bit);
742 fd_want_recv(fd);
Amaury Denoyelle40909df2022-10-24 17:08:43 +0200743
744 return;
745
746 err:
747 if (fd >= 0)
748 close(fd);
749}
750
Amaury Denoyelled3083c92022-12-01 16:20:06 +0100751/* Release socket file-descriptor specific for QUIC connection <qc>. Set
752 * <reinit> if socket should be reinitialized after address migration.
753 */
754void qc_release_fd(struct quic_conn *qc, int reinit)
Amaury Denoyelle40909df2022-10-24 17:08:43 +0200755{
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200756 if (qc_test_fd(qc)) {
757 fd_delete(qc->fd);
Amaury Denoyelle40909df2022-10-24 17:08:43 +0200758 qc->fd = DEAD_FD_MAGIC;
Amaury Denoyelled3083c92022-12-01 16:20:06 +0100759
760 if (reinit)
761 qc_init_fd(qc);
Amaury Denoyelle5b414862022-10-24 17:40:37 +0200762 }
Amaury Denoyelle40909df2022-10-24 17:08:43 +0200763}
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100764
765/*********************** QUIC accept queue management ***********************/
766/* per-thread accept queues */
767struct quic_accept_queue *quic_accept_queues;
768
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100769/* Install <qc> on the queue ready to be accepted. The queue task is then woken
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100770 * up. If <qc> accept is already scheduled or done, nothing is done.
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100771 */
772void quic_accept_push_qc(struct quic_conn *qc)
773{
774 struct quic_accept_queue *queue = &quic_accept_queues[qc->tid];
775 struct li_per_thread *lthr = &qc->li->per_thr[qc->tid];
776
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100777 /* early return if accept is already in progress/done for this
778 * connection
779 */
Frédéric Lécaillefc790062022-03-28 17:10:31 +0200780 if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100781 return;
782
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100783 BUG_ON(MT_LIST_INLIST(&qc->accept_list));
784
Frédéric Lécaillefc790062022-03-28 17:10:31 +0200785 qc->flags |= QUIC_FL_CONN_ACCEPT_REGISTERED;
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100786 /* 1. insert the listener in the accept queue
787 *
788 * Use TRY_APPEND as there is a possible race even with INLIST if
789 * multiple threads try to add the same listener instance from several
790 * quic_conn.
791 */
792 if (!MT_LIST_INLIST(&(lthr->quic_accept.list)))
793 MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list));
794
795 /* 2. insert the quic_conn in the listener per-thread queue. */
796 MT_LIST_APPEND(&lthr->quic_accept.conns, &qc->accept_list);
797
798 /* 3. wake up the queue tasklet */
799 tasklet_wakeup(quic_accept_queues[qc->tid].tasklet);
800}
801
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100802/* Tasklet handler to accept QUIC connections. Call listener_accept on every
803 * listener instances registered in the accept queue.
804 */
Willy Tarreau41e701e2022-09-08 15:12:59 +0200805struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i)
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100806{
807 struct li_per_thread *lthr;
808 struct mt_list *elt1, elt2;
809 struct quic_accept_queue *queue = &quic_accept_queues[tid];
810
811 mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) {
812 listener_accept(lthr->li);
813 MT_LIST_DELETE_SAFE(elt1);
814 }
815
816 return NULL;
817}
818
819static int quic_alloc_accept_queues(void)
820{
821 int i;
822
Tim Duesterhus9fb57e82022-06-01 21:58:37 +0200823 quic_accept_queues = calloc(global.nbthread,
824 sizeof(*quic_accept_queues));
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100825 if (!quic_accept_queues) {
826 ha_alert("Failed to allocate the quic accept queues.\n");
827 return 0;
828 }
829
830 for (i = 0; i < global.nbthread; ++i) {
831 struct tasklet *task;
832 if (!(task = tasklet_new())) {
833 ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i);
834 return 0;
835 }
836
837 tasklet_set_tid(task, i);
838 task->process = quic_accept_run;
839 quic_accept_queues[i].tasklet = task;
840
841 MT_LIST_INIT(&quic_accept_queues[i].listeners);
842 }
843
844 return 1;
845}
846REGISTER_POST_CHECK(quic_alloc_accept_queues);
847
848static int quic_deallocate_accept_queues(void)
849{
850 int i;
851
852 if (quic_accept_queues) {
853 for (i = 0; i < global.nbthread; ++i)
854 tasklet_free(quic_accept_queues[i].tasklet);
855 free(quic_accept_queues);
856 }
857
858 return 1;
859}
860REGISTER_POST_DEINIT(quic_deallocate_accept_queues);