blob: e56372ed60facce709f23baba806d3e6488507aa [file] [log] [blame]
Frédéric Lécaille70da8892020-11-06 15:49:49 +01001/*
2 * QUIC socket management.
3 *
Willy Tarreau3dfb7da2022-03-02 22:33:39 +01004 * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
Frédéric Lécaille70da8892020-11-06 15:49:49 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14
15#include <sys/socket.h>
16#include <sys/types.h>
17
18#include <haproxy/connection.h>
19#include <haproxy/listener.h>
Amaury Denoyelle4d295042022-01-19 16:18:44 +010020#include <haproxy/quic_sock.h>
Amaury Denoyelleeb01f592021-10-07 16:44:05 +020021#include <haproxy/session.h>
Amaury Denoyelle777969c2022-03-24 16:06:26 +010022#include <haproxy/tools.h>
Frédéric Lécaille026a7922020-11-23 15:46:36 +010023#include <haproxy/xprt_quic.h>
24
25/* This function is called from the protocol layer accept() in order to
26 * instantiate a new session on behalf of a given listener and frontend. It
27 * returns a positive value upon success, 0 if the connection can be ignored,
28 * or a negative value upon critical failure. The accepted connection is
29 * closed if we return <= 0. If no handshake is needed, it immediately tries
30 * to instantiate a new stream. The connection must already have been filled
31 * with the incoming connection handle (a fd), a target (the listener) and a
32 * source address.
33 */
34int quic_session_accept(struct connection *cli_conn)
35{
36 struct listener *l = __objt_listener(cli_conn->target);
37 struct proxy *p = l->bind_conf->frontend;
38 struct session *sess;
39
40 cli_conn->proxy_netns = l->rx.settings->netns;
Frédéric Lécaille026a7922020-11-23 15:46:36 +010041 /* This flag is ordinarily set by conn_ctrl_init() which cannot
42 * be called for now.
43 */
44 cli_conn->flags |= CO_FL_CTRL_READY;
45
46 /* wait for a PROXY protocol header */
47 if (l->options & LI_O_ACC_PROXY)
48 cli_conn->flags |= CO_FL_ACCEPT_PROXY;
49
50 /* wait for a NetScaler client IP insertion protocol header */
51 if (l->options & LI_O_ACC_CIP)
52 cli_conn->flags |= CO_FL_ACCEPT_CIP;
53
Frédéric Lécaille026a7922020-11-23 15:46:36 +010054 /* Add the handshake pseudo-XPRT */
55 if (cli_conn->flags & (CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP)) {
56 if (xprt_add_hs(cli_conn) != 0)
57 goto out_free_conn;
58 }
Olivier Houchard1b3c9312021-03-05 23:37:48 +010059
Frédéric Lécaille026a7922020-11-23 15:46:36 +010060 sess = session_new(p, l, &cli_conn->obj_type);
61 if (!sess)
62 goto out_free_conn;
63
64 conn_set_owner(cli_conn, sess, NULL);
65
Frédéric Lécailleecb58722021-05-27 17:12:36 +020066 if (conn_complete_session(cli_conn) < 0)
67 goto out_free_sess;
68
Amaury Denoyelle622ec412022-04-13 16:58:26 +020069 if (conn_xprt_start(cli_conn) < 0) {
70 /* conn_complete_session has succeeded : conn is the owner of
71 * the session and the MUX is initialized.
72 * Let the MUX free all resources on error.
73 */
74 cli_conn->mux->destroy(cli_conn->ctx);
75 return -1;
76 }
77
78 return 1;
Frédéric Lécaille27faba72021-03-03 16:21:00 +010079
Frédéric Lécaille026a7922020-11-23 15:46:36 +010080 out_free_sess:
81 /* prevent call to listener_release during session_free. It will be
82 * done below, for all errors. */
83 sess->listener = NULL;
84 session_free(sess);
85 out_free_conn:
Willy Tarreau784b8682022-04-11 14:18:10 +020086 cli_conn->handle.qc->conn = NULL;
Frédéric Lécaille026a7922020-11-23 15:46:36 +010087 conn_stop_tracking(cli_conn);
88 conn_xprt_close(cli_conn);
89 conn_free(cli_conn);
90 out:
91
Frédéric Lécaillee8139f32021-03-11 17:06:30 +010092 return -1;
Frédéric Lécaille026a7922020-11-23 15:46:36 +010093}
94
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +020095/* Retrieve a connection's source address. Returns -1 on failure. */
96int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len)
97{
98 struct quic_conn *qc;
99
Willy Tarreau784b8682022-04-11 14:18:10 +0200100 if (!conn || !conn->handle.qc)
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +0200101 return -1;
102
Willy Tarreau784b8682022-04-11 14:18:10 +0200103 qc = conn->handle.qc;
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +0200104 if (conn_is_back(conn)) {
105 /* no source address defined for outgoing connections for now */
106 return -1;
107 } else {
108 /* front connection, return the peer's address */
109 if (len > sizeof(qc->peer_addr))
110 len = sizeof(qc->peer_addr);
111 memcpy(addr, &qc->peer_addr, len);
112 return 0;
113 }
114}
115
116/* Retrieve a connection's destination address. Returns -1 on failure. */
117int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len)
118{
119 struct quic_conn *qc;
120
Willy Tarreau784b8682022-04-11 14:18:10 +0200121 if (!conn || !conn->handle.qc)
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +0200122 return -1;
123
Willy Tarreau784b8682022-04-11 14:18:10 +0200124 qc = conn->handle.qc;
Willy Tarreaucdf7c8e2022-04-11 16:20:00 +0200125 if (conn_is_back(conn)) {
126 /* back connection, return the peer's address */
127 if (len > sizeof(qc->peer_addr))
128 len = sizeof(qc->peer_addr);
129 memcpy(addr, &qc->peer_addr, len);
130 } else {
131 /* FIXME: front connection, no local address for now, we'll
132 * return the listener's address instead.
133 */
134 BUG_ON(!qc->li);
135
136 if (len > sizeof(qc->li->rx.addr))
137 len = sizeof(qc->li->rx.addr);
138 memcpy(addr, &qc->li->rx.addr, len);
139 }
140 return 0;
141}
142
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100143/*
144 * Inspired from session_accept_fd().
145 * Instantiate a new connection (connection struct) to be attached to <qc>
146 * QUIC connection of <l> listener.
147 * Returns 1 if succeeded, 0 if not.
148 */
149static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l,
150 struct sockaddr_storage *saddr)
151{
152 struct connection *cli_conn;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100153
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100154 if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL))
155 goto out;
156
Willy Tarreau9cc88c32022-04-08 14:34:31 +0200157 if (!sockaddr_alloc(&cli_conn->src, saddr, sizeof *saddr))
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100158 goto out_free_conn;
159
Willy Tarreau030b3e62022-05-02 17:47:46 +0200160 cli_conn->flags |= CO_FL_FDLESS;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100161 qc->conn = cli_conn;
Willy Tarreau784b8682022-04-11 14:18:10 +0200162 cli_conn->handle.qc = qc;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100163
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100164 cli_conn->target = &l->obj_type;
165
Frédéric Lécaille01ab6612021-06-14 10:31:43 +0200166 /* We need the xprt context before accepting (->accept()) the connection:
167 * we may receive packet before this connection acception.
168 */
169 if (conn_prepare(cli_conn, l->rx.proto, l->bind_conf->xprt) < 0)
170 goto out_free_conn;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100171
172 return 1;
173
174 out_free_conn:
Frédéric Lécaille01ab6612021-06-14 10:31:43 +0200175 qc->conn = NULL;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100176 conn_stop_tracking(cli_conn);
177 conn_xprt_close(cli_conn);
178 conn_free(cli_conn);
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100179 out:
180
181 return 0;
182}
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100183
184/* Tests if the receiver supports accepting connections. Returns positive on
185 * success, 0 if not possible
186 */
187int quic_sock_accepting_conn(const struct receiver *rx)
188{
189 return 1;
190}
191
192/* Accept an incoming connection from listener <l>, and return it, as well as
193 * a CO_AC_* status code into <status> if not null. Null is returned on error.
194 * <l> must be a valid listener with a valid frontend.
195 */
196struct connection *quic_sock_accept_conn(struct listener *l, int *status)
197{
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100198 struct quic_conn *qc;
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100199 struct li_per_thread *lthr = &l->per_thr[tid];
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100200
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100201 qc = MT_LIST_POP(&lthr->quic_accept.conns, struct quic_conn *, accept_list);
202 if (!qc)
203 goto done;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100204
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100205 if (!new_quic_cli_conn(qc, l, &qc->peer_addr))
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100206 goto err;
207
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100208 done:
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100209 *status = CO_AC_DONE;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100210 return qc ? qc->conn : NULL;
211
212 err:
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100213 /* in case of error reinsert the element to process it later. */
214 MT_LIST_INSERT(&lthr->quic_accept.conns, &qc->accept_list);
215
216 *status = CO_AC_PAUSE;
217 return NULL;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100218}
219
220/* Function called on a read event from a listening socket. It tries
221 * to handle as many connections as possible.
222 */
223void quic_sock_fd_iocb(int fd)
224{
225 ssize_t ret;
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100226 struct rxbuf *rxbuf;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100227 struct buffer *buf;
228 struct listener *l = objt_listener(fdtab[fd].owner);
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100229 struct quic_transport_params *params;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100230 /* Source address */
231 struct sockaddr_storage saddr = {0};
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100232 size_t max_sz, cspace;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100233 socklen_t saddrlen;
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100234 struct quic_dgram *dgram, *dgramp, *new_dgram;
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100235 unsigned char *dgram_buf;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100236
Tim Duesterhus16554242021-09-15 13:58:49 +0200237 BUG_ON(!l);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100238
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100239 if (!l)
240 return;
241
Willy Tarreauf5090652021-04-06 17:23:40 +0200242 if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd))
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100243 return;
244
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100245 rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), mt_list);
Amaury Denoyelleee72a432021-11-19 15:49:29 +0100246 if (!rxbuf)
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100247 goto out;
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100248
Amaury Denoyelleee72a432021-11-19 15:49:29 +0100249 buf = &rxbuf->buf;
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100250
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100251 new_dgram = NULL;
252 /* Remove all consumed datagrams of this buffer */
253 list_for_each_entry_safe(dgram, dgramp, &rxbuf->dgrams, list) {
254 if (HA_ATOMIC_LOAD(&dgram->buf))
255 break;
256
257 LIST_DELETE(&dgram->list);
258 b_del(buf, dgram->len);
259 if (!new_dgram)
260 new_dgram = dgram;
261 else
262 pool_free(pool_head_quic_dgram, dgram);
263 }
264
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100265 params = &l->bind_conf->quic_params;
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100266 max_sz = params->max_udp_payload_size;
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100267 cspace = b_contig_space(buf);
268 if (cspace < max_sz) {
Frédéric Lécaille1712b1d2022-01-28 13:10:24 +0100269 struct quic_dgram *dgram;
270
271 /* Allocate a fake datagram, without data to locate
272 * the end of the RX buffer (required during purging).
273 */
274 dgram = pool_zalloc(pool_head_quic_dgram);
275 if (!dgram)
276 goto out;
277
278 dgram->len = cspace;
279 LIST_APPEND(&rxbuf->dgrams, &dgram->list);
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100280 /* Consume the remaining space */
281 b_add(buf, cspace);
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100282 if (b_contig_space(buf) < max_sz)
283 goto out;
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100284
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100285 }
286
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100287 dgram_buf = (unsigned char *)b_tail(buf);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100288 saddrlen = sizeof saddr;
289 do {
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100290 ret = recvfrom(fd, dgram_buf, max_sz, 0,
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100291 (struct sockaddr *)&saddr, &saddrlen);
Willy Tarreauacef5e22022-04-25 20:32:15 +0200292 if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
Frédéric Lécaille439c4642022-02-02 14:33:10 +0100293 fd_cant_recv(fd);
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100294 goto out;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100295 }
Frédéric Lécaille439c4642022-02-02 14:33:10 +0100296 } while (ret < 0 && errno == EINTR);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100297
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100298 b_add(buf, ret);
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100299 if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr,
300 new_dgram, &rxbuf->dgrams)) {
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100301 /* If wrong, consume this datagram */
302 b_del(buf, ret);
303 }
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100304 out:
305 MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->mt_list);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100306}
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100307
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100308/* TODO standardize this function for a generic UDP sendto wrapper. This can be
309 * done by removing the <qc> arg and replace it with address/port.
310 */
311size_t qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t count,
312 int flags)
313{
314 ssize_t ret;
315 size_t try, done;
316 int send_flag;
317
318 done = 0;
319 /* send the largest possible block. For this we perform only one call
320 * to send() unless the buffer wraps and we exactly fill the first hunk,
321 * in which case we accept to do it once again.
322 */
323 while (count) {
324 try = b_contig_data(buf, done);
325 if (try > count)
326 try = count;
327
328 send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;
329 if (try < count || flags & CO_SFL_MSG_MORE)
330 send_flag |= MSG_MORE;
331
332 ret = sendto(qc->li->rx.fd, b_peek(buf, done), try, send_flag,
333 (struct sockaddr *)&qc->peer_addr, get_addr_len(&qc->peer_addr));
334 if (ret > 0) {
335 /* TODO remove partial sending support for UDP */
336 count -= ret;
337 done += ret;
338
339 if (ret < try)
340 break;
341 }
Amaury Denoyellead5df382022-05-18 18:26:13 +0200342 else if (errno == EINTR) {
343 /* try again */
344 continue;
345 }
Willy Tarreauacef5e22022-04-25 20:32:15 +0200346 else if (ret == 0 || errno == EAGAIN || errno == EWOULDBLOCK || errno == ENOTCONN || errno == EINPROGRESS) {
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100347 /* TODO must be handle properly. It is justified for UDP ? */
Frédéric Lécaille8726d632022-05-03 10:32:21 +0200348 qc->sendto_err++;
Amaury Denoyelle8fa66662022-05-18 18:14:12 +0200349 break;
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100350 }
Amaury Denoyellead5df382022-05-18 18:26:13 +0200351 else if (errno) {
352 /* TODO unlisted errno : handle it explicitely. */
353 ABORT_NOW();
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100354 }
355 }
356
357 if (done > 0) {
358 /* we count the total bytes sent, and the send rate for 32-byte
359 * blocks. The reason for the latter is that freq_ctr are
360 * limited to 4GB and that it's not enough per second.
361 */
362 _HA_ATOMIC_ADD(&global.out_bytes, done);
363 update_freq_ctr(&global.out_32bps, (done + 16) / 32);
364 }
365 return done;
366}
367
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100368
369/*********************** QUIC accept queue management ***********************/
370/* per-thread accept queues */
371struct quic_accept_queue *quic_accept_queues;
372
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100373/* Install <qc> on the queue ready to be accepted. The queue task is then woken
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100374 * up. If <qc> accept is already scheduled or done, nothing is done.
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100375 */
376void quic_accept_push_qc(struct quic_conn *qc)
377{
378 struct quic_accept_queue *queue = &quic_accept_queues[qc->tid];
379 struct li_per_thread *lthr = &qc->li->per_thr[qc->tid];
380
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100381 /* early return if accept is already in progress/done for this
382 * connection
383 */
Frédéric Lécaillefc790062022-03-28 17:10:31 +0200384 if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100385 return;
386
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100387 BUG_ON(MT_LIST_INLIST(&qc->accept_list));
388
Frédéric Lécaillefc790062022-03-28 17:10:31 +0200389 qc->flags |= QUIC_FL_CONN_ACCEPT_REGISTERED;
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100390 /* 1. insert the listener in the accept queue
391 *
392 * Use TRY_APPEND as there is a possible race even with INLIST if
393 * multiple threads try to add the same listener instance from several
394 * quic_conn.
395 */
396 if (!MT_LIST_INLIST(&(lthr->quic_accept.list)))
397 MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list));
398
399 /* 2. insert the quic_conn in the listener per-thread queue. */
400 MT_LIST_APPEND(&lthr->quic_accept.conns, &qc->accept_list);
401
402 /* 3. wake up the queue tasklet */
403 tasklet_wakeup(quic_accept_queues[qc->tid].tasklet);
404}
405
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100406/* Tasklet handler to accept QUIC connections. Call listener_accept on every
407 * listener instances registered in the accept queue.
408 */
409static struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i)
410{
411 struct li_per_thread *lthr;
412 struct mt_list *elt1, elt2;
413 struct quic_accept_queue *queue = &quic_accept_queues[tid];
414
415 mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) {
416 listener_accept(lthr->li);
417 MT_LIST_DELETE_SAFE(elt1);
418 }
419
420 return NULL;
421}
422
423static int quic_alloc_accept_queues(void)
424{
425 int i;
426
427 quic_accept_queues = calloc(global.nbthread, sizeof(struct quic_accept_queue));
428 if (!quic_accept_queues) {
429 ha_alert("Failed to allocate the quic accept queues.\n");
430 return 0;
431 }
432
433 for (i = 0; i < global.nbthread; ++i) {
434 struct tasklet *task;
435 if (!(task = tasklet_new())) {
436 ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i);
437 return 0;
438 }
439
440 tasklet_set_tid(task, i);
441 task->process = quic_accept_run;
442 quic_accept_queues[i].tasklet = task;
443
444 MT_LIST_INIT(&quic_accept_queues[i].listeners);
445 }
446
447 return 1;
448}
449REGISTER_POST_CHECK(quic_alloc_accept_queues);
450
451static int quic_deallocate_accept_queues(void)
452{
453 int i;
454
455 if (quic_accept_queues) {
456 for (i = 0; i < global.nbthread; ++i)
457 tasklet_free(quic_accept_queues[i].tasklet);
458 free(quic_accept_queues);
459 }
460
461 return 1;
462}
463REGISTER_POST_DEINIT(quic_deallocate_accept_queues);