blob: 69f6fc97b2465ba1cfb1504ec3b1a535de6fcb4a [file] [log] [blame]
Frédéric Lécaille70da8892020-11-06 15:49:49 +01001/*
2 * QUIC socket management.
3 *
Willy Tarreau3dfb7da2022-03-02 22:33:39 +01004 * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
Frédéric Lécaille70da8892020-11-06 15:49:49 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14
15#include <sys/socket.h>
16#include <sys/types.h>
17
18#include <haproxy/connection.h>
19#include <haproxy/listener.h>
Amaury Denoyelle4d295042022-01-19 16:18:44 +010020#include <haproxy/quic_sock.h>
Amaury Denoyelleeb01f592021-10-07 16:44:05 +020021#include <haproxy/session.h>
Frédéric Lécaille026a7922020-11-23 15:46:36 +010022#include <haproxy/xprt_quic.h>
23
24/* This function is called from the protocol layer accept() in order to
25 * instantiate a new session on behalf of a given listener and frontend. It
26 * returns a positive value upon success, 0 if the connection can be ignored,
27 * or a negative value upon critical failure. The accepted connection is
28 * closed if we return <= 0. If no handshake is needed, it immediately tries
29 * to instantiate a new stream. The connection must already have been filled
30 * with the incoming connection handle (a fd), a target (the listener) and a
31 * source address.
32 */
33int quic_session_accept(struct connection *cli_conn)
34{
35 struct listener *l = __objt_listener(cli_conn->target);
36 struct proxy *p = l->bind_conf->frontend;
37 struct session *sess;
38
39 cli_conn->proxy_netns = l->rx.settings->netns;
Frédéric Lécaille026a7922020-11-23 15:46:36 +010040 /* This flag is ordinarily set by conn_ctrl_init() which cannot
41 * be called for now.
42 */
43 cli_conn->flags |= CO_FL_CTRL_READY;
44
45 /* wait for a PROXY protocol header */
46 if (l->options & LI_O_ACC_PROXY)
47 cli_conn->flags |= CO_FL_ACCEPT_PROXY;
48
49 /* wait for a NetScaler client IP insertion protocol header */
50 if (l->options & LI_O_ACC_CIP)
51 cli_conn->flags |= CO_FL_ACCEPT_CIP;
52
Frédéric Lécaille026a7922020-11-23 15:46:36 +010053 /* Add the handshake pseudo-XPRT */
54 if (cli_conn->flags & (CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP)) {
55 if (xprt_add_hs(cli_conn) != 0)
56 goto out_free_conn;
57 }
Olivier Houchard1b3c9312021-03-05 23:37:48 +010058
Frédéric Lécaille026a7922020-11-23 15:46:36 +010059 sess = session_new(p, l, &cli_conn->obj_type);
60 if (!sess)
61 goto out_free_conn;
62
63 conn_set_owner(cli_conn, sess, NULL);
64
Frédéric Lécailleecb58722021-05-27 17:12:36 +020065 if (conn_complete_session(cli_conn) < 0)
66 goto out_free_sess;
67
68 if (conn_xprt_start(cli_conn) >= 0)
Frédéric Lécaille27faba72021-03-03 16:21:00 +010069 return 1;
70
Frédéric Lécaille026a7922020-11-23 15:46:36 +010071 out_free_sess:
72 /* prevent call to listener_release during session_free. It will be
73 * done below, for all errors. */
74 sess->listener = NULL;
75 session_free(sess);
76 out_free_conn:
77 cli_conn->qc->conn = NULL;
78 conn_stop_tracking(cli_conn);
79 conn_xprt_close(cli_conn);
80 conn_free(cli_conn);
81 out:
82
Frédéric Lécaillee8139f32021-03-11 17:06:30 +010083 return -1;
Frédéric Lécaille026a7922020-11-23 15:46:36 +010084}
85
86/*
87 * Inspired from session_accept_fd().
88 * Instantiate a new connection (connection struct) to be attached to <qc>
89 * QUIC connection of <l> listener.
90 * Returns 1 if succeeded, 0 if not.
91 */
92static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l,
93 struct sockaddr_storage *saddr)
94{
95 struct connection *cli_conn;
Frédéric Lécaille026a7922020-11-23 15:46:36 +010096
Frédéric Lécaille026a7922020-11-23 15:46:36 +010097 if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL))
98 goto out;
99
Frédéric Lécailled169efe2021-11-05 11:40:50 +0100100 if (!sockaddr_alloc(&cli_conn->dst, saddr, sizeof *saddr))
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100101 goto out_free_conn;
102
Frédéric Lécailled169efe2021-11-05 11:40:50 +0100103 cli_conn->flags |= CO_FL_ADDR_TO_SET;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100104 qc->conn = cli_conn;
105 cli_conn->qc = qc;
106
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100107 cli_conn->handle.fd = l->rx.fd;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100108 cli_conn->target = &l->obj_type;
109
Frédéric Lécaille01ab6612021-06-14 10:31:43 +0200110 /* We need the xprt context before accepting (->accept()) the connection:
111 * we may receive packet before this connection acception.
112 */
113 if (conn_prepare(cli_conn, l->rx.proto, l->bind_conf->xprt) < 0)
114 goto out_free_conn;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100115
116 return 1;
117
118 out_free_conn:
Frédéric Lécaille01ab6612021-06-14 10:31:43 +0200119 qc->conn = NULL;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100120 conn_stop_tracking(cli_conn);
121 conn_xprt_close(cli_conn);
122 conn_free(cli_conn);
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100123 out:
124
125 return 0;
126}
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100127
128/* Tests if the receiver supports accepting connections. Returns positive on
129 * success, 0 if not possible
130 */
131int quic_sock_accepting_conn(const struct receiver *rx)
132{
133 return 1;
134}
135
136/* Accept an incoming connection from listener <l>, and return it, as well as
137 * a CO_AC_* status code into <status> if not null. Null is returned on error.
138 * <l> must be a valid listener with a valid frontend.
139 */
140struct connection *quic_sock_accept_conn(struct listener *l, int *status)
141{
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100142 struct quic_conn *qc;
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100143 struct li_per_thread *lthr = &l->per_thr[tid];
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100144
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100145 qc = MT_LIST_POP(&lthr->quic_accept.conns, struct quic_conn *, accept_list);
146 if (!qc)
147 goto done;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100148
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100149 if (!new_quic_cli_conn(qc, l, &qc->peer_addr))
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100150 goto err;
151
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100152 done:
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100153 *status = CO_AC_DONE;
Frédéric Lécaille026a7922020-11-23 15:46:36 +0100154 return qc ? qc->conn : NULL;
155
156 err:
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100157 /* in case of error reinsert the element to process it later. */
158 MT_LIST_INSERT(&lthr->quic_accept.conns, &qc->accept_list);
159
160 *status = CO_AC_PAUSE;
161 return NULL;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100162}
163
164/* Function called on a read event from a listening socket. It tries
165 * to handle as many connections as possible.
166 */
167void quic_sock_fd_iocb(int fd)
168{
169 ssize_t ret;
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100170 struct rxbuf *rxbuf;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100171 struct buffer *buf;
172 struct listener *l = objt_listener(fdtab[fd].owner);
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100173 struct quic_transport_params *params;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100174 /* Source address */
175 struct sockaddr_storage saddr = {0};
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100176 size_t max_sz, cspace;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100177 socklen_t saddrlen;
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100178 struct quic_dgram *dgram, *dgramp, *new_dgram;
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100179 unsigned char *dgram_buf;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100180
Tim Duesterhus16554242021-09-15 13:58:49 +0200181 BUG_ON(!l);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100182
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100183 if (!l)
184 return;
185
Willy Tarreauf5090652021-04-06 17:23:40 +0200186 if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd))
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100187 return;
188
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100189 rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), mt_list);
Amaury Denoyelleee72a432021-11-19 15:49:29 +0100190 if (!rxbuf)
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100191 goto out;
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100192
Amaury Denoyelleee72a432021-11-19 15:49:29 +0100193 buf = &rxbuf->buf;
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100194
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100195 new_dgram = NULL;
196 /* Remove all consumed datagrams of this buffer */
197 list_for_each_entry_safe(dgram, dgramp, &rxbuf->dgrams, list) {
198 if (HA_ATOMIC_LOAD(&dgram->buf))
199 break;
200
201 LIST_DELETE(&dgram->list);
202 b_del(buf, dgram->len);
203 if (!new_dgram)
204 new_dgram = dgram;
205 else
206 pool_free(pool_head_quic_dgram, dgram);
207 }
208
Frédéric Lécaillec4becf52021-11-08 11:23:17 +0100209 params = &l->bind_conf->quic_params;
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100210 max_sz = params->max_udp_payload_size;
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100211 cspace = b_contig_space(buf);
212 if (cspace < max_sz) {
Frédéric Lécaille1712b1d2022-01-28 13:10:24 +0100213 struct quic_dgram *dgram;
214
215 /* Allocate a fake datagram, without data to locate
216 * the end of the RX buffer (required during purging).
217 */
218 dgram = pool_zalloc(pool_head_quic_dgram);
219 if (!dgram)
220 goto out;
221
222 dgram->len = cspace;
223 LIST_APPEND(&rxbuf->dgrams, &dgram->list);
Frédéric Lécaille320744b2022-01-27 12:19:28 +0100224 /* Consume the remaining space */
225 b_add(buf, cspace);
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100226 if (b_contig_space(buf) < max_sz)
227 goto out;
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100228
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100229 }
230
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100231 dgram_buf = (unsigned char *)b_tail(buf);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100232 saddrlen = sizeof saddr;
233 do {
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100234 ret = recvfrom(fd, dgram_buf, max_sz, 0,
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100235 (struct sockaddr *)&saddr, &saddrlen);
Frédéric Lécaille439c4642022-02-02 14:33:10 +0100236 if (ret < 0 && errno == EAGAIN) {
237 fd_cant_recv(fd);
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100238 goto out;
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100239 }
Frédéric Lécaille439c4642022-02-02 14:33:10 +0100240 } while (ret < 0 && errno == EINTR);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100241
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100242 b_add(buf, ret);
Frédéric Lécaillef6f75202022-02-02 09:44:22 +0100243 if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr,
244 new_dgram, &rxbuf->dgrams)) {
Frédéric Lécaille37ae5052022-01-27 11:31:50 +0100245 /* If wrong, consume this datagram */
246 b_del(buf, ret);
247 }
Frédéric Lécaille324ecda2021-11-02 10:14:44 +0100248 out:
249 MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->mt_list);
Frédéric Lécaille70da8892020-11-06 15:49:49 +0100250}
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100251
Amaury Denoyelle58a77042022-02-09 15:43:07 +0100252/* TODO standardize this function for a generic UDP sendto wrapper. This can be
253 * done by removing the <qc> arg and replace it with address/port.
254 */
255size_t qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t count,
256 int flags)
257{
258 ssize_t ret;
259 size_t try, done;
260 int send_flag;
261
262 done = 0;
263 /* send the largest possible block. For this we perform only one call
264 * to send() unless the buffer wraps and we exactly fill the first hunk,
265 * in which case we accept to do it once again.
266 */
267 while (count) {
268 try = b_contig_data(buf, done);
269 if (try > count)
270 try = count;
271
272 send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;
273 if (try < count || flags & CO_SFL_MSG_MORE)
274 send_flag |= MSG_MORE;
275
276 ret = sendto(qc->li->rx.fd, b_peek(buf, done), try, send_flag,
277 (struct sockaddr *)&qc->peer_addr, get_addr_len(&qc->peer_addr));
278 if (ret > 0) {
279 /* TODO remove partial sending support for UDP */
280 count -= ret;
281 done += ret;
282
283 if (ret < try)
284 break;
285 }
286 else if (ret == 0 || errno == EAGAIN || errno == ENOTCONN || errno == EINPROGRESS) {
287 /* TODO must be handle properly. It is justified for UDP ? */
288 ABORT_NOW();
289 }
290 else if (errno != EINTR) {
291 /* TODO must be handle properly. It is justified for UDP ? */
292 ABORT_NOW();
293 }
294 }
295
296 if (done > 0) {
297 /* we count the total bytes sent, and the send rate for 32-byte
298 * blocks. The reason for the latter is that freq_ctr are
299 * limited to 4GB and that it's not enough per second.
300 */
301 _HA_ATOMIC_ADD(&global.out_bytes, done);
302 update_freq_ctr(&global.out_32bps, (done + 16) / 32);
303 }
304 return done;
305}
306
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100307
308/*********************** QUIC accept queue management ***********************/
309/* per-thread accept queues */
310struct quic_accept_queue *quic_accept_queues;
311
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100312/* Install <qc> on the queue ready to be accepted. The queue task is then woken
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100313 * up. If <qc> accept is already scheduled or done, nothing is done.
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100314 */
315void quic_accept_push_qc(struct quic_conn *qc)
316{
317 struct quic_accept_queue *queue = &quic_accept_queues[qc->tid];
318 struct li_per_thread *lthr = &qc->li->per_thr[qc->tid];
319
Frédéric Lécaille91f083a2022-01-28 21:43:48 +0100320 /* early return if accept is already in progress/done for this
321 * connection
322 */
323 if (HA_ATOMIC_BTS(&qc->flags, QUIC_FL_ACCEPT_REGISTERED_BIT))
324 return;
325
Amaury Denoyellecfa2d562022-01-19 16:01:05 +0100326 BUG_ON(MT_LIST_INLIST(&qc->accept_list));
327
328 /* 1. insert the listener in the accept queue
329 *
330 * Use TRY_APPEND as there is a possible race even with INLIST if
331 * multiple threads try to add the same listener instance from several
332 * quic_conn.
333 */
334 if (!MT_LIST_INLIST(&(lthr->quic_accept.list)))
335 MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list));
336
337 /* 2. insert the quic_conn in the listener per-thread queue. */
338 MT_LIST_APPEND(&lthr->quic_accept.conns, &qc->accept_list);
339
340 /* 3. wake up the queue tasklet */
341 tasklet_wakeup(quic_accept_queues[qc->tid].tasklet);
342}
343
Amaury Denoyelle2ce99fe2022-01-19 15:46:11 +0100344/* Tasklet handler to accept QUIC connections. Call listener_accept on every
345 * listener instances registered in the accept queue.
346 */
347static struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i)
348{
349 struct li_per_thread *lthr;
350 struct mt_list *elt1, elt2;
351 struct quic_accept_queue *queue = &quic_accept_queues[tid];
352
353 mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) {
354 listener_accept(lthr->li);
355 MT_LIST_DELETE_SAFE(elt1);
356 }
357
358 return NULL;
359}
360
361static int quic_alloc_accept_queues(void)
362{
363 int i;
364
365 quic_accept_queues = calloc(global.nbthread, sizeof(struct quic_accept_queue));
366 if (!quic_accept_queues) {
367 ha_alert("Failed to allocate the quic accept queues.\n");
368 return 0;
369 }
370
371 for (i = 0; i < global.nbthread; ++i) {
372 struct tasklet *task;
373 if (!(task = tasklet_new())) {
374 ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i);
375 return 0;
376 }
377
378 tasklet_set_tid(task, i);
379 task->process = quic_accept_run;
380 quic_accept_queues[i].tasklet = task;
381
382 MT_LIST_INIT(&quic_accept_queues[i].listeners);
383 }
384
385 return 1;
386}
387REGISTER_POST_CHECK(quic_alloc_accept_queues);
388
389static int quic_deallocate_accept_queues(void)
390{
391 int i;
392
393 if (quic_accept_queues) {
394 for (i = 0; i < global.nbthread; ++i)
395 tasklet_free(quic_accept_queues[i].tasklet);
396 free(quic_accept_queues);
397 }
398
399 return 1;
400}
401REGISTER_POST_DEINIT(quic_deallocate_accept_queues);