Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 1 | /* |
| 2 | * QUIC socket management. |
| 3 | * |
Willy Tarreau | 3dfb7da | 2022-03-02 22:33:39 +0100 | [diff] [blame] | 4 | * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 13 | #define _GNU_SOURCE /* required for struct in6_pktinfo */ |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 14 | #include <errno.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 15 | #include <stdlib.h> |
| 16 | #include <string.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 17 | |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 18 | #include <netinet/in.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 19 | #include <sys/socket.h> |
| 20 | #include <sys/types.h> |
| 21 | |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 22 | #include <haproxy/api.h> |
| 23 | #include <haproxy/buf.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 24 | #include <haproxy/connection.h> |
Amaury Denoyelle | 7c9fdd9 | 2022-11-16 11:01:02 +0100 | [diff] [blame] | 25 | #include <haproxy/dynbuf.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 26 | #include <haproxy/fd.h> |
| 27 | #include <haproxy/freq_ctr.h> |
| 28 | #include <haproxy/global-t.h> |
| 29 | #include <haproxy/list.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 30 | #include <haproxy/listener.h> |
Amaury Denoyelle | 40909df | 2022-10-24 17:08:43 +0200 | [diff] [blame] | 31 | #include <haproxy/log.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 32 | #include <haproxy/pool.h> |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 33 | #include <haproxy/proto_quic.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 34 | #include <haproxy/proxy-t.h> |
Amaury Denoyelle | 92fa63f | 2022-09-30 18:11:13 +0200 | [diff] [blame] | 35 | #include <haproxy/quic_conn.h> |
Amaury Denoyelle | 4d29504 | 2022-01-19 16:18:44 +0100 | [diff] [blame] | 36 | #include <haproxy/quic_sock.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 37 | #include <haproxy/quic_tp-t.h> |
Amaury Denoyelle | eb01f59 | 2021-10-07 16:44:05 +0200 | [diff] [blame] | 38 | #include <haproxy/session.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 39 | #include <haproxy/stats-t.h> |
| 40 | #include <haproxy/task.h> |
Amaury Denoyelle | 8687b63 | 2022-09-27 14:22:09 +0200 | [diff] [blame] | 41 | #include <haproxy/trace.h> |
Amaury Denoyelle | 777969c | 2022-03-24 16:06:26 +0100 | [diff] [blame] | 42 | #include <haproxy/tools.h> |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 43 | #include <haproxy/trace.h> |
| 44 | |
| 45 | #define TRACE_SOURCE &trace_quic |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 46 | |
Amaury Denoyelle | 8687b63 | 2022-09-27 14:22:09 +0200 | [diff] [blame] | 47 | #define TRACE_SOURCE &trace_quic |
| 48 | |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 49 | /* Retrieve a connection's source address. Returns -1 on failure. */ |
| 50 | int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len) |
| 51 | { |
| 52 | struct quic_conn *qc; |
| 53 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 54 | if (!conn || !conn->handle.qc) |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 55 | return -1; |
| 56 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 57 | qc = conn->handle.qc; |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 58 | if (conn_is_back(conn)) { |
| 59 | /* no source address defined for outgoing connections for now */ |
| 60 | return -1; |
| 61 | } else { |
| 62 | /* front connection, return the peer's address */ |
| 63 | if (len > sizeof(qc->peer_addr)) |
| 64 | len = sizeof(qc->peer_addr); |
| 65 | memcpy(addr, &qc->peer_addr, len); |
| 66 | return 0; |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | /* Retrieve a connection's destination address. Returns -1 on failure. */ |
| 71 | int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len) |
| 72 | { |
| 73 | struct quic_conn *qc; |
| 74 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 75 | if (!conn || !conn->handle.qc) |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 76 | return -1; |
| 77 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 78 | qc = conn->handle.qc; |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 79 | if (conn_is_back(conn)) { |
| 80 | /* back connection, return the peer's address */ |
| 81 | if (len > sizeof(qc->peer_addr)) |
| 82 | len = sizeof(qc->peer_addr); |
| 83 | memcpy(addr, &qc->peer_addr, len); |
| 84 | } else { |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 85 | struct sockaddr_storage *from; |
| 86 | |
| 87 | /* Return listener address if IP_PKTINFO or friends are not |
| 88 | * supported by the socket. |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 89 | */ |
| 90 | BUG_ON(!qc->li); |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 91 | from = is_addr(&qc->local_addr) ? &qc->local_addr : |
| 92 | &qc->li->rx.addr; |
| 93 | if (len > sizeof(*from)) |
| 94 | len = sizeof(*from); |
| 95 | memcpy(addr, from, len); |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 96 | } |
| 97 | return 0; |
| 98 | } |
| 99 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 100 | /* |
| 101 | * Inspired from session_accept_fd(). |
| 102 | * Instantiate a new connection (connection struct) to be attached to <qc> |
| 103 | * QUIC connection of <l> listener. |
| 104 | * Returns 1 if succeeded, 0 if not. |
| 105 | */ |
| 106 | static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l, |
| 107 | struct sockaddr_storage *saddr) |
| 108 | { |
| 109 | struct connection *cli_conn; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 110 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 111 | if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL)) |
| 112 | goto out; |
| 113 | |
Willy Tarreau | 9cc88c3 | 2022-04-08 14:34:31 +0200 | [diff] [blame] | 114 | if (!sockaddr_alloc(&cli_conn->src, saddr, sizeof *saddr)) |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 115 | goto out_free_conn; |
| 116 | |
Willy Tarreau | 030b3e6 | 2022-05-02 17:47:46 +0200 | [diff] [blame] | 117 | cli_conn->flags |= CO_FL_FDLESS; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 118 | qc->conn = cli_conn; |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 119 | cli_conn->handle.qc = qc; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 120 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 121 | cli_conn->target = &l->obj_type; |
| 122 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 123 | return 1; |
| 124 | |
| 125 | out_free_conn: |
Frédéric Lécaille | 01ab661 | 2021-06-14 10:31:43 +0200 | [diff] [blame] | 126 | qc->conn = NULL; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 127 | conn_stop_tracking(cli_conn); |
| 128 | conn_xprt_close(cli_conn); |
| 129 | conn_free(cli_conn); |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 130 | out: |
| 131 | |
| 132 | return 0; |
| 133 | } |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 134 | |
| 135 | /* Tests if the receiver supports accepting connections. Returns positive on |
| 136 | * success, 0 if not possible |
| 137 | */ |
| 138 | int quic_sock_accepting_conn(const struct receiver *rx) |
| 139 | { |
| 140 | return 1; |
| 141 | } |
| 142 | |
| 143 | /* Accept an incoming connection from listener <l>, and return it, as well as |
| 144 | * a CO_AC_* status code into <status> if not null. Null is returned on error. |
| 145 | * <l> must be a valid listener with a valid frontend. |
| 146 | */ |
| 147 | struct connection *quic_sock_accept_conn(struct listener *l, int *status) |
| 148 | { |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 149 | struct quic_conn *qc; |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 150 | struct li_per_thread *lthr = &l->per_thr[tid]; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 151 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 152 | qc = MT_LIST_POP(<hr->quic_accept.conns, struct quic_conn *, accept_list); |
| 153 | if (!qc) |
| 154 | goto done; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 155 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 156 | if (!new_quic_cli_conn(qc, l, &qc->peer_addr)) |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 157 | goto err; |
| 158 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 159 | done: |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 160 | *status = CO_AC_DONE; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 161 | return qc ? qc->conn : NULL; |
| 162 | |
| 163 | err: |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 164 | /* in case of error reinsert the element to process it later. */ |
| 165 | MT_LIST_INSERT(<hr->quic_accept.conns, &qc->accept_list); |
| 166 | |
| 167 | *status = CO_AC_PAUSE; |
| 168 | return NULL; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 169 | } |
| 170 | |
Amaury Denoyelle | 8687b63 | 2022-09-27 14:22:09 +0200 | [diff] [blame] | 171 | /* QUIC datagrams handler task. */ |
| 172 | struct task *quic_lstnr_dghdlr(struct task *t, void *ctx, unsigned int state) |
| 173 | { |
| 174 | struct quic_dghdlr *dghdlr = ctx; |
| 175 | struct quic_dgram *dgram; |
| 176 | int max_dgrams = global.tune.maxpollevents; |
| 177 | |
| 178 | TRACE_ENTER(QUIC_EV_CONN_LPKT); |
| 179 | |
| 180 | while ((dgram = MT_LIST_POP(&dghdlr->dgrams, typeof(dgram), handler_list))) { |
| 181 | if (quic_dgram_parse(dgram, NULL, dgram->owner)) { |
| 182 | /* TODO should we requeue the datagram ? */ |
| 183 | break; |
| 184 | } |
| 185 | |
| 186 | if (--max_dgrams <= 0) |
| 187 | goto stop_here; |
| 188 | } |
| 189 | |
| 190 | TRACE_LEAVE(QUIC_EV_CONN_LPKT); |
| 191 | return t; |
| 192 | |
| 193 | stop_here: |
| 194 | /* too much work done at once, come back here later */ |
| 195 | if (!MT_LIST_ISEMPTY(&dghdlr->dgrams)) |
| 196 | tasklet_wakeup((struct tasklet *)t); |
| 197 | |
| 198 | TRACE_LEAVE(QUIC_EV_CONN_LPKT); |
| 199 | return t; |
| 200 | } |
| 201 | |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 202 | /* Retrieve the DCID from the datagram found in <buf> and deliver it to the |
| 203 | * correct datagram handler. |
| 204 | * Return 1 if a correct datagram could be found, 0 if not. |
| 205 | */ |
| 206 | static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner, |
| 207 | struct sockaddr_storage *saddr, |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 208 | struct sockaddr_storage *daddr, |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 209 | struct quic_dgram *new_dgram, struct list *dgrams) |
| 210 | { |
| 211 | struct quic_dgram *dgram; |
Willy Tarreau | f5a0c8a | 2022-10-13 16:14:11 +0200 | [diff] [blame] | 212 | const struct listener *l = owner; |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 213 | unsigned char *dcid; |
| 214 | size_t dcid_len; |
| 215 | int cid_tid; |
| 216 | |
| 217 | if (!len || !quic_get_dgram_dcid(buf, buf + len, &dcid, &dcid_len)) |
| 218 | goto err; |
| 219 | |
| 220 | dgram = new_dgram ? new_dgram : pool_alloc(pool_head_quic_dgram); |
| 221 | if (!dgram) |
| 222 | goto err; |
| 223 | |
Willy Tarreau | eed7826 | 2022-12-21 09:09:19 +0100 | [diff] [blame] | 224 | cid_tid = quic_get_cid_tid(dcid, &l->rx); |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 225 | |
| 226 | /* All the members must be initialized! */ |
| 227 | dgram->owner = owner; |
| 228 | dgram->buf = buf; |
| 229 | dgram->len = len; |
| 230 | dgram->dcid = dcid; |
| 231 | dgram->dcid_len = dcid_len; |
| 232 | dgram->saddr = *saddr; |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 233 | dgram->daddr = *daddr; |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 234 | dgram->qc = NULL; |
Amaury Denoyelle | 1cba8d6 | 2022-10-06 15:16:22 +0200 | [diff] [blame] | 235 | |
| 236 | /* Attached datagram to its quic_receiver_buf and quic_dghdlrs. */ |
| 237 | LIST_APPEND(dgrams, &dgram->recv_list); |
| 238 | MT_LIST_APPEND(&quic_dghdlrs[cid_tid].dgrams, &dgram->handler_list); |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 239 | |
Willy Tarreau | f9d4a7d | 2022-08-05 08:45:56 +0200 | [diff] [blame] | 240 | /* typically quic_lstnr_dghdlr() */ |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 241 | tasklet_wakeup(quic_dghdlrs[cid_tid].task); |
| 242 | |
| 243 | return 1; |
| 244 | |
| 245 | err: |
Frédéric Lécaille | bfb077a | 2022-08-12 11:55:20 +0200 | [diff] [blame] | 246 | pool_free(pool_head_quic_dgram, new_dgram); |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 247 | return 0; |
| 248 | } |
| 249 | |
Amaury Denoyelle | 91b2305 | 2022-10-06 14:45:09 +0200 | [diff] [blame] | 250 | /* This function is responsible to remove unused datagram attached in front of |
| 251 | * <buf>. Each instances will be freed until a not yet consumed datagram is |
| 252 | * found or end of the list is hit. The last unused datagram found is not freed |
| 253 | * and is instead returned so that the caller can reuse it if needed. |
| 254 | * |
Ilya Shipitsin | 4a689da | 2022-10-29 09:34:32 +0500 | [diff] [blame] | 255 | * Returns the last unused datagram or NULL if no occurrence found. |
Amaury Denoyelle | 91b2305 | 2022-10-06 14:45:09 +0200 | [diff] [blame] | 256 | */ |
| 257 | static struct quic_dgram *quic_rxbuf_purge_dgrams(struct quic_receiver_buf *buf) |
| 258 | { |
| 259 | struct quic_dgram *cur, *prev = NULL; |
| 260 | |
| 261 | while (!LIST_ISEMPTY(&buf->dgram_list)) { |
| 262 | cur = LIST_ELEM(buf->dgram_list.n, struct quic_dgram *, recv_list); |
| 263 | |
| 264 | /* Loop until a not yet consumed datagram is found. */ |
Amaury Denoyelle | 0b13e94 | 2022-10-25 11:38:21 +0200 | [diff] [blame] | 265 | if (HA_ATOMIC_LOAD(&cur->buf)) |
Amaury Denoyelle | 91b2305 | 2022-10-06 14:45:09 +0200 | [diff] [blame] | 266 | break; |
| 267 | |
| 268 | /* Clear buffer of current unused datagram. */ |
| 269 | LIST_DELETE(&cur->recv_list); |
| 270 | b_del(&buf->buf, cur->len); |
| 271 | |
| 272 | /* Free last found unused datagram. */ |
| 273 | if (prev) |
| 274 | pool_free(pool_head_quic_dgram, prev); |
| 275 | prev = cur; |
| 276 | } |
| 277 | |
| 278 | /* Return last unused datagram found. */ |
| 279 | return prev; |
| 280 | } |
| 281 | |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 282 | /* Receive data from datagram socket <fd>. Data are placed in <out> buffer of |
| 283 | * length <len>. |
| 284 | * |
| 285 | * Datagram addresses will be returned via the next arguments. <from> will be |
| 286 | * the peer address and <to> the reception one. Note that <to> can only be |
| 287 | * retrieved if the socket supports IP_PKTINFO or affiliated options. If not, |
| 288 | * <to> will be set as AF_UNSPEC. The caller must specify <to_port> to ensure |
| 289 | * that <to> address is completely filled. |
| 290 | * |
| 291 | * Returns value from recvmsg syscall. |
| 292 | */ |
| 293 | static ssize_t quic_recv(int fd, void *out, size_t len, |
| 294 | struct sockaddr *from, socklen_t from_len, |
| 295 | struct sockaddr *to, socklen_t to_len, |
| 296 | uint16_t dst_port) |
| 297 | { |
| 298 | union pktinfo { |
| 299 | #ifdef IP_PKTINFO |
| 300 | struct in_pktinfo in; |
| 301 | #else /* !IP_PKTINFO */ |
| 302 | struct in_addr addr; |
| 303 | #endif |
| 304 | #ifdef IPV6_RECVPKTINFO |
| 305 | struct in6_pktinfo in6; |
| 306 | #endif |
| 307 | }; |
| 308 | char cdata[CMSG_SPACE(sizeof(union pktinfo))]; |
| 309 | struct msghdr msg; |
| 310 | struct iovec vec; |
| 311 | struct cmsghdr *cmsg; |
| 312 | ssize_t ret; |
| 313 | |
| 314 | vec.iov_base = out; |
| 315 | vec.iov_len = len; |
| 316 | |
| 317 | memset(&msg, 0, sizeof(msg)); |
| 318 | msg.msg_name = from; |
| 319 | msg.msg_namelen = from_len; |
| 320 | msg.msg_iov = &vec; |
| 321 | msg.msg_iovlen = 1; |
| 322 | msg.msg_control = &cdata; |
| 323 | msg.msg_controllen = sizeof(cdata); |
| 324 | |
| 325 | clear_addr((struct sockaddr_storage *)to); |
| 326 | |
| 327 | do { |
| 328 | ret = recvmsg(fd, &msg, 0); |
| 329 | } while (ret < 0 && errno == EINTR); |
| 330 | |
| 331 | /* TODO handle errno. On EAGAIN/EWOULDBLOCK use fd_cant_recv() if |
| 332 | * using dedicated connection socket. |
| 333 | */ |
| 334 | |
| 335 | if (ret < 0) |
| 336 | goto end; |
| 337 | |
| 338 | for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { |
| 339 | switch (cmsg->cmsg_level) { |
| 340 | case IPPROTO_IP: |
| 341 | #if defined(IP_PKTINFO) |
| 342 | if (cmsg->cmsg_type == IP_PKTINFO) { |
| 343 | struct sockaddr_in *in = (struct sockaddr_in *)to; |
| 344 | struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg); |
| 345 | |
| 346 | if (to_len >= sizeof(struct sockaddr_in)) { |
| 347 | in->sin_family = AF_INET; |
| 348 | in->sin_addr = info->ipi_addr; |
| 349 | in->sin_port = dst_port; |
| 350 | } |
| 351 | } |
| 352 | #elif defined(IP_RECVDSTADDR) |
| 353 | if (cmsg->cmsg_type == IP_RECVDSTADDR) { |
| 354 | struct sockaddr_in *in = (struct sockaddr_in *)to; |
| 355 | struct in_addr *info = (struct in_addr *)CMSG_DATA(cmsg); |
| 356 | |
| 357 | if (to_len >= sizeof(struct sockaddr_in)) { |
| 358 | in->sin_family = AF_INET; |
| 359 | in->sin_addr.s_addr = info->s_addr; |
| 360 | in->sin_port = dst_port; |
| 361 | } |
| 362 | } |
| 363 | #endif /* IP_PKTINFO || IP_RECVDSTADDR */ |
| 364 | break; |
| 365 | |
| 366 | case IPPROTO_IPV6: |
| 367 | #ifdef IPV6_RECVPKTINFO |
| 368 | if (cmsg->cmsg_type == IPV6_PKTINFO) { |
| 369 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)to; |
| 370 | struct in6_pktinfo *info6 = (struct in6_pktinfo *)CMSG_DATA(cmsg); |
| 371 | |
| 372 | if (to_len >= sizeof(struct sockaddr_in6)) { |
| 373 | in6->sin6_family = AF_INET6; |
| 374 | memcpy(&in6->sin6_addr, &info6->ipi6_addr, sizeof(in6->sin6_addr)); |
| 375 | in6->sin6_port = dst_port; |
| 376 | } |
| 377 | } |
| 378 | #endif |
| 379 | break; |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | end: |
| 384 | return ret; |
| 385 | } |
| 386 | |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 387 | /* Function called on a read event from a listening socket. It tries |
| 388 | * to handle as many connections as possible. |
| 389 | */ |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 390 | void quic_lstnr_sock_fd_iocb(int fd) |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 391 | { |
| 392 | ssize_t ret; |
Amaury Denoyelle | 1cba8d6 | 2022-10-06 15:16:22 +0200 | [diff] [blame] | 393 | struct quic_receiver_buf *rxbuf; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 394 | struct buffer *buf; |
| 395 | struct listener *l = objt_listener(fdtab[fd].owner); |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 396 | struct quic_transport_params *params; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 397 | /* Source address */ |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 398 | struct sockaddr_storage saddr = {0}, daddr = {0}; |
Frédéric Lécaille | 320744b | 2022-01-27 12:19:28 +0100 | [diff] [blame] | 399 | size_t max_sz, cspace; |
Frédéric Lécaille | 2bed1f1 | 2022-06-23 21:05:05 +0200 | [diff] [blame] | 400 | struct quic_dgram *new_dgram; |
Frédéric Lécaille | f6f7520 | 2022-02-02 09:44:22 +0100 | [diff] [blame] | 401 | unsigned char *dgram_buf; |
Frédéric Lécaille | 1b0707f | 2022-06-30 11:28:56 +0200 | [diff] [blame] | 402 | int max_dgrams; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 403 | |
Tim Duesterhus | 1655424 | 2021-09-15 13:58:49 +0200 | [diff] [blame] | 404 | BUG_ON(!l); |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 405 | |
Frédéric Lécaille | 19ef636 | 2022-06-23 18:00:37 +0200 | [diff] [blame] | 406 | new_dgram = NULL; |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 407 | if (!l) |
| 408 | return; |
| 409 | |
Willy Tarreau | f509065 | 2021-04-06 17:23:40 +0200 | [diff] [blame] | 410 | if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd)) |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 411 | return; |
| 412 | |
Amaury Denoyelle | 1cba8d6 | 2022-10-06 15:16:22 +0200 | [diff] [blame] | 413 | rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el); |
Amaury Denoyelle | ee72a43 | 2021-11-19 15:49:29 +0100 | [diff] [blame] | 414 | if (!rxbuf) |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 415 | goto out; |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 416 | |
Amaury Denoyelle | ee72a43 | 2021-11-19 15:49:29 +0100 | [diff] [blame] | 417 | buf = &rxbuf->buf; |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 418 | |
Frédéric Lécaille | 1b0707f | 2022-06-30 11:28:56 +0200 | [diff] [blame] | 419 | max_dgrams = global.tune.maxpollevents; |
| 420 | start: |
Ilya Shipitsin | 3b64a28 | 2022-07-29 22:26:53 +0500 | [diff] [blame] | 421 | /* Try to reuse an existing dgram. Note that there is always at |
Frédéric Lécaille | 2bed1f1 | 2022-06-23 21:05:05 +0200 | [diff] [blame] | 422 | * least one datagram to pick, except the first time we enter |
| 423 | * this function for this <rxbuf> buffer. |
| 424 | */ |
Amaury Denoyelle | 91b2305 | 2022-10-06 14:45:09 +0200 | [diff] [blame] | 425 | new_dgram = quic_rxbuf_purge_dgrams(rxbuf); |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 426 | |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 427 | params = &l->bind_conf->quic_params; |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 428 | max_sz = params->max_udp_payload_size; |
Frédéric Lécaille | 320744b | 2022-01-27 12:19:28 +0100 | [diff] [blame] | 429 | cspace = b_contig_space(buf); |
| 430 | if (cspace < max_sz) { |
Amaury Denoyelle | 735b44f | 2022-10-27 17:56:27 +0200 | [diff] [blame] | 431 | struct proxy *px = l->bind_conf->frontend; |
| 432 | struct quic_counters *prx_counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, &quic_stats_module); |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 433 | struct quic_dgram *dgram; |
| 434 | |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 435 | /* Do no mark <buf> as full, and do not try to consume it |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 436 | * if the contiguous remaining space is not at the end |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 437 | */ |
Amaury Denoyelle | 735b44f | 2022-10-27 17:56:27 +0200 | [diff] [blame] | 438 | if (b_tail(buf) + cspace < b_wrap(buf)) { |
| 439 | HA_ATOMIC_INC(&prx_counters->rxbuf_full); |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 440 | goto out; |
Amaury Denoyelle | 735b44f | 2022-10-27 17:56:27 +0200 | [diff] [blame] | 441 | } |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 442 | |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 443 | /* Allocate a fake datagram, without data to locate |
| 444 | * the end of the RX buffer (required during purging). |
| 445 | */ |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 446 | dgram = pool_alloc(pool_head_quic_dgram); |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 447 | if (!dgram) |
| 448 | goto out; |
| 449 | |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 450 | /* Initialize only the useful members of this fake datagram. */ |
| 451 | dgram->buf = NULL; |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 452 | dgram->len = cspace; |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 453 | /* Append this datagram only to the RX buffer list. It will |
| 454 | * not be treated by any datagram handler. |
| 455 | */ |
Amaury Denoyelle | 1cba8d6 | 2022-10-06 15:16:22 +0200 | [diff] [blame] | 456 | LIST_APPEND(&rxbuf->dgram_list, &dgram->recv_list); |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 457 | |
Frédéric Lécaille | 320744b | 2022-01-27 12:19:28 +0100 | [diff] [blame] | 458 | /* Consume the remaining space */ |
| 459 | b_add(buf, cspace); |
Amaury Denoyelle | 735b44f | 2022-10-27 17:56:27 +0200 | [diff] [blame] | 460 | if (b_contig_space(buf) < max_sz) { |
| 461 | HA_ATOMIC_INC(&prx_counters->rxbuf_full); |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 462 | goto out; |
Amaury Denoyelle | 735b44f | 2022-10-27 17:56:27 +0200 | [diff] [blame] | 463 | } |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 464 | } |
| 465 | |
Frédéric Lécaille | f6f7520 | 2022-02-02 09:44:22 +0100 | [diff] [blame] | 466 | dgram_buf = (unsigned char *)b_tail(buf); |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 467 | ret = quic_recv(fd, dgram_buf, max_sz, |
| 468 | (struct sockaddr *)&saddr, sizeof(saddr), |
| 469 | (struct sockaddr *)&daddr, sizeof(daddr), |
| 470 | get_net_port(&l->rx.addr)); |
| 471 | if (ret <= 0) |
| 472 | goto out; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 473 | |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 474 | b_add(buf, ret); |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 475 | if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr, &daddr, |
Amaury Denoyelle | 1cba8d6 | 2022-10-06 15:16:22 +0200 | [diff] [blame] | 476 | new_dgram, &rxbuf->dgram_list)) { |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 477 | /* If wrong, consume this datagram */ |
Amaury Denoyelle | 9875f02 | 2022-11-24 15:24:38 +0100 | [diff] [blame] | 478 | b_sub(buf, ret); |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 479 | } |
Frédéric Lécaille | 19ef636 | 2022-06-23 18:00:37 +0200 | [diff] [blame] | 480 | new_dgram = NULL; |
Frédéric Lécaille | 1b0707f | 2022-06-30 11:28:56 +0200 | [diff] [blame] | 481 | if (--max_dgrams > 0) |
| 482 | goto start; |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 483 | out: |
Frédéric Lécaille | 19ef636 | 2022-06-23 18:00:37 +0200 | [diff] [blame] | 484 | pool_free(pool_head_quic_dgram, new_dgram); |
Amaury Denoyelle | 1cba8d6 | 2022-10-06 15:16:22 +0200 | [diff] [blame] | 485 | MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el); |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 486 | } |
| 487 | |
| 488 | /* FD-owned quic-conn socket callback. */ |
Willy Tarreau | 8f6da64 | 2023-03-10 12:04:02 +0100 | [diff] [blame] | 489 | void quic_conn_sock_fd_iocb(int fd) |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 490 | { |
Amaury Denoyelle | 7c9fdd9 | 2022-11-16 11:01:02 +0100 | [diff] [blame] | 491 | struct quic_conn *qc = fdtab[fd].owner; |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 492 | |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 493 | TRACE_ENTER(QUIC_EV_CONN_RCV, qc); |
| 494 | |
Amaury Denoyelle | e1a0ee3 | 2023-02-28 15:11:09 +0100 | [diff] [blame] | 495 | if (fd_send_active(fd) && fd_send_ready(fd)) { |
| 496 | TRACE_DEVEL("send ready", QUIC_EV_CONN_RCV, qc); |
| 497 | fd_stop_send(fd); |
| 498 | tasklet_wakeup_after(NULL, qc->wait_event.tasklet); |
Amaury Denoyelle | caa1654 | 2023-02-28 15:11:26 +0100 | [diff] [blame] | 499 | qc_notify_send(qc); |
Amaury Denoyelle | e1a0ee3 | 2023-02-28 15:11:09 +0100 | [diff] [blame] | 500 | } |
| 501 | |
| 502 | if (fd_recv_ready(fd)) { |
| 503 | tasklet_wakeup_after(NULL, qc->wait_event.tasklet); |
| 504 | fd_stop_recv(fd); |
| 505 | } |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 506 | |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 507 | TRACE_LEAVE(QUIC_EV_CONN_RCV, qc); |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 508 | } |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 509 | |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 510 | /* Send a datagram stored into <buf> buffer with <sz> as size. |
| 511 | * The caller must ensure there is at least <sz> bytes in this buffer. |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 512 | * |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 513 | * Returns the total bytes sent over the socket. 0 is returned if a transient |
| 514 | * error is encountered which allows send to be retry later. A negative value |
| 515 | * is used for a fatal error which guarantee that all future send operation for |
| 516 | * this connection will fail. |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 517 | * |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 518 | * TODO standardize this function for a generic UDP sendto wrapper. This can be |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 519 | * done by removing the <qc> arg and replace it with address/port. |
| 520 | */ |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 521 | int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz, |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 522 | int flags) |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 523 | { |
| 524 | ssize_t ret; |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 525 | |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 526 | do { |
Amaury Denoyelle | dc0dcb3 | 2022-11-21 14:48:57 +0100 | [diff] [blame] | 527 | if (qc_test_fd(qc)) { |
Amaury Denoyelle | e1a0ee3 | 2023-02-28 15:11:09 +0100 | [diff] [blame] | 528 | if (!fd_send_ready(qc->fd)) |
| 529 | return 0; |
| 530 | |
Amaury Denoyelle | dc0dcb3 | 2022-11-21 14:48:57 +0100 | [diff] [blame] | 531 | ret = send(qc->fd, b_peek(buf, b_head_ofs(buf)), sz, |
| 532 | MSG_DONTWAIT | MSG_NOSIGNAL); |
| 533 | } |
Amaury Denoyelle | 2d38092 | 2023-01-19 18:05:54 +0100 | [diff] [blame] | 534 | #if defined(IP_PKTINFO) || defined(IP_RECVDSTADDR) || defined(IPV6_RECVPKTINFO) |
| 535 | else if (is_addr(&qc->local_addr)) { |
| 536 | struct msghdr msg = { 0 }; |
| 537 | struct iovec vec; |
| 538 | struct cmsghdr *cmsg; |
| 539 | #ifdef IP_PKTINFO |
| 540 | struct in_pktinfo in; |
| 541 | #endif /* IP_PKTINFO */ |
| 542 | #ifdef IPV6_RECVPKTINFO |
| 543 | struct in6_pktinfo in6; |
| 544 | #endif /* IPV6_RECVPKTINFO */ |
| 545 | union { |
| 546 | #ifdef IP_PKTINFO |
| 547 | char buf[CMSG_SPACE(sizeof(in))]; |
| 548 | #endif /* IP_PKTINFO */ |
| 549 | #ifdef IPV6_RECVPKTINFO |
| 550 | char buf6[CMSG_SPACE(sizeof(in6))]; |
| 551 | #endif /* IPV6_RECVPKTINFO */ |
| 552 | char bufaddr[CMSG_SPACE(sizeof(struct in_addr))]; |
| 553 | struct cmsghdr align; |
| 554 | } u; |
| 555 | |
| 556 | vec.iov_base = b_peek(buf, b_head_ofs(buf)); |
| 557 | vec.iov_len = sz; |
| 558 | msg.msg_name = &qc->peer_addr; |
| 559 | msg.msg_namelen = get_addr_len(&qc->peer_addr); |
| 560 | msg.msg_iov = &vec; |
| 561 | msg.msg_iovlen = 1; |
| 562 | |
| 563 | switch (qc->local_addr.ss_family) { |
| 564 | case AF_INET: |
| 565 | #if defined(IP_PKTINFO) |
| 566 | memset(&in, 0, sizeof(in)); |
| 567 | memcpy(&in.ipi_spec_dst, |
| 568 | &((struct sockaddr_in *)&qc->local_addr)->sin_addr, |
| 569 | sizeof(struct in_addr)); |
| 570 | |
| 571 | msg.msg_control = u.buf; |
| 572 | msg.msg_controllen = sizeof(u.buf); |
| 573 | |
| 574 | cmsg = CMSG_FIRSTHDR(&msg); |
| 575 | cmsg->cmsg_level = IPPROTO_IP; |
| 576 | cmsg->cmsg_type = IP_PKTINFO; |
| 577 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); |
| 578 | memcpy(CMSG_DATA(cmsg), &in, sizeof(in)); |
| 579 | #elif defined(IP_RECVDSTADDR) |
| 580 | msg.msg_control = u.bufaddr; |
| 581 | msg.msg_controllen = sizeof(u.bufaddr); |
| 582 | |
| 583 | cmsg = CMSG_FIRSTHDR(&msg); |
| 584 | cmsg->cmsg_level = IPPROTO_IP; |
| 585 | cmsg->cmsg_type = IP_SENDSRCADDR; |
| 586 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); |
| 587 | memcpy(CMSG_DATA(cmsg), |
| 588 | &((struct sockaddr_in *)&qc->local_addr)->sin_addr, |
| 589 | sizeof(struct in_addr)); |
| 590 | #endif /* IP_PKTINFO || IP_RECVDSTADDR */ |
| 591 | break; |
| 592 | |
| 593 | case AF_INET6: |
| 594 | #ifdef IPV6_RECVPKTINFO |
| 595 | memset(&in6, 0, sizeof(in6)); |
| 596 | memcpy(&in6.ipi6_addr, |
| 597 | &((struct sockaddr_in6 *)&qc->local_addr)->sin6_addr, |
| 598 | sizeof(struct in6_addr)); |
| 599 | |
| 600 | msg.msg_control = u.buf6; |
| 601 | msg.msg_controllen = sizeof(u.buf6); |
| 602 | |
| 603 | cmsg = CMSG_FIRSTHDR(&msg); |
| 604 | cmsg->cmsg_level = IPPROTO_IPV6; |
| 605 | cmsg->cmsg_type = IPV6_PKTINFO; |
| 606 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); |
| 607 | memcpy(CMSG_DATA(cmsg), &in6, sizeof(in6)); |
| 608 | #endif /* IPV6_RECVPKTINFO */ |
| 609 | break; |
| 610 | |
| 611 | default: |
| 612 | break; |
| 613 | } |
| 614 | |
| 615 | ret = sendmsg(qc->li->rx.fd, &msg, |
| 616 | MSG_DONTWAIT|MSG_NOSIGNAL); |
| 617 | } |
| 618 | #endif /* IP_PKTINFO || IP_RECVDSTADDR || IPV6_RECVPKTINFO */ |
Amaury Denoyelle | dc0dcb3 | 2022-11-21 14:48:57 +0100 | [diff] [blame] | 619 | else { |
| 620 | ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz, |
| 621 | MSG_DONTWAIT|MSG_NOSIGNAL, |
| 622 | (struct sockaddr *)&qc->peer_addr, |
| 623 | get_addr_len(&qc->peer_addr)); |
| 624 | } |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 625 | } while (ret < 0 && errno == EINTR); |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 626 | |
Frédéric Lécaille | 9fc10af | 2023-02-09 20:37:26 +0100 | [diff] [blame] | 627 | if (ret < 0) { |
Amaury Denoyelle | 1d9f170 | 2022-10-24 10:03:33 +0200 | [diff] [blame] | 628 | struct proxy *prx = qc->li->bind_conf->frontend; |
| 629 | struct quic_counters *prx_counters = |
| 630 | EXTRA_COUNTERS_GET(prx->extra_counters_fe, |
| 631 | &quic_stats_module); |
| 632 | |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 633 | if (errno == EAGAIN || errno == EWOULDBLOCK || |
Amaury Denoyelle | 4bdd069 | 2023-02-27 17:31:55 +0100 | [diff] [blame] | 634 | errno == ENOTCONN || errno == EINPROGRESS) { |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 635 | if (errno == EAGAIN || errno == EWOULDBLOCK) |
| 636 | HA_ATOMIC_INC(&prx_counters->socket_full); |
| 637 | else |
| 638 | HA_ATOMIC_INC(&prx_counters->sendto_err); |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 639 | |
| 640 | /* transient error */ |
Amaury Denoyelle | e1a0ee3 | 2023-02-28 15:11:09 +0100 | [diff] [blame] | 641 | fd_want_send(qc->fd); |
| 642 | fd_cant_send(qc->fd); |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 643 | TRACE_PRINTF(TRACE_LEVEL_USER, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0, |
| 644 | "UDP send failure errno=%d (%s)", errno, strerror(errno)); |
| 645 | return 0; |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 646 | } |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 647 | else { |
| 648 | /* unrecoverable error */ |
Amaury Denoyelle | 1d9f170 | 2022-10-24 10:03:33 +0200 | [diff] [blame] | 649 | HA_ATOMIC_INC(&prx_counters->sendto_err_unknown); |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 650 | TRACE_PRINTF(TRACE_LEVEL_USER, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0, |
| 651 | "UDP send failure errno=%d (%s)", errno, strerror(errno)); |
| 652 | return -1; |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 653 | } |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 654 | } |
| 655 | |
Frédéric Lécaille | 9fc10af | 2023-02-09 20:37:26 +0100 | [diff] [blame] | 656 | if (ret != sz) |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 657 | return 0; |
Frédéric Lécaille | 9fc10af | 2023-02-09 20:37:26 +0100 | [diff] [blame] | 658 | |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 659 | /* we count the total bytes sent, and the send rate for 32-byte blocks. |
| 660 | * The reason for the latter is that freq_ctr are limited to 4GB and |
| 661 | * that it's not enough per second. |
| 662 | */ |
Willy Tarreau | 6be8d09 | 2023-01-12 16:08:41 +0100 | [diff] [blame] | 663 | _HA_ATOMIC_ADD(&th_ctx->out_bytes, ret); |
| 664 | update_freq_ctr(&th_ctx->out_32bps, (ret + 16) / 32); |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 665 | |
Amaury Denoyelle | 1febc2d | 2023-02-23 11:18:38 +0100 | [diff] [blame] | 666 | return ret; |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 667 | } |
| 668 | |
Amaury Denoyelle | 7c9fdd9 | 2022-11-16 11:01:02 +0100 | [diff] [blame] | 669 | /* Receive datagram on <qc> FD-owned socket. |
| 670 | * |
| 671 | * Returns the total number of bytes read or a negative value on error. |
| 672 | */ |
| 673 | int qc_rcv_buf(struct quic_conn *qc) |
| 674 | { |
| 675 | struct sockaddr_storage saddr = {0}, daddr = {0}; |
| 676 | struct quic_transport_params *params; |
| 677 | struct quic_dgram *new_dgram = NULL; |
| 678 | struct buffer buf = BUF_NULL; |
| 679 | size_t max_sz; |
| 680 | unsigned char *dgram_buf; |
| 681 | struct listener *l; |
| 682 | ssize_t ret = 0; |
| 683 | |
| 684 | /* Do not call this if quic-conn FD is uninitialized. */ |
| 685 | BUG_ON(qc->fd < 0); |
| 686 | |
| 687 | TRACE_ENTER(QUIC_EV_CONN_RCV, qc); |
| 688 | l = qc->li; |
| 689 | |
| 690 | params = &l->bind_conf->quic_params; |
| 691 | max_sz = params->max_udp_payload_size; |
| 692 | |
| 693 | do { |
| 694 | if (!b_alloc(&buf)) |
| 695 | break; /* TODO subscribe for memory again available. */ |
| 696 | |
| 697 | b_reset(&buf); |
| 698 | BUG_ON(b_contig_space(&buf) < max_sz); |
| 699 | |
| 700 | /* Allocate datagram on first loop or after requeuing. */ |
| 701 | if (!new_dgram && !(new_dgram = pool_alloc(pool_head_quic_dgram))) |
| 702 | break; /* TODO subscribe for memory again available. */ |
| 703 | |
| 704 | dgram_buf = (unsigned char *)b_tail(&buf); |
| 705 | ret = quic_recv(qc->fd, dgram_buf, max_sz, |
| 706 | (struct sockaddr *)&saddr, sizeof(saddr), |
| 707 | (struct sockaddr *)&daddr, sizeof(daddr), |
| 708 | get_net_port(&qc->local_addr)); |
| 709 | if (ret <= 0) { |
| 710 | /* Subscribe FD for future reception. */ |
| 711 | fd_want_recv(qc->fd); |
| 712 | break; |
| 713 | } |
| 714 | |
| 715 | b_add(&buf, ret); |
| 716 | |
| 717 | new_dgram->buf = dgram_buf; |
| 718 | new_dgram->len = ret; |
| 719 | new_dgram->dcid_len = 0; |
| 720 | new_dgram->dcid = NULL; |
| 721 | new_dgram->saddr = saddr; |
| 722 | new_dgram->daddr = daddr; |
| 723 | new_dgram->qc = NULL; /* set later via quic_dgram_parse() */ |
| 724 | |
| 725 | TRACE_DEVEL("read datagram", QUIC_EV_CONN_RCV, qc, new_dgram); |
| 726 | |
| 727 | if (!quic_get_dgram_dcid(new_dgram->buf, |
| 728 | new_dgram->buf + new_dgram->len, |
| 729 | &new_dgram->dcid, &new_dgram->dcid_len)) { |
| 730 | continue; |
| 731 | } |
| 732 | |
| 733 | if (!qc_check_dcid(qc, new_dgram->dcid, new_dgram->dcid_len)) { |
| 734 | /* Datagram received by error on the connection FD, dispatch it |
| 735 | * to its associated quic-conn. |
| 736 | * |
| 737 | * TODO count redispatch datagrams. |
| 738 | */ |
Amaury Denoyelle | b2bd839 | 2022-10-05 17:56:08 +0200 | [diff] [blame] | 739 | struct quic_receiver_buf *rxbuf; |
| 740 | struct quic_dgram *tmp_dgram; |
| 741 | unsigned char *rxbuf_tail; |
| 742 | |
| 743 | TRACE_STATE("datagram for other connection on quic-conn socket, requeue it", QUIC_EV_CONN_RCV, qc); |
| 744 | |
| 745 | rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el); |
| 746 | |
| 747 | tmp_dgram = quic_rxbuf_purge_dgrams(rxbuf); |
| 748 | pool_free(pool_head_quic_dgram, tmp_dgram); |
| 749 | |
| 750 | if (b_contig_space(&rxbuf->buf) < new_dgram->len) { |
| 751 | /* TODO count lost datagrams */ |
| 752 | MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el); |
| 753 | continue; |
| 754 | } |
| 755 | |
| 756 | rxbuf_tail = (unsigned char *)b_tail(&rxbuf->buf); |
| 757 | __b_putblk(&rxbuf->buf, (char *)dgram_buf, new_dgram->len); |
| 758 | if (!quic_lstnr_dgram_dispatch(rxbuf_tail, ret, l, &qc->peer_addr, &daddr, |
| 759 | new_dgram, &rxbuf->dgram_list)) { |
| 760 | /* TODO count lost datagrams. */ |
| 761 | b_sub(&buf, ret); |
| 762 | } |
| 763 | else { |
| 764 | /* datagram must not be freed as it was requeued. */ |
| 765 | new_dgram = NULL; |
| 766 | } |
| 767 | |
| 768 | MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el); |
| 769 | continue; |
Amaury Denoyelle | 7c9fdd9 | 2022-11-16 11:01:02 +0100 | [diff] [blame] | 770 | } |
| 771 | |
| 772 | quic_dgram_parse(new_dgram, qc, qc->li); |
| 773 | /* A datagram must always be consumed after quic_parse_dgram(). */ |
| 774 | BUG_ON(new_dgram->buf); |
| 775 | } while (ret > 0); |
| 776 | |
| 777 | pool_free(pool_head_quic_dgram, new_dgram); |
| 778 | |
| 779 | if (b_size(&buf)) { |
| 780 | b_free(&buf); |
| 781 | offer_buffers(NULL, 1); |
| 782 | } |
| 783 | |
| 784 | TRACE_LEAVE(QUIC_EV_CONN_RCV, qc); |
| 785 | return ret; |
| 786 | } |
| 787 | |
Amaury Denoyelle | 40909df | 2022-10-24 17:08:43 +0200 | [diff] [blame] | 788 | /* Allocate a socket file-descriptor specific for QUIC connection <qc>. |
| 789 | * Endpoint addresses are specified by the two following arguments : <src> is |
| 790 | * the local address and <dst> is the remote one. |
| 791 | * |
| 792 | * Return the socket FD or a negative error code. On error, socket is marked as |
| 793 | * uninitialized. |
| 794 | */ |
| 795 | void qc_alloc_fd(struct quic_conn *qc, const struct sockaddr_storage *src, |
| 796 | const struct sockaddr_storage *dst) |
| 797 | { |
| 798 | struct proxy *p = qc->li->bind_conf->frontend; |
| 799 | int fd = -1; |
| 800 | int ret; |
| 801 | |
| 802 | /* Must not happen. */ |
| 803 | BUG_ON(src->ss_family != dst->ss_family); |
| 804 | |
| 805 | qc_init_fd(qc); |
| 806 | |
| 807 | fd = socket(src->ss_family, SOCK_DGRAM, 0); |
| 808 | if (fd < 0) |
| 809 | goto err; |
| 810 | |
| 811 | if (fd >= global.maxsock) { |
| 812 | send_log(p, LOG_EMERG, |
| 813 | "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n", |
| 814 | p->id); |
| 815 | goto err; |
| 816 | } |
| 817 | |
| 818 | ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); |
| 819 | if (ret < 0) |
| 820 | goto err; |
| 821 | |
| 822 | switch (src->ss_family) { |
| 823 | case AF_INET: |
| 824 | #if defined(IP_PKTINFO) |
| 825 | ret = setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one)); |
| 826 | #elif defined(IP_RECVDSTADDR) |
| 827 | ret = setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one)); |
| 828 | #endif /* IP_PKTINFO || IP_RECVDSTADDR */ |
| 829 | break; |
| 830 | case AF_INET6: |
| 831 | #ifdef IPV6_RECVPKTINFO |
| 832 | ret = setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); |
| 833 | #endif |
| 834 | break; |
| 835 | } |
| 836 | if (ret < 0) |
| 837 | goto err; |
| 838 | |
| 839 | ret = bind(fd, (struct sockaddr *)src, get_addr_len(src)); |
| 840 | if (ret < 0) |
| 841 | goto err; |
| 842 | |
| 843 | ret = connect(fd, (struct sockaddr *)dst, get_addr_len(dst)); |
| 844 | if (ret < 0) |
| 845 | goto err; |
| 846 | |
| 847 | qc->fd = fd; |
| 848 | fd_set_nonblock(fd); |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 849 | fd_insert(fd, qc, quic_conn_sock_fd_iocb, tgid, ti->ltid_bit); |
| 850 | fd_want_recv(fd); |
Amaury Denoyelle | 40909df | 2022-10-24 17:08:43 +0200 | [diff] [blame] | 851 | |
| 852 | return; |
| 853 | |
| 854 | err: |
| 855 | if (fd >= 0) |
| 856 | close(fd); |
| 857 | } |
| 858 | |
Amaury Denoyelle | d3083c9 | 2022-12-01 16:20:06 +0100 | [diff] [blame] | 859 | /* Release socket file-descriptor specific for QUIC connection <qc>. Set |
| 860 | * <reinit> if socket should be reinitialized after address migration. |
| 861 | */ |
| 862 | void qc_release_fd(struct quic_conn *qc, int reinit) |
Amaury Denoyelle | 40909df | 2022-10-24 17:08:43 +0200 | [diff] [blame] | 863 | { |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 864 | if (qc_test_fd(qc)) { |
| 865 | fd_delete(qc->fd); |
Amaury Denoyelle | 40909df | 2022-10-24 17:08:43 +0200 | [diff] [blame] | 866 | qc->fd = DEAD_FD_MAGIC; |
Amaury Denoyelle | d3083c9 | 2022-12-01 16:20:06 +0100 | [diff] [blame] | 867 | |
| 868 | if (reinit) |
| 869 | qc_init_fd(qc); |
Amaury Denoyelle | 5b41486 | 2022-10-24 17:40:37 +0200 | [diff] [blame] | 870 | } |
Amaury Denoyelle | 40909df | 2022-10-24 17:08:43 +0200 | [diff] [blame] | 871 | } |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 872 | |
| 873 | /*********************** QUIC accept queue management ***********************/ |
| 874 | /* per-thread accept queues */ |
| 875 | struct quic_accept_queue *quic_accept_queues; |
| 876 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 877 | /* Install <qc> on the queue ready to be accepted. The queue task is then woken |
Frédéric Lécaille | 91f083a | 2022-01-28 21:43:48 +0100 | [diff] [blame] | 878 | * up. If <qc> accept is already scheduled or done, nothing is done. |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 879 | */ |
| 880 | void quic_accept_push_qc(struct quic_conn *qc) |
| 881 | { |
| 882 | struct quic_accept_queue *queue = &quic_accept_queues[qc->tid]; |
| 883 | struct li_per_thread *lthr = &qc->li->per_thr[qc->tid]; |
| 884 | |
Frédéric Lécaille | 91f083a | 2022-01-28 21:43:48 +0100 | [diff] [blame] | 885 | /* early return if accept is already in progress/done for this |
| 886 | * connection |
| 887 | */ |
Frédéric Lécaille | fc79006 | 2022-03-28 17:10:31 +0200 | [diff] [blame] | 888 | if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED) |
Frédéric Lécaille | 91f083a | 2022-01-28 21:43:48 +0100 | [diff] [blame] | 889 | return; |
| 890 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 891 | BUG_ON(MT_LIST_INLIST(&qc->accept_list)); |
| 892 | |
Frédéric Lécaille | fc79006 | 2022-03-28 17:10:31 +0200 | [diff] [blame] | 893 | qc->flags |= QUIC_FL_CONN_ACCEPT_REGISTERED; |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 894 | /* 1. insert the listener in the accept queue |
| 895 | * |
| 896 | * Use TRY_APPEND as there is a possible race even with INLIST if |
| 897 | * multiple threads try to add the same listener instance from several |
| 898 | * quic_conn. |
| 899 | */ |
| 900 | if (!MT_LIST_INLIST(&(lthr->quic_accept.list))) |
| 901 | MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list)); |
| 902 | |
| 903 | /* 2. insert the quic_conn in the listener per-thread queue. */ |
| 904 | MT_LIST_APPEND(<hr->quic_accept.conns, &qc->accept_list); |
| 905 | |
| 906 | /* 3. wake up the queue tasklet */ |
| 907 | tasklet_wakeup(quic_accept_queues[qc->tid].tasklet); |
| 908 | } |
| 909 | |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 910 | /* Tasklet handler to accept QUIC connections. Call listener_accept on every |
| 911 | * listener instances registered in the accept queue. |
| 912 | */ |
Willy Tarreau | 41e701e | 2022-09-08 15:12:59 +0200 | [diff] [blame] | 913 | struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i) |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 914 | { |
| 915 | struct li_per_thread *lthr; |
| 916 | struct mt_list *elt1, elt2; |
| 917 | struct quic_accept_queue *queue = &quic_accept_queues[tid]; |
| 918 | |
| 919 | mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) { |
| 920 | listener_accept(lthr->li); |
Frédéric Lécaille | 4377dbd | 2023-03-10 13:34:30 +0100 | [diff] [blame] | 921 | if (!MT_LIST_ISEMPTY(<hr->quic_accept.conns)) |
| 922 | tasklet_wakeup((struct tasklet*)t); |
| 923 | else |
| 924 | MT_LIST_DELETE_SAFE(elt1); |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 925 | } |
| 926 | |
| 927 | return NULL; |
| 928 | } |
| 929 | |
| 930 | static int quic_alloc_accept_queues(void) |
| 931 | { |
| 932 | int i; |
| 933 | |
Tim Duesterhus | 9fb57e8 | 2022-06-01 21:58:37 +0200 | [diff] [blame] | 934 | quic_accept_queues = calloc(global.nbthread, |
| 935 | sizeof(*quic_accept_queues)); |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 936 | if (!quic_accept_queues) { |
| 937 | ha_alert("Failed to allocate the quic accept queues.\n"); |
| 938 | return 0; |
| 939 | } |
| 940 | |
| 941 | for (i = 0; i < global.nbthread; ++i) { |
| 942 | struct tasklet *task; |
| 943 | if (!(task = tasklet_new())) { |
| 944 | ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i); |
| 945 | return 0; |
| 946 | } |
| 947 | |
| 948 | tasklet_set_tid(task, i); |
| 949 | task->process = quic_accept_run; |
| 950 | quic_accept_queues[i].tasklet = task; |
| 951 | |
| 952 | MT_LIST_INIT(&quic_accept_queues[i].listeners); |
| 953 | } |
| 954 | |
| 955 | return 1; |
| 956 | } |
| 957 | REGISTER_POST_CHECK(quic_alloc_accept_queues); |
| 958 | |
| 959 | static int quic_deallocate_accept_queues(void) |
| 960 | { |
| 961 | int i; |
| 962 | |
| 963 | if (quic_accept_queues) { |
| 964 | for (i = 0; i < global.nbthread; ++i) |
| 965 | tasklet_free(quic_accept_queues[i].tasklet); |
| 966 | free(quic_accept_queues); |
| 967 | } |
| 968 | |
| 969 | return 1; |
| 970 | } |
| 971 | REGISTER_POST_DEINIT(quic_deallocate_accept_queues); |