Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 1 | /* |
| 2 | * QUIC socket management. |
| 3 | * |
Willy Tarreau | 3dfb7da | 2022-03-02 22:33:39 +0100 | [diff] [blame] | 4 | * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 13 | #define _GNU_SOURCE /* required for struct in6_pktinfo */ |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 14 | #include <errno.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 15 | #include <stdlib.h> |
| 16 | #include <string.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 17 | |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 18 | #include <netinet/in.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 19 | #include <sys/socket.h> |
| 20 | #include <sys/types.h> |
| 21 | |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 22 | #include <haproxy/api.h> |
| 23 | #include <haproxy/buf.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 24 | #include <haproxy/connection.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 25 | #include <haproxy/fd.h> |
| 26 | #include <haproxy/freq_ctr.h> |
| 27 | #include <haproxy/global-t.h> |
| 28 | #include <haproxy/list.h> |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 29 | #include <haproxy/listener.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 30 | #include <haproxy/pool.h> |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 31 | #include <haproxy/proto_quic.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 32 | #include <haproxy/proxy-t.h> |
Amaury Denoyelle | 92fa63f | 2022-09-30 18:11:13 +0200 | [diff] [blame] | 33 | #include <haproxy/quic_conn.h> |
Amaury Denoyelle | 4d29504 | 2022-01-19 16:18:44 +0100 | [diff] [blame] | 34 | #include <haproxy/quic_sock.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 35 | #include <haproxy/quic_tp-t.h> |
Amaury Denoyelle | eb01f59 | 2021-10-07 16:44:05 +0200 | [diff] [blame] | 36 | #include <haproxy/session.h> |
Amaury Denoyelle | 5c25dc5 | 2022-09-30 17:44:15 +0200 | [diff] [blame] | 37 | #include <haproxy/stats-t.h> |
| 38 | #include <haproxy/task.h> |
Amaury Denoyelle | 777969c | 2022-03-24 16:06:26 +0100 | [diff] [blame] | 39 | #include <haproxy/tools.h> |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 40 | |
| 41 | /* This function is called from the protocol layer accept() in order to |
| 42 | * instantiate a new session on behalf of a given listener and frontend. It |
| 43 | * returns a positive value upon success, 0 if the connection can be ignored, |
| 44 | * or a negative value upon critical failure. The accepted connection is |
| 45 | * closed if we return <= 0. If no handshake is needed, it immediately tries |
| 46 | * to instantiate a new stream. The connection must already have been filled |
| 47 | * with the incoming connection handle (a fd), a target (the listener) and a |
| 48 | * source address. |
| 49 | */ |
| 50 | int quic_session_accept(struct connection *cli_conn) |
| 51 | { |
| 52 | struct listener *l = __objt_listener(cli_conn->target); |
| 53 | struct proxy *p = l->bind_conf->frontend; |
| 54 | struct session *sess; |
| 55 | |
| 56 | cli_conn->proxy_netns = l->rx.settings->netns; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 57 | /* This flag is ordinarily set by conn_ctrl_init() which cannot |
| 58 | * be called for now. |
| 59 | */ |
| 60 | cli_conn->flags |= CO_FL_CTRL_READY; |
| 61 | |
| 62 | /* wait for a PROXY protocol header */ |
| 63 | if (l->options & LI_O_ACC_PROXY) |
| 64 | cli_conn->flags |= CO_FL_ACCEPT_PROXY; |
| 65 | |
| 66 | /* wait for a NetScaler client IP insertion protocol header */ |
| 67 | if (l->options & LI_O_ACC_CIP) |
| 68 | cli_conn->flags |= CO_FL_ACCEPT_CIP; |
| 69 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 70 | /* Add the handshake pseudo-XPRT */ |
| 71 | if (cli_conn->flags & (CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP)) { |
| 72 | if (xprt_add_hs(cli_conn) != 0) |
| 73 | goto out_free_conn; |
| 74 | } |
Olivier Houchard | 1b3c931 | 2021-03-05 23:37:48 +0100 | [diff] [blame] | 75 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 76 | sess = session_new(p, l, &cli_conn->obj_type); |
| 77 | if (!sess) |
| 78 | goto out_free_conn; |
| 79 | |
| 80 | conn_set_owner(cli_conn, sess, NULL); |
| 81 | |
Frédéric Lécaille | ecb5872 | 2021-05-27 17:12:36 +0200 | [diff] [blame] | 82 | if (conn_complete_session(cli_conn) < 0) |
| 83 | goto out_free_sess; |
| 84 | |
Amaury Denoyelle | 622ec41 | 2022-04-13 16:58:26 +0200 | [diff] [blame] | 85 | if (conn_xprt_start(cli_conn) < 0) { |
| 86 | /* conn_complete_session has succeeded : conn is the owner of |
| 87 | * the session and the MUX is initialized. |
| 88 | * Let the MUX free all resources on error. |
| 89 | */ |
| 90 | cli_conn->mux->destroy(cli_conn->ctx); |
| 91 | return -1; |
| 92 | } |
| 93 | |
| 94 | return 1; |
Frédéric Lécaille | 27faba7 | 2021-03-03 16:21:00 +0100 | [diff] [blame] | 95 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 96 | out_free_sess: |
| 97 | /* prevent call to listener_release during session_free. It will be |
| 98 | * done below, for all errors. */ |
| 99 | sess->listener = NULL; |
| 100 | session_free(sess); |
| 101 | out_free_conn: |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 102 | cli_conn->handle.qc->conn = NULL; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 103 | conn_stop_tracking(cli_conn); |
| 104 | conn_xprt_close(cli_conn); |
| 105 | conn_free(cli_conn); |
| 106 | out: |
| 107 | |
Frédéric Lécaille | e8139f3 | 2021-03-11 17:06:30 +0100 | [diff] [blame] | 108 | return -1; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 109 | } |
| 110 | |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 111 | /* Retrieve a connection's source address. Returns -1 on failure. */ |
| 112 | int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len) |
| 113 | { |
| 114 | struct quic_conn *qc; |
| 115 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 116 | if (!conn || !conn->handle.qc) |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 117 | return -1; |
| 118 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 119 | qc = conn->handle.qc; |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 120 | if (conn_is_back(conn)) { |
| 121 | /* no source address defined for outgoing connections for now */ |
| 122 | return -1; |
| 123 | } else { |
| 124 | /* front connection, return the peer's address */ |
| 125 | if (len > sizeof(qc->peer_addr)) |
| 126 | len = sizeof(qc->peer_addr); |
| 127 | memcpy(addr, &qc->peer_addr, len); |
| 128 | return 0; |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | /* Retrieve a connection's destination address. Returns -1 on failure. */ |
| 133 | int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len) |
| 134 | { |
| 135 | struct quic_conn *qc; |
| 136 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 137 | if (!conn || !conn->handle.qc) |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 138 | return -1; |
| 139 | |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 140 | qc = conn->handle.qc; |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 141 | if (conn_is_back(conn)) { |
| 142 | /* back connection, return the peer's address */ |
| 143 | if (len > sizeof(qc->peer_addr)) |
| 144 | len = sizeof(qc->peer_addr); |
| 145 | memcpy(addr, &qc->peer_addr, len); |
| 146 | } else { |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 147 | struct sockaddr_storage *from; |
| 148 | |
| 149 | /* Return listener address if IP_PKTINFO or friends are not |
| 150 | * supported by the socket. |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 151 | */ |
| 152 | BUG_ON(!qc->li); |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 153 | from = is_addr(&qc->local_addr) ? &qc->local_addr : |
| 154 | &qc->li->rx.addr; |
| 155 | if (len > sizeof(*from)) |
| 156 | len = sizeof(*from); |
| 157 | memcpy(addr, from, len); |
Willy Tarreau | cdf7c8e | 2022-04-11 16:20:00 +0200 | [diff] [blame] | 158 | } |
| 159 | return 0; |
| 160 | } |
| 161 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 162 | /* |
| 163 | * Inspired from session_accept_fd(). |
| 164 | * Instantiate a new connection (connection struct) to be attached to <qc> |
| 165 | * QUIC connection of <l> listener. |
| 166 | * Returns 1 if succeeded, 0 if not. |
| 167 | */ |
| 168 | static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l, |
| 169 | struct sockaddr_storage *saddr) |
| 170 | { |
| 171 | struct connection *cli_conn; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 172 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 173 | if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL)) |
| 174 | goto out; |
| 175 | |
Willy Tarreau | 9cc88c3 | 2022-04-08 14:34:31 +0200 | [diff] [blame] | 176 | if (!sockaddr_alloc(&cli_conn->src, saddr, sizeof *saddr)) |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 177 | goto out_free_conn; |
| 178 | |
Willy Tarreau | 030b3e6 | 2022-05-02 17:47:46 +0200 | [diff] [blame] | 179 | cli_conn->flags |= CO_FL_FDLESS; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 180 | qc->conn = cli_conn; |
Willy Tarreau | 784b868 | 2022-04-11 14:18:10 +0200 | [diff] [blame] | 181 | cli_conn->handle.qc = qc; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 182 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 183 | cli_conn->target = &l->obj_type; |
| 184 | |
Frédéric Lécaille | 01ab661 | 2021-06-14 10:31:43 +0200 | [diff] [blame] | 185 | /* We need the xprt context before accepting (->accept()) the connection: |
| 186 | * we may receive packet before this connection acception. |
| 187 | */ |
| 188 | if (conn_prepare(cli_conn, l->rx.proto, l->bind_conf->xprt) < 0) |
| 189 | goto out_free_conn; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 190 | |
| 191 | return 1; |
| 192 | |
| 193 | out_free_conn: |
Frédéric Lécaille | 01ab661 | 2021-06-14 10:31:43 +0200 | [diff] [blame] | 194 | qc->conn = NULL; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 195 | conn_stop_tracking(cli_conn); |
| 196 | conn_xprt_close(cli_conn); |
| 197 | conn_free(cli_conn); |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 198 | out: |
| 199 | |
| 200 | return 0; |
| 201 | } |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 202 | |
| 203 | /* Tests if the receiver supports accepting connections. Returns positive on |
| 204 | * success, 0 if not possible |
| 205 | */ |
| 206 | int quic_sock_accepting_conn(const struct receiver *rx) |
| 207 | { |
| 208 | return 1; |
| 209 | } |
| 210 | |
| 211 | /* Accept an incoming connection from listener <l>, and return it, as well as |
| 212 | * a CO_AC_* status code into <status> if not null. Null is returned on error. |
| 213 | * <l> must be a valid listener with a valid frontend. |
| 214 | */ |
| 215 | struct connection *quic_sock_accept_conn(struct listener *l, int *status) |
| 216 | { |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 217 | struct quic_conn *qc; |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 218 | struct li_per_thread *lthr = &l->per_thr[tid]; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 219 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 220 | qc = MT_LIST_POP(<hr->quic_accept.conns, struct quic_conn *, accept_list); |
| 221 | if (!qc) |
| 222 | goto done; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 223 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 224 | if (!new_quic_cli_conn(qc, l, &qc->peer_addr)) |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 225 | goto err; |
| 226 | |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 227 | done: |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 228 | *status = CO_AC_DONE; |
Frédéric Lécaille | 026a792 | 2020-11-23 15:46:36 +0100 | [diff] [blame] | 229 | return qc ? qc->conn : NULL; |
| 230 | |
| 231 | err: |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 232 | /* in case of error reinsert the element to process it later. */ |
| 233 | MT_LIST_INSERT(<hr->quic_accept.conns, &qc->accept_list); |
| 234 | |
| 235 | *status = CO_AC_PAUSE; |
| 236 | return NULL; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 237 | } |
| 238 | |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 239 | /* Retrieve the DCID from the datagram found in <buf> and deliver it to the |
| 240 | * correct datagram handler. |
| 241 | * Return 1 if a correct datagram could be found, 0 if not. |
| 242 | */ |
| 243 | static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner, |
| 244 | struct sockaddr_storage *saddr, |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 245 | struct sockaddr_storage *daddr, |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 246 | struct quic_dgram *new_dgram, struct list *dgrams) |
| 247 | { |
| 248 | struct quic_dgram *dgram; |
| 249 | unsigned char *dcid; |
| 250 | size_t dcid_len; |
| 251 | int cid_tid; |
| 252 | |
| 253 | if (!len || !quic_get_dgram_dcid(buf, buf + len, &dcid, &dcid_len)) |
| 254 | goto err; |
| 255 | |
| 256 | dgram = new_dgram ? new_dgram : pool_alloc(pool_head_quic_dgram); |
| 257 | if (!dgram) |
| 258 | goto err; |
| 259 | |
| 260 | cid_tid = quic_get_cid_tid(dcid); |
| 261 | |
| 262 | /* All the members must be initialized! */ |
| 263 | dgram->owner = owner; |
| 264 | dgram->buf = buf; |
| 265 | dgram->len = len; |
| 266 | dgram->dcid = dcid; |
| 267 | dgram->dcid_len = dcid_len; |
| 268 | dgram->saddr = *saddr; |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 269 | dgram->daddr = *daddr; |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 270 | dgram->qc = NULL; |
| 271 | LIST_APPEND(dgrams, &dgram->list); |
| 272 | MT_LIST_APPEND(&quic_dghdlrs[cid_tid].dgrams, &dgram->mt_list); |
| 273 | |
Willy Tarreau | f9d4a7d | 2022-08-05 08:45:56 +0200 | [diff] [blame] | 274 | /* typically quic_lstnr_dghdlr() */ |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 275 | tasklet_wakeup(quic_dghdlrs[cid_tid].task); |
| 276 | |
| 277 | return 1; |
| 278 | |
| 279 | err: |
Frédéric Lécaille | bfb077a | 2022-08-12 11:55:20 +0200 | [diff] [blame] | 280 | pool_free(pool_head_quic_dgram, new_dgram); |
Frédéric Lécaille | 6492e66 | 2022-05-17 17:23:16 +0200 | [diff] [blame] | 281 | return 0; |
| 282 | } |
| 283 | |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 284 | /* Receive data from datagram socket <fd>. Data are placed in <out> buffer of |
| 285 | * length <len>. |
| 286 | * |
| 287 | * Datagram addresses will be returned via the next arguments. <from> will be |
| 288 | * the peer address and <to> the reception one. Note that <to> can only be |
| 289 | * retrieved if the socket supports IP_PKTINFO or affiliated options. If not, |
| 290 | * <to> will be set as AF_UNSPEC. The caller must specify <to_port> to ensure |
| 291 | * that <to> address is completely filled. |
| 292 | * |
| 293 | * Returns value from recvmsg syscall. |
| 294 | */ |
| 295 | static ssize_t quic_recv(int fd, void *out, size_t len, |
| 296 | struct sockaddr *from, socklen_t from_len, |
| 297 | struct sockaddr *to, socklen_t to_len, |
| 298 | uint16_t dst_port) |
| 299 | { |
| 300 | union pktinfo { |
| 301 | #ifdef IP_PKTINFO |
| 302 | struct in_pktinfo in; |
| 303 | #else /* !IP_PKTINFO */ |
| 304 | struct in_addr addr; |
| 305 | #endif |
| 306 | #ifdef IPV6_RECVPKTINFO |
| 307 | struct in6_pktinfo in6; |
| 308 | #endif |
| 309 | }; |
| 310 | char cdata[CMSG_SPACE(sizeof(union pktinfo))]; |
| 311 | struct msghdr msg; |
| 312 | struct iovec vec; |
| 313 | struct cmsghdr *cmsg; |
| 314 | ssize_t ret; |
| 315 | |
| 316 | vec.iov_base = out; |
| 317 | vec.iov_len = len; |
| 318 | |
| 319 | memset(&msg, 0, sizeof(msg)); |
| 320 | msg.msg_name = from; |
| 321 | msg.msg_namelen = from_len; |
| 322 | msg.msg_iov = &vec; |
| 323 | msg.msg_iovlen = 1; |
| 324 | msg.msg_control = &cdata; |
| 325 | msg.msg_controllen = sizeof(cdata); |
| 326 | |
| 327 | clear_addr((struct sockaddr_storage *)to); |
| 328 | |
| 329 | do { |
| 330 | ret = recvmsg(fd, &msg, 0); |
| 331 | } while (ret < 0 && errno == EINTR); |
| 332 | |
| 333 | /* TODO handle errno. On EAGAIN/EWOULDBLOCK use fd_cant_recv() if |
| 334 | * using dedicated connection socket. |
| 335 | */ |
| 336 | |
| 337 | if (ret < 0) |
| 338 | goto end; |
| 339 | |
| 340 | for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { |
| 341 | switch (cmsg->cmsg_level) { |
| 342 | case IPPROTO_IP: |
| 343 | #if defined(IP_PKTINFO) |
| 344 | if (cmsg->cmsg_type == IP_PKTINFO) { |
| 345 | struct sockaddr_in *in = (struct sockaddr_in *)to; |
| 346 | struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg); |
| 347 | |
| 348 | if (to_len >= sizeof(struct sockaddr_in)) { |
| 349 | in->sin_family = AF_INET; |
| 350 | in->sin_addr = info->ipi_addr; |
| 351 | in->sin_port = dst_port; |
| 352 | } |
| 353 | } |
| 354 | #elif defined(IP_RECVDSTADDR) |
| 355 | if (cmsg->cmsg_type == IP_RECVDSTADDR) { |
| 356 | struct sockaddr_in *in = (struct sockaddr_in *)to; |
| 357 | struct in_addr *info = (struct in_addr *)CMSG_DATA(cmsg); |
| 358 | |
| 359 | if (to_len >= sizeof(struct sockaddr_in)) { |
| 360 | in->sin_family = AF_INET; |
| 361 | in->sin_addr.s_addr = info->s_addr; |
| 362 | in->sin_port = dst_port; |
| 363 | } |
| 364 | } |
| 365 | #endif /* IP_PKTINFO || IP_RECVDSTADDR */ |
| 366 | break; |
| 367 | |
| 368 | case IPPROTO_IPV6: |
| 369 | #ifdef IPV6_RECVPKTINFO |
| 370 | if (cmsg->cmsg_type == IPV6_PKTINFO) { |
| 371 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)to; |
| 372 | struct in6_pktinfo *info6 = (struct in6_pktinfo *)CMSG_DATA(cmsg); |
| 373 | |
| 374 | if (to_len >= sizeof(struct sockaddr_in6)) { |
| 375 | in6->sin6_family = AF_INET6; |
| 376 | memcpy(&in6->sin6_addr, &info6->ipi6_addr, sizeof(in6->sin6_addr)); |
| 377 | in6->sin6_port = dst_port; |
| 378 | } |
| 379 | } |
| 380 | #endif |
| 381 | break; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | end: |
| 386 | return ret; |
| 387 | } |
| 388 | |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 389 | /* Function called on a read event from a listening socket. It tries |
| 390 | * to handle as many connections as possible. |
| 391 | */ |
| 392 | void quic_sock_fd_iocb(int fd) |
| 393 | { |
| 394 | ssize_t ret; |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 395 | struct rxbuf *rxbuf; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 396 | struct buffer *buf; |
| 397 | struct listener *l = objt_listener(fdtab[fd].owner); |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 398 | struct quic_transport_params *params; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 399 | /* Source address */ |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 400 | struct sockaddr_storage saddr = {0}, daddr = {0}; |
Frédéric Lécaille | 320744b | 2022-01-27 12:19:28 +0100 | [diff] [blame] | 401 | size_t max_sz, cspace; |
Frédéric Lécaille | 2bed1f1 | 2022-06-23 21:05:05 +0200 | [diff] [blame] | 402 | struct quic_dgram *new_dgram; |
Frédéric Lécaille | f6f7520 | 2022-02-02 09:44:22 +0100 | [diff] [blame] | 403 | unsigned char *dgram_buf; |
Frédéric Lécaille | 1b0707f | 2022-06-30 11:28:56 +0200 | [diff] [blame] | 404 | int max_dgrams; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 405 | |
Tim Duesterhus | 1655424 | 2021-09-15 13:58:49 +0200 | [diff] [blame] | 406 | BUG_ON(!l); |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 407 | |
Frédéric Lécaille | 19ef636 | 2022-06-23 18:00:37 +0200 | [diff] [blame] | 408 | new_dgram = NULL; |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 409 | if (!l) |
| 410 | return; |
| 411 | |
Willy Tarreau | f509065 | 2021-04-06 17:23:40 +0200 | [diff] [blame] | 412 | if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd)) |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 413 | return; |
| 414 | |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 415 | rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), mt_list); |
Amaury Denoyelle | ee72a43 | 2021-11-19 15:49:29 +0100 | [diff] [blame] | 416 | if (!rxbuf) |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 417 | goto out; |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 418 | |
Amaury Denoyelle | ee72a43 | 2021-11-19 15:49:29 +0100 | [diff] [blame] | 419 | buf = &rxbuf->buf; |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 420 | |
Frédéric Lécaille | 1b0707f | 2022-06-30 11:28:56 +0200 | [diff] [blame] | 421 | max_dgrams = global.tune.maxpollevents; |
| 422 | start: |
Ilya Shipitsin | 3b64a28 | 2022-07-29 22:26:53 +0500 | [diff] [blame] | 423 | /* Try to reuse an existing dgram. Note that there is always at |
Frédéric Lécaille | 2bed1f1 | 2022-06-23 21:05:05 +0200 | [diff] [blame] | 424 | * least one datagram to pick, except the first time we enter |
| 425 | * this function for this <rxbuf> buffer. |
| 426 | */ |
| 427 | if (!LIST_ISEMPTY(&rxbuf->dgrams)) { |
| 428 | struct quic_dgram *dg = |
| 429 | LIST_ELEM(rxbuf->dgrams.n, struct quic_dgram *, list); |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 430 | |
Frédéric Lécaille | 2bed1f1 | 2022-06-23 21:05:05 +0200 | [diff] [blame] | 431 | if (!dg->buf) { |
| 432 | LIST_DELETE(&dg->list); |
| 433 | b_del(buf, dg->len); |
| 434 | new_dgram = dg; |
| 435 | } |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 436 | } |
| 437 | |
Frédéric Lécaille | c4becf5 | 2021-11-08 11:23:17 +0100 | [diff] [blame] | 438 | params = &l->bind_conf->quic_params; |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 439 | max_sz = params->max_udp_payload_size; |
Frédéric Lécaille | 320744b | 2022-01-27 12:19:28 +0100 | [diff] [blame] | 440 | cspace = b_contig_space(buf); |
| 441 | if (cspace < max_sz) { |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 442 | struct quic_dgram *dgram; |
| 443 | |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 444 | /* Do no mark <buf> as full, and do not try to consume it |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 445 | * if the contiguous remaining space is not at the end |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 446 | */ |
| 447 | if (b_tail(buf) + cspace < b_wrap(buf)) |
| 448 | goto out; |
| 449 | |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 450 | /* Allocate a fake datagram, without data to locate |
| 451 | * the end of the RX buffer (required during purging). |
| 452 | */ |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 453 | dgram = pool_alloc(pool_head_quic_dgram); |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 454 | if (!dgram) |
| 455 | goto out; |
| 456 | |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 457 | /* Initialize only the useful members of this fake datagram. */ |
| 458 | dgram->buf = NULL; |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 459 | dgram->len = cspace; |
Frédéric Lécaille | ba19acd | 2022-08-08 21:10:58 +0200 | [diff] [blame] | 460 | /* Append this datagram only to the RX buffer list. It will |
| 461 | * not be treated by any datagram handler. |
| 462 | */ |
Frédéric Lécaille | 1712b1d | 2022-01-28 13:10:24 +0100 | [diff] [blame] | 463 | LIST_APPEND(&rxbuf->dgrams, &dgram->list); |
Frédéric Lécaille | 0c53568 | 2022-06-23 17:47:10 +0200 | [diff] [blame] | 464 | |
Frédéric Lécaille | 320744b | 2022-01-27 12:19:28 +0100 | [diff] [blame] | 465 | /* Consume the remaining space */ |
| 466 | b_add(buf, cspace); |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 467 | if (b_contig_space(buf) < max_sz) |
| 468 | goto out; |
| 469 | } |
| 470 | |
Frédéric Lécaille | f6f7520 | 2022-02-02 09:44:22 +0100 | [diff] [blame] | 471 | dgram_buf = (unsigned char *)b_tail(buf); |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 472 | ret = quic_recv(fd, dgram_buf, max_sz, |
| 473 | (struct sockaddr *)&saddr, sizeof(saddr), |
| 474 | (struct sockaddr *)&daddr, sizeof(daddr), |
| 475 | get_net_port(&l->rx.addr)); |
| 476 | if (ret <= 0) |
| 477 | goto out; |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 478 | |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 479 | b_add(buf, ret); |
Amaury Denoyelle | 97ecc7a | 2022-09-23 17:15:58 +0200 | [diff] [blame] | 480 | if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr, &daddr, |
Frédéric Lécaille | f6f7520 | 2022-02-02 09:44:22 +0100 | [diff] [blame] | 481 | new_dgram, &rxbuf->dgrams)) { |
Frédéric Lécaille | 37ae505 | 2022-01-27 11:31:50 +0100 | [diff] [blame] | 482 | /* If wrong, consume this datagram */ |
| 483 | b_del(buf, ret); |
| 484 | } |
Frédéric Lécaille | 19ef636 | 2022-06-23 18:00:37 +0200 | [diff] [blame] | 485 | new_dgram = NULL; |
Frédéric Lécaille | 1b0707f | 2022-06-30 11:28:56 +0200 | [diff] [blame] | 486 | if (--max_dgrams > 0) |
| 487 | goto start; |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 488 | out: |
Frédéric Lécaille | 19ef636 | 2022-06-23 18:00:37 +0200 | [diff] [blame] | 489 | pool_free(pool_head_quic_dgram, new_dgram); |
Frédéric Lécaille | 324ecda | 2021-11-02 10:14:44 +0100 | [diff] [blame] | 490 | MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->mt_list); |
Frédéric Lécaille | 70da889 | 2020-11-06 15:49:49 +0100 | [diff] [blame] | 491 | } |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 492 | |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 493 | /* Send a datagram stored into <buf> buffer with <sz> as size. |
| 494 | * The caller must ensure there is at least <sz> bytes in this buffer. |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 495 | * |
| 496 | * Returns 0 on success else non-zero. |
| 497 | * |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 498 | * TODO standardize this function for a generic UDP sendto wrapper. This can be |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 499 | * done by removing the <qc> arg and replace it with address/port. |
| 500 | */ |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 501 | int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz, |
| 502 | int flags) |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 503 | { |
| 504 | ssize_t ret; |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 505 | |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 506 | do { |
| 507 | ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz, |
| 508 | MSG_DONTWAIT | MSG_NOSIGNAL, |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 509 | (struct sockaddr *)&qc->peer_addr, get_addr_len(&qc->peer_addr)); |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 510 | } while (ret < 0 && errno == EINTR); |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 511 | |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 512 | if (ret < 0 || ret != sz) { |
| 513 | /* TODO adjust errno for UDP context. */ |
| 514 | if (errno == EAGAIN || errno == EWOULDBLOCK || |
| 515 | errno == ENOTCONN || errno == EINPROGRESS || errno == EBADF) { |
| 516 | struct proxy *prx = qc->li->bind_conf->frontend; |
| 517 | struct quic_counters *prx_counters = |
| 518 | EXTRA_COUNTERS_GET(prx->extra_counters_fe, |
| 519 | &quic_stats_module); |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 520 | |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 521 | if (errno == EAGAIN || errno == EWOULDBLOCK) |
| 522 | HA_ATOMIC_INC(&prx_counters->socket_full); |
| 523 | else |
| 524 | HA_ATOMIC_INC(&prx_counters->sendto_err); |
| 525 | } |
| 526 | else if (errno) { |
Ilya Shipitsin | 3b64a28 | 2022-07-29 22:26:53 +0500 | [diff] [blame] | 527 | /* TODO unlisted errno : handle it explicitly. */ |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 528 | ABORT_NOW(); |
| 529 | } |
| 530 | |
| 531 | return 1; |
Frédéric Lécaille | 48bb875 | 2022-08-03 20:52:20 +0200 | [diff] [blame] | 532 | } |
| 533 | |
Amaury Denoyelle | 6715cbf | 2022-08-05 11:56:36 +0200 | [diff] [blame] | 534 | /* we count the total bytes sent, and the send rate for 32-byte blocks. |
| 535 | * The reason for the latter is that freq_ctr are limited to 4GB and |
| 536 | * that it's not enough per second. |
| 537 | */ |
| 538 | _HA_ATOMIC_ADD(&global.out_bytes, ret); |
| 539 | update_freq_ctr(&global.out_32bps, (ret + 16) / 32); |
| 540 | |
| 541 | return 0; |
Amaury Denoyelle | 58a7704 | 2022-02-09 15:43:07 +0100 | [diff] [blame] | 542 | } |
| 543 | |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 544 | |
| 545 | /*********************** QUIC accept queue management ***********************/ |
| 546 | /* per-thread accept queues */ |
| 547 | struct quic_accept_queue *quic_accept_queues; |
| 548 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 549 | /* Install <qc> on the queue ready to be accepted. The queue task is then woken |
Frédéric Lécaille | 91f083a | 2022-01-28 21:43:48 +0100 | [diff] [blame] | 550 | * up. If <qc> accept is already scheduled or done, nothing is done. |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 551 | */ |
| 552 | void quic_accept_push_qc(struct quic_conn *qc) |
| 553 | { |
| 554 | struct quic_accept_queue *queue = &quic_accept_queues[qc->tid]; |
| 555 | struct li_per_thread *lthr = &qc->li->per_thr[qc->tid]; |
| 556 | |
Frédéric Lécaille | 91f083a | 2022-01-28 21:43:48 +0100 | [diff] [blame] | 557 | /* early return if accept is already in progress/done for this |
| 558 | * connection |
| 559 | */ |
Frédéric Lécaille | fc79006 | 2022-03-28 17:10:31 +0200 | [diff] [blame] | 560 | if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED) |
Frédéric Lécaille | 91f083a | 2022-01-28 21:43:48 +0100 | [diff] [blame] | 561 | return; |
| 562 | |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 563 | BUG_ON(MT_LIST_INLIST(&qc->accept_list)); |
| 564 | |
Frédéric Lécaille | fc79006 | 2022-03-28 17:10:31 +0200 | [diff] [blame] | 565 | qc->flags |= QUIC_FL_CONN_ACCEPT_REGISTERED; |
Amaury Denoyelle | cfa2d56 | 2022-01-19 16:01:05 +0100 | [diff] [blame] | 566 | /* 1. insert the listener in the accept queue |
| 567 | * |
| 568 | * Use TRY_APPEND as there is a possible race even with INLIST if |
| 569 | * multiple threads try to add the same listener instance from several |
| 570 | * quic_conn. |
| 571 | */ |
| 572 | if (!MT_LIST_INLIST(&(lthr->quic_accept.list))) |
| 573 | MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list)); |
| 574 | |
| 575 | /* 2. insert the quic_conn in the listener per-thread queue. */ |
| 576 | MT_LIST_APPEND(<hr->quic_accept.conns, &qc->accept_list); |
| 577 | |
| 578 | /* 3. wake up the queue tasklet */ |
| 579 | tasklet_wakeup(quic_accept_queues[qc->tid].tasklet); |
| 580 | } |
| 581 | |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 582 | /* Tasklet handler to accept QUIC connections. Call listener_accept on every |
| 583 | * listener instances registered in the accept queue. |
| 584 | */ |
Willy Tarreau | 41e701e | 2022-09-08 15:12:59 +0200 | [diff] [blame] | 585 | struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i) |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 586 | { |
| 587 | struct li_per_thread *lthr; |
| 588 | struct mt_list *elt1, elt2; |
| 589 | struct quic_accept_queue *queue = &quic_accept_queues[tid]; |
| 590 | |
| 591 | mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) { |
| 592 | listener_accept(lthr->li); |
| 593 | MT_LIST_DELETE_SAFE(elt1); |
| 594 | } |
| 595 | |
| 596 | return NULL; |
| 597 | } |
| 598 | |
| 599 | static int quic_alloc_accept_queues(void) |
| 600 | { |
| 601 | int i; |
| 602 | |
Tim Duesterhus | 9fb57e8 | 2022-06-01 21:58:37 +0200 | [diff] [blame] | 603 | quic_accept_queues = calloc(global.nbthread, |
| 604 | sizeof(*quic_accept_queues)); |
Amaury Denoyelle | 2ce99fe | 2022-01-19 15:46:11 +0100 | [diff] [blame] | 605 | if (!quic_accept_queues) { |
| 606 | ha_alert("Failed to allocate the quic accept queues.\n"); |
| 607 | return 0; |
| 608 | } |
| 609 | |
| 610 | for (i = 0; i < global.nbthread; ++i) { |
| 611 | struct tasklet *task; |
| 612 | if (!(task = tasklet_new())) { |
| 613 | ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i); |
| 614 | return 0; |
| 615 | } |
| 616 | |
| 617 | tasklet_set_tid(task, i); |
| 618 | task->process = quic_accept_run; |
| 619 | quic_accept_queues[i].tasklet = task; |
| 620 | |
| 621 | MT_LIST_INIT(&quic_accept_queues[i].listeners); |
| 622 | } |
| 623 | |
| 624 | return 1; |
| 625 | } |
| 626 | REGISTER_POST_CHECK(quic_alloc_accept_queues); |
| 627 | |
| 628 | static int quic_deallocate_accept_queues(void) |
| 629 | { |
| 630 | int i; |
| 631 | |
| 632 | if (quic_accept_queues) { |
| 633 | for (i = 0; i < global.nbthread; ++i) |
| 634 | tasklet_free(quic_accept_queues[i].tasklet); |
| 635 | free(quic_accept_queues); |
| 636 | } |
| 637 | |
| 638 | return 1; |
| 639 | } |
| 640 | REGISTER_POST_DEINIT(quic_deallocate_accept_queues); |