MEDIUM: quic: retrieve frontend destination address
Retrieve the frontend destination address for a QUIC connection. This
address is retrieve from the first received datagram and then stored in
the associated quic-conn.
This feature relies on IP_PKTINFO or affiliated flags support on the
socket. This flag is set for each QUIC listeners in
sock_inet_bind_receiver(). To retrieve the destination address,
recvfrom() has been replaced by recvmsg() syscall. This operation and
parsing of msghdr structure has been extracted in a wrapper quic_recv().
This change is useful to finalize the implementation of 'dst' sample
fetch. As such, quic_sock_get_dst() has been edited to return local
address from the quic-conn. As a best effort, if local address is not
available due to kernel non-support of IP_PKTINFO, address of the
listener is returned instead.
This should be backported up to 2.6.
diff --git a/include/haproxy/quic_conn-t.h b/include/haproxy/quic_conn-t.h
index 8384c01..0e3e273 100644
--- a/include/haproxy/quic_conn-t.h
+++ b/include/haproxy/quic_conn-t.h
@@ -374,6 +374,7 @@
unsigned char *dcid;
size_t dcid_len;
struct sockaddr_storage saddr;
+ struct sockaddr_storage daddr;
struct quic_conn *qc;
struct list list;
struct mt_list mt_list;
@@ -636,6 +637,7 @@
struct ssl_sock_ctx *xprt_ctx;
+ struct sockaddr_storage local_addr;
struct sockaddr_storage peer_addr;
/* Used only to reach the tasklet for the I/O handler from this quic_conn object. */
diff --git a/src/quic_conn.c b/src/quic_conn.c
index 34809c3..65c584d 100644
--- a/src/quic_conn.c
+++ b/src/quic_conn.c
@@ -4590,13 +4590,14 @@
* for QUIC servers (or haproxy listeners).
* <dcid> is the destination connection ID, <scid> is the source connection ID,
* <token> the token found to be used for this connection with <token_len> as
- * length. <saddr> is the source address.
+ * length. Endpoints addresses are specified via <local_addr> and <peer_addr>.
* Returns the connection if succeeded, NULL if not.
*/
static struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4,
struct quic_cid *dcid, struct quic_cid *scid,
const struct quic_cid *token_odcid,
- struct sockaddr_storage *saddr,
+ struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *peer_addr,
int server, int token, void *owner)
{
int i;
@@ -4715,7 +4716,8 @@
qc->streams_by_id = EB_ROOT_UNIQUE;
qc->stream_buf_count = 0;
- memcpy(&qc->peer_addr, saddr, sizeof qc->peer_addr);
+ memcpy(&qc->local_addr, local_addr, sizeof(qc->local_addr));
+ memcpy(&qc->peer_addr, peer_addr, sizeof qc->peer_addr);
if (server && !qc_lstnr_params_init(qc, &l->bind_conf->quic_params,
icid->stateless_reset_token,
@@ -6028,7 +6030,8 @@
ipv4 = dgram->saddr.ss_family == AF_INET;
qc = qc_new_conn(qv, ipv4, &pkt->dcid, &pkt->scid, token_odcid,
- &pkt->saddr, 1, !!pkt->token_len, l);
+ &dgram->daddr, &pkt->saddr, 1,
+ !!pkt->token_len, l);
if (qc == NULL)
goto drop;
diff --git a/src/quic_sock.c b/src/quic_sock.c
index 6845c0c..cbb88e8 100644
--- a/src/quic_sock.c
+++ b/src/quic_sock.c
@@ -10,10 +10,12 @@
*
*/
+#define _GNU_SOURCE /* required for struct in6_pktinfo */
#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
@@ -142,14 +144,17 @@
len = sizeof(qc->peer_addr);
memcpy(addr, &qc->peer_addr, len);
} else {
- /* FIXME: front connection, no local address for now, we'll
- * return the listener's address instead.
+ struct sockaddr_storage *from;
+
+ /* Return listener address if IP_PKTINFO or friends are not
+ * supported by the socket.
*/
BUG_ON(!qc->li);
-
- if (len > sizeof(qc->li->rx.addr))
- len = sizeof(qc->li->rx.addr);
- memcpy(addr, &qc->li->rx.addr, len);
+ from = is_addr(&qc->local_addr) ? &qc->local_addr :
+ &qc->li->rx.addr;
+ if (len > sizeof(*from))
+ len = sizeof(*from);
+ memcpy(addr, from, len);
}
return 0;
}
@@ -237,6 +242,7 @@
*/
static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner,
struct sockaddr_storage *saddr,
+ struct sockaddr_storage *daddr,
struct quic_dgram *new_dgram, struct list *dgrams)
{
struct quic_dgram *dgram;
@@ -260,6 +266,7 @@
dgram->dcid = dcid;
dgram->dcid_len = dcid_len;
dgram->saddr = *saddr;
+ dgram->daddr = *daddr;
dgram->qc = NULL;
LIST_APPEND(dgrams, &dgram->list);
MT_LIST_APPEND(&quic_dghdlrs[cid_tid].dgrams, &dgram->mt_list);
@@ -274,6 +281,111 @@
return 0;
}
+/* Receive data from datagram socket <fd>. Data are placed in <out> buffer of
+ * length <len>.
+ *
+ * Datagram addresses will be returned via the next arguments. <from> will be
+ * the peer address and <to> the reception one. Note that <to> can only be
+ * retrieved if the socket supports IP_PKTINFO or affiliated options. If not,
+ * <to> will be set as AF_UNSPEC. The caller must specify <to_port> to ensure
+ * that <to> address is completely filled.
+ *
+ * Returns value from recvmsg syscall.
+ */
+static ssize_t quic_recv(int fd, void *out, size_t len,
+ struct sockaddr *from, socklen_t from_len,
+ struct sockaddr *to, socklen_t to_len,
+ uint16_t dst_port)
+{
+ union pktinfo {
+#ifdef IP_PKTINFO
+ struct in_pktinfo in;
+#else /* !IP_PKTINFO */
+ struct in_addr addr;
+#endif
+#ifdef IPV6_RECVPKTINFO
+ struct in6_pktinfo in6;
+#endif
+ };
+ char cdata[CMSG_SPACE(sizeof(union pktinfo))];
+ struct msghdr msg;
+ struct iovec vec;
+ struct cmsghdr *cmsg;
+ ssize_t ret;
+
+ vec.iov_base = out;
+ vec.iov_len = len;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = from;
+ msg.msg_namelen = from_len;
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cdata;
+ msg.msg_controllen = sizeof(cdata);
+
+ clear_addr((struct sockaddr_storage *)to);
+
+ do {
+ ret = recvmsg(fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ /* TODO handle errno. On EAGAIN/EWOULDBLOCK use fd_cant_recv() if
+ * using dedicated connection socket.
+ */
+
+ if (ret < 0)
+ goto end;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ switch (cmsg->cmsg_level) {
+ case IPPROTO_IP:
+#if defined(IP_PKTINFO)
+ if (cmsg->cmsg_type == IP_PKTINFO) {
+ struct sockaddr_in *in = (struct sockaddr_in *)to;
+ struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in)) {
+ in->sin_family = AF_INET;
+ in->sin_addr = info->ipi_addr;
+ in->sin_port = dst_port;
+ }
+ }
+#elif defined(IP_RECVDSTADDR)
+ if (cmsg->cmsg_type == IP_RECVDSTADDR) {
+ struct sockaddr_in *in = (struct sockaddr_in *)to;
+ struct in_addr *info = (struct in_addr *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in)) {
+ in->sin_family = AF_INET;
+ in->sin_addr.s_addr = info->s_addr;
+ in->sin_port = dst_port;
+ }
+ }
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+
+ case IPPROTO_IPV6:
+#ifdef IPV6_RECVPKTINFO
+ if (cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)to;
+ struct in6_pktinfo *info6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in6)) {
+ in6->sin6_family = AF_INET6;
+ memcpy(&in6->sin6_addr, &info6->ipi6_addr, sizeof(in6->sin6_addr));
+ in6->sin6_port = dst_port;
+ }
+ }
+#endif
+ break;
+ }
+ }
+
+ end:
+ return ret;
+}
+
/* Function called on a read event from a listening socket. It tries
* to handle as many connections as possible.
*/
@@ -285,9 +397,8 @@
struct listener *l = objt_listener(fdtab[fd].owner);
struct quic_transport_params *params;
/* Source address */
- struct sockaddr_storage saddr = {0};
+ struct sockaddr_storage saddr = {0}, daddr = {0};
size_t max_sz, cspace;
- socklen_t saddrlen;
struct quic_dgram *new_dgram;
unsigned char *dgram_buf;
int max_dgrams;
@@ -358,18 +469,15 @@
}
dgram_buf = (unsigned char *)b_tail(buf);
- saddrlen = sizeof saddr;
- do {
- ret = recvfrom(fd, dgram_buf, max_sz, 0,
- (struct sockaddr *)&saddr, &saddrlen);
- if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
- fd_cant_recv(fd);
- goto out;
- }
- } while (ret < 0 && errno == EINTR);
+ ret = quic_recv(fd, dgram_buf, max_sz,
+ (struct sockaddr *)&saddr, sizeof(saddr),
+ (struct sockaddr *)&daddr, sizeof(daddr),
+ get_net_port(&l->rx.addr));
+ if (ret <= 0)
+ goto out;
b_add(buf, ret);
- if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr,
+ if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr, &daddr,
new_dgram, &rxbuf->dgrams)) {
/* If wrong, consume this datagram */
b_del(buf, ret);
diff --git a/src/sock_inet.c b/src/sock_inet.c
index d517e4c..f8e872c 100644
--- a/src/sock_inet.c
+++ b/src/sock_inet.c
@@ -381,6 +381,25 @@
}
#endif
+#ifdef USE_QUIC
+ if (rx->proto->proto_type == PROTO_TYPE_DGRAM) {
+ switch (addr_inet.ss_family) {
+ case AF_INET:
+#if defined(IP_PKTINFO)
+ setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one));
+#elif defined(IP_RECVDSTADDR)
+ setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one));
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+ case AF_INET6:
+#ifdef IPV6_RECVPKTINFO
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
+#endif
+ break;
+ }
+ }
+#endif /* USE_QUIC */
+
if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
err |= ERR_RETRYABLE | ERR_ALERT;
memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));