MEDIUM: quic: use a global CID trees list
Previously, quic_connection_id were stored in a per-thread tree list.
Datagram were first dispatched to the correct thread using the encoded
TID before a tree lookup was done.
Remove these trees and replace it with a global trees list of 256
entries. A CID is using the list index corresponding to its first byte.
On datagram dispatch, CID is lookup on its tree and TID is retrieved
using new member quic_connection_id.tid. As such, a read-write lock
protects each list instances. With 256 entries, it is expected that
contention should be reduced.
A new structure quic_cid_tree served as a tree container associated with
its read-write lock. An API is implemented to ensure lock safety for
insert/lookup/delete operation.
This patch is a step forward to be able to break the affinity between a
CID and a TID encoded thread. This is required to be able to migrate a
quic_conn after accept to select thread based on their load.
This should be backported up to 2.7 after a period of observation.
diff --git a/src/proto_quic.c b/src/proto_quic.c
index b1708eb..0961fc2 100644
--- a/src/proto_quic.c
+++ b/src/proto_quic.c
@@ -51,6 +51,11 @@
/* per-thread quic datagram handlers */
struct quic_dghdlr *quic_dghdlrs;
+struct eb_root *quic_cid_tree;
+
+/* global CID trees */
+#define QUIC_CID_TREES_CNT 256
+struct quic_cid_tree *quic_cid_trees;
/* Size of the internal buffer of QUIC RX buffer at the fd level */
#define QUIC_RX_BUFSZ (1UL << 18)
@@ -734,11 +739,20 @@
dghdlr->task->context = dghdlr;
dghdlr->task->process = quic_lstnr_dghdlr;
- dghdlr->cids = EB_ROOT_UNIQUE;
-
MT_LIST_INIT(&dghdlr->dgrams);
}
+ quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(struct quic_cid_tree));
+ if (!quic_cid_trees) {
+ ha_alert("Failed to allocate global CIDs trees.\n");
+ return 0;
+ }
+
+ for (i = 0; i < QUIC_CID_TREES_CNT; ++i) {
+ HA_RWLOCK_INIT(&quic_cid_trees[i].lock);
+ quic_cid_trees[i].root = EB_ROOT_UNIQUE;
+ }
+
return 1;
}
REGISTER_POST_CHECK(quic_alloc_dghdlrs);
@@ -753,6 +767,8 @@
free(quic_dghdlrs);
}
+ ha_free(&quic_cid_trees);
+
return 1;
}
REGISTER_POST_DEINIT(quic_deallocate_dghdlrs);
diff --git a/src/quic_conn.c b/src/quic_conn.c
index e1725e5..1a6d117 100644
--- a/src/quic_conn.c
+++ b/src/quic_conn.c
@@ -3253,8 +3253,7 @@
TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc);
}
else {
- /* insert the allocated CID in the receiver datagram handler tree */
- ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
+ quic_cid_insert(conn_id);
qc_build_new_connection_id_frm(qc, conn_id);
}
break;
@@ -3969,6 +3968,59 @@
return cid;
}
+/* Retrieve the thread ID associated to QUIC connection ID <cid> of length
+ * <cid_len>. CID may be not found on the CID tree because it is an ODCID. In
+ * this case, it will derived using client address <cli_addr> as hash
+ * parameter. However, this is done only if <buf> points to an INITIAL or 0RTT
+ * packet of length <len>.
+ *
+ * Returns the thread ID or a negative error code.
+ */
+int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
+ const struct sockaddr_storage *cli_addr,
+ unsigned char *buf, size_t buf_len)
+{
+ struct quic_cid_tree *tree;
+ struct quic_connection_id *conn_id;
+ struct ebmb_node *node;
+
+ tree = &quic_cid_trees[_quic_cid_tree_idx(cid)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, cid, cid_len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
+ if (!node) {
+ struct quic_cid orig, derive_cid;
+ struct quic_rx_packet pkt;
+
+ if (!qc_parse_hd_form(&pkt, &buf, buf + buf_len))
+ goto not_found;
+
+ if (pkt.type != QUIC_PACKET_TYPE_INITIAL &&
+ pkt.type != QUIC_PACKET_TYPE_0RTT) {
+ goto not_found;
+ }
+
+ memcpy(orig.data, cid, cid_len);
+ orig.len = cid_len;
+ derive_cid = quic_derive_cid(&orig, cli_addr);
+
+ tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, cid, cid_len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+ }
+
+ if (!node)
+ goto not_found;
+
+ conn_id = ebmb_entry(node, struct quic_connection_id, node);
+ return HA_ATOMIC_LOAD(&conn_id->tid);
+
+ not_found:
+ return -1;
+}
+
/* Allocate a new CID and attach it to <root> ebtree.
*
* If <orig> and <addr> params are non null, the new CID value is directly
@@ -4016,6 +4068,7 @@
}
conn_id->qc = qc;
+ HA_ATOMIC_STORE(&conn_id->tid, tid);
conn_id->seq_num.key = qc->next_cid_seq_num++;
conn_id->retire_prior_to = 0;
@@ -4082,8 +4135,10 @@
goto err;
}
- /* insert the allocated CID in the receiver datagram handler tree */
- ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
+ /* TODO To prevent CID tree locking, all CIDs created here
+ * could be allocated at the same time as the first one.
+ */
+ quic_cid_insert(conn_id);
quic_connection_id_to_frm_cpy(frm, conn_id);
LIST_APPEND(&frm_list, &frm->list);
@@ -4114,7 +4169,8 @@
break;
node = eb64_next(node);
- ebmb_delete(&conn_id->node);
+ quic_cid_delete(conn_id);
+
eb64_delete(&conn_id->seq_num);
pool_free(pool_head_quic_connection_id, conn_id);
}
@@ -5504,7 +5560,7 @@
/* insert the allocated CID in the receiver datagram handler tree */
if (server)
- ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
+ quic_cid_insert(conn_id);
/* Select our SCID which is the first CID with 0 as sequence number. */
qc->scid = conn_id->cid;
@@ -6511,11 +6567,15 @@
struct quic_conn *qc = NULL;
struct ebmb_node *node;
struct quic_connection_id *conn_id;
+ struct quic_cid_tree *tree;
TRACE_ENTER(QUIC_EV_CONN_RXPKT);
/* First look into DCID tree. */
- node = ebmb_lookup(&quic_dghdlrs[tid].cids, pkt->dcid.data, pkt->dcid.len);
+ tree = &quic_cid_trees[_quic_cid_tree_idx(pkt->dcid.data)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, pkt->dcid.data, pkt->dcid.len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
/* If not found on an Initial/0-RTT packet, it could be because an
* ODCID is reused by the client. Calculate the derived CID value to
@@ -6524,7 +6584,11 @@
if (!node && (pkt->type == QUIC_PACKET_TYPE_INITIAL ||
pkt->type == QUIC_PACKET_TYPE_0RTT)) {
const struct quic_cid derive_cid = quic_derive_cid(&pkt->dcid, saddr);
- node = ebmb_lookup(&quic_dghdlrs[tid].cids, derive_cid.data, derive_cid.len);
+
+ tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, derive_cid.data, derive_cid.len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
}
if (!node)
@@ -8196,9 +8260,12 @@
*/
int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len)
{
- struct ebmb_node *node;
+ const uchar idx = _quic_cid_tree_idx(dcid);
struct quic_connection_id *conn_id;
+ struct ebmb_node *node = NULL;
+ struct quic_cid_tree *tree = &quic_cid_trees[idx];
+ /* Test against our default CID or client ODCID. */
if ((qc->scid.len == dcid_len &&
memcmp(qc->scid.data, dcid, dcid_len) == 0) ||
(qc->odcid.len == dcid_len &&
@@ -8206,7 +8273,17 @@
return 1;
}
+ /* Test against our other CIDs. This can happen if the client has
+ * decided to switch to a new one.
+ *
+ * TODO to avoid locking, loop through qc.cids as an alternative.
+ *
+ * TODO set it to our default CID to avoid this operation next time.
+ */
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, dcid, dcid_len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
- node = ebmb_lookup(&quic_dghdlrs[tid].cids, dcid, dcid_len);
if (node) {
conn_id = ebmb_entry(node, struct quic_connection_id, node);
if (qc == conn_id->qc)
diff --git a/src/quic_sock.c b/src/quic_sock.c
index 905ecab..3b13426 100644
--- a/src/quic_sock.c
+++ b/src/quic_sock.c
@@ -209,7 +209,6 @@
struct quic_dgram *new_dgram, struct list *dgrams)
{
struct quic_dgram *dgram;
- const struct listener *l = owner;
unsigned char *dcid;
size_t dcid_len;
int cid_tid;
@@ -221,7 +220,18 @@
if (!dgram)
goto err;
- cid_tid = quic_get_cid_tid(dcid, &l->rx);
+ if ((cid_tid = quic_get_cid_tid(dcid, dcid_len, saddr, buf, len)) < 0) {
+ /* CID not found. Ensure only one thread will handle this CID
+ * to avoid duplicating connection creation. This code may not
+ * work if using thread-mask on the listener but is only here
+ * for a short time.
+ *
+ * TODO this code implies the thread used for QUIC handshake is
+ * directly derived from client chosen CID. This association
+ * must be broken.
+ */
+ cid_tid = dcid[0] % global.nbthread;
+ }
/* All the members must be initialized! */
dgram->owner = owner;
diff --git a/src/thread.c b/src/thread.c
index 6001f8c..d712825 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -443,6 +443,7 @@
case SFT_LOCK: return "SFT";
case IDLE_CONNS_LOCK: return "IDLE_CONNS";
case OCSP_LOCK: return "OCSP";
+ case QC_CID_LOCK: return "QC_CID";
case OTHER_LOCK: return "OTHER";
case DEBUG1_LOCK: return "DEBUG1";
case DEBUG2_LOCK: return "DEBUG2";