MEDIUM: quic: use a global CID trees list

Previously, quic_connection_id were stored in a per-thread tree list.
Datagram were first dispatched to the correct thread using the encoded
TID before a tree lookup was done.

Remove these trees and replace it with a global trees list of 256
entries. A CID is using the list index corresponding to its first byte.
On datagram dispatch, CID is lookup on its tree and TID is retrieved
using new member quic_connection_id.tid. As such, a read-write lock
protects each list instances. With 256 entries, it is expected that
contention should be reduced.

A new structure quic_cid_tree served as a tree container associated with
its read-write lock. An API is implemented to ensure lock safety for
insert/lookup/delete operation.

This patch is a step forward to be able to break the affinity between a
CID and a TID encoded thread. This is required to be able to migrate a
quic_conn after accept to select thread based on their load.

This should be backported up to 2.7 after a period of observation.
diff --git a/include/haproxy/proto_quic.h b/include/haproxy/proto_quic.h
index 8677cbf..a0e2b98 100644
--- a/include/haproxy/proto_quic.h
+++ b/include/haproxy/proto_quic.h
@@ -24,6 +24,12 @@
 extern struct protocol proto_quic4;
 extern struct protocol proto_quic6;
 
+struct quic_cid_tree {
+	struct eb_root root;
+	__decl_thread(HA_RWLOCK_T lock);
+};
+
 extern struct quic_dghdlr *quic_dghdlrs;
+extern struct quic_cid_tree *quic_cid_trees;
 
 #endif /* _HAPROXY_PROTO_QUIC_H  */
diff --git a/include/haproxy/quic_conn-t.h b/include/haproxy/quic_conn-t.h
index d366a0b..c8c6f48 100644
--- a/include/haproxy/quic_conn-t.h
+++ b/include/haproxy/quic_conn-t.h
@@ -303,6 +303,7 @@
 	struct quic_cid cid;   /* CID data */
 
 	struct quic_conn *qc;  /* QUIC connection using this CID */
+	uint tid;              /* Attached Thread ID for the connection. */
 };
 
 /* Structure to hold a range of ACKs sent in ACK frames. */
@@ -444,7 +445,6 @@
 struct quic_dghdlr {
 	struct mt_list dgrams;
 	struct tasklet *task;
-	struct eb_root cids;
 };
 
 /* Structure to store enough information about the RX CRYPTO frames. */
diff --git a/include/haproxy/quic_conn.h b/include/haproxy/quic_conn.h
index e1b333e..b52535f 100644
--- a/include/haproxy/quic_conn.h
+++ b/include/haproxy/quic_conn.h
@@ -38,6 +38,7 @@
 #include <haproxy/ticks.h>
 
 #include <haproxy/listener.h>
+#include <haproxy/proto_quic.h>
 #include <haproxy/quic_cc.h>
 #include <haproxy/quic_conn-t.h>
 #include <haproxy/quic_enc.h>
@@ -137,9 +138,41 @@
 	chunk_appendf(buf, ")");
 }
 
-/* Free the CIDs attached to <conn> QUIC connection. This must be called under
- * the CID lock.
- */
+/* Return tree index where <cid> is stored. */
+static inline uchar _quic_cid_tree_idx(const unsigned char *cid)
+{
+	return cid[0];
+}
+
+/* Return tree index where <cid> is stored. */
+static inline uchar quic_cid_tree_idx(const struct quic_cid *cid)
+{
+	return _quic_cid_tree_idx(cid->data);
+}
+
+/* Insert <conn_id> into global CID tree as a thread-safe operation. */
+static inline void quic_cid_insert(struct quic_connection_id *conn_id)
+{
+	const uchar idx = quic_cid_tree_idx(&conn_id->cid);
+	struct quic_cid_tree *tree = &quic_cid_trees[idx];
+
+	HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
+	ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len);
+	HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
+}
+
+/* Remove <conn_id> from global CID tree as a thread-safe operation. */
+static inline void quic_cid_delete(struct quic_connection_id *conn_id)
+{
+	const uchar idx = quic_cid_tree_idx(&conn_id->cid);
+	struct quic_cid_tree *tree = &quic_cid_trees[idx];
+
+	HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
+	ebmb_delete(&conn_id->node);
+	HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
+}
+
+/* Free the CIDs attached to <conn> QUIC connection. */
 static inline void free_quic_conn_cids(struct quic_conn *conn)
 {
 	struct eb64_node *node;
@@ -151,7 +184,7 @@
 		conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
 
 		/* remove the CID from the receiver tree */
-		ebmb_delete(&conn_id->node);
+		quic_cid_delete(conn_id);
 
 		/* remove the CID from the quic_conn tree */
 		node = eb64_next(node);
@@ -175,39 +208,6 @@
 	to->stateless_reset_token = src->stateless_reset_token;
 }
 
-/* extract a TID from a CID for receiver <rx>, from 0 to global.nbthread-1 and
- * in any case no more than 4095. It takes into account the bind_conf's thread
- * group and the bind_conf's thread mask. The algorithm is the following: most
- * packets contain a valid thread ID for the bind_conf, which means that the
- * retrieved ID directly maps to a bound thread ID. If that's not the case,
- * then we have to remap it. The resulting thread ID will then differ but will
- * be correctly encoded and decoded.
- */
-static inline uint quic_get_cid_tid(const unsigned char *cid, const struct receiver *rx)
-{
-	uint id, grp;
-	uint base, count;
-
-	id = read_n16(cid) & 4095;
-	grp = rx->bind_tgroup;
-	base  = ha_tgroup_info[grp - 1].base;
-	count = ha_tgroup_info[grp - 1].count;
-
-	if (base <= id && id < base + count &&
-	    rx->bind_thread & ha_thread_info[id].ltid_bit)
-		return id; // part of the group and bound: valid
-
-	/* The thread number isn't valid, it doesn't map to a thread bound on
-	 * this receiver. Let's reduce it to one of the thread(s) valid for
-	 * that receiver.
-	 */
-	count = my_popcountl(rx->bind_thread);
-	id = count - 1 - id % count;
-	id = mask_find_rank_bit(id, rx->bind_thread);
-	id += base;
-	return id;
-}
-
 /* Modify <cid> to have a CID linked to the thread ID <target_tid> that
  * quic_get_cid_tid() will be able to extract return.
  */
@@ -676,6 +676,9 @@
                         unsigned char **dcid, size_t *dcid_len);
 struct quic_cid quic_derive_cid(const struct quic_cid *orig,
                                 const struct sockaddr_storage *addr);
+int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
+                     const struct sockaddr_storage *cli_addr,
+                     unsigned char *buf, size_t buf_len);
 int qc_send_mux(struct quic_conn *qc, struct list *frms);
 
 void qc_notify_close(struct quic_conn *qc);
diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h
index b504ab6..30ff782 100644
--- a/include/haproxy/thread.h
+++ b/include/haproxy/thread.h
@@ -424,6 +424,7 @@
 	SFT_LOCK, /* sink forward target */
 	IDLE_CONNS_LOCK,
 	OCSP_LOCK,
+	QC_CID_LOCK,
 	OTHER_LOCK,
 	/* WT: make sure never to use these ones outside of development,
 	 * we need them for lock profiling!
diff --git a/src/proto_quic.c b/src/proto_quic.c
index b1708eb..0961fc2 100644
--- a/src/proto_quic.c
+++ b/src/proto_quic.c
@@ -51,6 +51,11 @@
 
 /* per-thread quic datagram handlers */
 struct quic_dghdlr *quic_dghdlrs;
+struct eb_root *quic_cid_tree;
+
+/* global CID trees */
+#define QUIC_CID_TREES_CNT 256
+struct quic_cid_tree *quic_cid_trees;
 
 /* Size of the internal buffer of QUIC RX buffer at the fd level */
 #define QUIC_RX_BUFSZ  (1UL << 18)
@@ -734,11 +739,20 @@
 		dghdlr->task->context = dghdlr;
 		dghdlr->task->process = quic_lstnr_dghdlr;
 
-		dghdlr->cids = EB_ROOT_UNIQUE;
-
 		MT_LIST_INIT(&dghdlr->dgrams);
 	}
 
+	quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(struct quic_cid_tree));
+	if (!quic_cid_trees) {
+		ha_alert("Failed to allocate global CIDs trees.\n");
+		return 0;
+	}
+
+	for (i = 0; i < QUIC_CID_TREES_CNT; ++i) {
+		HA_RWLOCK_INIT(&quic_cid_trees[i].lock);
+		quic_cid_trees[i].root = EB_ROOT_UNIQUE;
+	}
+
 	return 1;
 }
 REGISTER_POST_CHECK(quic_alloc_dghdlrs);
@@ -753,6 +767,8 @@
 		free(quic_dghdlrs);
 	}
 
+	ha_free(&quic_cid_trees);
+
 	return 1;
 }
 REGISTER_POST_DEINIT(quic_deallocate_dghdlrs);
diff --git a/src/quic_conn.c b/src/quic_conn.c
index e1725e5..1a6d117 100644
--- a/src/quic_conn.c
+++ b/src/quic_conn.c
@@ -3253,8 +3253,7 @@
 				TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc);
 			}
 			else {
-				/* insert the allocated CID in the receiver datagram handler tree */
-				ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
+				quic_cid_insert(conn_id);
 				qc_build_new_connection_id_frm(qc, conn_id);
 			}
 			break;
@@ -3969,6 +3968,59 @@
 	return cid;
 }
 
+/* Retrieve the thread ID associated to QUIC connection ID <cid> of length
+ * <cid_len>. CID may be not found on the CID tree because it is an ODCID. In
+ * this case, it will derived using client address <cli_addr> as hash
+ * parameter. However, this is done only if <buf> points to an INITIAL or 0RTT
+ * packet of length <len>.
+ *
+ * Returns the thread ID or a negative error code.
+ */
+int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
+                     const struct sockaddr_storage *cli_addr,
+                     unsigned char *buf, size_t buf_len)
+{
+	struct quic_cid_tree *tree;
+	struct quic_connection_id *conn_id;
+	struct ebmb_node *node;
+
+	tree = &quic_cid_trees[_quic_cid_tree_idx(cid)];
+	HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+	node = ebmb_lookup(&tree->root, cid, cid_len);
+	HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
+	if (!node) {
+		struct quic_cid orig, derive_cid;
+		struct quic_rx_packet pkt;
+
+		if (!qc_parse_hd_form(&pkt, &buf, buf + buf_len))
+			goto not_found;
+
+		if (pkt.type != QUIC_PACKET_TYPE_INITIAL &&
+		    pkt.type != QUIC_PACKET_TYPE_0RTT) {
+			goto not_found;
+		}
+
+		memcpy(orig.data, cid, cid_len);
+		orig.len = cid_len;
+		derive_cid = quic_derive_cid(&orig, cli_addr);
+
+		tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
+		HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+		node = ebmb_lookup(&tree->root, cid, cid_len);
+		HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+	}
+
+	if (!node)
+		goto not_found;
+
+	conn_id = ebmb_entry(node, struct quic_connection_id, node);
+	return HA_ATOMIC_LOAD(&conn_id->tid);
+
+ not_found:
+	return -1;
+}
+
 /* Allocate a new CID and attach it to <root> ebtree.
  *
  * If <orig> and <addr> params are non null, the new CID value is directly
@@ -4016,6 +4068,7 @@
 	}
 
 	conn_id->qc = qc;
+	HA_ATOMIC_STORE(&conn_id->tid, tid);
 
 	conn_id->seq_num.key = qc->next_cid_seq_num++;
 	conn_id->retire_prior_to = 0;
@@ -4082,8 +4135,10 @@
 			goto err;
 		}
 
-		/* insert the allocated CID in the receiver datagram handler tree */
-		ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
+		/* TODO To prevent CID tree locking, all CIDs created here
+		 * could be allocated at the same time as the first one.
+		 */
+		quic_cid_insert(conn_id);
 
 		quic_connection_id_to_frm_cpy(frm, conn_id);
 		LIST_APPEND(&frm_list, &frm->list);
@@ -4114,7 +4169,8 @@
 			break;
 
 		node = eb64_next(node);
-		ebmb_delete(&conn_id->node);
+		quic_cid_delete(conn_id);
+
 		eb64_delete(&conn_id->seq_num);
 		pool_free(pool_head_quic_connection_id, conn_id);
 	}
@@ -5504,7 +5560,7 @@
 
 	/* insert the allocated CID in the receiver datagram handler tree */
 	if (server)
-		ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
+		quic_cid_insert(conn_id);
 
 	/* Select our SCID which is the first CID with 0 as sequence number. */
 	qc->scid = conn_id->cid;
@@ -6511,11 +6567,15 @@
 	struct quic_conn *qc = NULL;
 	struct ebmb_node *node;
 	struct quic_connection_id *conn_id;
+	struct quic_cid_tree *tree;
 
 	TRACE_ENTER(QUIC_EV_CONN_RXPKT);
 
 	/* First look into DCID tree. */
-	node = ebmb_lookup(&quic_dghdlrs[tid].cids, pkt->dcid.data, pkt->dcid.len);
+	tree = &quic_cid_trees[_quic_cid_tree_idx(pkt->dcid.data)];
+	HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+	node = ebmb_lookup(&tree->root, pkt->dcid.data, pkt->dcid.len);
+	HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
 
 	/* If not found on an Initial/0-RTT packet, it could be because an
 	 * ODCID is reused by the client. Calculate the derived CID value to
@@ -6524,7 +6584,11 @@
 	if (!node && (pkt->type == QUIC_PACKET_TYPE_INITIAL ||
 	     pkt->type == QUIC_PACKET_TYPE_0RTT)) {
 		const struct quic_cid derive_cid = quic_derive_cid(&pkt->dcid, saddr);
-		node = ebmb_lookup(&quic_dghdlrs[tid].cids, derive_cid.data, derive_cid.len);
+
+		tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
+		HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+		node = ebmb_lookup(&tree->root, derive_cid.data, derive_cid.len);
+		HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
 	}
 
 	if (!node)
@@ -8196,9 +8260,12 @@
  */
 int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len)
 {
-	struct ebmb_node *node;
+	const uchar idx = _quic_cid_tree_idx(dcid);
 	struct quic_connection_id *conn_id;
+	struct ebmb_node *node = NULL;
+	struct quic_cid_tree *tree = &quic_cid_trees[idx];
 
+	/* Test against our default CID or client ODCID. */
 	if ((qc->scid.len == dcid_len &&
 	     memcmp(qc->scid.data, dcid, dcid_len) == 0) ||
 	    (qc->odcid.len == dcid_len &&
@@ -8206,7 +8273,17 @@
 		return 1;
 	}
 
+	/* Test against our other CIDs. This can happen if the client has
+	 * decided to switch to a new one.
+	 *
+	 * TODO to avoid locking, loop through qc.cids as an alternative.
+	 *
+	 * TODO set it to our default CID to avoid this operation next time.
+	 */
+	HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+	node = ebmb_lookup(&tree->root, dcid, dcid_len);
+	HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
-	node = ebmb_lookup(&quic_dghdlrs[tid].cids, dcid, dcid_len);
 	if (node) {
 		conn_id = ebmb_entry(node, struct quic_connection_id, node);
 		if (qc == conn_id->qc)
diff --git a/src/quic_sock.c b/src/quic_sock.c
index 905ecab..3b13426 100644
--- a/src/quic_sock.c
+++ b/src/quic_sock.c
@@ -209,7 +209,6 @@
                                      struct quic_dgram *new_dgram, struct list *dgrams)
 {
 	struct quic_dgram *dgram;
-	const struct listener *l = owner;
 	unsigned char *dcid;
 	size_t dcid_len;
 	int cid_tid;
@@ -221,7 +220,18 @@
 	if (!dgram)
 		goto err;
 
-	cid_tid = quic_get_cid_tid(dcid, &l->rx);
+	if ((cid_tid = quic_get_cid_tid(dcid, dcid_len, saddr, buf, len)) < 0) {
+		/* CID not found. Ensure only one thread will handle this CID
+		 * to avoid duplicating connection creation. This code may not
+		 * work if using thread-mask on the listener but is only here
+		 * for a short time.
+		 *
+		 * TODO this code implies the thread used for QUIC handshake is
+		 * directly derived from client chosen CID. This association
+		 * must be broken.
+		 */
+		cid_tid = dcid[0] % global.nbthread;
+	}
 
 	/* All the members must be initialized! */
 	dgram->owner = owner;
diff --git a/src/thread.c b/src/thread.c
index 6001f8c..d712825 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -443,6 +443,7 @@
 	case SFT_LOCK:             return "SFT";
 	case IDLE_CONNS_LOCK:      return "IDLE_CONNS";
 	case OCSP_LOCK:            return "OCSP";
+	case QC_CID_LOCK:          return "QC_CID";
 	case OTHER_LOCK:           return "OTHER";
 	case DEBUG1_LOCK:          return "DEBUG1";
 	case DEBUG2_LOCK:          return "DEBUG2";