MEDIUM: quic: respect the threads assigned to a bind line

Right now the QUIC thread mapping derives the thread ID from the CID
by dividing by global.nbthread. This is a problem because this makes
QUIC work on all threads and ignores the "thread" directive on the
bind lines. In addition, only 8 bits are used, which is no more
compatible with the up to 4096 threads we may have in a configuration.

Let's modify it this way:
  - the CID now dedicates 12 bits to the thread ID
  - on output we continue to place the TID directly there.
  - on input, the value is extracted. If it corresponds to a valid
    thread number of the bind_conf, it's used as-is.
  - otherwise it's used as a rank within the current bind_conf's
    thread mask so that in the end we still get a valid thread ID
    for this bind_conf.

The extraction function now requires a bind_conf in order to get the
group and thread mask. It was better to use bind_confs now as the goal
is to make them support multiple listeners sooner or later.
diff --git a/include/haproxy/quic_conn.h b/include/haproxy/quic_conn.h
index 2a962f4..a2292d0 100644
--- a/include/haproxy/quic_conn.h
+++ b/include/haproxy/quic_conn.h
@@ -213,19 +213,48 @@
 	to->stateless_reset_token = src->stateless_reset_token;
 }
 
-/* Retrieve the associated thread ID for <cid>. */
-static inline unsigned long quic_get_cid_tid(const unsigned char *cid)
+/* extract a TID from a CID for bind_conf <bc>, from 0 to global.nbthread-1 and
+ * in any case no more than 4095. It takes into account the bind_conf's thread
+ * group and the bind_conf's thread mask. The algorithm is the following: most
+ * packets contain a valid thread ID for the bind_conf, which means that the
+ * retrieved ID directly maps to a bound thread ID. If that's not the case,
+ * then we have to remap it. The resulting thread ID will then differ but will
+ * be correctly encoded and decoded.
+ */
+static inline uint quic_get_cid_tid(const unsigned char *cid, const struct bind_conf *bc)
 {
-	return *cid % global.nbthread;
+	uint id, grp;
+	uint base, count;
+
+	id = read_n16(cid) & 4095;
+	grp = bc->bind_tgroup;
+	base  = ha_tgroup_info[grp - 1].base;
+	count = ha_tgroup_info[grp - 1].count;
+
+	if (base <= id && id < base + count &&
+	    bc->bind_thread & ha_thread_info[id].ltid_bit)
+		return id; // part of the group and bound: valid
+
+	/* The thread number isn't valid, it doesn't map to a thread bound on
+	 * this receiver. Let's reduce it to one of the thread(s) valid for
+	 * that receiver.
+	 */
+	count = my_popcountl(bc->bind_thread);
+	id = count - 1 - id % count;
+	id = mask_find_rank_bit(id, bc->bind_thread);
+	id += base;
+	return id;
 }
 
-/* Modify <cid> to have a CID linked to the thread ID <target_tid>. This is
- * based on quic_get_cid_tid.
+/* Modify <cid> to have a CID linked to the thread ID <target_tid> that
+ * quic_get_cid_tid() will be able to extract return.
  */
-static inline void quic_pin_cid_to_tid(unsigned char *cid, int target_tid)
+static inline void quic_pin_cid_to_tid(unsigned char *cid, uint target_tid)
 {
-	cid[0] = MIN(cid[0], 255 - target_tid);
-	cid[0] = cid[0] - (cid[0] % global.nbthread) + target_tid;
+	uint16_t prev_id;
+
+	prev_id = read_n16(cid);
+	write_n16(cid, (prev_id & ~4095) | target_tid);
 }
 
 /* Return a 32-bits integer in <val> from QUIC packet with <buf> as address.
diff --git a/src/quic_conn.c b/src/quic_conn.c
index b0ee04f..88993dc 100644
--- a/src/quic_conn.c
+++ b/src/quic_conn.c
@@ -4863,7 +4863,8 @@
 	/* Set tasklet tid based on the SCID selected by us for this
 	 * connection. The upper layer will also be binded on the same thread.
 	 */
-	qc->tid = qc->wait_event.tasklet->tid = quic_get_cid_tid(qc->scid.data);
+	qc->tid = quic_get_cid_tid(qc->scid.data, l->bind_conf);
+	qc->wait_event.tasklet->tid = qc->tid;
 
 	if (qc_conn_alloc_ssl_ctx(qc) ||
 	    !quic_conn_init_timer(qc) ||
diff --git a/src/quic_sock.c b/src/quic_sock.c
index 0bb6673..8617489 100644
--- a/src/quic_sock.c
+++ b/src/quic_sock.c
@@ -246,6 +246,7 @@
                                      struct quic_dgram *new_dgram, struct list *dgrams)
 {
 	struct quic_dgram *dgram;
+	const struct listener *l = owner;
 	unsigned char *dcid;
 	size_t dcid_len;
 	int cid_tid;
@@ -257,7 +258,7 @@
 	if (!dgram)
 		goto err;
 
-	cid_tid = quic_get_cid_tid(dcid);
+	cid_tid = quic_get_cid_tid(dcid, l->bind_conf);
 
 	/* All the members must be initialized! */
 	dgram->owner = owner;