MINOR: fd: add a lock bit with the tgid

In order to permit to migrate FDs from one thread group to another,
we'll need to be able to set a TGID that is compatible with no other
thread group. Either we use a special value or we dedicate a special
bit. Given that we already have way more bits than needed, let's just
sacrifice the topmost one to serve as a lock bit, indicating the tgid
is not valid anymore. This will make all fd_grab_tgid() fail to grab
it.

The new fd_lock_tgid() function now tries to assign a locked tgid to
an idle FD, and fd_unlock_tgid() simply drops the lock bit, revealing
the target tgid.

For now it's still unused so it must not have any effect.
diff --git a/include/haproxy/fd-t.h b/include/haproxy/fd-t.h
index d0c3134..c5e94cb 100644
--- a/include/haproxy/fd-t.h
+++ b/include/haproxy/fd-t.h
@@ -179,9 +179,10 @@
  * 32-bit small archs can put everything in 32-bytes when threads are disabled.
  * refc_tgid is an atomic 32-bit composite value made of 16 higher bits
  * containing a refcount on tgid and the running_mask, and 16 lower bits
- * containing a thread group ID. The tgid may only be changed when refc is zero
- * and running may only be checked/changed when refc is held and shows the
- * reader is alone. An FD with tgid zero belongs to nobody.
+ * containing a thread group ID and a lock bit on the 16th. The tgid may only
+ * be changed when refc is zero and running may only be checked/changed when
+ * refc is held and shows the reader is alone. An FD with tgid zero belongs to
+ * nobody.
  */
 struct fdtab {
 	unsigned long running_mask;          /* mask of thread IDs currently using the fd */
diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h
index 9c07680..682aa94 100644
--- a/include/haproxy/fd.h
+++ b/include/haproxy/fd.h
@@ -330,6 +330,39 @@
 	HA_ATOMIC_SUB(&fdtab[fd].refc_tgid, 0x10000);
 }
 
+/* Unlock a tgid currently locked by fd_lock_tgid(). This will effectively
+ * allow threads from the FD's tgid to check the masks and manipulate the FD.
+ */
+static forceinline void fd_unlock_tgid(int fd)
+{
+	HA_ATOMIC_AND(&fdtab[fd].refc_tgid, 0xffff7fffU);
+}
+
+/* Switch the FD's TGID to the new value with a refcount of 1 and the lock bit
+ * set. It doesn't care about the current TGID, except that it will wait for
+ * the FD not to be already switching and having its refcount cleared. After
+ * the function returns, the caller is free to manipulate the masks, and it
+ * must call fd_unlock_tgid() to drop the lock, allowing threads from the
+ * designated group to use the FD. Finally a call to fd_drop_tgid() will be
+ * needed to drop the reference.
+ */
+static inline void fd_lock_tgid(int fd, uint desired_tgid)
+{
+	uint old;
+
+	BUG_ON(!desired_tgid);
+
+	old = tgid;  // assume we start from the caller's tgid
+	desired_tgid |= 0x18000; // refcount=1, lock bit=1.
+
+	while (1) {
+		old &= 0x7fff; // expect no lock and refcount==0
+		if (_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &old, desired_tgid))
+			break;
+		__ha_cpu_relax();
+	}
+}
+
 /* Grab a reference to the FD's TGID, and return the tgid. Note that a TGID of
  * zero indicates the FD was closed, thus also fails (i.e. no need to drop it).
  * On non-zero (success), the caller must release it using fd_drop_tgid().
@@ -387,7 +420,7 @@
 		if (_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &old, desired_tgid))
 			break;
 		__ha_cpu_relax();
-		old &= 0xffff;
+		old &= 0x7fff;   // keep only the tgid and drop the lock
 	}
 }