MINOR: fd: slightly optimize the fd_takeover double-CAS loop

The loop in fd_takeover() around the double-CAS is conditionned on
a previous value of old_masks[0] that always matches tid_bit on the
first iteration because it does not result from the atomic op but
from a pre-loaded value. Let's set the result of the atomic op there
instead so that the conflict between threads can be detected earlier
and before performing the double-word CAS.
diff --git a/src/fd.c b/src/fd.c
index cd792a1..b028b4c 100644
--- a/src/fd.c
+++ b/src/fd.c
@@ -367,14 +367,15 @@
 	unsigned long old_masks[2];
 	unsigned long new_masks[2];
 
-	old_masks[0] = tid_bit;
-	old_masks[1] = fdtab[fd].thread_mask;
 	new_masks[0] = new_masks[1] = tid_bit;
+
+	old_masks[0] = _HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
+	old_masks[1] = fdtab[fd].thread_mask;
+
 	/* protect ourself against a delete then an insert for the same fd,
 	 * if it happens, then the owner will no longer be the expected
 	 * connection.
 	 */
-	_HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
 	if (fdtab[fd].owner != expected_owner) {
 		_HA_ATOMIC_AND(&fdtab[fd].running_mask, ~tid_bit);
 		return -1;