MINOR: fd: make fd_clr_running() return the previous value instead

It's an AND so it destroys information and due to this there's a call
place where we have to perform two reads to know the previous value
then to change it. With a fetch-and-and instead, in a single operation
we can know if the bit was previously present, which is more efficient.
diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h
index 9c56fc9..9fbc358 100644
--- a/include/haproxy/fd.h
+++ b/include/haproxy/fd.h
@@ -408,12 +408,12 @@
 	return ret;
 }
 
-/* remove tid_bit from the fd's running mask and returns the bits that remain
- * after the atomic operation.
+/* remove tid_bit from the fd's running mask and returns the value before the
+ * atomic operation, so that the caller can know if it was present.
  */
 static inline long fd_clr_running(int fd)
 {
-	return _HA_ATOMIC_AND_FETCH(&fdtab[fd].running_mask, ~ti->ltid_bit);
+	return _HA_ATOMIC_FETCH_AND(&fdtab[fd].running_mask, ~ti->ltid_bit);
 }
 
 /* Prepares <fd> for being polled on all permitted threads of this group ID
diff --git a/src/fd.c b/src/fd.c
index 5782349..56a7711 100644
--- a/src/fd.c
+++ b/src/fd.c
@@ -378,7 +378,7 @@
 
 	HA_ATOMIC_OR(&fdtab[fd].running_mask, ti->ltid_bit);
 	HA_ATOMIC_STORE(&fdtab[fd].thread_mask, 0);
-	if (fd_clr_running(fd) == 0)
+	if (fd_clr_running(fd) == ti->ltid_bit)
 		_fd_delete_orphan(fd);
 }
 
@@ -594,8 +594,7 @@
 	 * This is detected by both thread_mask and running_mask being 0 after
 	 * we remove ourselves last.
 	 */
-	if ((fdtab[fd].running_mask & ti->ltid_bit) &&
-	    fd_clr_running(fd) == 0 && !fdtab[fd].thread_mask) {
+	if (fd_clr_running(fd) == ti->ltid_bit && !fdtab[fd].thread_mask) {
 		_fd_delete_orphan(fd);
 		return FD_UPDT_CLOSED;
 	}