MAJOR: fd: compute the new fd polling state out of the fd lock

Each fd_{may|cant|stop|want}_{recv|send} function sets or resets a
single bit at once, then recomputes the need for updates, and then
the new cache state. Later, pollers will compute the new polling
state based on the resulting operations here. In fact the conditions
are so simple that they can be performed by a single "if", or sometimes
even optimized away.

This means that in practice a simple compare-and-swap operation if often
enough to set the new value inluding the new polling state, and that only
the cache and fdupdt have to be performed under the lock. Better, for the
most common operations (fd_may_{recv,send}, used by the pollers), a simple
atomic OR is needed.

This patch does this for the fd_* functions above and it doesn't yet
remove the now useless fd_compute_new_polling_status() because it's still
used by other pollers. A pure connection rate test shows a 1% performance
increase.
diff --git a/include/proto/fd.h b/include/proto/fd.h
index 535e691..be55f8a 100644
--- a/include/proto/fd.h
+++ b/include/proto/fd.h
@@ -334,18 +334,10 @@
 }
 
 /* This function automatically enables/disables caching for an entry depending
- * on its state, and also possibly creates an update entry so that the poller
- * does its job as well. It is only called on state changes.
+ * on its state. It is only called on state changes.
  */
 static inline void fd_update_cache(int fd)
 {
-	/* 3 states for each direction require a polling update */
-	if ((fdtab[fd].state & (FD_EV_POLLED_R |                 FD_EV_ACTIVE_R)) == FD_EV_POLLED_R ||
-	    (fdtab[fd].state & (FD_EV_POLLED_R | FD_EV_READY_R | FD_EV_ACTIVE_R)) == FD_EV_ACTIVE_R ||
-	    (fdtab[fd].state & (FD_EV_POLLED_W |                 FD_EV_ACTIVE_W)) == FD_EV_POLLED_W ||
-	    (fdtab[fd].state & (FD_EV_POLLED_W | FD_EV_READY_W | FD_EV_ACTIVE_W)) == FD_EV_ACTIVE_W)
-		updt_fd_polling(fd);
-
 	/* only READY and ACTIVE states (the two with both flags set) require a cache entry */
 	if (((fdtab[fd].state & (FD_EV_READY_R | FD_EV_ACTIVE_R)) == (FD_EV_READY_R | FD_EV_ACTIVE_R)) ||
 	    ((fdtab[fd].state & (FD_EV_READY_W | FD_EV_ACTIVE_W)) == (FD_EV_READY_W | FD_EV_ACTIVE_W))) {
@@ -431,55 +423,96 @@
 /* Disable processing recv events on fd <fd> */
 static inline void fd_stop_recv(int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (!(old & FD_EV_ACTIVE_R))
+			return;
+		new = old & ~FD_EV_ACTIVE_R;
+		new &= ~FD_EV_POLLED_R;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (fd_recv_active(fd)) {
-		fdtab[fd].state &= ~FD_EV_ACTIVE_R;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_R)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Disable processing send events on fd <fd> */
 static inline void fd_stop_send(int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (!(old & FD_EV_ACTIVE_W))
+			return;
+		new = old & ~FD_EV_ACTIVE_W;
+		new &= ~FD_EV_POLLED_W;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (fd_send_active(fd)) {
-		fdtab[fd].state &= ~FD_EV_ACTIVE_W;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_W)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Disable processing of events on fd <fd> for both directions. */
 static inline void fd_stop_both(int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (!(old & FD_EV_ACTIVE_RW))
+			return;
+		new = old & ~FD_EV_ACTIVE_RW;
+		new &= ~FD_EV_POLLED_RW;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (fd_active(fd)) {
-		fdtab[fd].state &= ~FD_EV_ACTIVE_RW;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_RW)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */
 static inline void fd_cant_recv(const int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (!(old & FD_EV_READY_R))
+			return;
+		new = old & ~FD_EV_READY_R;
+		if (new & FD_EV_ACTIVE_R)
+			new |= FD_EV_POLLED_R;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (fd_recv_ready(fd)) {
-		fdtab[fd].state &= ~FD_EV_READY_R;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_R)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Report that FD <fd> can receive anymore without polling. */
 static inline void fd_may_recv(const int fd)
 {
+	/* marking ready never changes polled status */
+	HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_R);
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (!fd_recv_ready(fd)) {
-		fdtab[fd].state |= FD_EV_READY_R;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
@@ -490,55 +523,99 @@
  */
 static inline void fd_done_recv(const int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if ((old & (FD_EV_POLLED_R|FD_EV_READY_R)) != (FD_EV_POLLED_R|FD_EV_READY_R))
+			return;
+		new = old & ~FD_EV_READY_R;
+		if (new & FD_EV_ACTIVE_R)
+			new |= FD_EV_POLLED_R;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (fd_recv_polled(fd) && fd_recv_ready(fd)) {
-		fdtab[fd].state &= ~FD_EV_READY_R;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_R)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */
 static inline void fd_cant_send(const int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (!(old & FD_EV_READY_W))
+			return;
+		new = old & ~FD_EV_READY_W;
+		if (new & FD_EV_ACTIVE_W)
+			new |= FD_EV_POLLED_W;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (fd_send_ready(fd)) {
-		fdtab[fd].state &= ~FD_EV_READY_W;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_W)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Report that FD <fd> can send anymore without polling (EAGAIN detected). */
 static inline void fd_may_send(const int fd)
 {
+	/* marking ready never changes polled status */
+	HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_W);
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (!fd_send_ready(fd)) {
-		fdtab[fd].state |= FD_EV_READY_W;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Prepare FD <fd> to try to receive */
 static inline void fd_want_recv(int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (old & FD_EV_ACTIVE_R)
+			return;
+		new = old | FD_EV_ACTIVE_R;
+		if (!(new & FD_EV_READY_R))
+			new |= FD_EV_POLLED_R;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (!fd_recv_active(fd)) {
-		fdtab[fd].state |= FD_EV_ACTIVE_R;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_R)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }
 
 /* Prepare FD <fd> to try to send */
 static inline void fd_want_send(int fd)
 {
+	unsigned char old, new;
+
+	old = fdtab[fd].state;
+	do {
+		if (old & FD_EV_ACTIVE_W)
+			return;
+		new = old | FD_EV_ACTIVE_W;
+		if (!(new & FD_EV_READY_W))
+			new |= FD_EV_POLLED_W;
+	} while (unlikely(!HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
+
 	HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
-	if (!fd_send_active(fd)) {
-		fdtab[fd].state |= FD_EV_ACTIVE_W;
-		fd_update_cache(fd); /* need an update entry to change the state */
-	}
+	if ((old ^ new) & FD_EV_POLLED_W)
+		updt_fd_polling(fd);
+
+	fd_update_cache(fd); /* need an update entry to change the state */
 	HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
 }