OPTIM: poll: optimize fd management functions for low register count CPUs

Looking at the assembly code that updt_fd() and alloc/release_spec_entry
produce in the polling loops, it's clear that gcc has to recompute pointers
several times in a row because of limited spare registers. By better
grouping adjacent structure updates, we improve the code size by around
60 bytes in the fast path on x86.
diff --git a/include/proto/fd.h b/include/proto/fd.h
index 127cbe0..3b1365d 100644
--- a/include/proto/fd.h
+++ b/include/proto/fd.h
@@ -89,8 +89,8 @@
 	if (fdtab[fd].updated)
 		/* already scheduled for update */
 		return;
-	fd_updt[fd_nbupdt++] = fd;
 	fdtab[fd].updated = 1;
+	fd_updt[fd_nbupdt++] = fd;
 }
 
 
@@ -100,8 +100,9 @@
 	if (fdtab[fd].spec_p)
 		/* FD already in speculative I/O list */
 		return;
-	fd_spec[fd_nbspec++] = fd;
+	fd_nbspec++;
 	fdtab[fd].spec_p = fd_nbspec;
+	fd_spec[fd_nbspec-1] = fd;
 }
 
 /* Removes entry used by fd <fd> from the spec list and replaces it with the
@@ -117,7 +118,7 @@
 		return;
 	fdtab[fd].spec_p = 0;
 	fd_nbspec--;
-	if (pos <= fd_nbspec) {
+	if (likely(pos <= fd_nbspec)) {
 		/* was not the last entry */
 		fd = fd_spec[fd_nbspec];
 		fd_spec[pos - 1] = fd;