OPTIM: poll: optimize fd management functions for low register count CPUs
Looking at the assembly code that updt_fd() and alloc/release_spec_entry
produce in the polling loops, it's clear that gcc has to recompute pointers
several times in a row because of limited spare registers. By better
grouping adjacent structure updates, we improve the code size by around
60 bytes in the fast path on x86.
diff --git a/include/proto/fd.h b/include/proto/fd.h
index 127cbe0..3b1365d 100644
--- a/include/proto/fd.h
+++ b/include/proto/fd.h
@@ -89,8 +89,8 @@
if (fdtab[fd].updated)
/* already scheduled for update */
return;
- fd_updt[fd_nbupdt++] = fd;
fdtab[fd].updated = 1;
+ fd_updt[fd_nbupdt++] = fd;
}
@@ -100,8 +100,9 @@
if (fdtab[fd].spec_p)
/* FD already in speculative I/O list */
return;
- fd_spec[fd_nbspec++] = fd;
+ fd_nbspec++;
fdtab[fd].spec_p = fd_nbspec;
+ fd_spec[fd_nbspec-1] = fd;
}
/* Removes entry used by fd <fd> from the spec list and replaces it with the
@@ -117,7 +118,7 @@
return;
fdtab[fd].spec_p = 0;
fd_nbspec--;
- if (pos <= fd_nbspec) {
+ if (likely(pos <= fd_nbspec)) {
/* was not the last entry */
fd = fd_spec[fd_nbspec];
fd_spec[pos - 1] = fd;