MAJOR: fd/threads: Make the fdcache mostly lockless.
Create a local, per-thread, fdcache, for file descriptors that only belongs
to one thread, and make the global fd cache mostly lockless, as we can get
a lot of contention on the fd cache lock.
diff --git a/src/fd.c b/src/fd.c
index 0995040..2cd79fb 100644
--- a/src/fd.c
+++ b/src/fd.c
@@ -167,15 +167,14 @@
struct poller cur_poller;
int nbpollers = 0;
-unsigned int *fd_cache = NULL; // FD events cache
-int fd_cache_num = 0; // number of events in the cache
+volatile struct fdlist fd_cache ; // FD events cache
+volatile struct fdlist fd_cache_local[MAX_THREADS]; // FD events local for each thread
+
unsigned long fd_cache_mask = 0; // Mask of threads with events in the cache
THREAD_LOCAL int *fd_updt = NULL; // FD updates list
THREAD_LOCAL int fd_nbupdt = 0; // number of updates in the list
-__decl_hathreads(HA_RWLOCK_T fdcache_lock); /* global lock to protect fd_cache array */
-
/* Deletes an FD from the fdsets.
* The file descriptor is also closed.
*/
@@ -221,33 +220,30 @@
fd_dodelete(fd, 0);
}
-/* Scan and process the cached events. This should be called right after
- * the poller. The loop may cause new entries to be created, for example
- * if a listener causes an accept() to initiate a new incoming connection
- * wanting to attempt an recv().
- */
-void fd_process_cached_events()
+static inline void fdlist_process_cached_events(volatile struct fdlist *fdlist)
{
- int fd, entry, e;
+ int fd, old_fd, e;
- HA_RWLOCK_RDLOCK(FDCACHE_LOCK, &fdcache_lock);
- fd_cache_mask &= ~tid_bit;
- for (entry = 0; entry < fd_cache_num; ) {
- fd = fd_cache[entry];
-
- if (!(fdtab[fd].thread_mask & tid_bit)) {
- activity[tid].fd_skip++;
- goto next;
- }
+ for (old_fd = fd = fdlist->first; fd != -1; fd = fdtab[fd].fdcache_entry.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ } else if (fd <= -3)
+ fd = -fd - 4;
+ if (fd == -1)
+ break;
+ old_fd = fd;
+ if (!(fdtab[fd].thread_mask & tid_bit))
+ continue;
+ if (fdtab[fd].fdcache_entry.next < -3)
+ continue;
- fd_cache_mask |= tid_bit;
+ HA_ATOMIC_OR(&fd_cache_mask, tid_bit);
if (HA_SPIN_TRYLOCK(FD_LOCK, &fdtab[fd].lock)) {
activity[tid].fd_lock++;
- goto next;
+ continue;
}
- HA_RWLOCK_RDUNLOCK(FDCACHE_LOCK, &fdcache_lock);
-
e = fdtab[fd].state;
fdtab[fd].ev &= FD_POLL_STICKY;
@@ -265,19 +261,19 @@
fd_release_cache_entry(fd);
HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
}
-
- HA_RWLOCK_RDLOCK(FDCACHE_LOCK, &fdcache_lock);
- /* If the fd was removed from the cache, it has been
- * replaced by the next one that we don't want to skip !
- */
- if (entry < fd_cache_num && fd_cache[entry] != fd) {
- activity[tid].fd_del++;
- continue;
- }
- next:
- entry++;
}
- HA_RWLOCK_RDUNLOCK(FDCACHE_LOCK, &fdcache_lock);
+}
+
+/* Scan and process the cached events. This should be called right after
+ * the poller. The loop may cause new entries to be created, for example
+ * if a listener causes an accept() to initiate a new incoming connection
+ * wanting to attempt an recv().
+ */
+void fd_process_cached_events()
+{
+ HA_ATOMIC_AND(&fd_cache_mask, ~tid_bit);
+ fdlist_process_cached_events(&fd_cache_local[tid]);
+ fdlist_process_cached_events(&fd_cache);
}
/* disable the specified poller */
@@ -320,16 +316,19 @@
if ((fdinfo = calloc(global.maxsock, sizeof(struct fdinfo))) == NULL)
goto fail_info;
- if ((fd_cache = calloc(global.maxsock, sizeof(*fd_cache))) == NULL)
- goto fail_cache;
-
+ fd_cache.first = fd_cache.last = -1;
hap_register_per_thread_init(init_pollers_per_thread);
hap_register_per_thread_deinit(deinit_pollers_per_thread);
- for (p = 0; p < global.maxsock; p++)
+ for (p = 0; p < global.maxsock; p++) {
HA_SPIN_INIT(&fdtab[p].lock);
+ /* Mark the fd as out of the fd cache */
+ fdtab[p].fdcache_entry.next = -3;
+ fdtab[p].fdcache_entry.next = -3;
+ }
+ for (p = 0; p < global.nbthread; p++)
+ fd_cache_local[p].first = fd_cache_local[p].last = -1;
- HA_RWLOCK_INIT(&fdcache_lock);
do {
bp = NULL;
for (p = 0; p < nbpollers; p++)
@@ -372,11 +371,8 @@
bp->term(bp);
}
- free(fd_cache); fd_cache = NULL;
free(fdinfo); fdinfo = NULL;
free(fdtab); fdtab = NULL;
-
- HA_RWLOCK_DESTROY(&fdcache_lock);
}
/*