BUG/MEDIUM: init/threads: provide per-thread alloc/free function callbacks

We currently have the ability to register functions to be called early
on thread creation and at thread deinitialization. It turns out this is
not sufficient because certain such functions may use resources that are
being allocated by the other ones, thus creating a race condition depending
only on the linking order. For example the mworker needs to register a
file descriptor while the pollers will reallocate the fd_updt[] array.
Similarly logs and trashes may be used by some init functions while it's
unclear whether they have been deduplicated.

The same issue happens on deinit, if the fd_updt[] or trash is released
before some functions finish to use them, we'll get into trouble.

This patch creates a couple of early and late callbacks for per-thread
allocation/freeing of resources. A few init functions were moved there,
and the fd init code was split between the two (since it used to both
allocate and initialize at once). This way the init/deinit sequence is
expected to be safe now.

This patch should be backported to 1.9 as at least the trash/log issue
seems to be present. The run_thread_poll_loop() code is a bit different
there as the mworker is not a callback, but it will have no effect and
it's enough to drop the mworker changes.

This bug was reported by Ilya Shipitsin in github issue #104.
diff --git a/src/fd.c b/src/fd.c
index 3827e83..d924155 100644
--- a/src/fd.c
+++ b/src/fd.c
@@ -576,17 +576,23 @@
 	fd_cant_recv(fd);
 }
 
-/* Initialize the pollers per thread */
+/* allocate the per-thread fd_updt thus needs to be called early after
+ * thread creation.
+ */
+static int alloc_pollers_per_thread()
+{
+	fd_updt = calloc(global.maxsock, sizeof(*fd_updt));
+	return fd_updt != NULL;
+}
+
+/* Initialize the pollers per thread.*/
 static int init_pollers_per_thread()
 {
 	int mypipe[2];
-	if ((fd_updt = calloc(global.maxsock, sizeof(*fd_updt))) == NULL)
-		return 0;
-	if (pipe(mypipe) < 0) {
-		free(fd_updt);
-		fd_updt = NULL;
+
+	if (pipe(mypipe) < 0)
 		return 0;
-	}
+
 	poller_rd_pipe = mypipe[0];
 	poller_wr_pipe[tid] = mypipe[1];
 	fcntl(poller_rd_pipe, F_SETFL, O_NONBLOCK);
@@ -599,9 +605,6 @@
 /* Deinitialize the pollers per thread */
 static void deinit_pollers_per_thread()
 {
-	free(fd_updt);
-	fd_updt = NULL;
-
 	/* rd and wr are init at the same place, but only rd is init to -1, so
 	  we rely to rd to close.   */
 	if (poller_rd_pipe > -1) {
@@ -612,6 +615,13 @@
 	}
 }
 
+/* Release the pollers per thread, to be called late */
+static void free_pollers_per_thread()
+{
+	free(fd_updt);
+	fd_updt = NULL;
+}
+
 /*
  * Initialize the pollers till the best one is found.
  * If none works, returns 0, otherwise 1.
@@ -769,8 +779,10 @@
 	return 1;
 }
 
+REGISTER_PER_THREAD_ALLOC(alloc_pollers_per_thread);
 REGISTER_PER_THREAD_INIT(init_pollers_per_thread);
 REGISTER_PER_THREAD_DEINIT(deinit_pollers_per_thread);
+REGISTER_PER_THREAD_FREE(free_pollers_per_thread);
 
 /*
  * Local variables: