BUG/MINOR: fd: Don't clear the update_mask in fd_insert.

Clearing the update_mask bit in fd_insert may lead to duplicate insertion
of fd in fd_updt, that could lead to a write past the end of the array.
Instead, make sure the update_mask bit is cleared by the pollers no matter
what.

This should be backported to 1.8.
[wt: warning: 1.8 doesn't have the lockless fdcache changes and will
 require some careful changes in the pollers]
diff --git a/include/proto/fd.h b/include/proto/fd.h
index 0f59df6..6c9cfe7 100644
--- a/include/proto/fd.h
+++ b/include/proto/fd.h
@@ -445,7 +445,6 @@
 	fdtab[fd].owner = owner;
 	fdtab[fd].iocb = iocb;
 	fdtab[fd].ev = 0;
-	fdtab[fd].update_mask &= ~tid_bit;
 	fdtab[fd].linger_risk = 0;
 	fdtab[fd].cloned = 0;
 	fdtab[fd].thread_mask = thread_mask;
diff --git a/src/ev_epoll.c b/src/ev_epoll.c
index b98ca8c..a8e5797 100644
--- a/src/ev_epoll.c
+++ b/src/ev_epoll.c
@@ -74,13 +74,13 @@
 	for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
 		fd = fd_updt[updt_idx];
 
+		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 		if (!fdtab[fd].owner) {
 			activity[tid].poll_drop++;
 			continue;
 		}
 
 		en = fdtab[fd].state;
-		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 
 		if (fdtab[fd].polled_mask & tid_bit) {
 			if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) {
diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c
index cc96307..a103ece 100644
--- a/src/ev_kqueue.c
+++ b/src/ev_kqueue.c
@@ -47,13 +47,13 @@
 	for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
 		fd = fd_updt[updt_idx];
 
+		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 		if (!fdtab[fd].owner) {
 			activity[tid].poll_drop++;
 			continue;
 		}
 
 		en = fdtab[fd].state;
-		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 
 		if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) {
 			if (!(fdtab[fd].polled_mask & tid_bit)) {
diff --git a/src/ev_poll.c b/src/ev_poll.c
index edcfe65..6093b65 100644
--- a/src/ev_poll.c
+++ b/src/ev_poll.c
@@ -65,13 +65,13 @@
 	for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
 		fd = fd_updt[updt_idx];
 
+		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 		if (!fdtab[fd].owner) {
 			activity[tid].poll_drop++;
 			continue;
 		}
 
 		en = fdtab[fd].state;
-		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 
 		/* we have a single state for all threads, which is why we
 		 * don't check the tid_bit. First thread to see the update
diff --git a/src/ev_select.c b/src/ev_select.c
index 6e83467..163a458 100644
--- a/src/ev_select.c
+++ b/src/ev_select.c
@@ -57,13 +57,13 @@
 	for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
 		fd = fd_updt[updt_idx];
 
+		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 		if (!fdtab[fd].owner) {
 			activity[tid].poll_drop++;
 			continue;
 		}
 
 		en = fdtab[fd].state;
-		HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
 
 		/* we have a single state for all threads, which is why we
 		 * don't check the tid_bit. First thread to see the update