BUG/MEDIUM: threads: Fix the exit condition of the thread barrier

In thread_sync_barrier, we exit when all threads have set their own bit in the
barrier mask. It is done by comparing it to all_threads_mask. But we must not
use a simple equality to do so, becaue all_threads_mask may change. Since commit
ba86c6c25 ("MINOR: threads: Be sure to remove threads from all_threads_mask on
exit"), when a thread exit, its bit is removed from all_threads_mask. Instead,
we must use a bitwise AND to test is all bits of all_threads_mask are set.

This also requires that all_threads_mask is set to volatile if we want to
catch changes.

This patch must be backported in 1.8.
diff --git a/include/common/hathreads.h b/include/common/hathreads.h
index 5c4ceca..274f988 100644
--- a/include/common/hathreads.h
+++ b/include/common/hathreads.h
@@ -260,7 +260,7 @@
 int  thread_no_sync(void);
 int  thread_need_sync(void);
 
-extern unsigned long all_threads_mask;
+extern volatile unsigned long all_threads_mask;
 
 #define ha_sigmask(how, set, oldset)  pthread_sigmask(how, set, oldset)
 
diff --git a/src/hathreads.c b/src/hathreads.c
index 5db3c21..a3bca7d 100644
--- a/src/hathreads.c
+++ b/src/hathreads.c
@@ -31,7 +31,7 @@
 static HA_SPINLOCK_T sync_lock;
 static int           threads_sync_pipe[2];
 static unsigned long threads_want_sync = 0;
-unsigned long all_threads_mask  = 0;
+volatile unsigned long all_threads_mask  = 0;
 
 #if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
 struct lock_stat lock_stats[LOCK_LABELS];
@@ -106,7 +106,7 @@
 
 	HA_ATOMIC_CAS(barrier, &old, 0);
 	HA_ATOMIC_OR(barrier, tid_bit);
-	while (*barrier != all_threads_mask)
+	while ((*barrier & all_threads_mask) != all_threads_mask)
 		pl_cpu_relax();
 }