MINOR: threads: Implement __ha_barrier_atomic*.

Implement __ha_barrier functions to be used when trying to protect data
modified by atomic operations (except when using HA_ATOMIC_STORE).
On intel, atomic operations either use the LOCK prefix and xchg, and both
atc as full barrier, so there's no need to add an extra barrier.
diff --git a/include/common/hathreads.h b/include/common/hathreads.h
index 74a4761..77d5c6f 100644
--- a/include/common/hathreads.h
+++ b/include/common/hathreads.h
@@ -128,6 +128,18 @@
 {
 }
 
+static inline void __ha_barrier_atomic_load(void)
+{
+}
+
+static inline void __ha_barrier_atomic_store(void)
+{
+}
+
+static inline void __ha_barrier_atomic_full(void)
+{
+}
+
 static inline void __ha_barrier_load(void)
 {
 }
@@ -866,7 +878,28 @@
         return (ret);
 }
 
+/* Use __ha_barrier_atomic* when you're trying to protect data that are
+ * are modified using HA_ATOMIC* (except HA_ATOMIC_STORE)
+ */
+static __inline void
+__ha_barrier_atomic_load(void)
+{
+	__asm __volatile("" ::: "memory");
+}
+
 static __inline void
+__ha_barrier_atomic_store(void)
+{
+	__asm __volatile("" ::: "memory");
+}
+
+static __inline void
+__ha_barrier_atomic_full(void)
+{
+	__asm __volatile("" ::: "memory");
+}
+
+static __inline void
 __ha_barrier_load(void)
 {
 	__asm __volatile("lfence" ::: "memory");
@@ -886,6 +919,27 @@
 
 #elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__))
 
+/* Use __ha_barrier_atomic* when you're trying to protect data that are
+ * are modified using HA_ATOMIC* (except HA_ATOMIC_STORE)
+ */
+static __inline void
+__ha_barrier_atomic_load(void)
+{
+	__asm __volatile("dmb" ::: "memory");
+}
+
+static __inline void
+__ha_barrier_atomic_store(void)
+{
+	__asm __volatile("dsb" ::: "memory");
+}
+
+static __inline void
+__ha_barrier_atomic_full(void)
+{
+	__asm __volatile("dmb" ::: "memory");
+}
+
 static __inline void
 __ha_barrier_load(void)
 {
@@ -927,6 +981,27 @@
 
 #elif defined (__aarch64__)
 
+/* Use __ha_barrier_atomic* when you're trying to protect data that are
+ * are modified using HA_ATOMIC* (except HA_ATOMIC_STORE)
+ */
+static __inline void
+__ha_barrier_atomic_load(void)
+{
+	__asm __volatile("dmb ishld" ::: "memory");
+}
+
+static __inline void
+__ha_barrier_atomic_store(void)
+{
+	__asm __volatile("dmb ishst" ::: "memory");
+}
+
+static __inline void
+__ha_barrier_atomic_full(void)
+{
+	__asm __volatile("dmb ish" ::: "memory");
+}
+
 static __inline void
 __ha_barrier_load(void)
 {
@@ -972,6 +1047,9 @@
 }
 
 #else
+#define __ha_barrier_atomic_load __sync_synchronize
+#define __ha_barrier_atomic_store __sync_synchronize
+#define __ha_barrier_atomic_full __sync_synchronize
 #define __ha_barrier_load __sync_synchronize
 #define __ha_barrier_store __sync_synchronize
 #define __ha_barrier_full __sync_synchronize