MINOR: atomic: remove the memcpy() call and dependency on string.h

The memcpy() call in the aarch64 version of __ha_cas_dw() is sometimes
inlined and sometimes not, depending on the gcc version. It's only used
to copy two void*, so let's use direct assignment instead of memcpy().
It would also be possible to change the asm code to directly write there,
but it's not worth it.

With this change the code is 8kB smaller with gcc-5.4.
diff --git a/include/haproxy/atomic.h b/include/haproxy/atomic.h
index 4306b75..af0b10a 100644
--- a/include/haproxy/atomic.h
+++ b/include/haproxy/atomic.h
@@ -23,8 +23,6 @@
 #ifndef _HAPROXY_ATOMIC_H
 #define _HAPROXY_ATOMIC_H
 
-#include <string.h>
-
 /* A few notes for the macros and functions here:
  *  - this file is painful to edit, most operations exist in 3 variants,
  *    no-thread, threads with gcc<4.7, threads with gcc>=4.7. Be careful when
@@ -781,7 +779,8 @@
                              : "r" (target), "r" (((void **)(compare))[0]), "r" (((void **)(compare))[1]), "r" (((void **)(set))[0]), "r" (((void **)(set))[1])
                              : "cc", "memory");
 
-	memcpy(compare, &value, sizeof(value));
+	((void **)(compare))[0] = value[0];
+	((void **)(compare))[1] = value[1];
         return (tmp1);
 }
 #endif // ARMv8.1-A atomics