Fix the CAS spinlock implementation

Make the spinlock implementation use ARMv8.1-LSE CAS instruction based
on a platform build option. The CAS-based implementation used to be
unconditionally selected for all ARM8.1+ platforms.

The previous CAS spinlock implementation had a bug wherein the spin_unlock()
implementation had an `sev` after `stlr` which is not sufficient. A dsb is
needed to ensure that the stlr completes prior to the sev. Having a dsb is
heavyweight and a better solution would be to use load exclusive semantics
to monitor the lock and wake up from wfe when a store happens to the lock.
The patch implements the same.

Change-Id: I5283ce4a889376e4cc01d1b9d09afa8229a2e522
Signed-off-by: Soby Mathew <soby.mathew@arm.com>
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/Makefile b/Makefile
index 32918c3..18800cb 100644
--- a/Makefile
+++ b/Makefile
@@ -141,6 +141,15 @@
         $(error Unknown BRANCH_PROTECTION value ${BRANCH_PROTECTION})
 endif
 
+# USE_SPINLOCK_CAS requires AArch64 build
+ifeq (${USE_SPINLOCK_CAS},1)
+ifneq (${ARCH},aarch64)
+        $(error USE_SPINLOCK_CAS requires AArch64)
+else
+        $(info USE_SPINLOCK_CAS is an experimental feature)
+endif
+endif
+
 ################################################################################
 # Toolchain
 ################################################################################
@@ -690,6 +699,7 @@
 $(eval $(call assert_boolean,BL2_AT_EL3))
 $(eval $(call assert_boolean,BL2_IN_XIP_MEM))
 $(eval $(call assert_boolean,BL2_INV_DCACHE))
+$(eval $(call assert_boolean,USE_SPINLOCK_CAS))
 
 $(eval $(call assert_numeric,ARM_ARCH_MAJOR))
 $(eval $(call assert_numeric,ARM_ARCH_MINOR))
@@ -755,6 +765,7 @@
 $(eval $(call add_define,BL2_AT_EL3))
 $(eval $(call add_define,BL2_IN_XIP_MEM))
 $(eval $(call add_define,BL2_INV_DCACHE))
+$(eval $(call add_define,USE_SPINLOCK_CAS))
 
 ifeq (${SANITIZE_UB},trap)
         $(eval $(call add_define,MONITOR_TRAPS))