feat(cpufeat): add ENABLE_FEAT_LS64_ACCDATA

Armv8.6 introduced the FEAT_LS64 extension, which provides a 64 *byte*
store instruction. A related instruction is ST64BV0, which will replace
the lowest 32 bits of the data with a value taken from the ACCDATA_EL1
system register (so that EL0 cannot alter them).
Using that ST64BV0 instruction and accessing the ACCDATA_EL1 system
register is guarded by two SCR_EL3 bits, which we should set to avoid a
trap into EL3, when lower ELs use one of those.

Add the required bits and pieces to make this feature usable:
- Add the ENABLE_FEAT_LS64_ACCDATA build option (defaulting to 0).
- Add the CPUID and SCR_EL3 bit definitions associated with FEAT_LS64.
- Add a feature check to check for the existing four variants of the
  LS64 feature and detect future extensions.
- Add code to save and restore the ACCDATA_EL1 register on
  secure/non-secure context switches.
- Enable the feature with runtime detection for FVP and Arm FPGA.

Please note that the *basic* FEAT_LS64 feature does not feature any trap
bits, it's only the addition of the ACCDATA_EL1 system register that
adds these traps and the SCR_EL3 bits.

Change-Id: Ie3e2ca2d9c4fbbd45c0cc6089accbb825579138a
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 3f0120c..737d07a 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -293,6 +293,18 @@
 /* ID_AA64ISAR1_EL1 definitions */
 #define ID_AA64ISAR1_EL1		S3_0_C0_C6_1
 
+#define ID_AA64ISAR1_LS64_SHIFT		U(60)
+#define ID_AA64ISAR1_LS64_MASK		ULL(0xf)
+#define LS64_ACCDATA_IMPLEMENTED	ULL(0x3)
+#define LS64_V_IMPLEMENTED		ULL(0x2)
+#define LS64_IMPLEMENTED		ULL(0x1)
+#define LS64_NOT_IMPLEMENTED		ULL(0x0)
+
+#define ID_AA64ISAR1_SB_SHIFT		U(36)
+#define ID_AA64ISAR1_SB_MASK		ULL(0xf)
+#define SB_IMPLEMENTED			ULL(0x1)
+#define SB_NOT_IMPLEMENTED		ULL(0x0)
+
 #define ID_AA64ISAR1_GPI_SHIFT		U(28)
 #define ID_AA64ISAR1_GPI_MASK		ULL(0xf)
 #define ID_AA64ISAR1_GPA_SHIFT		U(24)
@@ -303,11 +315,6 @@
 #define ID_AA64ISAR1_APA_SHIFT		U(4)
 #define ID_AA64ISAR1_APA_MASK		ULL(0xf)
 
-#define ID_AA64ISAR1_SB_SHIFT		U(36)
-#define ID_AA64ISAR1_SB_MASK		ULL(0xf)
-#define SB_IMPLEMENTED			ULL(0x1)
-#define SB_NOT_IMPLEMENTED		ULL(0x0)
-
 /* ID_AA64ISAR2_EL1 definitions */
 #define ID_AA64ISAR2_EL1		S3_0_C0_C6_2
 
@@ -606,11 +613,13 @@
 #define SCR_SCTLR2En_BIT	(UL(1) << 44)
 #define SCR_TCR2EN_BIT		(UL(1) << 43)
 #define SCR_RCWMASKEn_BIT	(UL(1) << 42)
+#define SCR_ENTP2_SHIFT		U(41)
+#define SCR_ENTP2_BIT		(UL(1) << SCR_ENTP2_SHIFT)
 #define SCR_TRNDR_BIT		(UL(1) << 40)
 #define SCR_GCSEn_BIT		(UL(1) << 39)
 #define SCR_HXEn_BIT		(UL(1) << 38)
-#define SCR_ENTP2_SHIFT		U(41)
-#define SCR_ENTP2_BIT		(UL(1) << SCR_ENTP2_SHIFT)
+#define SCR_ADEn_BIT		(UL(1) << 37)
+#define SCR_EnAS0_BIT		(UL(1) << 36)
 #define SCR_AMVOFFEN_SHIFT	U(35)
 #define SCR_AMVOFFEN_BIT	(UL(1) << SCR_AMVOFFEN_SHIFT)
 #define SCR_TWEDEn_BIT		(UL(1) << 29)
@@ -1504,6 +1513,11 @@
 #define SCTLR2_EL1		S3_0_C1_C0_3
 
 /*******************************************************************************
+ * FEAT_LS64_ACCDATA - LoadStore64B with status data
+ ******************************************************************************/
+#define ACCDATA_EL1		S3_0_C13_C0_5
+
+/*******************************************************************************
  * Definitions for DynamicIQ Shared Unit registers
  ******************************************************************************/
 #define CLUSTERPWRDN_EL1	S3_0_c15_c3_6