feat(cpufeat): add ENABLE_FEAT_LS64_ACCDATA

Armv8.6 introduced the FEAT_LS64 extension, which provides a 64 *byte*
store instruction. A related instruction is ST64BV0, which will replace
the lowest 32 bits of the data with a value taken from the ACCDATA_EL1
system register (so that EL0 cannot alter them).
Using that ST64BV0 instruction and accessing the ACCDATA_EL1 system
register is guarded by two SCR_EL3 bits, which we should set to avoid a
trap into EL3, when lower ELs use one of those.

Add the required bits and pieces to make this feature usable:
- Add the ENABLE_FEAT_LS64_ACCDATA build option (defaulting to 0).
- Add the CPUID and SCR_EL3 bit definitions associated with FEAT_LS64.
- Add a feature check to check for the existing four variants of the
  LS64 feature and detect future extensions.
- Add code to save and restore the ACCDATA_EL1 register on
  secure/non-secure context switches.
- Enable the feature with runtime detection for FVP and Arm FPGA.

Please note that the *basic* FEAT_LS64 feature does not feature any trap
bits, it's only the addition of the ACCDATA_EL1 system register that
adds these traps and the SCR_EL3 bits.

Change-Id: Ie3e2ca2d9c4fbbd45c0cc6089accbb825579138a
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/include/arch/aarch32/arch_features.h b/include/arch/aarch32/arch_features.h
index a29b672..e347240 100644
--- a/include/arch/aarch32/arch_features.h
+++ b/include/arch/aarch32/arch_features.h
@@ -196,5 +196,7 @@
 static inline bool is_feat_sebep_present(void) { return false; }
 __attribute__((always_inline))
 static inline bool is_feat_d128_present(void) { return false; }
+__attribute__((always_inline))
+static inline bool is_feat_ls64_accdata_present(void) { return false; }
 
 #endif /* ARCH_FEATURES_H */
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 3f0120c..737d07a 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -293,6 +293,18 @@
 /* ID_AA64ISAR1_EL1 definitions */
 #define ID_AA64ISAR1_EL1		S3_0_C0_C6_1
 
+#define ID_AA64ISAR1_LS64_SHIFT		U(60)
+#define ID_AA64ISAR1_LS64_MASK		ULL(0xf)
+#define LS64_ACCDATA_IMPLEMENTED	ULL(0x3)
+#define LS64_V_IMPLEMENTED		ULL(0x2)
+#define LS64_IMPLEMENTED		ULL(0x1)
+#define LS64_NOT_IMPLEMENTED		ULL(0x0)
+
+#define ID_AA64ISAR1_SB_SHIFT		U(36)
+#define ID_AA64ISAR1_SB_MASK		ULL(0xf)
+#define SB_IMPLEMENTED			ULL(0x1)
+#define SB_NOT_IMPLEMENTED		ULL(0x0)
+
 #define ID_AA64ISAR1_GPI_SHIFT		U(28)
 #define ID_AA64ISAR1_GPI_MASK		ULL(0xf)
 #define ID_AA64ISAR1_GPA_SHIFT		U(24)
@@ -303,11 +315,6 @@
 #define ID_AA64ISAR1_APA_SHIFT		U(4)
 #define ID_AA64ISAR1_APA_MASK		ULL(0xf)
 
-#define ID_AA64ISAR1_SB_SHIFT		U(36)
-#define ID_AA64ISAR1_SB_MASK		ULL(0xf)
-#define SB_IMPLEMENTED			ULL(0x1)
-#define SB_NOT_IMPLEMENTED		ULL(0x0)
-
 /* ID_AA64ISAR2_EL1 definitions */
 #define ID_AA64ISAR2_EL1		S3_0_C0_C6_2
 
@@ -606,11 +613,13 @@
 #define SCR_SCTLR2En_BIT	(UL(1) << 44)
 #define SCR_TCR2EN_BIT		(UL(1) << 43)
 #define SCR_RCWMASKEn_BIT	(UL(1) << 42)
+#define SCR_ENTP2_SHIFT		U(41)
+#define SCR_ENTP2_BIT		(UL(1) << SCR_ENTP2_SHIFT)
 #define SCR_TRNDR_BIT		(UL(1) << 40)
 #define SCR_GCSEn_BIT		(UL(1) << 39)
 #define SCR_HXEn_BIT		(UL(1) << 38)
-#define SCR_ENTP2_SHIFT		U(41)
-#define SCR_ENTP2_BIT		(UL(1) << SCR_ENTP2_SHIFT)
+#define SCR_ADEn_BIT		(UL(1) << 37)
+#define SCR_EnAS0_BIT		(UL(1) << 36)
 #define SCR_AMVOFFEN_SHIFT	U(35)
 #define SCR_AMVOFFEN_BIT	(UL(1) << SCR_AMVOFFEN_SHIFT)
 #define SCR_TWEDEn_BIT		(UL(1) << 29)
@@ -1504,6 +1513,11 @@
 #define SCTLR2_EL1		S3_0_C1_C0_3
 
 /*******************************************************************************
+ * FEAT_LS64_ACCDATA - LoadStore64B with status data
+ ******************************************************************************/
+#define ACCDATA_EL1		S3_0_C13_C0_5
+
+/*******************************************************************************
  * Definitions for DynamicIQ Shared Unit registers
  ******************************************************************************/
 #define CLUSTERPWRDN_EL1	S3_0_c15_c3_6
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index ec38d76..59188da 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -140,6 +140,8 @@
  * +----------------------------+
  * |	FEAT_D128		|
  * +----------------------------+
+ * |	FEAT_LS64_ACCDATA	|
+ * +----------------------------+
  */
 
 __attribute__((always_inline))
@@ -421,6 +423,11 @@
 CREATE_FEATURE_FUNCS(feat_sme2, id_aa64pfr1_el1, ID_AA64PFR1_EL1_SME_SHIFT,
 		     ID_AA64PFR1_EL1_SME_MASK, SME2_IMPLEMENTED, ENABLE_SME2_FOR_NS)
 
+/* FEAT_LS64_ACCDATA: */
+CREATE_FEATURE_FUNCS(feat_ls64_accdata, id_aa64isar1_el1, ID_AA64ISAR1_LS64_SHIFT,
+		     ID_AA64ISAR1_LS64_MASK, LS64_ACCDATA_IMPLEMENTED,
+		     ENABLE_FEAT_LS64_ACCDATA)
+
 /*******************************************************************************
  * Function to get hardware granularity support
  ******************************************************************************/
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 9c36e4b..119c428 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -696,6 +696,9 @@
 DEFINE_RENAME_SYSREG_RW_FUNCS(sctlr2_el1, SCTLR2_EL1)
 DEFINE_RENAME_SYSREG_RW_FUNCS(sctlr2_el2, SCTLR2_EL2)
 
+/* FEAT_LS64_ACCDATA Registers */
+DEFINE_RENAME_SYSREG_RW_FUNCS(accdata_el1, ACCDATA_EL1)
+
 /* DynamIQ Control registers */
 DEFINE_RENAME_SYSREG_RW_FUNCS(clusterpwrdn_el1, CLUSTERPWRDN_EL1)
 DEFINE_RENAME_SYSREG_RW_FUNCS(clusterpmcr_el1, CLUSTERPMCR_EL1)
diff --git a/include/lib/el3_runtime/context_el1.h b/include/lib/el3_runtime/context_el1.h
index 4379bcf..7bc0235 100644
--- a/include/lib/el3_runtime/context_el1.h
+++ b/include/lib/el3_runtime/context_el1.h
@@ -118,6 +118,10 @@
 	uint64_t sctlr2_el1;
 } el1_sctlr2_regs_t;
 
+typedef struct el1_ls64_regs {
+	uint64_t accdata_el1;
+} el1_ls64_regs_t;
+
 typedef struct el1_sysregs {
 
 	el1_common_regs_t common;
@@ -174,6 +178,9 @@
 	el1_sctlr2_regs_t sctlr2;
 #endif
 
+#if ENABLE_FEAT_LS64_ACCDATA
+	el1_ls64_regs_t ls64;
+#endif
 } el1_sysregs_t;
 
 
@@ -304,6 +311,14 @@
 #define write_el1_ctx_sctlr2(ctx, reg, val)
 #endif /* ENABLE_FEAT_SCTLR2 */
 
+#if ENABLE_FEAT_LS64_ACCDATA
+#define read_el1_ctx_ls64(ctx, reg)		(((ctx)->ls64).reg)
+#define write_el1_ctx_ls64(ctx, reg, val)	((((ctx)->ls64).reg)	\
+							= (uint64_t) (val))
+#else
+#define read_el1_ctx_ls64(ctx, reg)		ULL(0)
+#define write_el1_ctx_ls64(ctx, reg, val)
+#endif /* ENABLE_FEAT_LS64_ACCDATA */
 /******************************************************************************/
 #endif /* __ASSEMBLER__ */