feat(cpufeat): add ENABLE_FEAT_LS64_ACCDATA

Armv8.6 introduced the FEAT_LS64 extension, which provides a 64 *byte*
store instruction. A related instruction is ST64BV0, which will replace
the lowest 32 bits of the data with a value taken from the ACCDATA_EL1
system register (so that EL0 cannot alter them).
Using that ST64BV0 instruction and accessing the ACCDATA_EL1 system
register is guarded by two SCR_EL3 bits, which we should set to avoid a
trap into EL3, when lower ELs use one of those.

Add the required bits and pieces to make this feature usable:
- Add the ENABLE_FEAT_LS64_ACCDATA build option (defaulting to 0).
- Add the CPUID and SCR_EL3 bit definitions associated with FEAT_LS64.
- Add a feature check to check for the existing four variants of the
  LS64 feature and detect future extensions.
- Add code to save and restore the ACCDATA_EL1 register on
  secure/non-secure context switches.
- Enable the feature with runtime detection for FVP and Arm FPGA.

Please note that the *basic* FEAT_LS64 feature does not feature any trap
bits, it's only the addition of the ACCDATA_EL1 system register that
adds these traps and the SCR_EL3 bits.

Change-Id: Ie3e2ca2d9c4fbbd45c0cc6089accbb825579138a
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/Makefile b/Makefile
index f736a3b..6bbca3b 100644
--- a/Makefile
+++ b/Makefile
@@ -1257,6 +1257,7 @@
 	ENABLE_FEAT_FGT \
 	ENABLE_FEAT_FGT2 \
 	ENABLE_FEAT_HCX \
+	ENABLE_FEAT_LS64_ACCDATA \
 	ENABLE_FEAT_MTE2 \
 	ENABLE_FEAT_PAN \
 	ENABLE_FEAT_RNG \
@@ -1421,6 +1422,7 @@
 	ENABLE_FEAT_VHE \
 	ENABLE_FEAT_CSV2_2 \
 	ENABLE_FEAT_CSV2_3 \
+	ENABLE_FEAT_LS64_ACCDATA \
 	ENABLE_FEAT_PAN \
 	ENABLE_FEAT_TCR2 \
 	ENABLE_FEAT_THE \
diff --git a/common/feat_detect.c b/common/feat_detect.c
index 6aa5e2e..8c03ab8 100644
--- a/common/feat_detect.c
+++ b/common/feat_detect.c
@@ -192,6 +192,11 @@
 	return ISOLATE_FIELD(read_id_aa64mmfr1_el1(), ID_AA64MMFR1_EL1_HCX_SHIFT,
 			     ID_AA64MMFR1_EL1_HCX_MASK);
 }
+static unsigned int read_feat_ls64_id_field(void)
+{
+	return ISOLATE_FIELD(read_id_aa64isar1_el1(), ID_AA64ISAR1_LS64_SHIFT,
+			     ID_AA64ISAR1_LS64_MASK);
+}
 static unsigned int read_feat_tcr2_id_field(void)
 {
 	return ISOLATE_FIELD(read_id_aa64mmfr3_el1(), ID_AA64MMFR3_EL1_TCRX_SHIFT,
@@ -367,6 +372,7 @@
 
 	/* v8.7 features */
 	check_feature(ENABLE_FEAT_HCX, read_feat_hcx_id_field(), "HCX", 1, 1);
+	check_feature(ENABLE_FEAT_LS64_ACCDATA, read_feat_ls64_id_field(), "LS64", 1, 3);
 
 	/* v8.9 features */
 	check_feature(ENABLE_FEAT_TCR2, read_feat_tcr2_id_field(),
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index cd10cd6..ab0b94d 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -499,6 +499,11 @@
    The flag is automatically disabled when the target
    architecture is AArch32.
 
+-  ``ENABLE_FEAT_LS64_ACCDATA``: Numeric value to enable access and save and
+   restore the ACCDATA_EL1 system register, at EL2 and below. This flag can
+   take the values 0 to 2, to align  with the ``ENABLE_FEAT`` mechanism.
+   Default value is ``0``.
+
 -  ``ENABLE_MPMM``: Boolean option to enable support for the Maximum Power
    Mitigation Mechanism supported by certain Arm cores, which allows the SoC
    firmware to detect and limit high activity events to assist in SoC processor
diff --git a/include/arch/aarch32/arch_features.h b/include/arch/aarch32/arch_features.h
index a29b672..e347240 100644
--- a/include/arch/aarch32/arch_features.h
+++ b/include/arch/aarch32/arch_features.h
@@ -196,5 +196,7 @@
 static inline bool is_feat_sebep_present(void) { return false; }
 __attribute__((always_inline))
 static inline bool is_feat_d128_present(void) { return false; }
+__attribute__((always_inline))
+static inline bool is_feat_ls64_accdata_present(void) { return false; }
 
 #endif /* ARCH_FEATURES_H */
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 3f0120c..737d07a 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -293,6 +293,18 @@
 /* ID_AA64ISAR1_EL1 definitions */
 #define ID_AA64ISAR1_EL1		S3_0_C0_C6_1
 
+#define ID_AA64ISAR1_LS64_SHIFT		U(60)
+#define ID_AA64ISAR1_LS64_MASK		ULL(0xf)
+#define LS64_ACCDATA_IMPLEMENTED	ULL(0x3)
+#define LS64_V_IMPLEMENTED		ULL(0x2)
+#define LS64_IMPLEMENTED		ULL(0x1)
+#define LS64_NOT_IMPLEMENTED		ULL(0x0)
+
+#define ID_AA64ISAR1_SB_SHIFT		U(36)
+#define ID_AA64ISAR1_SB_MASK		ULL(0xf)
+#define SB_IMPLEMENTED			ULL(0x1)
+#define SB_NOT_IMPLEMENTED		ULL(0x0)
+
 #define ID_AA64ISAR1_GPI_SHIFT		U(28)
 #define ID_AA64ISAR1_GPI_MASK		ULL(0xf)
 #define ID_AA64ISAR1_GPA_SHIFT		U(24)
@@ -303,11 +315,6 @@
 #define ID_AA64ISAR1_APA_SHIFT		U(4)
 #define ID_AA64ISAR1_APA_MASK		ULL(0xf)
 
-#define ID_AA64ISAR1_SB_SHIFT		U(36)
-#define ID_AA64ISAR1_SB_MASK		ULL(0xf)
-#define SB_IMPLEMENTED			ULL(0x1)
-#define SB_NOT_IMPLEMENTED		ULL(0x0)
-
 /* ID_AA64ISAR2_EL1 definitions */
 #define ID_AA64ISAR2_EL1		S3_0_C0_C6_2
 
@@ -606,11 +613,13 @@
 #define SCR_SCTLR2En_BIT	(UL(1) << 44)
 #define SCR_TCR2EN_BIT		(UL(1) << 43)
 #define SCR_RCWMASKEn_BIT	(UL(1) << 42)
+#define SCR_ENTP2_SHIFT		U(41)
+#define SCR_ENTP2_BIT		(UL(1) << SCR_ENTP2_SHIFT)
 #define SCR_TRNDR_BIT		(UL(1) << 40)
 #define SCR_GCSEn_BIT		(UL(1) << 39)
 #define SCR_HXEn_BIT		(UL(1) << 38)
-#define SCR_ENTP2_SHIFT		U(41)
-#define SCR_ENTP2_BIT		(UL(1) << SCR_ENTP2_SHIFT)
+#define SCR_ADEn_BIT		(UL(1) << 37)
+#define SCR_EnAS0_BIT		(UL(1) << 36)
 #define SCR_AMVOFFEN_SHIFT	U(35)
 #define SCR_AMVOFFEN_BIT	(UL(1) << SCR_AMVOFFEN_SHIFT)
 #define SCR_TWEDEn_BIT		(UL(1) << 29)
@@ -1504,6 +1513,11 @@
 #define SCTLR2_EL1		S3_0_C1_C0_3
 
 /*******************************************************************************
+ * FEAT_LS64_ACCDATA - LoadStore64B with status data
+ ******************************************************************************/
+#define ACCDATA_EL1		S3_0_C13_C0_5
+
+/*******************************************************************************
  * Definitions for DynamicIQ Shared Unit registers
  ******************************************************************************/
 #define CLUSTERPWRDN_EL1	S3_0_c15_c3_6
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index ec38d76..59188da 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -140,6 +140,8 @@
  * +----------------------------+
  * |	FEAT_D128		|
  * +----------------------------+
+ * |	FEAT_LS64_ACCDATA	|
+ * +----------------------------+
  */
 
 __attribute__((always_inline))
@@ -421,6 +423,11 @@
 CREATE_FEATURE_FUNCS(feat_sme2, id_aa64pfr1_el1, ID_AA64PFR1_EL1_SME_SHIFT,
 		     ID_AA64PFR1_EL1_SME_MASK, SME2_IMPLEMENTED, ENABLE_SME2_FOR_NS)
 
+/* FEAT_LS64_ACCDATA: */
+CREATE_FEATURE_FUNCS(feat_ls64_accdata, id_aa64isar1_el1, ID_AA64ISAR1_LS64_SHIFT,
+		     ID_AA64ISAR1_LS64_MASK, LS64_ACCDATA_IMPLEMENTED,
+		     ENABLE_FEAT_LS64_ACCDATA)
+
 /*******************************************************************************
  * Function to get hardware granularity support
  ******************************************************************************/
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 9c36e4b..119c428 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -696,6 +696,9 @@
 DEFINE_RENAME_SYSREG_RW_FUNCS(sctlr2_el1, SCTLR2_EL1)
 DEFINE_RENAME_SYSREG_RW_FUNCS(sctlr2_el2, SCTLR2_EL2)
 
+/* FEAT_LS64_ACCDATA Registers */
+DEFINE_RENAME_SYSREG_RW_FUNCS(accdata_el1, ACCDATA_EL1)
+
 /* DynamIQ Control registers */
 DEFINE_RENAME_SYSREG_RW_FUNCS(clusterpwrdn_el1, CLUSTERPWRDN_EL1)
 DEFINE_RENAME_SYSREG_RW_FUNCS(clusterpmcr_el1, CLUSTERPMCR_EL1)
diff --git a/include/lib/el3_runtime/context_el1.h b/include/lib/el3_runtime/context_el1.h
index 4379bcf..7bc0235 100644
--- a/include/lib/el3_runtime/context_el1.h
+++ b/include/lib/el3_runtime/context_el1.h
@@ -118,6 +118,10 @@
 	uint64_t sctlr2_el1;
 } el1_sctlr2_regs_t;
 
+typedef struct el1_ls64_regs {
+	uint64_t accdata_el1;
+} el1_ls64_regs_t;
+
 typedef struct el1_sysregs {
 
 	el1_common_regs_t common;
@@ -174,6 +178,9 @@
 	el1_sctlr2_regs_t sctlr2;
 #endif
 
+#if ENABLE_FEAT_LS64_ACCDATA
+	el1_ls64_regs_t ls64;
+#endif
 } el1_sysregs_t;
 
 
@@ -304,6 +311,14 @@
 #define write_el1_ctx_sctlr2(ctx, reg, val)
 #endif /* ENABLE_FEAT_SCTLR2 */
 
+#if ENABLE_FEAT_LS64_ACCDATA
+#define read_el1_ctx_ls64(ctx, reg)		(((ctx)->ls64).reg)
+#define write_el1_ctx_ls64(ctx, reg, val)	((((ctx)->ls64).reg)	\
+							= (uint64_t) (val))
+#else
+#define read_el1_ctx_ls64(ctx, reg)		ULL(0)
+#define write_el1_ctx_ls64(ctx, reg, val)
+#endif /* ENABLE_FEAT_LS64_ACCDATA */
 /******************************************************************************/
 #endif /* __ASSEMBLER__ */
 
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 8aa2ccc..4ae1306 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -427,6 +427,15 @@
 	}
 
 	/*
+	 * If FEAT_LS64_ACCDATA is enabled, enable access to ACCDATA_EL1 by
+	 * setting SCR_EL3.ADEn and allow the ST64BV0 instruction by setting
+	 * SCR_EL3.EnAS0.
+	 */
+	if (is_feat_ls64_accdata_supported()) {
+		scr_el3 |= SCR_ADEn_BIT | SCR_EnAS0_BIT;
+	}
+
+	/*
 	 * If FEAT_RNG_TRAP is enabled, all reads of the RNDR and RNDRRS
 	 * registers are trapped to EL3.
 	 */
@@ -1759,6 +1768,9 @@
 		write_el1_ctx_sctlr2(ctx, sctlr2_el1, read_sctlr2_el1());
 	}
 
+	if (is_feat_ls64_accdata_supported()) {
+		write_el1_ctx_ls64(ctx, accdata_el1, read_accdata_el1());
+	}
 }
 
 static void el1_sysregs_context_restore(el1_sysregs_t *ctx)
@@ -1864,6 +1876,9 @@
 		write_sctlr2_el1(read_el1_ctx_sctlr2(ctx, sctlr2_el1));
 	}
 
+	if (is_feat_ls64_accdata_supported()) {
+		write_accdata_el1(read_el1_ctx_ls64(ctx, accdata_el1));
+	}
 }
 
 /*******************************************************************************
diff --git a/make_helpers/arch_features.mk b/make_helpers/arch_features.mk
index 39f6223..d378a55 100644
--- a/make_helpers/arch_features.mk
+++ b/make_helpers/arch_features.mk
@@ -336,6 +336,9 @@
 # Flag to enable FEAT_FGT2 (Fine Granular Traps 2)
 ENABLE_FEAT_FGT2			?=	0
 
+# LoadStore64Bytes extension using the ACCDATA_EL1 system register
+ENABLE_FEAT_LS64_ACCDATA		?=	0
+
 #----
 # 8.8
 #----
diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk
index 82401db..c1dc5f5 100644
--- a/plat/arm/board/arm_fpga/platform.mk
+++ b/plat/arm/board/arm_fpga/platform.mk
@@ -47,6 +47,7 @@
 ENABLE_TRF_FOR_NS		:= 2
 ENABLE_SME_FOR_NS		:= 2
 ENABLE_SME2_FOR_NS		:= 2
+ENABLE_FEAT_LS64_ACCDATA	:= 2
 
 # Treating this as a memory-constrained port for now
 USE_COHERENT_MEM	:=	0
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index 6f53a81..0156b31 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -80,6 +80,7 @@
 ENABLE_FEAT_S1POE		:= 2
 ENABLE_FEAT_SCTLR2		:= 2
 ENABLE_FEAT_MTE2		:= 2
+ENABLE_FEAT_LS64_ACCDATA	:= 2
 
 # The FVP platform depends on this macro to build with correct GIC driver.
 $(eval $(call add_define,FVP_USE_GIC_DRIVER))