fix(errata): workaround for Cortex A78 AE erratum 2395408

Cortex A78 AE erratum 2395408 is a Cat B erratum that applies
to revisions <= r0p1. It is still open.

This erratum states, "A translation table walk that matches an
existing L1 prefetch with a read request outstanding on CHI might
fold into the prefetch, which might lead to data corruption for
a future instruction fetch"

This erratum is avoided by setting CPUACTLR2_EL1[40] to 1 to
disable folding of demand requests into older prefetches with
L2 miss requests outstanding.

SDEN is available at https://developer.arm.com/documentation/SDEN-1707912

Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
Change-Id: Ic17968987ca3c67fa7f64211bcde6dfcb35ed5d6
diff --git a/docs/design/cpu-specific-build-macros.rst b/docs/design/cpu-specific-build-macros.rst
index 13b447a..af0e769 100644
--- a/docs/design/cpu-specific-build-macros.rst
+++ b/docs/design/cpu-specific-build-macros.rst
@@ -310,6 +310,10 @@
   Cortex-A78 AE CPU. This needs to be enabled for revisions r0p0 and r0p1. This
   erratum is still open.
 
+- ``ERRATA_A78_AE_2395408`` : This applies errata 2395408 workaround to
+  Cortex-A78 AE CPU. This needs to be enabled for revisions r0p0 and r0p1. This
+  erratum is still open.
+
 For Neoverse N1, the following errata build flags are defined :
 
 -  ``ERRATA_N1_1073348``: This applies errata 1073348 workaround to Neoverse-N1
diff --git a/include/lib/cpus/aarch64/cortex_a78.h b/include/lib/cpus/aarch64/cortex_a78.h
index 06c27ad..31da99e 100644
--- a/include/lib/cpus/aarch64/cortex_a78.h
+++ b/include/lib/cpus/aarch64/cortex_a78.h
@@ -38,6 +38,7 @@
 #define CORTEX_A78_ACTLR2_EL1_BIT_0			(ULL(1) << 0)
 #define CORTEX_A78_ACTLR2_EL1_BIT_1			(ULL(1) << 1)
 #define CORTEX_A78_ACTLR2_EL1_BIT_2			(ULL(1) << 2)
+#define CORTEX_A78_ACTLR2_EL1_BIT_40			(ULL(1) << 40)
 
 /*******************************************************************************
  * CPU Activity Monitor Unit register specific definitions.
diff --git a/include/lib/cpus/aarch64/cortex_a78_ae.h b/include/lib/cpus/aarch64/cortex_a78_ae.h
index 4108af5..b68ec1e 100644
--- a/include/lib/cpus/aarch64/cortex_a78_ae.h
+++ b/include/lib/cpus/aarch64/cortex_a78_ae.h
@@ -26,5 +26,6 @@
  ******************************************************************************/
 #define CORTEX_A78_AE_ACTLR2_EL1			CORTEX_A78_ACTLR2_EL1
 #define CORTEX_A78_AE_ACTLR2_EL1_BIT_0			CORTEX_A78_ACTLR2_EL1_BIT_0
+#define CORTEX_A78_AE_ACTLR2_EL1_BIT_40			CORTEX_A78_ACTLR2_EL1_BIT_40
 
 #endif /* CORTEX_A78_AE_H */
diff --git a/lib/cpus/aarch64/cortex_a78_ae.S b/lib/cpus/aarch64/cortex_a78_ae.S
index 45fb0ad..27adc38 100644
--- a/lib/cpus/aarch64/cortex_a78_ae.S
+++ b/lib/cpus/aarch64/cortex_a78_ae.S
@@ -141,6 +141,41 @@
 	b	cpu_rev_var_range
 endfunc check_errata_2376748
 
+/* --------------------------------------------------
+ * Errata Workaround for A78 AE Erratum 2395408.
+ * This applies to revisions r0p0 and r0p1 of A78 AE.
+ * Inputs:
+ * x0: variant[4:7] and revision[0:3] of current cpu.
+ * Shall clobber: x0-x17
+ * --------------------------------------------------
+ */
+func errata_a78_ae_2395408_wa
+	/* Compare x0 against revisions r0p0 - r0p1 */
+	mov	x17, x30
+	bl	check_errata_2395408
+	cbz	x0, 1f
+
+	/* --------------------------------------------------------
+	 * Disable folding of demand requests into older prefetches
+	 * with L2 miss requests outstanding by setting the
+	 * CPUACTLR2_EL1[40] to 1.
+	 * --------------------------------------------------------
+	 */
+	mrs	x0, CORTEX_A78_AE_ACTLR2_EL1
+	orr	x0, x0, #CORTEX_A78_AE_ACTLR2_EL1_BIT_40
+	msr	CORTEX_A78_AE_ACTLR2_EL1, x0
+	isb
+1:
+	ret	x17
+endfunc errata_a78_ae_2395408_wa
+
+func check_errata_2395408
+	/* Applies to revisions r0p0 and r0p1. */
+	mov	x1, #CPU_REV(0, 0)
+	mov	x2, #CPU_REV(0, 1)
+	b	cpu_rev_var_range
+endfunc check_errata_2395408
+
 func check_errata_cve_2022_23960
 #if WORKAROUND_CVE_2022_23960
 	mov	x0, #ERRATA_APPLIES
@@ -174,6 +209,11 @@
 	bl	errata_a78_ae_2376748_wa
 #endif
 
+#if ERRATA_A78_AE_2395408
+	mov	x0, x18
+	bl	errata_a78_ae_2395408_wa
+#endif
+
 #if ENABLE_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
@@ -240,6 +280,7 @@
 	report_errata ERRATA_A78_AE_1941500, cortex_a78_ae, 1941500
 	report_errata ERRATA_A78_AE_1951502, cortex_a78_ae, 1951502
 	report_errata ERRATA_A78_AE_2376748, cortex_a78_ae, 2376748
+	report_errata ERRATA_A78_AE_2395408, cortex_a78_ae, 2395408
 	report_errata WORKAROUND_CVE_2022_23960, cortex_a78_ae, cve_2022_23960
 
 	ldp	x8, x30, [sp], #16
diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk
index 748724b..8840f8e 100644
--- a/lib/cpus/cpu-ops.mk
+++ b/lib/cpus/cpu-ops.mk
@@ -345,6 +345,10 @@
 # to revisions r0p0 and r0p1 of the A78 AE cpu. It is still open.
 ERRATA_A78_AE_2376748	?=0
 
+# Flag to apply erratum 2395408 workaround during reset. This erratum applies
+# to revisions r0p0 and r0p1 of the A78 AE cpu. It is still open.
+ERRATA_A78_AE_2395408	?=0
+
 # Flag to apply T32 CLREX workaround during reset. This erratum applies
 # only to r0p0 and r1p0 of the Neoverse N1 cpu.
 ERRATA_N1_1043202	?=0
@@ -846,6 +850,10 @@
 $(eval $(call assert_boolean,ERRATA_A78_AE_2376748))
 $(eval $(call add_define,ERRATA_A78_AE_2376748))
 
+# Process ERRATA_A78_AE_2395408 flag
+$(eval $(call assert_boolean,ERRATA_A78_AE_2395408))
+$(eval $(call add_define,ERRATA_A78_AE_2395408))
+
 # Process ERRATA_N1_1043202 flag
 $(eval $(call assert_boolean,ERRATA_N1_1043202))
 $(eval $(call add_define,ERRATA_N1_1043202))