Implement dynamic mitigation for CVE-2018-3639 on Cortex-A76

The Cortex-A76 implements SMCCC_ARCH_WORKAROUND_2 as defined in
"Firmware interfaces for mitigating cache speculation vulnerabilities
System Software on Arm Systems"[0].

Dynamic mitigation for CVE-2018-3639 is enabled/disabled by
setting/clearning bit 16 (Disable load pass store) of `CPUACTLR2_EL1`.

NOTE: The generic code that implements dynamic mitigation does not
currently implement the expected semantics when dispatching an SDEI
event to a lower EL.  This will be fixed in a separate patch.

[0] https://developer.arm.com/cache-speculation-vulnerability-firmware-specification

Change-Id: I8fb2862b9ab24d55a0e9693e48e8be4df32afb5a
Signed-off-by: Dimitris Papastamos <dimitris.papastamos@arm.com>
diff --git a/include/lib/cpus/aarch64/cortex_a76.h b/include/lib/cpus/aarch64/cortex_a76.h
index de6288d..1cb7747 100644
--- a/include/lib/cpus/aarch64/cortex_a76.h
+++ b/include/lib/cpus/aarch64/cortex_a76.h
@@ -16,6 +16,13 @@
 #define CORTEX_A76_CPUPWRCTLR_EL1	S3_0_C15_C2_7
 #define CORTEX_A76_CPUECTLR_EL1	S3_0_C15_C1_4
 
+/*******************************************************************************
+ * CPU Auxiliary Control register specific definitions.
+ ******************************************************************************/
+#define CORTEX_A76_CPUACTLR2_EL1	S3_0_C15_C1_1
+
+#define CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE	(1 << 16)
+
 /* Definitions of register field mask in CORTEX_A76_CPUPWRCTLR_EL1 */
 #define CORTEX_A76_CORE_PWRDN_EN_MASK	0x1
 
diff --git a/lib/cpus/aarch64/cortex_a76.S b/lib/cpus/aarch64/cortex_a76.S
index 3a41802..14705d7 100644
--- a/lib/cpus/aarch64/cortex_a76.S
+++ b/lib/cpus/aarch64/cortex_a76.S
@@ -5,12 +5,228 @@
  */
 
 #include <arch.h>
+#include <arm_arch_svc.h>
 #include <asm_macros.S>
 #include <bl_common.h>
+#include <context.h>
 #include <cortex_a76.h>
 #include <cpu_macros.S>
 #include <plat_macros.S>
 
+#if !DYNAMIC_WORKAROUND_CVE_2018_3639
+#error Cortex A76 requires DYNAMIC_WORKAROUND_CVE_2018_3639=1
+#endif
+
+#define ESR_EL3_A64_SMC0	0x5e000000
+#define ESR_EL3_A32_SMC0	0x4e000000
+
+	/*
+	 * This macro applies the mitigation for CVE-2018-3639.
+	 * It implements a fash path where `SMCCC_ARCH_WORKAROUND_2`
+	 * SMC calls from a lower EL running in AArch32 or AArch64
+	 * will go through the fast and return early.
+	 *
+	 * The macro saves x2-x3 to the context.  In the fast path
+	 * x0-x3 registers do not need to be restored as the calling
+	 * context will have saved them.
+	 */
+	.macro apply_cve_2018_3639_wa _is_sync_exception _esr_el3_val
+	stp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+
+	.if \_is_sync_exception
+		/*
+		 * Ensure SMC is coming from A64/A32 state on #0
+		 * with W0 = SMCCC_ARCH_WORKAROUND_2
+		 *
+		 * This sequence evaluates as:
+		 *    (W0==SMCCC_ARCH_WORKAROUND_2) ? (ESR_EL3==SMC#0) : (NE)
+		 * allowing use of a single branch operation
+		 */
+		orr	w2, wzr, #SMCCC_ARCH_WORKAROUND_2
+		cmp	x0, x2
+		mrs	x3, esr_el3
+		mov_imm	w2, \_esr_el3_val
+		ccmp	w2, w3, #0, eq
+		/*
+		 * Static predictor will predict a fall-through, optimizing
+		 * the `SMCCC_ARCH_WORKAROUND_2` fast path.
+		 */
+		bne	1f
+
+		/*
+		 * The sequence below implements the `SMCCC_ARCH_WORKAROUND_2`
+		 * fast path.
+		 */
+		cmp	x1, xzr /* enable/disable check */
+
+		/*
+		 * When the calling context wants mitigation disabled,
+		 * we program the mitigation disable function in the
+		 * CPU context, which gets invoked on subsequent exits from
+		 * EL3 via the `el3_exit` function.  Otherwise NULL is
+		 * programmed in the CPU context, which results in caller's
+		 * inheriting the EL3 mitigation state (enabled) on subsequent
+		 * `el3_exit`.
+		 */
+		mov	x0, xzr
+		adr	x1, cortex_a76_disable_wa_cve_2018_3639
+		csel	x1, x1, x0, eq
+		str	x1, [sp, #CTX_CVE_2018_3639_OFFSET + CTX_CVE_2018_3639_DISABLE]
+
+		mrs	x2, CORTEX_A76_CPUACTLR2_EL1
+		orr	x1, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+		bic	x3, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+		csel	x3, x3, x1, eq
+		msr	CORTEX_A76_CPUACTLR2_EL1, x3
+		eret	/* ERET implies ISB */
+	.endif
+1:
+	/*
+	 * Always enable v4 mitigation during EL3 execution.  This is not
+	 * required for the fast path above because it does not perform any
+	 * memory loads.
+	 */
+	mrs	x2, CORTEX_A76_CPUACTLR2_EL1
+	orr	x2, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+	msr	CORTEX_A76_CPUACTLR2_EL1, x2
+	isb
+
+	/*
+	 * The caller may have passed arguments to EL3 via x2-x3.
+	 * Restore these registers from the context before jumping to the
+	 * main runtime vector table entry.
+	 */
+	ldp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+	.endm
+
+vector_base cortex_a76_wa_cve_2018_3639_a76_vbar
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_EL0 : 0x0 - 0x200
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry cortex_a76_sync_exception_sp_el0
+	b	sync_exception_sp_el0
+	check_vector_size cortex_a76_sync_exception_sp_el0
+
+vector_entry cortex_a76_irq_sp_el0
+	b	irq_sp_el0
+	check_vector_size cortex_a76_irq_sp_el0
+
+vector_entry cortex_a76_fiq_sp_el0
+	b	fiq_sp_el0
+	check_vector_size cortex_a76_fiq_sp_el0
+
+vector_entry cortex_a76_serror_sp_el0
+	b	serror_sp_el0
+	check_vector_size cortex_a76_serror_sp_el0
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_ELx: 0x200 - 0x400
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry cortex_a76_sync_exception_sp_elx
+	b	sync_exception_sp_elx
+	check_vector_size cortex_a76_sync_exception_sp_elx
+
+vector_entry cortex_a76_irq_sp_elx
+	b	irq_sp_elx
+	check_vector_size cortex_a76_irq_sp_elx
+
+vector_entry cortex_a76_fiq_sp_elx
+	b	fiq_sp_elx
+	check_vector_size cortex_a76_fiq_sp_elx
+
+vector_entry cortex_a76_serror_sp_elx
+	b	serror_sp_elx
+	check_vector_size cortex_a76_serror_sp_elx
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch64 : 0x400 - 0x600
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry cortex_a76_sync_exception_aarch64
+	apply_cve_2018_3639_wa _is_sync_exception=1 _esr_el3_val=ESR_EL3_A64_SMC0
+	b	sync_exception_aarch64
+	check_vector_size cortex_a76_sync_exception_aarch64
+
+vector_entry cortex_a76_irq_aarch64
+	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A64_SMC0
+	b	irq_aarch64
+	check_vector_size cortex_a76_irq_aarch64
+
+vector_entry cortex_a76_fiq_aarch64
+	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A64_SMC0
+	b	fiq_aarch64
+	check_vector_size cortex_a76_fiq_aarch64
+
+vector_entry cortex_a76_serror_aarch64
+	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A64_SMC0
+	b	serror_aarch64
+	check_vector_size cortex_a76_serror_aarch64
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch32 : 0x600 - 0x800
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry cortex_a76_sync_exception_aarch32
+	apply_cve_2018_3639_wa _is_sync_exception=1 _esr_el3_val=ESR_EL3_A32_SMC0
+	b	sync_exception_aarch32
+	check_vector_size cortex_a76_sync_exception_aarch32
+
+vector_entry cortex_a76_irq_aarch32
+	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A32_SMC0
+	b	irq_aarch32
+	check_vector_size cortex_a76_irq_aarch32
+
+vector_entry cortex_a76_fiq_aarch32
+	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A32_SMC0
+	b	fiq_aarch32
+	check_vector_size cortex_a76_fiq_aarch32
+
+vector_entry cortex_a76_serror_aarch32
+	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A32_SMC0
+	b	serror_aarch32
+	check_vector_size cortex_a76_serror_aarch32
+
+func check_errata_cve_2018_3639
+#if WORKAROUND_CVE_2018_3639
+	mov	x0, #ERRATA_APPLIES
+#else
+	mov	x0, #ERRATA_MISSING
+#endif
+	ret
+endfunc check_errata_cve_2018_3639
+
+func cortex_a76_disable_wa_cve_2018_3639
+	mrs	x0, CORTEX_A76_CPUACTLR2_EL1
+	bic	x0, x0, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+	msr	CORTEX_A76_CPUACTLR2_EL1, x0
+	isb
+	ret
+endfunc cortex_a76_disable_wa_cve_2018_3639
+
+func cortex_a76_reset_func
+#if WORKAROUND_CVE_2018_3639
+	mrs	x0, CORTEX_A76_CPUACTLR2_EL1
+	orr	x0, x0, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+	msr	CORTEX_A76_CPUACTLR2_EL1, x0
+	isb
+#endif
+
+#if IMAGE_BL31 && WORKAROUND_CVE_2018_3639
+	/*
+	 * The Cortex-A76 generic vectors are overwritten to use the vectors
+	 * defined above.  This is required in order to apply mitigation
+	 * against CVE-2018-3639 on exception entry from lower ELs.
+	 */
+	adr	x0, cortex_a76_wa_cve_2018_3639_a76_vbar
+	msr	vbar_el3, x0
+	isb
+#endif
+	ret
+endfunc cortex_a76_reset_func
+
 	/* ---------------------------------------------
 	 * HW will do the cache maintenance while powering down
 	 * ---------------------------------------------
@@ -27,6 +243,27 @@
 	ret
 endfunc cortex_a76_core_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex Cortex A76. Must follow AAPCS.
+ */
+func cortex_a76_errata_report
+	stp	x8, x30, [sp, #-16]!
+
+	bl	cpu_get_rev_var
+	mov	x8, x0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata WORKAROUND_CVE_2018_3639, cortex_a76, cve_2018_3639
+
+	ldp	x8, x30, [sp], #16
+	ret
+endfunc cortex_a76_errata_report
+#endif
+
 	/* ---------------------------------------------
 	 * This function provides cortex_a76 specific
 	 * register information for crash reporting.
@@ -46,6 +283,8 @@
 	ret
 endfunc cortex_a76_cpu_reg_dump
 
-declare_cpu_ops cortex_a76, CORTEX_A76_MIDR, \
-	CPU_NO_RESET_FUNC, \
+declare_cpu_ops_wa cortex_a76, CORTEX_A76_MIDR, \
+	cortex_a76_reset_func, \
+	CPU_NO_EXTRA1_FUNC, \
+	cortex_a76_disable_wa_cve_2018_3639, \
 	cortex_a76_core_pwr_dwn