refactor(el3-runtime): change Cortex-A76 implementation of CVE-2018-3639

Re-factored the prior implementation of workaround for CVE-2018-3639
using branch and link instruction to save vector space to include the
workaround for CVE-2022-23960.

Signed-off-by: Bipin Ravi <bipin.ravi@arm.com>
Change-Id: Ib3fe949583160429b5de8f0a4a8e623eb91d87d4
diff --git a/lib/cpus/aarch64/cortex_a76.S b/lib/cpus/aarch64/cortex_a76.S
index 4f7f4bb..7bcdafd 100644
--- a/lib/cpus/aarch64/cortex_a76.S
+++ b/lib/cpus/aarch64/cortex_a76.S
@@ -35,59 +35,17 @@
 	 *
 	 * The macro saves x2-x3 to the context. In the fast path
 	 * x0-x3 registers do not need to be restored as the calling
-	 * context will have saved them.
+	 * context will have saved them. The macro also saves
+	 * x29-x30 to the context in the sync_exception path.
 	 */
 	.macro apply_cve_2018_3639_wa _is_sync_exception _esr_el3_val
 	stp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
-
 	.if \_is_sync_exception
-		/*
-		 * Ensure SMC is coming from A64/A32 state on #0
-		 * with W0 = SMCCC_ARCH_WORKAROUND_2
-		 *
-		 * This sequence evaluates as:
-		 *    (W0==SMCCC_ARCH_WORKAROUND_2) ? (ESR_EL3==SMC#0) : (NE)
-		 * allowing use of a single branch operation
-		 */
-		orr	w2, wzr, #SMCCC_ARCH_WORKAROUND_2
-		cmp	x0, x2
-		mrs	x3, esr_el3
-		mov_imm	w2, \_esr_el3_val
-		ccmp	w2, w3, #0, eq
-		/*
-		 * Static predictor will predict a fall-through, optimizing
-		 * the `SMCCC_ARCH_WORKAROUND_2` fast path.
-		 */
-		bne	1f
-
-		/*
-		 * The sequence below implements the `SMCCC_ARCH_WORKAROUND_2`
-		 * fast path.
-		 */
-		cmp	x1, xzr /* enable/disable check */
-
-		/*
-		 * When the calling context wants mitigation disabled,
-		 * we program the mitigation disable function in the
-		 * CPU context, which gets invoked on subsequent exits from
-		 * EL3 via the `el3_exit` function. Otherwise NULL is
-		 * programmed in the CPU context, which results in caller's
-		 * inheriting the EL3 mitigation state (enabled) on subsequent
-		 * `el3_exit`.
-		 */
-		mov	x0, xzr
-		adr	x1, cortex_a76_disable_wa_cve_2018_3639
-		csel	x1, x1, x0, eq
-		str	x1, [sp, #CTX_CVE_2018_3639_OFFSET + CTX_CVE_2018_3639_DISABLE]
-
-		mrs	x2, CORTEX_A76_CPUACTLR2_EL1
-		orr	x1, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
-		bic	x3, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
-		csel	x3, x3, x1, eq
-		msr	CORTEX_A76_CPUACTLR2_EL1, x3
-		exception_return /* exception_return contains ISB */
+	stp	x29, x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X29]
+	mov_imm	w2, \_esr_el3_val
+	bl	apply_cve_2018_3639_sync_wa
+	ldp	x29, x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X29]
 	.endif
-1:
 	/*
 	 * Always enable v4 mitigation during EL3 execution. This is not
 	 * required for the fast path above because it does not perform any
@@ -195,6 +153,78 @@
 	apply_cve_2018_3639_wa _is_sync_exception=0 _esr_el3_val=ESR_EL3_A32_SMC0
 	b	serror_aarch32
 end_vector_entry cortex_a76_serror_aarch32
+
+	/*
+	 * -----------------------------------------------------------------
+	 * This function applies the mitigation for CVE-2018-3639
+	 * specifically for sync exceptions. It implements a fast path
+	 * where `SMCCC_ARCH_WORKAROUND_2` SMC calls from a lower EL
+	 * running in AArch64 will go through the fast and return early.
+	 *
+	 * In the fast path x0-x3 registers do not need to be restored as the
+	 * calling context will have saved them.
+	 *
+	 * Caller must pass value of esr_el3 to compare via x2.
+	 * Save and restore these registers outside of this function from the
+	 * context before jumping to the main runtime vector table entry.
+	 *
+	 * Shall clobber: x0-x3, x30
+	 * -----------------------------------------------------------------
+	 */
+func apply_cve_2018_3639_sync_wa
+	/*
+	 * Ensure SMC is coming from A64/A32 state on #0
+	 * with W0 = SMCCC_ARCH_WORKAROUND_2
+	 *
+	 * This sequence evaluates as:
+	 *    (W0==SMCCC_ARCH_WORKAROUND_2) ? (ESR_EL3==SMC#0) : (NE)
+	 * allowing use of a single branch operation
+	 * X2 populated outside this function with the SMC FID.
+	 */
+	orr	w3, wzr, #SMCCC_ARCH_WORKAROUND_2
+	cmp	x0, x3
+	mrs	x3, esr_el3
+
+	ccmp	w2, w3, #0, eq
+	/*
+	 * Static predictor will predict a fall-through, optimizing
+	 * the `SMCCC_ARCH_WORKAROUND_2` fast path.
+	 */
+	bne	1f
+
+	/*
+	* The sequence below implements the `SMCCC_ARCH_WORKAROUND_2`
+	* fast path.
+	*/
+	cmp	x1, xzr /* enable/disable check */
+
+	/*
+	 * When the calling context wants mitigation disabled,
+	 * we program the mitigation disable function in the
+	 * CPU context, which gets invoked on subsequent exits from
+	 * EL3 via the `el3_exit` function. Otherwise NULL is
+	 * programmed in the CPU context, which results in caller's
+	 * inheriting the EL3 mitigation state (enabled) on subsequent
+	 * `el3_exit`.
+	 */
+	mov	x0, xzr
+	adr	x1, cortex_a76_disable_wa_cve_2018_3639
+	csel	x1, x1, x0, eq
+	str	x1, [sp, #CTX_CVE_2018_3639_OFFSET + CTX_CVE_2018_3639_DISABLE]
+
+	mrs	x2, CORTEX_A76_CPUACTLR2_EL1
+	orr	x1, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+	bic	x3, x2, #CORTEX_A76_CPUACTLR2_EL1_DISABLE_LOAD_PASS_STORE
+	csel	x3, x3, x1, eq
+	msr	CORTEX_A76_CPUACTLR2_EL1, x3
+	ldp	x29, x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X29]
+	/*
+	* `SMCCC_ARCH_WORKAROUND_2`fast path return to lower EL.
+	*/
+	exception_return /* exception_return contains ISB */
+1:
+	ret
+endfunc apply_cve_2018_3639_sync_wa
 #endif /* DYNAMIC_WORKAROUND_CVE_2018_3639 */
 
 	/* --------------------------------------------------