Optimize SMCCC_ARCH_WORKAROUND_1 on Cortex A57/A72/A73 and A75

This patch implements a fast path for this SMC call on affected PEs by
detecting and returning immediately after executing the workaround.

NOTE: The MMU disable/enable workaround now assumes that the MMU was
enabled on entry to EL3.  This is a valid assumption as the code turns
on the MMU after reset and leaves it on until the core powers off.

Change-Id: I13c336d06a52297620a9760fb2461b4d606a30b3
Signed-off-by: Dimitris Papastamos <dimitris.papastamos@arm.com>
diff --git a/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S b/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S
index f478148..b24b620 100644
--- a/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S
+++ b/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S
@@ -1,26 +1,60 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch.h>
+#include <arm_arch_svc.h>
 #include <asm_macros.S>
 #include <context.h>
 
 	.globl	workaround_mmu_runtime_exceptions
 
+#define ESR_EL3_A64_SMC0	0x5e000000
+
 vector_base workaround_mmu_runtime_exceptions
 
-	.macro	apply_workaround
+	.macro	apply_workaround _is_sync_exception
 	stp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
-	mrs	x0, sctlr_el3
+	mrs	x1, sctlr_el3
 	/* Disable MMU */
-	bic	x1, x0, #SCTLR_M_BIT
+	bic	x1, x1, #SCTLR_M_BIT
 	msr	sctlr_el3, x1
 	isb
-	/* Restore MMU config */
-	msr	sctlr_el3, x0
+	/* Enable MMU */
+	orr	x1, x1, #SCTLR_M_BIT
+	msr	sctlr_el3, x1
+	/*
+	 * Defer ISB to avoid synchronizing twice in case we hit
+	 * the workaround SMC call which will implicitly synchronize
+	 * because of the ERET instruction.
+	 */
+
+	/*
+	 * Ensure SMC is coming from A64 state on #0
+	 * with W0 = SMCCC_ARCH_WORKAROUND_1
+	 *
+	 * This sequence evaluates as:
+	 *    (W0==SMCCC_ARCH_WORKAROUND_1) ? (ESR_EL3==SMC#0) : (NE)
+	 * allowing use of a single branch operation
+	 */
+	.if \_is_sync_exception
+		orr	w1, wzr, #SMCCC_ARCH_WORKAROUND_1
+		cmp	w0, w1
+		mrs	x0, esr_el3
+		mov_imm	w1, ESR_EL3_A64_SMC0
+		ccmp	w0, w1, #0, eq
+		/* Static predictor will predict a fall through */
+		bne	1f
+		eret
+1:
+	.endif
+
+	/*
+	 * Synchronize now to enable the MMU.  This is required
+	 * to ensure the load pair below reads the data stored earlier.
+	 */
 	isb
 	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
 	.endm
@@ -70,22 +104,22 @@
 	 * ---------------------------------------------------------------------
 	 */
 vector_entry workaround_mmu_sync_exception_aarch64
-	apply_workaround
+	apply_workaround _is_sync_exception=1
 	b	sync_exception_aarch64
 	check_vector_size workaround_mmu_sync_exception_aarch64
 
 vector_entry workaround_mmu_irq_aarch64
-	apply_workaround
+	apply_workaround _is_sync_exception=0
 	b	irq_aarch64
 	check_vector_size workaround_mmu_irq_aarch64
 
 vector_entry workaround_mmu_fiq_aarch64
-	apply_workaround
+	apply_workaround _is_sync_exception=0
 	b	fiq_aarch64
 	check_vector_size workaround_mmu_fiq_aarch64
 
 vector_entry workaround_mmu_serror_aarch64
-	apply_workaround
+	apply_workaround _is_sync_exception=0
 	b	serror_aarch64
 	check_vector_size workaround_mmu_serror_aarch64
 
@@ -94,21 +128,21 @@
 	 * ---------------------------------------------------------------------
 	 */
 vector_entry workaround_mmu_sync_exception_aarch32
-	apply_workaround
+	apply_workaround _is_sync_exception=1
 	b	sync_exception_aarch32
 	check_vector_size workaround_mmu_sync_exception_aarch32
 
 vector_entry workaround_mmu_irq_aarch32
-	apply_workaround
+	apply_workaround _is_sync_exception=0
 	b	irq_aarch32
 	check_vector_size workaround_mmu_irq_aarch32
 
 vector_entry workaround_mmu_fiq_aarch32
-	apply_workaround
+	apply_workaround _is_sync_exception=0
 	b	fiq_aarch32
 	check_vector_size workaround_mmu_fiq_aarch32
 
 vector_entry workaround_mmu_serror_aarch32
-	apply_workaround
+	apply_workaround _is_sync_exception=0
 	b	serror_aarch32
 	check_vector_size workaround_mmu_serror_aarch32