feat(el3-runtime): modify vector entry paths

Vector entries in EL3 from lower ELs, first check for any pending
async EAs from lower EL before handling the original exception.
This happens when there is an error (EA) in the system which is not
yet signaled to PE while executing at lower EL. During entry into EL3
the errors (EA) are synchronized causing async EA to pend at EL3.

On detecting the pending EA (via ISR_EL1.A) EL3 either reflects it back
to lower EL (KFH) or handles it in EL3 (FFH) based on EA routing model.

In case of Firmware First handling mode (FFH), EL3 handles the pended
EA first before returing back to handle the original exception.

While in case of Kernel First handling mode (KFH), EL3 will return back
to lower EL without handling the original exception. On returing to
lower EL, EA will be pended. In KFH mode there is a risk of back and
forth between EL3 and lower EL if the EA is masked at lower EL or
priority of EA is lower than that of original exception. This is a
limitation in current architecture but can be solved in future if EL3
gets a capability to inject virtual SError.

Signed-off-by: Manish Pandey <manish.pandey2@arm.com>
Change-Id: I3a2a31de7cf454d9d690b1ef769432a5b24f6c11
diff --git a/bl31/aarch64/ea_delegate.S b/bl31/aarch64/ea_delegate.S
index dd6b4dc..188c724 100644
--- a/bl31/aarch64/ea_delegate.S
+++ b/bl31/aarch64/ea_delegate.S
@@ -15,26 +15,10 @@
 #include <cpu_macros.S>
 #include <context.h>
 
-	.globl	handle_lower_el_ea_esb
 	.globl	handle_lower_el_sync_ea
 	.globl	handle_lower_el_async_ea
-
-
-/*
- * Function to delegate External Aborts synchronized by ESB instruction at EL3
- * vector entry. This function assumes GP registers x0-x29 have been saved, and
- * are available for use. It delegates the handling of the EA to platform
- * handler, and returns only upon successfully handling the EA; otherwise
- * panics. On return from this function, the original exception handler is
- * expected to resume.
- */
-func handle_lower_el_ea_esb
-	mov	x0, #ERROR_EA_ESB
-	mrs	x1, DISR_EL1
-	b	ea_proceed
-endfunc handle_lower_el_ea_esb
-
-
+	.globl	handle_pending_async_ea
+	.globl	reflect_pending_async_ea_to_lower_el
 /*
  * This function forms the tail end of Synchronous Exception entry from lower
  * EL, and expects to handle Synchronous External Aborts from lower EL and CPU
@@ -140,6 +124,165 @@
 	b	el3_exit
 endfunc handle_lower_el_async_ea
 
+/*
+ * NOTE 1 : Synchronized async EA handling
+ *
+ * Comment here applicable to following two functions
+ *   - handle_pending_async_ea
+ *   - reflect_pending_async_ea_to_lower_el
+ *
+ * Must be called from exception vector directly.
+ *
+ * These special handling is required to cater for handling async EA from
+ * lower EL synchronized at EL3 entry.
+ *
+ * This scenario may arise when there is an error (EA) in the system which is not
+ * yet signaled to PE while executing in lower EL. During entry into EL3, the errors
+ * are synchronized either implicitly or explicitly causing async EA to pend at EL3.
+ *
+ * On detecting the pending EA (via ISR_EL1.A), based on routing model of EA
+ * either handle it in EL3 using "handle_pending_async_ea" (FFH)  or return to
+ * lower EL using "reflect_pending_async_ea_to_lower_el" (KFH) .
+ */
+
+/*
+ * Refer to NOTE 1 : Firmware First Handling (FFH)
+ *  Called when FFH is enabled and outgoing world is Non-Secure (scr_el3.ea = 1).
+ *
+ * This function assumes x30 has been saved.
+ */
+#if HANDLE_EA_EL3_FIRST_NS
+func handle_pending_async_ea
+	/*
+	 * Prepare for nested handling of EA. Stash sysregs clobbered by nested
+	 * exception and handler
+	 */
+	str	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_GPREG_LR]
+	mrs	x30, esr_el3
+	str	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ESR_EL3]
+	mrs	x30, spsr_el3
+	str	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_SPSR_EL3]
+	mrs	x30, elr_el3
+	str	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ELR_EL3]
+
+	mov	x30, #1
+	str	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
+	/*
+	 * Restore the original x30 saved as part of entering EL3. This is not
+	 * required for the current function but for EL3 SError vector entry
+	 * once PSTATE.A bit is unmasked. We restore x30 and then the same
+	 * value is stored in EL3 SError vector entry.
+	 */
+	ldr	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+
+	/*
+	 * After clearing PSTATE.A bit pending SError will trigger at current EL.
+	 * Put explicit synchronization event to ensure newly unmasked interrupt
+	 * is taken immediately.
+	 */
+	unmask_async_ea
+
+	/* Restore the original exception information along with zeroing the storage */
+	ldr	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ELR_EL3]
+	msr	elr_el3, x30
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ELR_EL3]
+	ldr	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_SPSR_EL3]
+	msr	spsr_el3, x30
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_SPSR_EL3]
+	ldr	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ESR_EL3]
+	msr	esr_el3, x30
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ESR_EL3]
+
+	/*
+	 * If the original exception corresponds to SError from lower El, eret back
+	 * to lower EL, otherwise return to vector table for original exception handling.
+	 */
+	ubfx	x30, x30, #ESR_EC_SHIFT, #ESR_EC_LENGTH
+	cmp	x30, #EC_SERROR
+	ldr	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_GPREG_LR]
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_GPREG_LR]
+	b.eq	1f
+	ret
+1:
+	exception_return
+endfunc handle_pending_async_ea
+#endif /* HANDLE_EA_EL3_FIRST_NS */
+
+/*
+ * Refer to NOTE 1 : Kernel First handling (KFH)
+ *   Called in following scenarios
+ *     - Always, if outgoing world is either Secure or Realm
+ *     - KFH mode if outgoing world is Non-secure.
+ *
+ * This function assumes x30 has been saved.
+ */
+
+func reflect_pending_async_ea_to_lower_el
+	/*
+	 * As the original exception was not handled we need to ensure that we return
+	 * back to the instruction which caused the exception. To acheive that, eret
+	 * to "elr-4" (Label "subtract_elr_el3") for SMC or simply eret otherwise
+	 * (Label "skip_smc_check").
+	 *
+	 * LIMITATION: It could be that async EA is masked at the target exception level
+	 * or the priority of async EA wrt to the EL3/secure interrupt is lower, which
+	 * causes back and forth between lower EL and EL3. In case of back and forth between
+	 * lower EL and EL3, we can track the loop count in "CTX_NESTED_EA_FLAG" and leverage
+	 * previous ELR in "CTX_SAVED_ELR_EL3" to detect this cycle and further panic
+	 * to indicate a problem here (Label "check_loop_ctr").
+	 * However, setting SCR_EL3.IESB = 1, should give priority to SError handling
+	 * as per AArch64.TakeException pseudo code in Arm ARM.
+	 *
+	 * TODO: In future if EL3 gets a capability to inject a virtual SError to lower
+	 * ELs, we can remove the el3_panic and handle the original exception first and
+	 * inject SError to lower EL before ereting back.
+	 */
+	stp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
+	ldr	x29, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ELR_EL3]
+	mrs	x28, elr_el3
+	cmp	x29, x28
+	b.eq	check_loop_ctr
+	str	x28, [sp, #CTX_EL3STATE_OFFSET + CTX_SAVED_ELR_EL3]
+	/* Zero the loop counter */
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
+	b	skip_loop_ctr
+check_loop_ctr:
+	ldr	x29, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
+	add	x29, x29, #1
+	str	x29, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
+	cmp	x29, #ASYNC_EA_REPLAY_COUNTER
+	b.ge	el3_panic
+skip_loop_ctr:
+	/*
+	 * Logic to distinguish if we came from SMC or any other exception.
+	 * Use offsets in vector entry to get which exception we are handling.
+	 * In each vector entry of size 0x200, address "0x0-0x80" is for sync
+	 * exception and "0x80-0x200" is for async exceptions.
+	 * Use vector base address (vbar_el3) and exception offset (LR) to
+	 * calculate whether the address we came from is any of the following
+	 * "0x0-0x80", "0x200-0x280", "0x400-0x480" or "0x600-0x680"
+	 */
+	mrs	x29, vbar_el3
+	sub	x30, x30, x29
+	and	x30, x30, #0x1ff
+	cmp	x30, #0x80
+	b.ge	skip_smc_check
+	/* Its a synchronous exception, Now check if it is SMC or not? */
+	mrs	x30, esr_el3
+	ubfx	x30, x30, #ESR_EC_SHIFT, #ESR_EC_LENGTH
+	cmp	x30, #EC_AARCH32_SMC
+	b.eq	subtract_elr_el3
+	cmp	x30, #EC_AARCH64_SMC
+	b.eq	subtract_elr_el3
+	b	skip_smc_check
+subtract_elr_el3:
+	sub	x28, x28, #4
+skip_smc_check:
+	msr	elr_el3, x28
+	ldp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
+	ldr	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+	exception_return
+endfunc reflect_pending_async_ea_to_lower_el
 
 /*
  * Prelude for Synchronous External Abort handling. This function assumes that
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index 8298696..4c1fa1a 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -47,72 +47,31 @@
 	str	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
 	.endm
 
-	/*
-	 * Macro that prepares entry to EL3 upon taking an exception.
-	 *
-	 * With RAS_FFH_SUPPORT, this macro synchronizes pending errors with an
-	 * ESB instruction. When an error is thus synchronized, the handling is
-	 * delegated to platform EA handler.
-	 *
-	 * Without RAS_FFH_SUPPORT, this macro synchronizes pending errors using
-	 * a DSB, unmasks Asynchronous External Aborts and saves X30 before
-	 * setting the flag CTX_IS_IN_EL3.
-	 */
-	.macro check_and_unmask_ea
-#if RAS_FFH_SUPPORT
-	/* Synchronize pending External Aborts */
-	esb
-
-	/* Unmask the SError interrupt */
-	msr	daifclr, #DAIF_ABT_BIT
-
-	/* Check for SErrors synchronized by the ESB instruction */
-	mrs	x30, DISR_EL1
-	tbz	x30, #DISR_A_BIT, 1f
-
-	/*
-	 * Save general purpose and ARMv8.3-PAuth registers (if enabled).
-	 * Also save PMCR_EL0 and  set the PSTATE to a known state.
-	 */
-	bl	prepare_el3_entry
-
-	bl	handle_lower_el_ea_esb
-
-	/* Restore general purpose, PMCR_EL0 and ARMv8.3-PAuth registers */
-	bl	restore_gp_pmcr_pauth_regs
-1:
-#else
-	/*
-	 * Note 1: The explicit DSB at the entry of various exception vectors
-	 * for handling exceptions from lower ELs can inadvertently trigger an
-	 * SError exception in EL3 due to pending asynchronous aborts in lower
-	 * ELs. This will end up being handled by serror_sp_elx which will
-	 * ultimately panic and die.
-	 * The way to workaround is to update a flag to indicate if the exception
-	 * truly came from EL3. This flag is allocated in the cpu_context
-	 * structure and located at offset "CTX_EL3STATE_OFFSET + CTX_IS_IN_EL3"
-	 * This is not a bullet proof solution to the problem at hand because
-	 * we assume the instructions following "isb" that help to update the
-	 * flag execute without causing further exceptions.
-	 */
+	.macro restore_x30
+	ldr	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+	.endm
 
 	/*
-	 * For SoCs which do not implement RAS, use DSB as a barrier to
-	 * synchronize pending external aborts.
+	 * Macro that synchronizes errors (EA) and checks for pending SError.
+	 * On detecting a pending SError it either reflects it back to lower
+	 * EL (KFH) or handles it in EL3 (FFH) based on EA routing model.
 	 */
+	.macro	sync_and_handle_pending_serror
 	dsb	sy
-
-	/* Unmask the SError interrupt */
-	msr	daifclr, #DAIF_ABT_BIT
-
-	/* Use ISB for the above unmask operation to take effect immediately */
 	isb
-
-	/* Refer Note 1. */
-	mov 	x30, #1
-	str	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_IS_IN_EL3]
-	dmb	sy
+	mrs	x30, ISR_EL1
+	tbz	x30, #ISR_A_SHIFT, 2f
+#if HANDLE_EA_EL3_FIRST_NS
+	mrs	x30, scr_el3
+	tst	x30, #SCR_EA_BIT
+	b.eq	1f
+	bl	handle_pending_async_ea
+	b	2f
 #endif
+1:
+	/* This function never returns, but need LR for decision making */
+	bl	reflect_pending_async_ea_to_lower_el
+2:
 	.endm
 
 	/* ---------------------------------------------------------------------
@@ -217,22 +176,33 @@
 end_vector_entry fiq_sp_elx
 
 vector_entry serror_sp_elx
-#if !RAS_FFH_SUPPORT
+#if HANDLE_EA_EL3_FIRST_NS
 	/*
 	 * This will trigger if the exception was taken due to SError in EL3 or
 	 * because of pending asynchronous external aborts from lower EL that got
-	 * triggered due to explicit synchronization in EL3. Refer Note 1.
+	 * triggered due to implicit/explicit synchronization in EL3 (SCR_EL3.EA=1)
+	 * during EL3 entry. For the former case we continue with "plat_handle_el3_ea".
+	 * The later case will occur when PSTATE.A bit is cleared in
+	 * "handle_pending_async_ea". This means we are doing a nested
+	 * exception in EL3. Call the handler for async EA which will eret back to
+	 * original el3 handler if it is nested exception. Also, unmask EA so that we
+	 * catch any further EA arise when handling this nested exception at EL3.
 	 */
-	/* Assumes SP_EL3 on entry */
 	save_x30
-	ldr	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_IS_IN_EL3]
-	cbnz	x30, 1f
-
-	/* Handle asynchronous external abort from lower EL */
+	ldr	x30, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
+	cbz	x30, 1f
+	/*
+	 * This is nested exception handling, clear the flag to avoid taking this
+	 * path for further exceptions caused by EA handling
+	 */
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
+	unmask_async_ea
 	b	handle_lower_el_async_ea
 1:
+	restore_x30
 #endif
 	no_ret	plat_handle_el3_ea
+
 end_vector_entry serror_sp_elx
 
 	/* ---------------------------------------------------------------------
@@ -248,34 +218,37 @@
 	 */
 	save_x30
 	apply_at_speculative_wa
-	check_and_unmask_ea
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	handle_sync_exception
 end_vector_entry sync_exception_aarch64
 
 vector_entry irq_aarch64
 	save_x30
 	apply_at_speculative_wa
-	check_and_unmask_ea
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	b	handle_interrupt_exception
 end_vector_entry irq_aarch64
 
 vector_entry fiq_aarch64
 	save_x30
 	apply_at_speculative_wa
-	check_and_unmask_ea
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	b 	handle_interrupt_exception
 end_vector_entry fiq_aarch64
 
+	/*
+	 * Need to synchronize any outstanding SError since we can get a burst of errors.
+	 * So reuse the sync mechanism to catch any further errors which are pending.
+	 */
 vector_entry serror_aarch64
 	save_x30
 	apply_at_speculative_wa
-#if RAS_FFH_SUPPORT
-	msr	daifclr, #DAIF_ABT_BIT
-#else
-	check_and_unmask_ea
-#endif
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	b	handle_lower_el_async_ea
-
 end_vector_entry serror_aarch64
 
 	/* ---------------------------------------------------------------------
@@ -291,34 +264,37 @@
 	 */
 	save_x30
 	apply_at_speculative_wa
-	check_and_unmask_ea
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	handle_sync_exception
 end_vector_entry sync_exception_aarch32
 
 vector_entry irq_aarch32
 	save_x30
 	apply_at_speculative_wa
-	check_and_unmask_ea
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	b	handle_interrupt_exception
 end_vector_entry irq_aarch32
 
 vector_entry fiq_aarch32
 	save_x30
 	apply_at_speculative_wa
-	check_and_unmask_ea
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	b	handle_interrupt_exception
 end_vector_entry fiq_aarch32
 
+	/*
+	 * Need to synchronize any outstanding SError since we can get a burst of errors.
+	 * So reuse the sync mechanism to catch any further errors which are pending.
+	 */
 vector_entry serror_aarch32
 	save_x30
 	apply_at_speculative_wa
-#if RAS_FFH_SUPPORT
-	msr	daifclr, #DAIF_ABT_BIT
-#else
-	check_and_unmask_ea
-#endif
+	sync_and_handle_pending_serror
+	unmask_async_ea
 	b	handle_lower_el_async_ea
-
 end_vector_entry serror_aarch32
 
 #ifdef MONITOR_TRAPS
diff --git a/include/arch/aarch64/asm_macros.S b/include/arch/aarch64/asm_macros.S
index 6091f62..44f892c 100644
--- a/include/arch/aarch64/asm_macros.S
+++ b/include/arch/aarch64/asm_macros.S
@@ -292,4 +292,13 @@
 #endif
 	.endm
 
+	/*
+	 * Macro to unmask External Aborts by changing PSTATE.A bit.
+	 * Put explicit synchronization event to ensure newly unmasked interrupt
+	 * is taken immediately.
+	 */
+	.macro  unmask_async_ea
+	msr     daifclr, #DAIF_ABT_BIT
+	isb
+	.endm
 #endif /* ASM_MACROS_S */
diff --git a/include/bl31/ea_handle.h b/include/bl31/ea_handle.h
index 68f012c..7cd7b6a 100644
--- a/include/bl31/ea_handle.h
+++ b/include/bl31/ea_handle.h
@@ -21,4 +21,6 @@
 /* RAS event signalled as peripheral interrupt */
 #define ERROR_INTERRUPT		3
 
+#define ASYNC_EA_REPLAY_COUNTER	U(100)
+
 #endif /* EA_HANDLE_H */
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 470d113..fb28505 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -63,7 +63,22 @@
 #define CTX_PMCR_EL0		U(0x28)
 #define CTX_IS_IN_EL3		U(0x30)
 #define CTX_MPAM3_EL3		U(0x38)
-#define CTX_EL3STATE_END	U(0x40) /* Align to the next 16 byte boundary */
+/* Constants required in supporting nested exception in EL3 */
+#define CTX_SAVED_ELR_EL3	U(0x40)
+/*
+ * General purpose flag, to save various EL3 states
+ * FFH mode : Used to identify if handling nested exception
+ * KFH mode : Used as counter value
+ */
+#define CTX_NESTED_EA_FLAG	U(0x48)
+#if HANDLE_EA_EL3_FIRST_NS
+ #define CTX_SAVED_ESR_EL3	U(0x50)
+ #define CTX_SAVED_SPSR_EL3	U(0x58)
+ #define CTX_SAVED_GPREG_LR	U(0x60)
+ #define CTX_EL3STATE_END	U(0x70) /* Align to the next 16 byte boundary */
+#else
+ #define CTX_EL3STATE_END	U(0x50) /* Align to the next 16 byte boundary */
+#endif
 
 /*******************************************************************************
  * Constants that allow assembler code to access members of and the
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index 41b25d6..894165a 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -689,7 +689,8 @@
 	ldr	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
 
 #ifdef IMAGE_BL31
-	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_IS_IN_EL3]
+	/* Clear the EL3 flag as we are exiting el3 */
+	str	xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_NESTED_EA_FLAG]
 #endif /* IMAGE_BL31 */
 
 	exception_return