runtime_exceptions: Update AT speculative workaround

As per latest mailing communication [1], we decided to
update AT speculative workaround implementation in order to
disable page table walk for lower ELs(EL1 or EL0) immediately
after context switching to EL3 from lower ELs.

Previous implementation of AT speculative workaround is available
here: 45aecff00

AT speculative workaround is updated as below:
1. Avoid saving and restoring of SCTLR and TCR registers for EL1
   in context save and restore routine respectively.
2. On EL3 entry, save SCTLR and TCR registers for EL1.
3. On EL3 entry, update EL1 system registers to disable stage 1
   page table walk for lower ELs (EL1 and EL0) and enable EL1
   MMU.
4. On EL3 exit, restore SCTLR and TCR registers for EL1 which
   are saved in step 2.

[1]:
https://lists.trustedfirmware.org/pipermail/tf-a/2020-July/000586.html

Change-Id: Iee8de16f81dc970a8f492726f2ddd57e7bd9ffb5
Signed-off-by: Manish V Badarkhe <Manish.Badarkhe@arm.com>
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index 5b37388..bfe13f3 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -12,6 +12,7 @@
 #include <bl31/interrupt_mgmt.h>
 #include <common/runtime_svc.h>
 #include <context.h>
+#include <el3_common_macros.S>
 #include <lib/el3_runtime/cpu_data.h>
 #include <lib/smccc.h>
 
@@ -285,21 +286,25 @@
 	 * to a valid cpu context where the general purpose and system register
 	 * state can be saved.
 	 */
+	apply_at_speculative_wa
 	check_and_unmask_ea
 	handle_sync_exception
 end_vector_entry sync_exception_aarch64
 
 vector_entry irq_aarch64
+	apply_at_speculative_wa
 	check_and_unmask_ea
 	handle_interrupt_exception irq_aarch64
 end_vector_entry irq_aarch64
 
 vector_entry fiq_aarch64
+	apply_at_speculative_wa
 	check_and_unmask_ea
 	handle_interrupt_exception fiq_aarch64
 end_vector_entry fiq_aarch64
 
 vector_entry serror_aarch64
+	apply_at_speculative_wa
 	msr	daifclr, #DAIF_ABT_BIT
 	b	enter_lower_el_async_ea
 end_vector_entry serror_aarch64
@@ -315,21 +320,25 @@
 	 * to a valid cpu context where the general purpose and system register
 	 * state can be saved.
 	 */
+	apply_at_speculative_wa
 	check_and_unmask_ea
 	handle_sync_exception
 end_vector_entry sync_exception_aarch32
 
 vector_entry irq_aarch32
+	apply_at_speculative_wa
 	check_and_unmask_ea
 	handle_interrupt_exception irq_aarch32
 end_vector_entry irq_aarch32
 
 vector_entry fiq_aarch32
+	apply_at_speculative_wa
 	check_and_unmask_ea
 	handle_interrupt_exception fiq_aarch32
 end_vector_entry fiq_aarch32
 
 vector_entry serror_aarch32
+	apply_at_speculative_wa
 	msr	daifclr, #DAIF_ABT_BIT
 	b	enter_lower_el_async_ea
 end_vector_entry serror_aarch32
@@ -455,6 +464,8 @@
 	b	el3_exit
 
 smc_prohibited:
+	restore_ptw_el1_sys_regs
+	ldp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
 	ldr	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
 	mov	x0, #SMC_UNK
 	exception_return
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index 0708de6..17a4efa 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -9,6 +9,7 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <context.h>
 #include <lib/xlat_tables/xlat_tables_defs.h>
 
 	/*
@@ -443,4 +444,42 @@
 #endif
 	.endm
 
+	.macro	apply_at_speculative_wa
+#if ERRATA_SPECULATIVE_AT
+	/*
+	 * Explicitly save x30 so as to free up a register and to enable
+	 * branching and also, save x29 which will be used in the called
+	 * function
+	 */
+	stp	x29, x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X29]
+	bl	save_and_update_ptw_el1_sys_regs
+	ldp	x29, x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X29]
+#endif
+	.endm
+
+	.macro	restore_ptw_el1_sys_regs
+#if ERRATA_SPECULATIVE_AT
+	/* -----------------------------------------------------------
+	 * In case of ERRATA_SPECULATIVE_AT, must follow below order
+	 * to ensure that page table walk is not enabled until
+	 * restoration of all EL1 system registers. TCR_EL1 register
+	 * should be updated at the end which restores previous page
+	 * table walk setting of stage1 i.e.(TCR_EL1.EPDx) bits. ISB
+	 * ensures that CPU does below steps in order.
+	 *
+	 * 1. Ensure all other system registers are written before
+	 *    updating SCTLR_EL1 using ISB.
+	 * 2. Restore SCTLR_EL1 register.
+	 * 3. Ensure SCTLR_EL1 written successfully using ISB.
+	 * 4. Restore TCR_EL1 register.
+	 * -----------------------------------------------------------
+	 */
+	isb
+	ldp	x28, x29, [sp, #CTX_EL1_SYSREGS_OFFSET + CTX_SCTLR_EL1]
+	msr	sctlr_el1, x28
+	isb
+	msr	tcr_el1, x29
+#endif
+	.endm
+
 #endif /* EL3_COMMON_MACROS_S */
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index 12e5d49..1cb527d 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -8,6 +8,7 @@
 #include <asm_macros.S>
 #include <assert_macros.S>
 #include <context.h>
+#include <el3_common_macros.S>
 
 #if CTX_INCLUDE_EL2_REGS
 	.global	el2_sysregs_context_save
@@ -22,6 +23,7 @@
 #endif
 	.global	save_gp_pmcr_pauth_regs
 	.global	restore_gp_pmcr_pauth_regs
+	.global save_and_update_ptw_el1_sys_regs
 	.global	el3_exit
 
 #if CTX_INCLUDE_EL2_REGS
@@ -420,9 +422,11 @@
 	mrs	x10, elr_el1
 	stp	x9, x10, [x0, #CTX_SPSR_EL1]
 
+#if !ERRATA_SPECULATIVE_AT
 	mrs	x15, sctlr_el1
 	mrs	x16, tcr_el1
 	stp	x15, x16, [x0, #CTX_SCTLR_EL1]
+#endif
 
 	mrs	x17, cpacr_el1
 	mrs	x9, csselr_el1
@@ -517,9 +521,11 @@
 	msr	spsr_el1, x9
 	msr	elr_el1, x10
 
+#if !ERRATA_SPECULATIVE_AT
 	ldp	x15, x16, [x0, #CTX_SCTLR_EL1]
 	msr	sctlr_el1, x15
 	msr	tcr_el1, x16
+#endif
 
 	ldp	x17, x9, [x0, #CTX_CPACR_EL1]
 	msr	cpacr_el1, x17
@@ -859,6 +865,48 @@
 	ret
 endfunc restore_gp_pmcr_pauth_regs
 
+/*
+ * In case of ERRATA_SPECULATIVE_AT, save SCTLR_EL1 and TCR_EL1
+ * registers and update EL1 registers to disable stage1 and stage2
+ * page table walk
+ */
+func save_and_update_ptw_el1_sys_regs
+	/* ----------------------------------------------------------
+	 * Save only sctlr_el1 and tcr_el1 registers
+	 * ----------------------------------------------------------
+	 */
+	mrs	x29, sctlr_el1
+	str	x29, [sp, #(CTX_EL1_SYSREGS_OFFSET + CTX_SCTLR_EL1)]
+	mrs	x29, tcr_el1
+	str	x29, [sp, #(CTX_EL1_SYSREGS_OFFSET + CTX_TCR_EL1)]
+
+	/* ------------------------------------------------------------
+	 * Must follow below order in order to disable page table
+	 * walk for lower ELs (EL1 and EL0). First step ensures that
+	 * page table walk is disabled for stage1 and second step
+	 * ensures that page table walker should use TCR_EL1.EPDx
+	 * bits to perform address translation. ISB ensures that CPU
+	 * does these 2 steps in order.
+	 *
+	 * 1. Update TCR_EL1.EPDx bits to disable page table walk by
+	 *    stage1.
+	 * 2. Enable MMU bit to avoid identity mapping via stage2
+	 *    and force TCR_EL1.EPDx to be used by the page table
+	 *    walker.
+	 * ------------------------------------------------------------
+	 */
+	orr	x29, x29, #(TCR_EPD0_BIT)
+	orr	x29, x29, #(TCR_EPD1_BIT)
+	msr	tcr_el1, x29
+	isb
+	mrs	x29, sctlr_el1
+	orr	x29, x29, #SCTLR_M_BIT
+	msr	sctlr_el1, x29
+	isb
+
+	ret
+endfunc save_and_update_ptw_el1_sys_regs
+
 /* ------------------------------------------------------------------
  * This routine assumes that the SP_EL3 is pointing to a valid
  * context structure from where the gp regs and other special
@@ -903,6 +951,8 @@
 	blr	x17
 1:
 #endif
+	restore_ptw_el1_sys_regs
+
 	/* ----------------------------------------------------------
 	 * Restore general purpose (including x30), PMCR_EL0 and
 	 * ARMv8.3-PAuth registers.