Add support for handling runtime service requests

This patch uses the reworked exception handling support to handle
runtime service requests through SMCs following the SMC calling
convention. This is a giant commit since all the changes are
inter-related. It does the following:

1. Replace the old exception handling mechanism with the new one
2. Enforce that SP_EL0 is used C runtime stacks.
3. Ensures that the cold and warm boot paths use the 'cpu_context'
   structure to program an ERET into the next lower EL.
4. Ensures that SP_EL3 always points to the next 'cpu_context'
   structure prior to an ERET into the next lower EL
5. Introduces a PSCI SMC handler which completes the use of PSCI as a
   runtime service

Change-Id: I661797f834c0803d2c674d20f504df1b04c2b852
Co-authored-by: Achin Gupta <achin.gupta@arm.com>
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index cd0c023..1b8488d 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -31,6 +31,7 @@
 #include <bl_common.h>
 #include <platform.h>
 #include <arch.h>
+#include "cm_macros.S"
 
 
 	.globl	bl31_entrypoint
@@ -129,6 +130,12 @@
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
 	bl	zeromem16
 
+	/* ---------------------------------------------
+	 * Use SP_EL0 for the C runtime stack.
+	 * ---------------------------------------------
+	 */
+	msr	spsel, #0
+
 	/* --------------------------------------------
 	 * Give ourselves a small coherent stack to
 	 * ease the pain of initializing the MMU
@@ -155,32 +162,26 @@
 	bl	platform_set_stack
 
 	/* ---------------------------------------------
-	 * Use the more complex exception vectors now
-	 * the stacks are setup.
+	 * Jump to main function.
 	 * ---------------------------------------------
 	 */
-	adr	x1, runtime_exceptions
-	msr	vbar_el3, x1
+	bl	bl31_main
 
 	/* ---------------------------------------------
-	 * Use SP_EL0 to initialize BL31. It allows us
-	 * to jump to the next image without having to
-	 * come back here to ensure all of the stack's
-	 * been popped out. run_image() is not nice
-	 * enough to reset the stack pointer before
-	 * handing control to the next stage.
+	 * Use the more complex exception vectors now
+	 * that context management is setup. SP_EL3 is
+	 * pointing to a 'cpu_context' structure which
+	 * has an exception stack allocated. Since
+	 * we're just about to leave this EL with ERET,
+	 * we don't need an ISB here
 	 * ---------------------------------------------
 	 */
-	mov	x0, sp
-	msr	sp_el0, x0
-	msr	spsel, #0
-	isb
+	adr	x1, runtime_exceptions
+	msr	vbar_el3, x1
 
-	/* ---------------------------------------------
-	 * Jump to main function.
-	 * ---------------------------------------------
-	 */
-	bl	bl31_main
+	zero_callee_saved_regs
+	b	el3_exit
 
 _panic:
+	wfi
 	b	_panic
diff --git a/bl31/aarch64/exception_handlers.c b/bl31/aarch64/exception_handlers.c
deleted file mode 100644
index 3151294..0000000
--- a/bl31/aarch64/exception_handlers.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <arch_helpers.h>
-#include <platform.h>
-#include <bl_common.h>
-#include <bl31.h>
-#include <psci.h>
-#include <assert.h>
-#include <runtime_svc.h>
-
-/*******************************************************************************
- * This function checks whether this is a valid smc e.g.
- * the function id is correct, top word of args are zeroed
- * when aarch64 makes an aarch32 call etc.
- ******************************************************************************/
-int validate_smc(gp_regs *regs)
-{
-	unsigned int rw = GET_RW(regs->spsr);
-	unsigned int cc = GET_SMC_CC(regs->x0);
-
-	/* Check if there is a difference in the caller RW and SMC CC */
-	if (rw == cc) {
-
-		/* Check whether the caller has chosen the right func. id */
-		if (cc == SMC_64) {
-			regs->x0 = SMC_UNK;
-			return SMC_UNK;
-		}
-
-		/*
-		 * Paranoid check to zero the top word of passed args
-		 * irrespective of caller's register width.
-		 *
-		 * TODO: Check if this needed if the caller is aarch32
-		 */
-		regs->x0 &= (unsigned int) 0xFFFFFFFF;
-		regs->x1 &= (unsigned int) 0xFFFFFFFF;
-		regs->x2 &= (unsigned int) 0xFFFFFFFF;
-		regs->x3 &= (unsigned int) 0xFFFFFFFF;
-		regs->x4 &= (unsigned int) 0xFFFFFFFF;
-		regs->x5 &= (unsigned int) 0xFFFFFFFF;
-		regs->x6 &= (unsigned int) 0xFFFFFFFF;
-	}
-
-	return 0;
-}
-
-/* TODO: Break down the SMC handler into fast and standard SMC handlers. */
-void smc_handler(unsigned type, unsigned long esr, gp_regs *regs)
-{
-	/* Check if the SMC has been correctly called */
-	if (validate_smc(regs) != 0)
-		return;
-
-	switch (regs->x0) {
-	case PSCI_VERSION:
-		regs->x0 = psci_version();
-		break;
-
-	case PSCI_CPU_OFF:
-		regs->x0 = __psci_cpu_off();
-		break;
-
-	case PSCI_CPU_SUSPEND_AARCH64:
-	case PSCI_CPU_SUSPEND_AARCH32:
-		regs->x0 = __psci_cpu_suspend(regs->x1, regs->x2, regs->x3);
-		break;
-
-	case PSCI_CPU_ON_AARCH64:
-	case PSCI_CPU_ON_AARCH32:
-		regs->x0 = psci_cpu_on(regs->x1, regs->x2, regs->x3);
-		break;
-
-	case PSCI_AFFINITY_INFO_AARCH32:
-	case PSCI_AFFINITY_INFO_AARCH64:
-		regs->x0 = psci_affinity_info(regs->x1, regs->x2);
-		break;
-
-	default:
-		regs->x0 = SMC_UNK;
-	}
-
-	return;
-}
-
-void irq_handler(unsigned type, unsigned long esr, gp_regs *regs)
-{
-	plat_report_exception(type);
-	assert(0);
-}
-
-void fiq_handler(unsigned type, unsigned long esr, gp_regs *regs)
-{
-	plat_report_exception(type);
-	assert(0);
-}
-
-void serror_handler(unsigned type, unsigned long esr, gp_regs *regs)
-{
-	plat_report_exception(type);
-	assert(0);
-}
-
-void sync_exception_handler(unsigned type, gp_regs *regs)
-{
-	unsigned long esr = read_esr();
-	unsigned int ec = EC_BITS(esr);
-
-	switch (ec) {
-
-	case EC_AARCH32_SMC:
-	case EC_AARCH64_SMC:
-		smc_handler(type, esr, regs);
-		break;
-
-	default:
-		plat_report_exception(type);
-		assert(0);
-	}
-	return;
-}
-
-void async_exception_handler(unsigned type, gp_regs *regs)
-{
-	unsigned long esr = read_esr();
-
-	switch (type) {
-
-	case IRQ_SP_EL0:
-	case IRQ_SP_ELX:
-	case IRQ_AARCH64:
-	case IRQ_AARCH32:
-		irq_handler(type, esr, regs);
-		break;
-
-	case FIQ_SP_EL0:
-	case FIQ_SP_ELX:
-	case FIQ_AARCH64:
-	case FIQ_AARCH32:
-		fiq_handler(type, esr, regs);
-		break;
-
-	case SERROR_SP_EL0:
-	case SERROR_SP_ELX:
-	case SERROR_AARCH64:
-	case SERROR_AARCH32:
-		serror_handler(type, esr, regs);
-		break;
-
-	default:
-		plat_report_exception(type);
-		assert(0);
-	}
-
-	return;
-}
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index 92835dc..10e65dc 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -30,12 +30,13 @@
 
 #include <arch.h>
 #include <runtime_svc.h>
+#include <platform.h>
+#include <context.h>
+#include "cm_macros.S"
 
 	.globl	runtime_exceptions
-
-
-#include <asm_macros.S>
-
+	.globl	el3_exit
+	.globl	get_exception_stack
 
 	.section	.vectors, "ax"; .align 11
 
@@ -46,39 +47,32 @@
 	 * -----------------------------------------------------
 	 */
 sync_exception_sp_el0:
-	exception_entry save_regs
-	mov	x0, #SYNC_EXCEPTION_SP_EL0
-	mov	x1, sp
-	bl	sync_exception_handler
-	exception_exit restore_regs
-	eret
+	/* -----------------------------------------------------
+	 * We don't expect any synchronous exceptions from EL3
+	 * -----------------------------------------------------
+	 */
+	wfi
+	b	sync_exception_sp_el0
 
 	.align	7
+	/* -----------------------------------------------------
+	 * EL3 code is non-reentrant. Any asynchronous exception
+	 * is a serious error. Loop infinitely.
+	 * -----------------------------------------------------
+	 */
 irq_sp_el0:
-	exception_entry save_regs
-	mov	x0, #IRQ_SP_EL0
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception IRQ_SP_EL0
+	b	irq_sp_el0
 
 	.align	7
 fiq_sp_el0:
-	exception_entry save_regs
-	mov	x0, #FIQ_SP_EL0
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception FIQ_SP_EL0
+	b	fiq_sp_el0
 
 	.align	7
 serror_sp_el0:
-	exception_entry save_regs
-	mov	x0, #SERROR_SP_EL0
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception SERROR_SP_EL0
+	b	serror_sp_el0
 
 	/* -----------------------------------------------------
 	 * Current EL with SPx: 0x200 - 0x380
@@ -86,39 +80,35 @@
 	 */
 	.align	7
 sync_exception_sp_elx:
-	exception_entry save_regs
-	mov	x0, #SYNC_EXCEPTION_SP_ELX
-	mov	x1, sp
-	bl	sync_exception_handler
-	exception_exit restore_regs
-	eret
+	/* -----------------------------------------------------
+	 * This exception will trigger if anything went wrong
+	 * during a previous exception entry or exit or while
+	 * handling an earlier unexpected synchronous exception.
+	 * In any case we cannot rely on SP_EL3. Switching to a
+	 * known safe area of memory will corrupt at least a
+	 * single register. It is best to enter wfi in loop as
+	 * that will preserve the system state for analysis
+	 * through a debugger later.
+	 * -----------------------------------------------------
+	 */
+	wfi
+	b	sync_exception_sp_elx
 
+	/* -----------------------------------------------------
+	 * As mentioned in the previous comment, all bets are
+	 * off if SP_EL3 cannot be relied upon. Report their
+	 * occurrence.
+	 * -----------------------------------------------------
+	 */
 	.align	7
 irq_sp_elx:
-	exception_entry save_regs
-	mov	x0, #IRQ_SP_ELX
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
-
+	b	irq_sp_elx
 	.align	7
 fiq_sp_elx:
-	exception_entry save_regs
-	mov	x0, #FIQ_SP_ELX
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
-
+	b	fiq_sp_elx
 	.align	7
 serror_sp_elx:
-	exception_entry save_regs
-	mov	x0, #SERROR_SP_ELX
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	b	serror_sp_elx
 
 	/* -----------------------------------------------------
 	 * Lower EL using AArch64 : 0x400 - 0x580
@@ -126,39 +116,35 @@
 	 */
 	.align	7
 sync_exception_aarch64:
-	exception_entry save_regs
-	mov	x0, #SYNC_EXCEPTION_AARCH64
-	mov	x1, sp
-	bl	sync_exception_handler
-	exception_exit restore_regs
-	eret
+	/* -----------------------------------------------------
+	 * This exception vector will be the entry point for
+	 * SMCs and traps that are unhandled at lower ELs most
+	 * commonly. SP_EL3 should point to a valid cpu context
+	 * where the general purpose and system register state
+	 * can be saved.
+	 * -----------------------------------------------------
+	 */
+	handle_sync_exception
 
 	.align	7
+	/* -----------------------------------------------------
+	 * Asynchronous exceptions from lower ELs are not
+	 * currently supported. Report their occurrence.
+	 * -----------------------------------------------------
+	 */
 irq_aarch64:
-	exception_entry save_regs
-	mov	x0, #IRQ_AARCH64
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception IRQ_AARCH64
+	b	irq_aarch64
 
 	.align	7
 fiq_aarch64:
-	exception_entry save_regs
-	mov	x0, #FIQ_AARCH64
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception FIQ_AARCH64
+	b	fiq_aarch64
 
 	.align	7
 serror_aarch64:
-	exception_entry save_regs
-	mov	x0, #IRQ_AARCH32
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception SERROR_AARCH64
+	b	serror_aarch64
 
 	/* -----------------------------------------------------
 	 * Lower EL using AArch32 : 0x600 - 0x780
@@ -166,39 +152,281 @@
 	 */
 	.align	7
 sync_exception_aarch32:
-	exception_entry save_regs
-	mov	x0, #SYNC_EXCEPTION_AARCH32
-	mov	x1, sp
-	bl	sync_exception_handler
-	exception_exit restore_regs
-	eret
+	/* -----------------------------------------------------
+	 * This exception vector will be the entry point for
+	 * SMCs and traps that are unhandled at lower ELs most
+	 * commonly. SP_EL3 should point to a valid cpu context
+	 * where the general purpose and system register state
+	 * can be saved.
+	 * -----------------------------------------------------
+	 */
+	handle_sync_exception
 
 	.align	7
+	/* -----------------------------------------------------
+	 * Asynchronous exceptions from lower ELs are not
+	 * currently supported. Report their occurrence.
+	 * -----------------------------------------------------
+	 */
 irq_aarch32:
-	exception_entry save_regs
-	mov	x0, #IRQ_AARCH32
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception IRQ_AARCH32
+	b	irq_aarch32
 
 	.align	7
 fiq_aarch32:
-	exception_entry save_regs
-	mov	x0, #FIQ_AARCH32
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
-	eret
+	handle_async_exception FIQ_AARCH32
+	b	fiq_aarch32
 
 	.align	7
 serror_aarch32:
-	exception_entry save_regs
-	mov	x0, #SERROR_AARCH32
-	mov	x1, sp
-	bl	async_exception_handler
-	exception_exit restore_regs
+	handle_async_exception SERROR_AARCH32
+	b	serror_aarch32
+	.align	7
+
+	.section	.text, "ax"
+	/* -----------------------------------------------------
+	 * The following code handles secure monitor calls.
+	 * Depending upon the execution state from where the SMC
+	 * has been invoked, it frees some general purpose
+	 * registers to perform the remaining tasks. They
+	 * involve finding the runtime service handler that is
+	 * the target of the SMC & switching to runtime stacks
+	 * (SP_EL0) before calling the handler.
+	 *
+	 * Note that x30 has been explicitly saved and can be
+	 * used here
+	 * -----------------------------------------------------
+	 */
+smc_handler32:
+	/* Check whether aarch32 issued an SMC64 */
+	tbnz	x0, #FUNCID_CC_SHIFT, smc_prohibited
+
+	/* -----------------------------------------------------
+	 * Since we're are coming from aarch32, x8-x18 need to
+	 * be saved as per SMC32 calling convention. If a lower
+	 * EL in aarch64 is making an SMC32 call then it must
+	 * have saved x8-x17 already therein.
+	 * -----------------------------------------------------
+	 */
+	stp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	stp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	stp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	stp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+	stp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+
+	/* x4-x7, x18, sp_el0 are saved below */
+
+smc_handler64:
+	/* -----------------------------------------------------
+	 * Populate the parameters for the SMC handler. We
+	 * already have x0-x4 in place. x5 will point to a
+	 * cookie (not used now). x6 will point to the context
+	 * structure (SP_EL3) and x7 will contain flags we need
+	 * to pass to the handler Hence save x5-x7. Note that x4
+	 * only needs to be preserved for AArch32 callers but we
+	 * do it for AArch64 callers as well for convenience
+	 * -----------------------------------------------------
+	 */
+	stp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	stp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+
+	mov	x5, xzr
+	mov	x6, sp
+
+	/* Get the unique owning entity number */
+	ubfx	x16, x0, #FUNCID_OEN_SHIFT, #FUNCID_OEN_WIDTH
+	ubfx	x15, x0, #FUNCID_TYPE_SHIFT, #FUNCID_TYPE_WIDTH
+	orr	x16, x16, x15, lsl #FUNCID_OEN_WIDTH
+
+	adr	x11, (__RT_SVC_DESCS_START__ + RT_SVC_DESC_HANDLE)
+
+	/* Load descriptor index from array of indices */
+	adr	x14, rt_svc_descs_indices
+	ldrb	w15, [x14, x16]
+
+	/* Save x18 and SP_EL0 */
+	mrs	x17, sp_el0
+	stp	x18, x17, [x6, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+
+	/* -----------------------------------------------------
+	 * Restore the saved C runtime stack value which will
+	 * become the new SP_EL0 i.e. EL3 runtime stack. It was
+	 * saved in the 'cpu_context' structure prior to the last
+	 * ERET from EL3.
+	 * -----------------------------------------------------
+	 */
+	ldr	x12, [x6, #CTX_EL3STATE_OFFSET + CTX_RUNTIME_SP]
+
+	/*
+	 * Any index greater than 127 is invalid. Check bit 7 for
+	 * a valid index
+	 */
+	tbnz	w15, 7, smc_unknown
+
+	/* Switch to SP_EL0 */
+	msr	spsel, #0
+
+	/* -----------------------------------------------------
+	 * Get the descriptor using the index
+	 * x11 = (base + off), x15 = index
+	 *
+	 * handler = (base + off) + (index << log2(size))
+	 * -----------------------------------------------------
+	 */
+	lsl	w10, w15, #RT_SVC_SIZE_LOG2
+	ldr	x15, [x11, w10, uxtw]
+
+	/* -----------------------------------------------------
+	 * Save the SPSR_EL3, ELR_EL3, & SCR_EL3 in case there
+	 * is a world switch during SMC handling.
+	 * TODO: Revisit if all system registers can be saved
+	 * later.
+	 * -----------------------------------------------------
+	 */
+	mrs	x16, spsr_el3
+	mrs	x17, elr_el3
+	mrs	x18, scr_el3
+	stp	x16, x17, [x6, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
+	stp	x18, xzr, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
+
+	/* Copy SCR_EL3.NS bit to the flag to indicate caller's security */
+	bfi	x7, x18, #0, #1
+
+	mov	sp, x12
+
+	/* -----------------------------------------------------
+	 * Call the Secure Monitor Call handler and then drop
+	 * directly into el3_exit() which will program any
+	 * remaining architectural state prior to issuing the
+	 * ERET to the desired lower EL.
+	 * -----------------------------------------------------
+	 */
+#if DEBUG
+	cbz	x15, rt_svc_fw_critical_error
+#endif
+	blr	x15
+
+	/* -----------------------------------------------------
+	 * This routine assumes that the SP_EL3 is pointing to
+	 * a valid context structure from where the gp regs and
+	 * other special registers can be retrieved.
+	 * -----------------------------------------------------
+	 */
+el3_exit: ; .type el3_exit, %function
+	/* -----------------------------------------------------
+	 * Save the current SP_EL0 i.e. the EL3 runtime stack
+	 * which will be used for handling the next SMC. Then
+	 * switch to SP_EL3
+	 * -----------------------------------------------------
+	 */
+	mov	x17, sp
+	msr	spsel, #1
+	str	x17, [sp, #CTX_EL3STATE_OFFSET + CTX_RUNTIME_SP]
+
+	/* -----------------------------------------------------
+	 * Restore SPSR_EL3, ELR_EL3 and SCR_EL3 prior to ERET
+	 * -----------------------------------------------------
+	 */
+	ldp	x18, xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
+	ldp	x16, x17, [sp, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
+	msr	scr_el3, x18
+	msr	spsr_el3, x16
+	msr	elr_el3, x17
+
+	/* Restore saved general purpose registers and return */
+	bl	restore_scratch_registers
+	ldp	x30, xzr, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
 	eret
 
-	.align	7
+smc_unknown:
+	/*
+	 * Here we restore x4-x18 regardless of where we came from. AArch32
+	 * callers will find the registers contents unchanged, but AArch64
+	 * callers will find the registers modified (with stale earlier NS
+	 * content). Either way, we aren't leaking any secure information
+	 * through them
+	 */
+	bl	restore_scratch_registers_callee
+
+smc_prohibited:
+	ldp	x30, xzr, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+	mov	w0, #SMC_UNK
+	eret
+
+rt_svc_fw_critical_error:
+	b	rt_svc_fw_critical_error
+
+	/* -----------------------------------------------------
+	 * The following functions are used to saved and restore
+	 * all the caller saved registers as per the aapcs_64.
+	 * These are not macros to ensure their invocation fits
+	 * within the 32 instructions per exception vector.
+	 * -----------------------------------------------------
+	 */
+save_scratch_registers: ; .type save_scratch_registers, %function
+	stp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	stp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+	stp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	stp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+	stp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	stp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	stp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	stp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+	stp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+	mrs	x17, sp_el0
+	stp	x18, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+	ret
+
+restore_scratch_registers: ; .type restore_scratch_registers, %function
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	ldp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+
+restore_scratch_registers_callee:
+	ldp	x18, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+
+	ldp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	ldp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+	ldp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	ldp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	ldp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	ldp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+
+	msr	sp_el0, x17
+	ldp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+	ret
+
+	/* -----------------------------------------------------
+	 * 256 bytes of exception stack for each cpu
+	 * -----------------------------------------------------
+	 */
+#if DEBUG
+#define PCPU_EXCEPTION_STACK_SIZE	0x300
+#else
+#define PCPU_EXCEPTION_STACK_SIZE	0x100
+#endif
+	/* -----------------------------------------------------
+	 * void get_exception_stack (uint64_t mpidr) : This
+	 * function is used to allocate a small stack for
+	 * reporting unhandled exceptions
+	 * -----------------------------------------------------
+	 */
+get_exception_stack: ; .type get_exception_stack, %function
+	mov	x10, x30 // lr
+	bl	platform_get_core_pos
+	add	x0, x0, #1
+	mov	x1, #PCPU_EXCEPTION_STACK_SIZE
+	mul	x0, x0, x1
+	ldr	x1, =pcpu_exception_stack
+	add	x0, x1, x0
+	ret	x10
+
+	/* -----------------------------------------------------
+	 * Per-cpu exception stacks in normal memory.
+	 * -----------------------------------------------------
+	 */
+	.section	data, "aw", %nobits; .align 6
+
+pcpu_exception_stack:
+	/* Zero fill */
+	.space (PLATFORM_CORE_COUNT * PCPU_EXCEPTION_STACK_SIZE), 0
 
diff --git a/bl31/aarch64/runtime_exceptions_next.S b/bl31/aarch64/runtime_exceptions_next.S
deleted file mode 100644
index 10e65dc..0000000
--- a/bl31/aarch64/runtime_exceptions_next.S
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <arch.h>
-#include <runtime_svc.h>
-#include <platform.h>
-#include <context.h>
-#include "cm_macros.S"
-
-	.globl	runtime_exceptions
-	.globl	el3_exit
-	.globl	get_exception_stack
-
-	.section	.vectors, "ax"; .align 11
-
-	.align	7
-runtime_exceptions:
-	/* -----------------------------------------------------
-	 * Current EL with _sp_el0 : 0x0 - 0x180
-	 * -----------------------------------------------------
-	 */
-sync_exception_sp_el0:
-	/* -----------------------------------------------------
-	 * We don't expect any synchronous exceptions from EL3
-	 * -----------------------------------------------------
-	 */
-	wfi
-	b	sync_exception_sp_el0
-
-	.align	7
-	/* -----------------------------------------------------
-	 * EL3 code is non-reentrant. Any asynchronous exception
-	 * is a serious error. Loop infinitely.
-	 * -----------------------------------------------------
-	 */
-irq_sp_el0:
-	handle_async_exception IRQ_SP_EL0
-	b	irq_sp_el0
-
-	.align	7
-fiq_sp_el0:
-	handle_async_exception FIQ_SP_EL0
-	b	fiq_sp_el0
-
-	.align	7
-serror_sp_el0:
-	handle_async_exception SERROR_SP_EL0
-	b	serror_sp_el0
-
-	/* -----------------------------------------------------
-	 * Current EL with SPx: 0x200 - 0x380
-	 * -----------------------------------------------------
-	 */
-	.align	7
-sync_exception_sp_elx:
-	/* -----------------------------------------------------
-	 * This exception will trigger if anything went wrong
-	 * during a previous exception entry or exit or while
-	 * handling an earlier unexpected synchronous exception.
-	 * In any case we cannot rely on SP_EL3. Switching to a
-	 * known safe area of memory will corrupt at least a
-	 * single register. It is best to enter wfi in loop as
-	 * that will preserve the system state for analysis
-	 * through a debugger later.
-	 * -----------------------------------------------------
-	 */
-	wfi
-	b	sync_exception_sp_elx
-
-	/* -----------------------------------------------------
-	 * As mentioned in the previous comment, all bets are
-	 * off if SP_EL3 cannot be relied upon. Report their
-	 * occurrence.
-	 * -----------------------------------------------------
-	 */
-	.align	7
-irq_sp_elx:
-	b	irq_sp_elx
-	.align	7
-fiq_sp_elx:
-	b	fiq_sp_elx
-	.align	7
-serror_sp_elx:
-	b	serror_sp_elx
-
-	/* -----------------------------------------------------
-	 * Lower EL using AArch64 : 0x400 - 0x580
-	 * -----------------------------------------------------
-	 */
-	.align	7
-sync_exception_aarch64:
-	/* -----------------------------------------------------
-	 * This exception vector will be the entry point for
-	 * SMCs and traps that are unhandled at lower ELs most
-	 * commonly. SP_EL3 should point to a valid cpu context
-	 * where the general purpose and system register state
-	 * can be saved.
-	 * -----------------------------------------------------
-	 */
-	handle_sync_exception
-
-	.align	7
-	/* -----------------------------------------------------
-	 * Asynchronous exceptions from lower ELs are not
-	 * currently supported. Report their occurrence.
-	 * -----------------------------------------------------
-	 */
-irq_aarch64:
-	handle_async_exception IRQ_AARCH64
-	b	irq_aarch64
-
-	.align	7
-fiq_aarch64:
-	handle_async_exception FIQ_AARCH64
-	b	fiq_aarch64
-
-	.align	7
-serror_aarch64:
-	handle_async_exception SERROR_AARCH64
-	b	serror_aarch64
-
-	/* -----------------------------------------------------
-	 * Lower EL using AArch32 : 0x600 - 0x780
-	 * -----------------------------------------------------
-	 */
-	.align	7
-sync_exception_aarch32:
-	/* -----------------------------------------------------
-	 * This exception vector will be the entry point for
-	 * SMCs and traps that are unhandled at lower ELs most
-	 * commonly. SP_EL3 should point to a valid cpu context
-	 * where the general purpose and system register state
-	 * can be saved.
-	 * -----------------------------------------------------
-	 */
-	handle_sync_exception
-
-	.align	7
-	/* -----------------------------------------------------
-	 * Asynchronous exceptions from lower ELs are not
-	 * currently supported. Report their occurrence.
-	 * -----------------------------------------------------
-	 */
-irq_aarch32:
-	handle_async_exception IRQ_AARCH32
-	b	irq_aarch32
-
-	.align	7
-fiq_aarch32:
-	handle_async_exception FIQ_AARCH32
-	b	fiq_aarch32
-
-	.align	7
-serror_aarch32:
-	handle_async_exception SERROR_AARCH32
-	b	serror_aarch32
-	.align	7
-
-	.section	.text, "ax"
-	/* -----------------------------------------------------
-	 * The following code handles secure monitor calls.
-	 * Depending upon the execution state from where the SMC
-	 * has been invoked, it frees some general purpose
-	 * registers to perform the remaining tasks. They
-	 * involve finding the runtime service handler that is
-	 * the target of the SMC & switching to runtime stacks
-	 * (SP_EL0) before calling the handler.
-	 *
-	 * Note that x30 has been explicitly saved and can be
-	 * used here
-	 * -----------------------------------------------------
-	 */
-smc_handler32:
-	/* Check whether aarch32 issued an SMC64 */
-	tbnz	x0, #FUNCID_CC_SHIFT, smc_prohibited
-
-	/* -----------------------------------------------------
-	 * Since we're are coming from aarch32, x8-x18 need to
-	 * be saved as per SMC32 calling convention. If a lower
-	 * EL in aarch64 is making an SMC32 call then it must
-	 * have saved x8-x17 already therein.
-	 * -----------------------------------------------------
-	 */
-	stp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
-	stp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
-	stp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
-	stp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
-	stp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
-
-	/* x4-x7, x18, sp_el0 are saved below */
-
-smc_handler64:
-	/* -----------------------------------------------------
-	 * Populate the parameters for the SMC handler. We
-	 * already have x0-x4 in place. x5 will point to a
-	 * cookie (not used now). x6 will point to the context
-	 * structure (SP_EL3) and x7 will contain flags we need
-	 * to pass to the handler Hence save x5-x7. Note that x4
-	 * only needs to be preserved for AArch32 callers but we
-	 * do it for AArch64 callers as well for convenience
-	 * -----------------------------------------------------
-	 */
-	stp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
-	stp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
-
-	mov	x5, xzr
-	mov	x6, sp
-
-	/* Get the unique owning entity number */
-	ubfx	x16, x0, #FUNCID_OEN_SHIFT, #FUNCID_OEN_WIDTH
-	ubfx	x15, x0, #FUNCID_TYPE_SHIFT, #FUNCID_TYPE_WIDTH
-	orr	x16, x16, x15, lsl #FUNCID_OEN_WIDTH
-
-	adr	x11, (__RT_SVC_DESCS_START__ + RT_SVC_DESC_HANDLE)
-
-	/* Load descriptor index from array of indices */
-	adr	x14, rt_svc_descs_indices
-	ldrb	w15, [x14, x16]
-
-	/* Save x18 and SP_EL0 */
-	mrs	x17, sp_el0
-	stp	x18, x17, [x6, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
-
-	/* -----------------------------------------------------
-	 * Restore the saved C runtime stack value which will
-	 * become the new SP_EL0 i.e. EL3 runtime stack. It was
-	 * saved in the 'cpu_context' structure prior to the last
-	 * ERET from EL3.
-	 * -----------------------------------------------------
-	 */
-	ldr	x12, [x6, #CTX_EL3STATE_OFFSET + CTX_RUNTIME_SP]
-
-	/*
-	 * Any index greater than 127 is invalid. Check bit 7 for
-	 * a valid index
-	 */
-	tbnz	w15, 7, smc_unknown
-
-	/* Switch to SP_EL0 */
-	msr	spsel, #0
-
-	/* -----------------------------------------------------
-	 * Get the descriptor using the index
-	 * x11 = (base + off), x15 = index
-	 *
-	 * handler = (base + off) + (index << log2(size))
-	 * -----------------------------------------------------
-	 */
-	lsl	w10, w15, #RT_SVC_SIZE_LOG2
-	ldr	x15, [x11, w10, uxtw]
-
-	/* -----------------------------------------------------
-	 * Save the SPSR_EL3, ELR_EL3, & SCR_EL3 in case there
-	 * is a world switch during SMC handling.
-	 * TODO: Revisit if all system registers can be saved
-	 * later.
-	 * -----------------------------------------------------
-	 */
-	mrs	x16, spsr_el3
-	mrs	x17, elr_el3
-	mrs	x18, scr_el3
-	stp	x16, x17, [x6, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
-	stp	x18, xzr, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
-
-	/* Copy SCR_EL3.NS bit to the flag to indicate caller's security */
-	bfi	x7, x18, #0, #1
-
-	mov	sp, x12
-
-	/* -----------------------------------------------------
-	 * Call the Secure Monitor Call handler and then drop
-	 * directly into el3_exit() which will program any
-	 * remaining architectural state prior to issuing the
-	 * ERET to the desired lower EL.
-	 * -----------------------------------------------------
-	 */
-#if DEBUG
-	cbz	x15, rt_svc_fw_critical_error
-#endif
-	blr	x15
-
-	/* -----------------------------------------------------
-	 * This routine assumes that the SP_EL3 is pointing to
-	 * a valid context structure from where the gp regs and
-	 * other special registers can be retrieved.
-	 * -----------------------------------------------------
-	 */
-el3_exit: ; .type el3_exit, %function
-	/* -----------------------------------------------------
-	 * Save the current SP_EL0 i.e. the EL3 runtime stack
-	 * which will be used for handling the next SMC. Then
-	 * switch to SP_EL3
-	 * -----------------------------------------------------
-	 */
-	mov	x17, sp
-	msr	spsel, #1
-	str	x17, [sp, #CTX_EL3STATE_OFFSET + CTX_RUNTIME_SP]
-
-	/* -----------------------------------------------------
-	 * Restore SPSR_EL3, ELR_EL3 and SCR_EL3 prior to ERET
-	 * -----------------------------------------------------
-	 */
-	ldp	x18, xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
-	ldp	x16, x17, [sp, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
-	msr	scr_el3, x18
-	msr	spsr_el3, x16
-	msr	elr_el3, x17
-
-	/* Restore saved general purpose registers and return */
-	bl	restore_scratch_registers
-	ldp	x30, xzr, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
-	eret
-
-smc_unknown:
-	/*
-	 * Here we restore x4-x18 regardless of where we came from. AArch32
-	 * callers will find the registers contents unchanged, but AArch64
-	 * callers will find the registers modified (with stale earlier NS
-	 * content). Either way, we aren't leaking any secure information
-	 * through them
-	 */
-	bl	restore_scratch_registers_callee
-
-smc_prohibited:
-	ldp	x30, xzr, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
-	mov	w0, #SMC_UNK
-	eret
-
-rt_svc_fw_critical_error:
-	b	rt_svc_fw_critical_error
-
-	/* -----------------------------------------------------
-	 * The following functions are used to saved and restore
-	 * all the caller saved registers as per the aapcs_64.
-	 * These are not macros to ensure their invocation fits
-	 * within the 32 instructions per exception vector.
-	 * -----------------------------------------------------
-	 */
-save_scratch_registers: ; .type save_scratch_registers, %function
-	stp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
-	stp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
-	stp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
-	stp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
-	stp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
-	stp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
-	stp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
-	stp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
-	stp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
-	mrs	x17, sp_el0
-	stp	x18, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
-	ret
-
-restore_scratch_registers: ; .type restore_scratch_registers, %function
-	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
-	ldp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
-
-restore_scratch_registers_callee:
-	ldp	x18, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
-
-	ldp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
-	ldp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
-	ldp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
-	ldp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
-	ldp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
-	ldp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
-
-	msr	sp_el0, x17
-	ldp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
-	ret
-
-	/* -----------------------------------------------------
-	 * 256 bytes of exception stack for each cpu
-	 * -----------------------------------------------------
-	 */
-#if DEBUG
-#define PCPU_EXCEPTION_STACK_SIZE	0x300
-#else
-#define PCPU_EXCEPTION_STACK_SIZE	0x100
-#endif
-	/* -----------------------------------------------------
-	 * void get_exception_stack (uint64_t mpidr) : This
-	 * function is used to allocate a small stack for
-	 * reporting unhandled exceptions
-	 * -----------------------------------------------------
-	 */
-get_exception_stack: ; .type get_exception_stack, %function
-	mov	x10, x30 // lr
-	bl	platform_get_core_pos
-	add	x0, x0, #1
-	mov	x1, #PCPU_EXCEPTION_STACK_SIZE
-	mul	x0, x0, x1
-	ldr	x1, =pcpu_exception_stack
-	add	x0, x1, x0
-	ret	x10
-
-	/* -----------------------------------------------------
-	 * Per-cpu exception stacks in normal memory.
-	 * -----------------------------------------------------
-	 */
-	.section	data, "aw", %nobits; .align 6
-
-pcpu_exception_stack:
-	/* Zero fill */
-	.space (PLATFORM_CORE_COUNT * PCPU_EXCEPTION_STACK_SIZE), 0
-