Merge pull request #177 from jcastillo-arm/jc/tf-issues/096

Rework incorrect use of assert() and panic() in codebase
diff --git a/Makefile b/Makefile
index 39496fb..fef89c2 100644
--- a/Makefile
+++ b/Makefile
@@ -60,7 +60,9 @@
 # Determine the version of ARM GIC architecture to use for interrupt management
 # in EL3. The platform port can change this value if needed.
 ARM_GIC_ARCH		:=	2
-
+# Flag used to indicate if ASM_ASSERTION should be enabled for the build.
+# This defaults to being present in DEBUG builds only.
+ASM_ASSERTION		:=	${DEBUG}
 
 # Checkpatch ignores
 CHECK_IGNORE		=	--ignore COMPLEX_MACRO
@@ -90,8 +92,8 @@
 VERSION_STRING		:=	v${VERSION_MAJOR}.${VERSION_MINOR}(${BUILD_TYPE}):${BUILD_STRING}
 
 BL_COMMON_SOURCES	:=	common/bl_common.c			\
-				common/debug.c				\
 				common/tf_printf.c			\
+				common/aarch64/debug.S			\
 				lib/aarch64/cache_helpers.S		\
 				lib/aarch64/misc_helpers.S		\
 				lib/aarch64/xlat_helpers.c		\
@@ -207,6 +209,9 @@
 # Process ARM_GIC_ARCH flag
 $(eval $(call add_define,ARM_GIC_ARCH))
 
+# Process ASM_ASSERTION flag
+$(eval $(call assert_boolean,ASM_ASSERTION))
+$(eval $(call add_define,ASM_ASSERTION))
 
 ASFLAGS			+= 	-nostdinc -ffreestanding -Wa,--fatal-warnings	\
 				-Werror -Wmissing-include-dirs			\
diff --git a/bl1/aarch64/bl1_arch_setup.c b/bl1/aarch64/bl1_arch_setup.c
index cf69ac7..eeaa24a 100644
--- a/bl1/aarch64/bl1_arch_setup.c
+++ b/bl1/aarch64/bl1_arch_setup.c
@@ -37,20 +37,11 @@
  ******************************************************************************/
 void bl1_arch_setup(void)
 {
-	unsigned long tmp_reg = 0;
-
-	/* Enable alignment checks */
-	tmp_reg = read_sctlr_el3();
-	tmp_reg |= (SCTLR_A_BIT | SCTLR_SA_BIT);
-	write_sctlr_el3(tmp_reg);
-	isb();
-
 	/*
 	 * Set the next EL to be AArch64, route external abort and SError
 	 * interrupts to EL3
 	 */
-	tmp_reg = SCR_RES1_BITS | SCR_RW_BIT | SCR_EA_BIT;
-	write_scr(tmp_reg);
+	write_scr_el3(SCR_RES1_BITS | SCR_RW_BIT | SCR_EA_BIT);
 
 	/*
 	 * Enable SError and Debug exceptions
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index 50cfae6..dd7d78f 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -44,7 +44,7 @@
 func bl1_entrypoint
 	/* ---------------------------------------------
 	 * Set the CPU endianness before doing anything
-	 * that might involve memory reads or writes
+	 * that might involve memory reads or writes.
 	 * ---------------------------------------------
 	 */
 	mrs	x0, sctlr_el3
@@ -59,12 +59,14 @@
 	 */
 	bl	cpu_reset_handler
 
-	/* -------------------------------
-	 * Enable the instruction cache.
-	 * -------------------------------
+	/* ---------------------------------------------
+	 * Enable the instruction cache, stack pointer
+	 * and data access alignment checks
+	 * ---------------------------------------------
 	 */
+	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
 	mrs	x0, sctlr_el3
-	orr	x0, x0, #SCTLR_I_BIT
+	orr	x0, x0, x1
 	msr	sctlr_el3, x0
 	isb
 
@@ -130,14 +132,16 @@
 	ldr	x2, =__DATA_SIZE__
 	bl	memcpy16
 
-	/* ---------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU and
-	 * CCI in assembler
-	 * ---------------------------------------------
+	/* --------------------------------------------
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
+	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Architectural init. can be generic e.g.
@@ -150,14 +154,6 @@
 	bl	bl1_early_platform_setup
 	bl	bl1_plat_arch_setup
 
-	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
 	/* --------------------------------------------------
 	 * Initialize platform and jump to our c-entry point
 	 * for this type of reset. Panic if it returns
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 09eadff..6fcd040 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -65,11 +65,13 @@
 	msr	vbar_el1, x0
 
 	/* ---------------------------------------------
-	 * Enable the instruction cache.
+	 * Enable the instruction cache, stack pointer
+	 * and data access alignment checks
 	 * ---------------------------------------------
 	 */
+	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
 	mrs	x0, sctlr_el1
-	orr	x0, x0, #SCTLR_I_BIT
+	orr	x0, x0, x1
 	msr	sctlr_el1, x0
 	isb
 
@@ -96,12 +98,15 @@
 	bl	zeromem16
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Perform early platform setup & platform
@@ -113,14 +118,6 @@
 	bl	bl2_plat_arch_setup
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
-	/* ---------------------------------------------
 	 * Jump to main function.
 	 * ---------------------------------------------
 	 */
diff --git a/bl31/aarch64/bl31_arch_setup.c b/bl31/aarch64/bl31_arch_setup.c
index e0382b3..f67881e 100644
--- a/bl31/aarch64/bl31_arch_setup.c
+++ b/bl31/aarch64/bl31_arch_setup.c
@@ -42,21 +42,12 @@
  ******************************************************************************/
 void bl31_arch_setup(void)
 {
-	unsigned long tmp_reg = 0;
-	uint64_t counter_freq;
-
-	/* Enable alignment checks */
-	tmp_reg = read_sctlr_el3();
-	tmp_reg |= (SCTLR_A_BIT | SCTLR_SA_BIT);
-	write_sctlr_el3(tmp_reg);
-
 	/*
 	 * Route external abort and SError interrupts to EL3
 	 * other SCR bits will be configured before exiting to a lower exception
 	 * level
 	 */
-	tmp_reg = SCR_RES1_BITS | SCR_EA_BIT;
-	write_scr(tmp_reg);
+	write_scr_el3(SCR_RES1_BITS | SCR_EA_BIT);
 
 	/*
 	 * Enable SError and Debug exceptions
@@ -65,6 +56,5 @@
 	enable_debug_exceptions();
 
 	/* Program the counter frequency */
-	counter_freq = plat_get_syscnt_freq();
-	write_cntfrq_el0(counter_freq);
+	write_cntfrq_el0(plat_get_syscnt_freq());
 }
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 6e48e31..fb8fd2c 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -52,6 +52,15 @@
 	mov	x20, x0
 	mov	x21, x1
 #else
+	/* ---------------------------------------------
+	 * Set the CPU endianness before doing anything
+	 * that might involve memory reads or writes.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, sctlr_el3
+	bic	x0, x0, #SCTLR_EE_BIT
+	msr	sctlr_el3, x0
+	isb
 
 	/* -----------------------------------------------------
 	 * Perform any processor specific actions upon reset
@@ -61,24 +70,34 @@
 	 */
 	bl	cpu_reset_handler
 #endif
-
 	/* ---------------------------------------------
-	 * Enable the instruction cache.
+	 * Enable the instruction cache, stack pointer
+	 * and data access alignment checks
 	 * ---------------------------------------------
 	 */
-	mrs	x1, sctlr_el3
-	orr	x1, x1, #SCTLR_I_BIT
-	msr	sctlr_el3, x1
+	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
+	mrs	x0, sctlr_el3
+	orr	x0, x0, x1
+	msr	sctlr_el3, x0
 	isb
 
 	/* ---------------------------------------------
+	 * Initialise cpu_data early to enable crash
+	 * reporting to have access to crash stack.
+	 * Since crash reporting depends on cpu_data to
+	 * report the unhandled exception, not
+	 * doing so can lead to recursive exceptions due
+	 * to a NULL TPIDR_EL3
+	 * ---------------------------------------------
+	 */
+	bl	init_cpu_data_ptr
+
+	/* ---------------------------------------------
-	 * Set the exception vector and zero tpidr_el3
-	 * until the crash reporting is set up
+	 * Set the exception vector.
 	 * ---------------------------------------------
 	 */
 	adr	x1, runtime_exceptions
 	msr	vbar_el3, x1
-	msr	tpidr_el3, xzr
 
 	/* ---------------------------------------------------------------------
 	 * The initial state of the Architectural feature trap register
@@ -137,27 +156,21 @@
 	bl	zeromem16
 
 	/* ---------------------------------------------
-	 * Initialise cpu_data and crash reporting
-	 * ---------------------------------------------
-	 */
-#if CRASH_REPORTING
-	bl	init_crash_reporting
-#endif
-	bl	init_cpu_data_ptr
-
-	/* ---------------------------------------------
 	 * Use SP_EL0 for the C runtime stack.
 	 * ---------------------------------------------
 	 */
 	msr	spsel, #0
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Perform platform specific early arch. setup
@@ -175,14 +188,6 @@
 	bl	bl31_plat_arch_setup
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
-	/* ---------------------------------------------
 	 * Jump to main function.
 	 * ---------------------------------------------
 	 */
diff --git a/bl31/aarch64/context.S b/bl31/aarch64/context.S
index 2698215..79b5d19 100644
--- a/bl31/aarch64/context.S
+++ b/bl31/aarch64/context.S
@@ -43,23 +43,9 @@
 	.global el3_sysregs_context_save
 func el3_sysregs_context_save
 
-	mrs	x10, sctlr_el3
-	str	x10, [x0, #CTX_SCTLR_EL3]
-
-	mrs	x11, cptr_el3
-	stp	x11, xzr, [x0, #CTX_CPTR_EL3]
-
-	mrs	x13, cntfrq_el0
-	mrs	x14, mair_el3
-	stp	x13, x14, [x0, #CTX_CNTFRQ_EL0]
-
-	mrs	x15, tcr_el3
-	mrs	x16, ttbr0_el3
-	stp	x15, x16, [x0, #CTX_TCR_EL3]
-
-	mrs	x17, daif
-	and	x17, x17, #(DAIF_ABT_BIT | DAIF_DBG_BIT)
-	stp	x17, xzr, [x0, #CTX_DAIF_EL3]
+	mrs	x10, cptr_el3
+	mrs	x11, cntfrq_el0
+	stp	x10, x11, [x0, #CTX_CPTR_EL3]
 
 	ret
 
@@ -78,27 +64,9 @@
 	.global el3_sysregs_context_restore
 func el3_sysregs_context_restore
 
-	ldp	x11, xzr, [x0, #CTX_CPTR_EL3]
-	msr	cptr_el3, x11
-
-	ldp	x13, x14, [x0, #CTX_CNTFRQ_EL0]
-	msr	cntfrq_el0, x13
-	msr	mair_el3, x14
-
-	ldp	x15, x16, [x0, #CTX_TCR_EL3]
-	msr	tcr_el3, x15
-	msr	ttbr0_el3, x16
-
-	ldp	x17, xzr, [x0, #CTX_DAIF_EL3]
-	mrs	x11, daif
-	orr	x17, x17, x11
-	msr	daif, x17
-
-	/* Make sure all the above changes are observed */
-	isb
-
-	ldr	x10, [x0, #CTX_SCTLR_EL3]
-	msr	sctlr_el3, x10
+	ldp	x13, x14, [x0, #CTX_CPTR_EL3]
+	msr	cptr_el3, x13
+	msr	cntfrq_el0, x14
 	isb
 
 	ret
diff --git a/bl31/aarch64/crash_reporting.S b/bl31/aarch64/crash_reporting.S
index 1118e96..e69878b 100644
--- a/bl31/aarch64/crash_reporting.S
+++ b/bl31/aarch64/crash_reporting.S
@@ -34,11 +34,13 @@
 #include <plat_macros.S>
 #include <platform_def.h>
 
-	.globl	dump_state_and_die
-	.globl	dump_intr_state_and_die
-	.globl  init_crash_reporting
+	.globl	report_unhandled_exception
+	.globl	report_unhandled_interrupt
+	.globl	el3_panic
 
 #if CRASH_REPORTING
+#define REG_SIZE	0x8
+
 	/* ------------------------------------------------------
 	 * The below section deals with dumping the system state
 	 * when an unhandled exception is taken in EL3.
@@ -46,267 +48,326 @@
 	 * be dumped during a unhandled exception is given below.
 	 * ------------------------------------------------------
 	 */
-.section .rodata.dump_reg_name, "aS"
-caller_saved_regs:	.asciz	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",\
-	 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",\
-	 "x17", "x18", ""
+.section .rodata.crash_prints, "aS"
+print_spacer:
+	.asciz	" =\t\t0x"
 
-callee_saved_regs: .asciz	"x19", "x20", "x21", "x22", "x23", "x24",\
-	 "x25", "x26", "x27", "x28", "x29", "x30", ""
+cpu_ectlr_reg:
+	.asciz	"cpuectlr_el1 =\t\t0x"
 
-el3_sys_regs: .asciz	"scr_el3", "sctlr_el3", "cptr_el3", "tcr_el3",\
-	 "daif", "mair_el3", "spsr_el3", "elr_el3", "ttbr0_el3", "esr_el3",\
-	 "sp_el3", "far_el3", ""
+gp_regs:
+	.asciz	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",\
+		"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",\
+		"x16", "x17", "x18", "x19", "x20", "x21", "x22",\
+		"x23", "x24", "x25", "x26", "x27", "x28", "x29", ""
+el3_sys_regs:
+	.asciz	"scr_el3", "sctlr_el3", "cptr_el3", "tcr_el3",\
+		"daif", "mair_el3", "spsr_el3", "elr_el3", "ttbr0_el3",\
+		"esr_el3", "far_el3", ""
 
-non_el3_sys_0_regs: .asciz "spsr_el1", "elr_el1", "spsr_abt", "spsr_und",\
-	"spsr_irq", "spsr_fiq", "sctlr_el1", "actlr_el1", "cpacr_el1",\
-	"csselr_el1", "sp_el1", "esr_el1", "ttbr0_el1", "ttbr1_el1",\
-	"mair_el1", "amair_el1", "tcr_el1", "tpidr_el1", ""
+non_el3_sys_regs:
+	.asciz	"spsr_el1", "elr_el1", "spsr_abt", "spsr_und",\
+		"spsr_irq", "spsr_fiq", "sctlr_el1", "actlr_el1", "cpacr_el1",\
+		"csselr_el1", "sp_el1", "esr_el1", "ttbr0_el1", "ttbr1_el1",\
+		"mair_el1", "amair_el1", "tcr_el1", "tpidr_el1", "tpidr_el0",\
+		"tpidrro_el0", "dacr32_el2", "ifsr32_el2", "par_el1",\
+		"mpidr_el1", "afsr0_el1", "afsr1_el1", "contextidr_el1",\
+		"vbar_el1", "cntp_ctl_el0", "cntp_cval_el0", "cntv_ctl_el0",\
+		"cntv_cval_el0", "cntkctl_el1", "fpexc32_el2", "sp_el0", ""
 
-non_el3_sys_1_regs: .asciz "tpidr_el0", "tpidrro_el0", "dacr32_el2",\
-	"ifsr32_el2", "par_el1", "far_el1", "afsr0_el1", "afsr1_el1",\
-	"contextidr_el1", "vbar_el1", "cntp_ctl_el0", "cntp_cval_el0",\
-	"cntv_ctl_el0", "cntv_cval_el0", "cntkctl_el1", "fpexc32_el2",\
-	"sp_el0", ""
+panic_msg:
+	.asciz "PANIC in EL3 at x30 = 0x"
+excpt_msg:
+	.asciz "Unhandled Exception in EL3.\nx30 =\t\t0x"
+intr_excpt_msg:
+	.asciz "Unhandled Interrupt Exception in EL3.\nx30 =\t\t0x"
 
-	/* -----------------------------------------------------
-	 * Currently we are stack limited. Hence make sure that
-	 * we dont try to dump more than 20 registers using the
-	 * stack.
-	 * -----------------------------------------------------
+	/*
+	 * Helper function to print newline to console.
 	 */
+func print_newline
+	mov	x0, '\n'
+	b	plat_crash_console_putc
 
-#define REG_SIZE 0x8
+	/*
+	 * Helper function to print from crash buf.
+	 * The print loop is controlled by the buf size and
+	 * ascii reg name list which is passed in x6. The
+	 * function returns the crash buf address in x0.
+	 * Clobbers : x0 - x7, sp
+	 */
+func size_controlled_print
+	/* Save the lr */
+	mov	sp, x30
+	/* load the crash buf address */
+	mrs	x7, tpidr_el3
+test_size_list:
+	/* Calculate x5 always as it will be clobbered by asm_print_hex */
+	mrs	x5, tpidr_el3
+	add	x5, x5, #CPU_DATA_CRASH_BUF_SIZE
+	/* Test whether we have reached end of crash buf */
+	cmp	x7, x5
+	b.eq	exit_size_print
+	ldrb	w4, [x6]
+	/* Test whether we are at end of list */
+	cbz	w4, exit_size_print
+	mov	x4, x6
+	/* asm_print_str updates x4 to point to next entry in list */
+	bl	asm_print_str
+	/* update x6 with the updated list pointer */
+	mov	x6, x4
+	adr	x4, print_spacer
+	bl	asm_print_str
+	ldr	x4, [x7], #REG_SIZE
+	bl	asm_print_hex
+	bl	print_newline
+	b	test_size_list
+exit_size_print:
+	mov	x30, sp
+	ret
 
-/* The caller saved registers are X0 to X18 */
-#define CALLER_SAVED_REG_SIZE 		(20 * REG_SIZE)
-/* The caller saved registers are X19 to X30 */
-#define CALLEE_SAVED_REG_SIZE 		(12 * REG_SIZE)
-/* The EL3 sys regs*/
-#define EL3_SYS_REG_SIZE 			(12 * REG_SIZE)
-/* The non EL3 sys regs set-0 */
-#define NON_EL3_SYS_0_REG_SIZE 		(18 * REG_SIZE)
-/* The non EL3 sys regs set-1 */
-#define NON_EL3_SYS_1_REG_SIZE 		(18 * REG_SIZE)
+	/*
+	 * Helper function to store x8 - x15 registers to
+	 * the crash buf. The system registers values are
+	 * copied to x8 to x15 by the caller which are then
+	 * copied to the crash buf by this function.
+	 * x0 points to the crash buf. It then calls
+	 * size_controlled_print to print to console.
+	 * Clobbers : x0 - x7, sp
+	 */
+func str_in_crash_buf_print
+	/* restore the crash buf address in x0 */
+	mrs	x0, tpidr_el3
+	stp	x8, x9, [x0]
+	stp	x10, x11, [x0, #REG_SIZE * 2]
+	stp	x12, x13, [x0, #REG_SIZE * 4]
+	stp	x14, x15, [x0, #REG_SIZE * 6]
+	b	size_controlled_print
 
-	.macro print_caller_saved_regs
-	sub	sp, sp, #CALLER_SAVED_REG_SIZE
-	stp	x0, x1, [sp]
-	stp	x2, x3, [sp, #(REG_SIZE * 2)]
-	stp	x4, x5, [sp, #(REG_SIZE * 4)]
-	stp	x6, x7, [sp, #(REG_SIZE * 6)]
-	stp	x8, x9, [sp, #(REG_SIZE * 8)]
-	stp	x10, x11, [sp, #(REG_SIZE * 10)]
-	stp	x12, x13, [sp, #(REG_SIZE * 12)]
-	stp	x14, x15, [sp, #(REG_SIZE * 14)]
-	stp	x16, x17, [sp, #(REG_SIZE * 16)]
-	stp	x18, xzr, [sp, #(REG_SIZE * 18)]
-	adr	x0, caller_saved_regs
+	/* ------------------------------------------------------
+	 * This macro calculates the offset to crash buf from
+	 * cpu_data and stores it in tpidr_el3. It also saves x0
+	 * and x1 in the crash buf by using sp as a temporary
+	 * register.
+	 * ------------------------------------------------------
+	 */
+	.macro prepare_crash_buf_save_x0_x1
+	/* we can corrupt this reg to free up x0 */
+	mov	sp, x0
+	/* tpidr_el3 contains the address to cpu_data structure */
+	mrs	x0, tpidr_el3
+	/* Calculate the Crash buffer offset in cpu_data */
+	add	x0, x0, #CPU_DATA_CRASH_BUF_OFFSET
+	/* Store crash buffer address in tpidr_el3 */
+	msr	tpidr_el3, x0
+	str	x1, [x0, #REG_SIZE]
 	mov	x1, sp
-	bl	print_string_value
-	add	sp, sp, #CALLER_SAVED_REG_SIZE
+	str	x1, [x0]
 	.endm
 
-	.macro print_callee_saved_regs
-	sub	sp, sp, CALLEE_SAVED_REG_SIZE
-	stp	x19, x20, [sp]
-	stp	x21, x22, [sp, #(REG_SIZE * 2)]
-	stp	x23, x24, [sp, #(REG_SIZE * 4)]
-	stp	x25, x26, [sp, #(REG_SIZE * 6)]
-	stp	x27, x28, [sp, #(REG_SIZE * 8)]
-	stp	x29, x30, [sp, #(REG_SIZE * 10)]
-	adr	x0, callee_saved_regs
-	mov	x1, sp
-	bl	print_string_value
-	add	sp, sp, #CALLEE_SAVED_REG_SIZE
-	.endm
+	/* -----------------------------------------------------
+	 * This function allows to report a crash (if crash
+	 * reporting is enabled) when an unhandled exception
+	 * occurs. It prints the CPU state via the crash console
+	 * making use of the crash buf. This function will
+	 * not return.
+	 * -----------------------------------------------------
+	 */
+func report_unhandled_exception
+	prepare_crash_buf_save_x0_x1
+	adr	x0, excpt_msg
+	mov	sp, x0
+	/* This call will not return */
+	b	do_crash_reporting
 
-	.macro print_el3_sys_regs
-	sub	sp, sp, #EL3_SYS_REG_SIZE
-	mrs	x9, scr_el3
-	mrs	x10, sctlr_el3
-	mrs	x11, cptr_el3
-	mrs	x12, tcr_el3
-	mrs	x13, daif
-	mrs	x14, mair_el3
-	mrs	x15, spsr_el3 /*save the elr and spsr regs seperately*/
-	mrs	x16, elr_el3
-	mrs	x17, ttbr0_el3
-	mrs	x8, esr_el3
-	mrs	x7, far_el3
 
-	stp	x9, x10, [sp]
-	stp	x11, x12, [sp, #(REG_SIZE * 2)]
-	stp	x13, x14, [sp, #(REG_SIZE * 4)]
-	stp	x15, x16, [sp, #(REG_SIZE * 6)]
-	stp	x17, x8, [sp, #(REG_SIZE * 8)]
-	stp	x0, x7, [sp, #(REG_SIZE * 10)] /* sp_el3 is in x0 */
+	/* -----------------------------------------------------
+	 * This function allows to report a crash (if crash
+	 * reporting is enabled) when an unhandled interrupt
+	 * occurs. It prints the CPU state via the crash console
+	 * making use of the crash buf. This function will
+	 * not return.
+	 * -----------------------------------------------------
+	 */
+func report_unhandled_interrupt
+	prepare_crash_buf_save_x0_x1
+	adr	x0, intr_excpt_msg
+	mov	sp, x0
+	/* This call will not return */
+	b	do_crash_reporting
 
-	adr	x0, el3_sys_regs
-	mov	x1, sp
-	bl	print_string_value
-	add	sp, sp, #EL3_SYS_REG_SIZE
-	.endm
+	/* -----------------------------------------------------
+	 * This function allows to report a crash (if crash
+	 * reporting is enabled) when panic() is invoked from
+	 * C Runtime. It prints the CPU state via the crash
+	 * console making use of the crash buf. This function
+	 * will not return.
+	 * -----------------------------------------------------
+	 */
+func el3_panic
+	msr	spsel, #1
+	prepare_crash_buf_save_x0_x1
+	adr	x0, panic_msg
+	mov	sp, x0
+	/* This call will not return */
+	b	do_crash_reporting
 
-	.macro print_non_el3_sys_0_regs
-	sub	sp, sp, #NON_EL3_SYS_0_REG_SIZE
-	mrs	x9, spsr_el1
-	mrs	x10, elr_el1
-	mrs	x11, spsr_abt
-	mrs	x12, spsr_und
-	mrs	x13, spsr_irq
-	mrs	x14, spsr_fiq
-	mrs	x15, sctlr_el1
-	mrs	x16, actlr_el1
-	mrs	x17, cpacr_el1
-	mrs	x8, csselr_el1
+	/* ------------------------------------------------------------
+	 * The common crash reporting functionality. It requires x0
+	 * and x1 has already been stored in crash buf, sp points to
+	 * crash message and tpidr_el3 contains the crash buf address.
+	 * The function does the following:
+	 *   - Retrieve the crash buffer from tpidr_el3
+	 *   - Store x2 to x6 in the crash buffer
+	 *   - Initialise the crash console.
+	 *   - Print the crash message by using the address in sp.
+	 *   - Print x30 value to the crash console.
+	 *   - Print x0 - x7 from the crash buf to the crash console.
+	 *   - Print x8 - x29 (in groups of 8 registers) using the
+	 *     crash buf to the crash console.
+	 *   - Print el3 sys regs (in groups of 8 registers) using the
+	 *     crash buf to the crash console.
+	 *   - Print non el3 sys regs (in groups of 8 registers) using
+	 *     the crash buf to the crash console.
+	 * ------------------------------------------------------------
+	 */
+func do_crash_reporting
+	/* Retrieve the crash buf from tpidr_el3 */
+	mrs	x0, tpidr_el3
+	/* Store x2 - x6, x30 in the crash buffer */
+	stp	x2, x3, [x0, #REG_SIZE * 2]
+	stp	x4, x5, [x0, #REG_SIZE * 4]
+	stp	x6, x30, [x0, #REG_SIZE * 6]
+	/* Initialize the crash console */
+	bl	plat_crash_console_init
+	/* Verify the console is initialized */
+	cbz	x0, crash_panic
+	/* Print the crash message. sp points to the crash message */
+	mov	x4, sp
+	bl	asm_print_str
+	/* load the crash buf address */
+	mrs	x0, tpidr_el3
+	/* report x30 first from the crash buf */
+	ldr	x4, [x0, #REG_SIZE * 7]
+	bl	asm_print_hex
+	bl	print_newline
+	/* Load the crash buf address */
+	mrs	x0, tpidr_el3
+	/* Now mov x7 into crash buf */
+	str	x7, [x0, #REG_SIZE * 7]
 
-	stp	x9, x10, [sp]
-	stp	x11, x12, [sp, #(REG_SIZE * 2)]
-	stp	x13, x14, [sp, #(REG_SIZE * 4)]
-	stp	x15, x16, [sp, #(REG_SIZE * 6)]
-	stp	x17, x8, [sp, #(REG_SIZE * 8)]
+	/* Report x0 - x29 values stored in crash buf*/
+	/* Store the ascii list pointer in x6 */
+	adr	x6, gp_regs
+	/* Print x0 to x7 from the crash buf */
+	bl	size_controlled_print
+	/* Store x8 - x15 in crash buf and print */
+	bl	str_in_crash_buf_print
+	/* Load the crash buf address */
+	mrs	x0, tpidr_el3
+	/* Store the rest of gp regs and print */
+	stp	x16, x17, [x0]
+	stp	x18, x19, [x0, #REG_SIZE * 2]
+	stp	x20, x21, [x0, #REG_SIZE * 4]
+	stp	x22, x23, [x0, #REG_SIZE * 6]
+	bl	size_controlled_print
+	/* Load the crash buf address */
+	mrs	x0, tpidr_el3
+	stp	x24, x25, [x0]
+	stp	x26, x27, [x0, #REG_SIZE * 2]
+	stp	x28, x29, [x0, #REG_SIZE * 4]
+	bl	size_controlled_print
+
+	/* Print the el3 sys registers */
+	adr	x6, el3_sys_regs
+	mrs	x8, scr_el3
+	mrs	x9, sctlr_el3
+	mrs	x10, cptr_el3
+	mrs	x11, tcr_el3
+	mrs	x12, daif
+	mrs	x13, mair_el3
+	mrs	x14, spsr_el3
+	mrs	x15, elr_el3
+	bl	str_in_crash_buf_print
+	mrs	x8, ttbr0_el3
+	mrs	x9, esr_el3
+	mrs	x10, far_el3
+	bl	str_in_crash_buf_print
 
+	/* Print the non el3 sys registers */
+	adr	x6, non_el3_sys_regs
+	mrs	x8, spsr_el1
+	mrs	x9, elr_el1
+	mrs	x10, spsr_abt
+	mrs	x11, spsr_und
+	mrs	x12, spsr_irq
+	mrs	x13, spsr_fiq
+	mrs	x14, sctlr_el1
+	mrs	x15, actlr_el1
+	bl	str_in_crash_buf_print
+	mrs	x8, cpacr_el1
+	mrs	x9, csselr_el1
 	mrs	x10, sp_el1
 	mrs	x11, esr_el1
 	mrs	x12, ttbr0_el1
 	mrs	x13, ttbr1_el1
 	mrs	x14, mair_el1
 	mrs	x15, amair_el1
-	mrs	x16, tcr_el1
-	mrs	x17, tpidr_el1
+	bl	str_in_crash_buf_print
+	mrs	x8, tcr_el1
+	mrs	x9, tpidr_el1
+	mrs	x10, tpidr_el0
+	mrs	x11, tpidrro_el0
+	mrs	x12, dacr32_el2
+	mrs	x13, ifsr32_el2
+	mrs	x14, par_el1
+	mrs	x15, mpidr_el1
+	bl	str_in_crash_buf_print
+	mrs	x8, afsr0_el1
+	mrs	x9, afsr1_el1
+	mrs	x10, contextidr_el1
+	mrs	x11, vbar_el1
+	mrs	x12, cntp_ctl_el0
+	mrs	x13, cntp_cval_el0
+	mrs	x14, cntv_ctl_el0
+	mrs	x15, cntv_cval_el0
+	bl	str_in_crash_buf_print
+	mrs	x8, cntkctl_el1
+	mrs	x9, fpexc32_el2
+	mrs	x10, sp_el0
+	bl	str_in_crash_buf_print
 
-	stp	x10, x11, [sp, #(REG_SIZE * 10)]
-	stp	x12, x13, [sp, #(REG_SIZE * 12)]
-	stp	x14, x15, [sp, #(REG_SIZE * 14)]
-	stp	x16, x17, [sp, #(REG_SIZE * 16)]
-
-	adr	x0, non_el3_sys_0_regs
-	mov	x1, sp
-	bl	print_string_value
-	add	sp, sp, #NON_EL3_SYS_0_REG_SIZE
-	.endm
+	/* Print the CPUECTLR_EL1 reg */
+	mrs	x0, midr_el1
+	lsr	x0, x0, #MIDR_PN_SHIFT
+	and	x0, x0, #MIDR_PN_MASK
+	cmp	x0, #MIDR_PN_A57
+	b.eq	1f
+	cmp	x0, #MIDR_PN_A53
+	b.ne	2f
+1:
+	adr	x4, cpu_ectlr_reg
+	bl	asm_print_str
+	mrs	x4, CPUECTLR_EL1
+	bl	asm_print_hex
+	bl	print_newline
+2:
 
-	.macro print_non_el3_sys_1_regs
-	sub	sp, sp, #NON_EL3_SYS_1_REG_SIZE
+	/* Print the gic registers */
+	plat_print_gic_regs
 
-	mrs	x9, tpidr_el0
-	mrs	x10, tpidrro_el0
-	mrs	x11, dacr32_el2
-	mrs	x12, ifsr32_el2
-	mrs	x13, par_el1
-	mrs	x14, far_el1
-	mrs	x15, afsr0_el1
-	mrs	x16, afsr1_el1
-	mrs	x17, contextidr_el1
-	mrs	x8, vbar_el1
+	/* Print the interconnect registers */
+	plat_print_interconnect_regs
 
-	stp	x9, x10, [sp]
-	stp	x11, x12, [sp, #(REG_SIZE * 2)]
-	stp	x13, x14, [sp, #(REG_SIZE * 4)]
-	stp	x15, x16, [sp, #(REG_SIZE * 6)]
-	stp	x17, x8, [sp, #(REG_SIZE * 8)]
-
-	mrs	x10, cntp_ctl_el0
-	mrs	x11, cntp_cval_el0
-	mrs	x12, cntv_ctl_el0
-	mrs	x13, cntv_cval_el0
-	mrs	x14, cntkctl_el1
-	mrs	x15, fpexc32_el2
-	mrs	x8, sp_el0
-
-	stp	x10, x11, [sp, #(REG_SIZE *10)]
-	stp	x12, x13, [sp, #(REG_SIZE * 12)]
-	stp	x14, x15, [sp, #(REG_SIZE * 14)]
-	stp	x8, xzr, [sp, #(REG_SIZE * 16)]
-
-	adr	x0, non_el3_sys_1_regs
-	mov	x1, sp
-	bl	print_string_value
-	add	sp, sp, #NON_EL3_SYS_1_REG_SIZE
-	.endm
-
-	.macro init_crash_stack
-	msr	cntfrq_el0, x0 /* we can corrupt this reg to free up x0 */
-	mrs	x0, tpidr_el3
-
-	/* Check if tpidr is initialized */
-	cbz	x0, infinite_loop
-
-	ldr	x0, [x0, #CPU_DATA_CRASH_STACK_OFFSET]
-	/* store the x30 and sp to stack */
-	str	x30, [x0, #-(REG_SIZE)]!
-	mov	x30, sp
-	str	x30, [x0, #-(REG_SIZE)]!
-	mov	sp, x0
-	mrs	x0, cntfrq_el0
-	.endm
+	/* Done reporting */
+	b	crash_panic
 
-	/* ---------------------------------------------------
-	 * The below function initializes the crash dump stack ,
-	 * and prints the system state. This function
-	 * will not return.
-	 * ---------------------------------------------------
-	 */
-func dump_state_and_die
-	init_crash_stack
-	print_caller_saved_regs
-	b	print_state
-
-func dump_intr_state_and_die
-	init_crash_stack
-	print_caller_saved_regs
-	plat_print_gic_regs /* fall through to print_state */
-
-print_state:
-	/* copy the original x30 from stack */
-	ldr	x30, [sp, #REG_SIZE]
-	print_callee_saved_regs
-	/* copy the original SP_EL3 from stack to x0 and rewind stack */
-	ldr x0, [sp], #(REG_SIZE * 2)
-	print_el3_sys_regs
-	print_non_el3_sys_0_regs
-	print_non_el3_sys_1_regs
-
-#else	/* CRASH_REPORING */
-
-func dump_state_and_die
-dump_intr_state_and_die:
-
+#else	/* CRASH_REPORTING */
+func report_unhandled_exception
+report_unhandled_interrupt:
+	b	crash_panic
 #endif	/* CRASH_REPORING */
 
-infinite_loop:
-	b	infinite_loop
-
 
-#define PCPU_CRASH_STACK_SIZE	0x140
-
-	/* -----------------------------------------------------
-	 * Per-cpu crash stacks in normal memory.
-	 * -----------------------------------------------------
-	 */
-declare_stack pcpu_crash_stack, tzfw_normal_stacks, \
-		PCPU_CRASH_STACK_SIZE, PLATFORM_CORE_COUNT
-
-	/* -----------------------------------------------------
-	 * Provides each CPU with a small stacks for reporting
-	 * unhandled exceptions, and stores the stack address
-	 * in cpu_data
-	 *
-	 * This can be called without a runtime stack
-	 * clobbers: x0 - x4
-	 * -----------------------------------------------------
-	 */
-func init_crash_reporting
-	mov	x4, x30
-	mov	x2, #0
-	adr	x3, pcpu_crash_stack
-init_crash_loop:
-	mov	x0, x2
-	bl	_cpu_data_by_index
-	add	x3, x3, #PCPU_CRASH_STACK_SIZE
-	str	x3, [x0, #CPU_DATA_CRASH_STACK_OFFSET]
-	add	x2, x2, #1
-	cmp	x2, #PLATFORM_CORE_COUNT
-	b.lo	init_crash_loop
-	ret	x4
+func crash_panic
+	b	crash_panic
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index a11cd71..996dedc 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -60,7 +60,7 @@
 	 * -----------------------------------------------------
 	 */
 
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	.endm
 
 
@@ -142,7 +142,7 @@
 	 * where the interrupt was generated.
 	 */
 interrupt_error_\label:
-	bl	dump_intr_state_and_die
+	bl	report_unhandled_interrupt
 	.endm
 
 
@@ -158,7 +158,6 @@
 	.endm
 
 	.section	.vectors, "ax"; .align 11
-
 	.align	7
 runtime_exceptions:
 	/* -----------------------------------------------------
@@ -170,7 +169,7 @@
 	 * We don't expect any synchronous exceptions from EL3
 	 * -----------------------------------------------------
 	 */
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	check_vector_size sync_exception_sp_el0
 
 	.align	7
@@ -180,17 +179,17 @@
 	 * -----------------------------------------------------
 	 */
 irq_sp_el0:
-	bl	dump_intr_state_and_die
+	bl	report_unhandled_interrupt
 	check_vector_size irq_sp_el0
 
 	.align	7
 fiq_sp_el0:
-	bl	dump_intr_state_and_die
+	bl	report_unhandled_interrupt
 	check_vector_size fiq_sp_el0
 
 	.align	7
 serror_sp_el0:
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	check_vector_size serror_sp_el0
 
 	/* -----------------------------------------------------
@@ -206,22 +205,22 @@
 	 * There is a high probability that SP_EL3 is corrupted.
 	 * -----------------------------------------------------
 	 */
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	check_vector_size sync_exception_sp_elx
 
 	.align	7
 irq_sp_elx:
-	bl	dump_intr_state_and_die
+	bl	report_unhandled_interrupt
 	check_vector_size irq_sp_elx
 
 	.align	7
 fiq_sp_elx:
-	bl	dump_intr_state_and_die
+	bl	report_unhandled_interrupt
 	check_vector_size fiq_sp_elx
 
 	.align	7
 serror_sp_elx:
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	check_vector_size serror_sp_elx
 
 	/* -----------------------------------------------------
@@ -258,7 +257,7 @@
 
 	.align	7
 serror_aarch64:
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	check_vector_size serror_aarch64
 
 	/* -----------------------------------------------------
@@ -295,7 +294,7 @@
 
 	.align	7
 serror_aarch32:
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 	check_vector_size serror_aarch32
 
 	.align	7
@@ -403,7 +402,7 @@
 	mrs	x17, elr_el3
 	mrs	x18, scr_el3
 	stp	x16, x17, [x6, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
-	stp	x18, xzr, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
+	str	x18, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
 
 	/* Copy SCR_EL3.NS bit to the flag to indicate caller's security */
 	bfi	x7, x18, #0, #1
@@ -446,7 +445,7 @@
 	 * Restore SPSR_EL3, ELR_EL3 and SCR_EL3 prior to ERET
 	 * -----------------------------------------------------
 	 */
-	ldp	x18, xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
+	ldr	x18, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
 	ldp	x16, x17, [sp, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
 	msr	scr_el3, x18
 	msr	spsr_el3, x16
@@ -473,7 +472,7 @@
 
 rt_svc_fw_critical_error:
 	msr	spsel, #1 /* Switch to SP_ELx */
-	bl	dump_state_and_die
+	bl	report_unhandled_exception
 
 	/* -----------------------------------------------------
 	 * The following functions are used to saved and restore
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index 5555c31..fb17a2e 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -48,6 +48,7 @@
 				services/std_svc/psci/psci_afflvl_suspend.c	\
 				services/std_svc/psci/psci_common.c		\
 				services/std_svc/psci/psci_entry.S		\
+				services/std_svc/psci/psci_helpers.S		\
 				services/std_svc/psci/psci_main.c		\
 				services/std_svc/psci/psci_setup.c
 
diff --git a/bl31/cpu_data_array.c b/bl31/cpu_data_array.c
index b0042a1..4cba118 100644
--- a/bl31/cpu_data_array.c
+++ b/bl31/cpu_data_array.c
@@ -32,13 +32,5 @@
 #include <cpu_data.h>
 #include <platform_def.h>
 
-/* verify assembler offsets match data structures */
-CASSERT(CPU_DATA_CRASH_STACK_OFFSET == __builtin_offsetof
-	(cpu_data_t, crash_stack),
-	assert_cpu_data_crash_stack_offset_mismatch);
-
-CASSERT((1 << CPU_DATA_LOG2SIZE) == sizeof(cpu_data_t),
-	assert_cpu_data_log2size_mismatch);
-
 /* The per_cpu_ptr_cache_t space allocation */
 cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 479ca59..91b6128 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -31,6 +31,7 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <tsp.h>
+#include <xlat_tables.h>
 
 
 	.globl	tsp_entrypoint
@@ -88,11 +89,13 @@
 	msr	vbar_el1, x0
 
 	/* ---------------------------------------------
-	 * Enable the instruction cache.
+	 * Enable the instruction cache, stack pointer
+	 * and data access alignment checks
 	 * ---------------------------------------------
 	 */
+	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
 	mrs	x0, sctlr_el1
-	orr	x0, x0, #SCTLR_I_BIT
+	orr	x0, x0, x1
 	msr	sctlr_el1, x0
 	isb
 
@@ -111,12 +114,15 @@
 	bl	zeromem16
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Perform early platform setup & platform
@@ -127,14 +133,6 @@
 	bl	bl32_plat_arch_setup
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
-	/* ---------------------------------------------
 	 * Jump to main function.
 	 * ---------------------------------------------
 	 */
@@ -200,35 +198,59 @@
 	msr	vbar_el1, x0
 
 	/* ---------------------------------------------
-	 * Enable the instruction cache.
+	 * Enable the instruction cache, stack pointer
+	 * and data access alignment checks
 	 * ---------------------------------------------
 	 */
+	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
 	mrs	x0, sctlr_el1
-	orr	x0, x0, #SCTLR_I_BIT
+	orr	x0, x0, x1
 	msr	sctlr_el1, x0
 	isb
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Give ourselves a stack whose memory will be
+	 * marked as Normal-IS-WBWA when the MMU is
+	 * enabled.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
-	/* ---------------------------------------------
-	 * Initialise the MMU
-	 * ---------------------------------------------
+	/* --------------------------------------------
+	 * Enable the MMU with the DCache disabled. It
+	 * is safe to use stacks allocated in normal
+	 * memory as a result. All memory accesses are
+	 * marked nGnRnE when the MMU is disabled. So
+	 * all the stack writes will make it to memory.
+	 * All memory accesses are marked Non-cacheable
+	 * when the MMU is enabled but D$ is disabled.
+	 * So used stack memory is guaranteed to be
+	 * visible immediately after the MMU is enabled
+	 * Enabling the DCache at the same time as the
+	 * MMU can lead to speculatively fetched and
+	 * possibly stale stack memory being read from
+	 * other caches. This can lead to coherency
+	 * issues.
+	 * --------------------------------------------
 	 */
+	mov	x0, #DISABLE_DCACHE
 	bl	bl32_plat_enable_mmu
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
+	 * Enable the Data cache now that the MMU has
+	 * been enabled. The stack has been unwound. It
+	 * will be written first before being read. This
+	 * will invalidate any stale cache lines resi-
+	 * -dent in other caches. We assume that
+	 * interconnect coherency has been enabled for
+	 * this cluster by EL3 firmware.
 	 * ---------------------------------------------
 	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
+	mrs	x0, sctlr_el1
+	orr	x0, x0, #SCTLR_C_BIT
+	msr	sctlr_el1, x0
+	isb
 
 	/* ---------------------------------------------
 	 * Enter C runtime to perform any remaining
diff --git a/common/aarch64/debug.S b/common/aarch64/debug.S
new file mode 100644
index 0000000..b7d7ac2
--- /dev/null
+++ b/common/aarch64/debug.S
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+
+	.globl	asm_print_str
+	.globl	asm_print_hex
+	.globl	asm_assert
+	.globl	do_panic
+
+/* Since the max decimal input number is 65536 */
+#define MAX_DEC_DIVISOR		10000
+/* The offset to add to get ascii for numerals '0 - 9' */
+#define ASCII_OFFSET_NUM	0x30
+
+#if ASM_ASSERTION
+.section .rodata.assert_str, "aS"
+assert_msg1:
+	.asciz "ASSERT: File "
+assert_msg2:
+	.asciz " Line "
+
+	/*
+	 * This macro is intended to be used to print the
+	 * line number in decimal. Used by asm_assert macro.
+	 * The max number expected is 65536.
+	 * In: x4 = the decimal to print.
+	 * Clobber: x30, x0, x1, x2, x5, x6
+	 */
+	.macro asm_print_line_dec
+	mov	x6, #10		/* Divide by 10 after every loop iteration */
+	mov	x5, #MAX_DEC_DIVISOR
+1:
+	udiv	x0, x4, x5		/* Get the quotient */
+	msub	x4, x0, x5, x4		/* Find the remainder */
+	add	x0, x0, #ASCII_OFFSET_NUM		/* Convert to ascii */
+	bl	plat_crash_console_putc
+	udiv	x5, x5, x6		/* Reduce divisor */
+	cbnz	x5, 1b
+	.endm
+
+
+/* ---------------------------------------------------------------------------
+ * Assertion support in assembly.
+ * The below function helps to support assertions in assembly where we do not
+ * have a C runtime stack. Arguments to the function are :
+ * x0 - File name
+ * x1 - Line no
+ * Clobber list : x30, x0, x1, x2, x3, x4, x5, x6.
+ * ---------------------------------------------------------------------------
+ */
+func asm_assert
+	mov	x5, x0
+	mov	x6, x1
+	/* Ensure the console is initialized */
+	bl	plat_crash_console_init
+	/* Check if the console is initialized */
+	cbz	x0, _assert_loop
+	/* The console is initialized */
+	adr	x4, assert_msg1
+	bl	asm_print_str
+	mov	x4, x5
+	bl	asm_print_str
+	adr	x4, assert_msg2
+	bl	asm_print_str
+	/* Check if line number higher than max permitted */
+	tst	x6, #~0xffff
+	b.ne	_assert_loop
+	mov	x4, x6
+	asm_print_line_dec
+_assert_loop:
+	b	_assert_loop
+#endif
+
+/*
+ * This function prints a string from address in x4.
+ * In: x4 = pointer to string.
+ * Clobber: x30, x0, x1, x2, x3
+ */
+func asm_print_str
+	mov	x3, x30
+1:
+	ldrb	w0, [x4], #0x1
+	cbz	x0, 2f
+	bl	plat_crash_console_putc
+	b	1b
+2:
+	ret	x3
+
+/*
+ * This function prints a hexadecimal number in x4.
+ * In: x4 = the hexadecimal to print.
+ * Clobber: x30, x0, x5, x1, x2, x3
+ */
+func asm_print_hex
+	mov	x3, x30
+	mov	x5, #64  /* No of bits to convert to ascii */
+1:
+	sub	x5, x5, #4
+	lsrv	x0, x4, x5
+	and	x0, x0, #0xf
+	cmp	x0, #0xA
+	b.lo	2f
+	/* Add by 0x27 in addition to ASCII_OFFSET_NUM
+	 * to get ascii for characters 'a - f'.
+	 */
+	add	x0, x0, #0x27
+2:
+	add	x0, x0, #ASCII_OFFSET_NUM
+	bl	plat_crash_console_putc
+	cbnz	x5, 1b
+	ret	x3
+
+	/***********************************************************
+	 * The common implementation of do_panic for all BL stages
+	 ***********************************************************/
+
+.section .rodata.panic_str, "aS"
+	panic_msg: .asciz "PANIC at PC : 0x"
+
+/* ---------------------------------------------------------------------------
+ * do_panic assumes that it is invoked from a C Runtime Environment ie a
+ * valid stack exists. This call will not return.
+ * Clobber list : if CRASH_REPORTING is not enabled then x30, x0 - x6
+ * ---------------------------------------------------------------------------
+ */
+
+/* This is for the non el3 BL stages to compile through */
+	.weak el3_panic
+
+func do_panic
+#if CRASH_REPORTING
+	str	x0, [sp, #-0x10]!
+	mrs	x0, currentel
+	ubfx	x0, x0, #2, #2
+	cmp	x0, #0x3
+	ldr	x0, [sp], #0x10
+	b.eq	el3_panic
+#endif
+
+panic_common:
+/*
+ * el3_panic will be redefined by the BL31
+ * crash reporting mechanism (if enabled)
+ */
+el3_panic:
+	mov	x6, x30
+	bl	plat_crash_console_init
+	/* Check if the console is initialized */
+	cbz	x0, _panic_loop
+	/* The console is initialized */
+	adr	x4, panic_msg
+	bl	asm_print_str
+	mov	x4, x6
+	/* The panic location is lr -4 */
+	sub	x4, x4, #4
+	bl	asm_print_hex
+_panic_loop:
+	b	_panic_loop
+
diff --git a/common/debug.c b/common/debug.c
deleted file mode 100644
index be54f5d..0000000
--- a/common/debug.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-#include <console.h>
-#include <debug.h>
-#include <stdarg.h>
-#include <stdio.h>
-
-/******************************************************************
-* This function is invoked from assembler error handling routines and
-* prints out the string and the value in 64 bit hex format. These
-* are passed to the function as input parameters.
-********************************************************************/
-void print_string_value(char *s, unsigned long *mem)
-{
-	unsigned char i, temp;
-	unsigned long val;
-
-	while (*s) {
-		i = 16;
-		while (*s)
-			console_putc(*s++);
-
-		s++;
-
-		console_putc('\t');
-		console_putc(':');
-		console_putc('0');
-		console_putc('x');
-
-		val = *mem++;
-
-		while (i--) {
-			temp = (val >> (i << 2)) & 0xf;
-			if (temp <  0xa)
-				console_putc('0' + temp);
-			else
-				console_putc('A' + (temp - 0xa));
-		}
-		console_putc('\n');
-	}
-}
-
-/***********************************************************
- * The common implementation of do_panic for all BL stages
- ***********************************************************/
-
-#if DEBUG
-void __dead2 do_panic(const char *file, int line)
-{
-		tf_printf("PANIC in file: %s line: %d\n", file, line);
-		while (1)
-			;
-}
-#else
-void __dead2 do_panic(void)
-{
-	unsigned long pc_reg;
-	__asm__ volatile("mov %0, x30\n"
-					: "=r" (pc_reg) : );
-
-	/* x30 reports the next eligible instruction whereas we want the
-	 * place where panic() is invoked. Hence decrement by 4.
-	 */
-	tf_printf("PANIC in PC location 0x%016X\n", pc_reg - 0x4);
-	while (1)
-		;
-
-}
-#endif
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 813d0be..c711590 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -15,6 +15,7 @@
     *   Boot Loader stage 3-1 (BL3-1)
     *   PSCI implementation (in BL3-1)
     *   Interrupt Management framework (in BL3-1)
+    *   Crash Reporting mechanism (in BL3-1)
 4.  C Library
 5.  Storage abstraction layer
 
@@ -104,12 +105,6 @@
     by [plat/common/aarch64/platform_mp_stack.S] and
     [plat/common/aarch64/platform_up_stack.S].
 
-*   **#define : PCPU_DV_MEM_STACK_SIZE**
-
-    Defines the coherent stack memory available to each CPU. This constant is used
-    by [plat/common/aarch64/platform_mp_stack.S] and
-    [plat/common/aarch64/platform_up_stack.S].
-
 *   **#define : FIRMWARE_WELCOME_STR**
 
     Defines the character string printed by BL1 upon entry into the `bl1_main()`
@@ -274,10 +269,18 @@
 *   **Macro : plat_print_gic_regs**
 
     This macro allows the crash reporting routine to print GIC registers
-    in case of an unhandled IRQ or FIQ in BL3-1. This aids in debugging and
+    in case of an unhandled exception in BL3-1. This aids in debugging and
     this macro can be defined to be empty in case GIC register reporting is
     not desired.
 
+*   **Macro : plat_print_interconnect_regs**
+
+    This macro allows the crash reporting routine to print interconnect registers
+    in case of an unhandled exception in BL3-1. This aids in debugging and
+    this macro can be defined to be empty in case interconnect register reporting
+    is not desired. In the ARM FVP port, the CCI snoop control registers are
+    reported.
+
 ### Other mandatory modifications
 
 The following mandatory modifications may be implemented in any file
@@ -395,31 +398,6 @@
     cluster_id = 8-bit value in MPIDR at affinity level 1
 
 
-### Function : platform_set_coherent_stack()
-
-    Argument : unsigned long
-    Return   : void
-
-A platform may need stack memory that is coherent with main memory to perform
-certain operations like:
-
-*   Turning the MMU on, or
-*   Flushing caches prior to powering down a CPU or cluster.
-
-Each BL stage allocates this coherent stack memory for each CPU in the
-`tzfw_coherent_mem` section.
-
-This function sets the current stack pointer to the coherent stack that
-has been allocated for the CPU specified by MPIDR. For BL images that only
-require a stack for the primary CPU the parameter is ignored. The size of
-the stack allocated to each CPU is specified by the platform defined constant
-`PCPU_DV_MEM_STACK_SIZE`.
-
-Common implementations of this function for the UP and MP BL images are
-provided in [plat/common/aarch64/platform_up_stack.S] and
-[plat/common/aarch64/platform_mp_stack.S]
-
-
 ### Function : platform_is_primary_cpu()
 
     Argument : unsigned long
@@ -1116,11 +1094,6 @@
 affinity level 0 (CPU), the platform port should power down affinity level 1
 (the cluster) as well.
 
-This function is called with coherent stacks. This allows the PSCI
-implementation to flush caches at a given affinity level without running into
-stale stack state after turning off the caches. On ARMv8-A cache hits do not
-occur after the cache has been turned off.
-
 #### plat_pm_ops.affinst_suspend()
 
 Perform the platform specific setup to power off an affinity instance in the
@@ -1143,11 +1116,6 @@
 resume execution by restoring this state when its powered on (see
 `affinst_suspend_finish()`).
 
-This function is called with coherent stacks. This allows the PSCI
-implementation to flush caches at a given affinity level without running into
-stale stack state after turning off the caches. On ARMv8-A cache hits do not
-occur after the cache has been turned off.
-
 #### plat_pm_ops.affinst_on_finish()
 
 This function is called by the PSCI implementation after the calling CPU is
@@ -1159,11 +1127,6 @@
 The `MPIDR` (first argument), `affinity level` (second argument) and `state`
 (third argument) have a similar meaning as described in the previous operations.
 
-This function is called with coherent stacks. This allows the PSCI
-implementation to flush caches at a given affinity level without running into
-stale stack state after turning off the caches. On ARMv8-A cache hits do not
-occur after the cache has been turned off.
-
 #### plat_pm_ops.affinst_on_suspend()
 
 This function is called by the PSCI implementation after the calling CPU is
@@ -1176,11 +1139,6 @@
 The `MPIDR` (first argument), `affinity level` (second argument) and `state`
 (third argument) have a similar meaning as described in the previous operations.
 
-This function is called with coherent stacks. This allows the PSCI
-implementation to flush caches at a given affinity level without running into
-stale stack state after turning off the caches. On ARMv8-A cache hits do not
-occur after the cache has been turned off.
-
 BL3-1 platform initialization code must also detect the system topology and
 the state of each affinity instance in the topology. This information is
 critical for the PSCI runtime service to function correctly. More details are
@@ -1316,6 +1274,41 @@
 interrupt id from the relevant _Interrupt Group Register_ (`GICD_IGROUPRn`). It
 uses the group value to determine the type of interrupt.
 
+3.5  Crash Reporting mechanism (in BL3-1)
+----------------------------------------------
+BL3-1 implements a crash reporting mechanism which prints the various registers
+of the CPU to enable quick crash analysis and debugging. It requires that a console
+is designated as the crash console by the platform which will used to print the
+register dump.
+
+The following functions must be implemented by the platform if it wants crash reporting
+mechanism in BL3-1. The functions are implemented in assembly so that they can be
+invoked without a C Runtime stack.
+
+### Function : plat_crash_console_init
+
+    Argument : void
+    Return   : int
+
+This API is used by the crash reporting mechanism to intialize the crash console.
+It should only use the general purpose registers x0 to x2 to do the initiaization
+and returns 1 on success.
+
+The FVP port designates the PL011_UART0 as the crash console and calls the
+console_core_init() to initialize the console.
+
+### Function : plat_crash_console_putc
+
+    Argument : int
+    Return   : int
+
+This API is used by the crash reporting mechanism to print a character on the
+designated crash console. It should only use general purpose registers x1 and
+x2 to do its work. The parameter and the return value are in general purpose
+register x0.
+
+The FVP port designates the PL011_UART0 as the crash console and calls the
+console_core_putc() to print the character on the console.
 
 4.  C Library
 -------------
diff --git a/docs/user-guide.md b/docs/user-guide.md
index a4d7f46..41e7606 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -181,6 +181,11 @@
     BL3-1. This option defaults to the value of `DEBUG` - i.e. by default
     this is only enabled for a debug build of the firmware.
 
+*   `ASM_ASSERTION`: This flag determines whether the assertion checks within
+    assembly source files are enabled or not. This option defaults to the
+    value of `DEBUG` - i.e. by default this is only enabled for a debug
+    build of the firmware.
+
 ### Creating a Firmware Image Package
 
 FIPs are automatically created as part of the build instructions described in
diff --git a/drivers/arm/pl011/pl011_console.S b/drivers/arm/pl011/pl011_console.S
new file mode 100644
index 0000000..5ff1582
--- /dev/null
+++ b/drivers/arm/pl011/pl011_console.S
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <pl011.h>
+
+	.globl	console_init
+	.globl	console_putc
+	.globl	console_core_init
+	.globl	console_core_putc
+	.globl	console_getc
+
+	/*
+	 *  The console base is in the data section and not in .bss
+	 *  even though it is zero-init. In particular, this allows
+	 *  the console functions to start using this variable before
+	 *  the runtime memory is initialized for images which do not
+	 *  need to copy the .data section from ROM to RAM.
+	 */
+.section .data.console_base ; .align 3
+	console_base: .quad 0x0
+
+	/* -----------------------------------------------
+	 * int console_init(unsigned long base_addr,
+	 * unsigned int uart_clk, unsigned int baud_rate)
+	 * Function to initialize the console without a
+	 * C Runtime to print debug information. It saves
+	 * the console base to the data section.
+	 * In: x0 - console base address
+	 *     w1 - Uart clock in Hz
+	 *     w2 - Baud rate
+	 * out: return 1 on success.
+	 * Clobber list : x1 - x3
+	 * -----------------------------------------------
+	 */
+func console_init
+	adrp	x3, console_base
+	str	x0, [x3, :lo12:console_base]
+	b	console_core_init
+
+	/* -----------------------------------------------
+	 * int console_core_init(unsigned long base_addr,
+	 * unsigned int uart_clk, unsigned int baud_rate)
+	 * Function to initialize the console without a
+	 * C Runtime to print debug information. This
+	 * function will be accessed by console_init and
+	 * crash reporting.
+	 * In: x0 - console base address
+	 *     w1 - Uart clock in Hz
+	 *     w2 - Baud rate
+	 * Out: return 1 on success
+	 * Clobber list : x1, x2
+	 * -----------------------------------------------
+	 */
+func console_core_init
+	/* Check the input base address */
+	cbz	x0, init_fail
+	/* Check baud rate and uart clock for sanity */
+	cbz	w1, init_fail
+	cbz	w2, init_fail
+	/* Program the baudrate */
+	/* Divisor =  (Uart clock * 4) / baudrate */
+	lsl	w1, w1, #2
+	udiv	w2, w1, w2
+	/* IBRD = Divisor >> 6 */
+	lsr	w1, w2, #6
+	/* Write the IBRD */
+	str	w1, [x0, #UARTIBRD]
+	/* FBRD = Divisor & 0x3F */
+	and	w1, w2, #0x3f
+	/* Write the FBRD */
+	str	w1, [x0, #UARTFBRD]
+	mov	w1, #PL011_LINE_CONTROL
+	str	w1, [x0, #UARTLCR_H]
+	/* Clear any pending errors */
+	str	wzr, [x0, #UARTECR]
+	/* Enable tx, rx, and uart overall */
+	mov	w1, #(PL011_UARTCR_RXE | PL011_UARTCR_TXE | PL011_UARTCR_UARTEN)
+	str	w1, [x0, #UARTCR]
+	mov	w0, #1
+init_fail:
+	ret
+
+	/* ---------------------------------------------
+	 * int console_putc(int c)
+	 * Function to output a character over the
+	 * console. It returns the character printed on
+	 * success or -1 on error.
+	 * In : x0 - character to be printed
+	 * Out : return -1 on error else return character.
+	 * Clobber list : x1, x2
+	 * ---------------------------------------------
+	 */
+func console_putc
+	adrp	x2, console_base
+	ldr	x1, [x2, :lo12:console_base]
+	b	console_core_putc
+
+	/* --------------------------------------------------------
+	 * int console_core_putc(int c, unsigned int base_addr)
+	 * Function to output a character over the console. It
+	 * returns the character printed on success or -1 on error.
+	 * In : w0 - character to be printed
+	 *      x1 - console base address
+	 * Out : return -1 on error else return character.
+	 * Clobber list : x2
+	 * --------------------------------------------------------
+	 */
+func console_core_putc
+	/* Check the input parameter */
+	cbz	x1, putc_error
+	/* Prepend '\r' to '\n' */
+	cmp	w0, #0xA
+	b.ne	2f
+1:
+	/* Check if the transmit FIFO is full */
+	ldr	w2, [x1, #UARTFR]
+	tbnz	w2, #PL011_UARTFR_TXFF_BIT, 1b
+	mov	w2, #0xD
+	str	w2, [x1, #UARTDR]
+2:
+	/* Check if the transmit FIFO is full */
+	ldr	w2, [x1, #UARTFR]
+	tbnz	w2, #PL011_UARTFR_TXFF_BIT, 2b
+	str	w0, [x1, #UARTDR]
+	ret
+putc_error:
+	mov	w0, #-1
+	ret
+
+	/* ---------------------------------------------
+	 * int console_getc(void)
+	 * Function to get a character from the console.
+	 * It returns the character grabbed on success
+	 * or -1 on error.
+	 * Clobber list : x0, x1
+	 * ---------------------------------------------
+	 */
+func console_getc
+	adrp	x0, console_base
+	ldr	x1, [x0, :lo12:console_base]
+	cbz	x1, getc_error
+1:
+	/* Check if the receive FIFO is empty */
+	ldr	w0, [x1, #UARTFR]
+	tbnz	w0, #PL011_UARTFR_RXFE_BIT, 1b
+	ldr	w0, [x1, #UARTDR]
+	ret
+getc_error:
+	mov	w0, #-1
+	ret
diff --git a/drivers/arm/pl011/pl011_console.c b/drivers/arm/pl011/pl011_console.c
deleted file mode 100644
index 81897ca..0000000
--- a/drivers/arm/pl011/pl011_console.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <assert.h>
-#include <console.h>
-#include <pl011.h>
-
-static unsigned long uart_base;
-
-void console_init(unsigned long base_addr)
-{
-	/* TODO: assert() internally calls printf() and will result in
-	 * an infinite loop. This needs to be fixed with some kind of
-	 * exception  mechanism or early panic support. This also applies
-	 * to the other assert() calls below.
-	 */
-	assert(base_addr);
-
-	/* Initialise internal base address variable */
-	uart_base = base_addr;
-
-	/* Baud Rate */
-#if defined(PL011_INTEGER) && defined(PL011_FRACTIONAL)
-	pl011_write_ibrd(uart_base, PL011_INTEGER);
-	pl011_write_fbrd(uart_base, PL011_FRACTIONAL);
-#else
-	pl011_setbaudrate(uart_base, PL011_BAUDRATE);
-#endif
-
-	pl011_write_lcr_h(uart_base, PL011_LINE_CONTROL);
-
-	/* Clear any pending errors */
-	pl011_write_ecr(uart_base, 0);
-
-	/* Enable tx, rx, and uart overall */
-	pl011_write_cr(uart_base, PL011_UARTCR_RXE | PL011_UARTCR_TXE |
-			PL011_UARTCR_UARTEN);
-
-}
-
-#define WAIT_UNTIL_UART_FREE(base)				\
-	while ((pl011_read_fr(base) & PL011_UARTFR_TXFF))	\
-		continue
-
-int console_putc(int c)
-{
-	/* If the console has not been initialized then return an error
-	 * code. Asserting here would result in recursion and stack
-	 * exhaustion
-	 */
-	if (!uart_base)
-		return -1;
-
-	if (c == '\n') {
-		WAIT_UNTIL_UART_FREE(uart_base);
-		pl011_write_dr(uart_base, '\r');
-	}
-
-	WAIT_UNTIL_UART_FREE(uart_base);
-	pl011_write_dr(uart_base, c);
-	return c;
-}
-
-int console_getc(void)
-{
-	assert(uart_base);
-
-	while ((pl011_read_fr(uart_base) & PL011_UARTFR_RXFE) != 0)
-		;
-	return pl011_read_dr(uart_base);
-}
diff --git a/include/bl31/context.h b/include/bl31/context.h
index 82d0c9c..3bf4980 100644
--- a/include/bl31/context.h
+++ b/include/bl31/context.h
@@ -76,21 +76,13 @@
  * 32-bits wide but are stored as 64-bit values for convenience
  ******************************************************************************/
 #define CTX_EL3STATE_OFFSET	(CTX_GPREGS_OFFSET + CTX_GPREGS_END)
-#define CTX_VBAR_EL3	0x0		/* Currently unused */
+#define CTX_SCR_EL3		0x0
 #define CTX_RUNTIME_SP		0x8
 #define CTX_SPSR_EL3		0x10
 #define CTX_ELR_EL3		0x18
-#define CTX_SCR_EL3		0x20
-#define CTX_SCTLR_EL3		0x28
-#define CTX_CPTR_EL3		0x30
-/* Unused space to allow registers to be stored as pairs */
-#define CTX_CNTFRQ_EL0		0x40
-#define CTX_MAIR_EL3		0x48
-#define CTX_TCR_EL3		0x50
-#define CTX_TTBR0_EL3		0x58
-#define CTX_DAIF_EL3		0x60
-/* Unused space to honour alignment requirements */
-#define CTX_EL3STATE_END	0x70
+#define CTX_CPTR_EL3		0x20
+#define CTX_CNTFRQ_EL0		0x28
+#define CTX_EL3STATE_END	0x30
 
 /*******************************************************************************
  * Constants that allow assembler code to access members of and the
diff --git a/include/bl31/cpu_data.h b/include/bl31/cpu_data.h
index 5f45f14..ef0b68c 100644
--- a/include/bl31/cpu_data.h
+++ b/include/bl31/cpu_data.h
@@ -32,9 +32,14 @@
 #define __CPU_DATA_H__
 
 /* Offsets for the cpu_data structure */
-#define CPU_DATA_CRASH_STACK_OFFSET	0x10
+#define CPU_DATA_CRASH_BUF_OFFSET	0x10
+#if CRASH_REPORTING
+#define CPU_DATA_LOG2SIZE		7
+#else
 #define CPU_DATA_LOG2SIZE		6
-
+#endif
+/* need enough space in crash buffer to save 8 registers */
+#define CPU_DATA_CRASH_BUF_SIZE	64
 #ifndef __ASSEMBLY__
 
 #include <arch_helpers.h>
@@ -61,9 +66,21 @@
 
 typedef struct cpu_data {
 	void *cpu_context[2];
-	uint64_t crash_stack;
+#if CRASH_REPORTING
+	uint64_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
+#endif
 } __aligned(CACHE_WRITEBACK_GRANULE) cpu_data_t;
 
+#if CRASH_REPORTING
+/* verify assembler offsets match data structures */
+CASSERT(CPU_DATA_CRASH_BUF_OFFSET == __builtin_offsetof
+	(cpu_data_t, crash_buf),
+	assert_cpu_data_crash_stack_offset_mismatch);
+#endif
+
+CASSERT((1 << CPU_DATA_LOG2SIZE) == sizeof(cpu_data_t),
+	assert_cpu_data_log2size_mismatch);
+
 struct cpu_data *_cpu_data_by_index(uint32_t cpu_index);
 struct cpu_data *_cpu_data_by_mpidr(uint64_t mpidr);
 
diff --git a/include/common/asm_macros.S b/include/common/asm_macros.S
index 2bccf58..238fa82 100644
--- a/include/common/asm_macros.S
+++ b/include/common/asm_macros.S
@@ -162,3 +162,36 @@
 	.macro get_up_stack _name, _size
 	ldr x0, =(\_name + \_size)
 	.endm
+
+	/*
+	 * Helper macro to generate the best mov/movk combinations according
+	 * the value to be moved. The 16 bits from '_shift' are tested and
+	 * if not zero, they are moved into '_reg' without affecting
+	 * other bits.
+	 */
+	.macro _mov_imm16 _reg, _val, _shift
+		.if (\_val >> \_shift) & 0xffff
+			.if (\_val & (1 << \_shift - 1))
+				movk	\_reg, (\_val >> \_shift) & 0xffff, LSL \_shift
+			.else
+				mov	\_reg, \_val & (0xffff << \_shift)
+			.endif
+		.endif
+	.endm
+
+	/*
+	 * Helper macro to load arbitrary values into 32 or 64-bit registers
+	 * which generates the best mov/movk combinations. Many base addresses
+	 * are 64KB aligned the macro will eliminate updating bits 15:0 in
+	 * that case
+	 */
+	.macro mov_imm _reg, _val
+		.if (\_val) == 0
+			mov	\_reg, #0
+		.else
+			_mov_imm16	\_reg, (\_val), 0
+			_mov_imm16	\_reg, (\_val), 16
+			_mov_imm16	\_reg, (\_val), 32
+			_mov_imm16	\_reg, (\_val), 48
+		.endif
+	.endm
diff --git a/drivers/arm/pl011/pl011.c b/include/common/assert_macros.S
similarity index 76%
rename from drivers/arm/pl011/pl011.c
rename to include/common/assert_macros.S
index e296c23..45d699b 100644
--- a/drivers/arm/pl011/pl011.c
+++ b/include/common/assert_macros.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,14 +28,19 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <assert.h>
-#include <pl011.h>
-
-void pl011_setbaudrate(unsigned long base_addr, unsigned int baudrate)
-{
-	unsigned int divisor;
-	assert(baudrate);
-	divisor = (PL011_CLK_IN_HZ * 4) / baudrate;
-	pl011_write_ibrd(base_addr, divisor >> 6);
-	pl011_write_fbrd(base_addr, divisor & 0x3F);
-}
+	/*
+	 * Assembler macro to enable asm_assert. Use this macro wherever
+	 * assert is required in assembly.
+	 */
+#define ASM_ASSERT(_cc) \
+.ifndef .L_assert_filename ;\
+	.pushsection .rodata.str1.1, "aS" ;\
+	.L_assert_filename: ;\
+			.string	__FILE__ ;\
+	.popsection ;\
+.endif ;\
+	b._cc	1f ;\
+	adr	x0, .L_assert_filename ;\
+	mov	x1, __LINE__ ;\
+	b	asm_assert ;\
+1:
diff --git a/include/common/debug.h b/include/common/debug.h
index c70109f..3f5655b 100644
--- a/include/common/debug.h
+++ b/include/common/debug.h
@@ -52,22 +52,9 @@
 
 #define ERROR(...)	tf_printf("ERROR: " __VA_ARGS__)
 
-
-/* For the moment this Panic function is very basic, Report an error and
- * spin. This can be expanded in the future to provide more information.
- */
-#if DEBUG
-void __dead2 do_panic(const char *file, int line);
-#define panic()	do_panic(__FILE__, __LINE__)
-
-#else
 void __dead2 do_panic(void);
 #define panic()	do_panic()
 
-#endif
-
-void print_string_value(char *s, unsigned long *mem);
 void tf_printf(const char *fmt, ...);
 
-
 #endif /* __DEBUG_H__ */
diff --git a/include/drivers/arm/cci400.h b/include/drivers/arm/cci400.h
index 7222391..6246e48 100644
--- a/include/drivers/arm/cci400.h
+++ b/include/drivers/arm/cci400.h
@@ -65,8 +65,11 @@
 /* Status register bit definitions */
 #define CHANGE_PENDING_BIT		(1 << 0)
 
+#ifndef __ASSEMBLY__
+
 /* Function declarations */
 void cci_enable_coherency(unsigned long mpidr);
 void cci_disable_coherency(unsigned long mpidr);
 
+#endif /* __ASSEMBLY__ */
 #endif /* __CCI_400_H__ */
diff --git a/include/drivers/arm/pl011.h b/include/drivers/arm/pl011.h
index 281330e..7c4df62 100644
--- a/include/drivers/arm/pl011.h
+++ b/include/drivers/arm/pl011.h
@@ -31,9 +31,6 @@
 #ifndef __PL011_H__
 #define __PL011_H__
 
-#include <mmio.h>
-
-
 /* PL011 Registers */
 #define UARTDR                    0x000
 #define UARTRSR                   0x004
@@ -68,6 +65,9 @@
 #define PL011_UARTFR_DSR          (1 << 1)	/* Data set ready */
 #define PL011_UARTFR_CTS          (1 << 0)	/* Clear to send */
 
+#define PL011_UARTFR_TXFF_BIT	5	/* Transmit FIFO full bit in UARTFR register */
+#define PL011_UARTFR_RXFE_BIT	4	/* Receive FIFO empty bit in UARTFR register */
+
 /* Control reg bits */
 #define PL011_UARTCR_CTSEN        (1 << 15)	/* CTS hardware flow control enable */
 #define PL011_UARTCR_RTSEN        (1 << 14)	/* RTS hardware flow control enable */
@@ -78,14 +78,6 @@
 #define PL011_UARTCR_LBE          (1 << 7)	/* Loopback enable */
 #define PL011_UARTCR_UARTEN       (1 << 0)	/* UART Enable */
 
-#if !defined(PL011_BAUDRATE)
-#define PL011_BAUDRATE  115200
-#endif
-
-#if !defined(PL011_CLK_IN_HZ)
-#define PL011_CLK_IN_HZ 24000000
-#endif
-
 #if !defined(PL011_LINE_CONTROL)
 /* FIFO Enabled / No Parity / 8 Data bit / One Stop Bit */
 #define PL011_LINE_CONTROL  (PL011_UARTLCR_H_FEN | PL011_UARTLCR_H_WLEN_8)
@@ -103,58 +95,4 @@
 #define PL011_UARTLCR_H_PEN       (1 << 1)	/* Parity Enable */
 #define PL011_UARTLCR_H_BRK       (1 << 0)	/* Send break */
 
-/*******************************************************************************
- * Pl011 CPU interface accessors for writing registers
- ******************************************************************************/
-
-static inline void pl011_write_ibrd(unsigned long base, unsigned int val)
-{
-	mmio_write_32(base + UARTIBRD, val);
-}
-
-static inline void pl011_write_fbrd(unsigned long base, unsigned int val)
-{
-	mmio_write_32(base + UARTFBRD, val);
-}
-
-static inline void pl011_write_lcr_h(unsigned long base, unsigned int val)
-{
-	mmio_write_32(base + UARTLCR_H, val);
-}
-
-static inline void pl011_write_ecr(unsigned long base, unsigned int val)
-{
-	mmio_write_32(base + UARTECR, val);
-}
-
-static inline void pl011_write_cr(unsigned long base, unsigned int val)
-{
-	mmio_write_32(base + UARTCR, val);
-}
-
-static inline void pl011_write_dr(unsigned long base, unsigned int val)
-{
-	mmio_write_32(base + UARTDR, val);
-}
-
-/*******************************************************************************
- * Pl011 CPU interface accessors for reading registers
- ******************************************************************************/
-
-static inline unsigned int pl011_read_fr(unsigned long base)
-{
-	return mmio_read_32(base + UARTFR);
-}
-
-static inline unsigned int pl011_read_dr(unsigned long base)
-{
-	return mmio_read_32(base + UARTDR);
-}
-
-/*******************************************************************************
- * Function prototypes
- ******************************************************************************/
-
-void pl011_setbaudrate(unsigned long base_addr, unsigned int baudrate);
-
 #endif	/* __PL011_H__ */
diff --git a/include/drivers/console.h b/include/drivers/console.h
index e285909..f144ab9 100644
--- a/include/drivers/console.h
+++ b/include/drivers/console.h
@@ -31,7 +31,8 @@
 #ifndef __CONSOLE_H__
 #define __CONSOLE_H__
 
-void console_init(unsigned long base_addr);
+int console_init(unsigned long base_addr,
+		unsigned int uart_clk, unsigned int baud_rate);
 int console_putc(int c);
 int console_getc(void);
 
diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h
index ff91efc..0427208 100644
--- a/include/lib/aarch64/arch.h
+++ b/include/lib/aarch64/arch.h
@@ -129,11 +129,8 @@
 #define SCTLR_A_BIT		(1 << 1)
 #define SCTLR_C_BIT		(1 << 2)
 #define SCTLR_SA_BIT		(1 << 3)
-#define SCTLR_B_BIT		(1 << 7)
-#define SCTLR_Z_BIT		(1 << 11)
 #define SCTLR_I_BIT		(1 << 12)
 #define SCTLR_WXN_BIT		(1 << 19)
-#define SCTLR_EXCEPTION_BITS	(0x3 << 6)
 #define SCTLR_EE_BIT		(1 << 25)
 
 /* CPUECTLR definitions */
diff --git a/include/lib/aarch64/xlat_tables.h b/include/lib/aarch64/xlat_tables.h
index 8e0adc7..2d4a211 100644
--- a/include/lib/aarch64/xlat_tables.h
+++ b/include/lib/aarch64/xlat_tables.h
@@ -31,6 +31,14 @@
 #ifndef __XLAT_TABLES_H__
 #define __XLAT_TABLES_H__
 
+
+/*
+ * Flags to override default values used to program system registers while
+ * enabling the MMU.
+ */
+#define DISABLE_DCACHE		(1 << 0)
+
+#ifndef __ASSEMBLY__
 #include <stdint.h>
 
 /*
@@ -67,7 +75,8 @@
 
 void init_xlat_tables(void);
 
-void enable_mmu_el1(void);
-void enable_mmu_el3(void);
+void enable_mmu_el1(uint32_t flags);
+void enable_mmu_el3(uint32_t flags);
 
+#endif /*__ASSEMBLY__*/
 #endif /* __XLAT_TABLES_H__ */
diff --git a/include/plat/common/platform.h b/include/plat/common/platform.h
index 1eeaac2..ab93123 100644
--- a/include/plat/common/platform.h
+++ b/include/plat/common/platform.h
@@ -72,6 +72,8 @@
 unsigned int platform_get_core_pos(unsigned long mpidr);
 unsigned long platform_get_stack(unsigned long mpidr);
 void plat_report_exception(unsigned long);
+void plat_crash_console_init(unsigned long base_addr);
+int plat_crash_console_putc(int c);
 
 /*******************************************************************************
  * Mandatory BL1 functions
@@ -180,7 +182,7 @@
 /*******************************************************************************
  * Optional BL3-1 functions (may be overridden)
  ******************************************************************************/
-void bl31_plat_enable_mmu(void);
+void bl31_plat_enable_mmu(uint32_t flags);
 
 /*******************************************************************************
  * Mandatory BL3-2 functions (only if platform contains a BL3-2)
@@ -190,6 +192,6 @@
 /*******************************************************************************
  * Optional BL3-2 functions (may be overridden)
  ******************************************************************************/
-void bl32_plat_enable_mmu(void);
+void bl32_plat_enable_mmu(uint32_t flags);
 
 #endif /* __PLATFORM_H__ */
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 439ca28..f605bf4 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -30,6 +30,7 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <assert_macros.S>
 
 	.globl	get_afflvl_shift
 	.globl	mpidr_mask_lower_afflvls
@@ -46,7 +47,6 @@
 	.globl	enable_vfp
 #endif
 
-
 func get_afflvl_shift
 	cmp	x0, #3
 	cinc	x0, x0, eq
@@ -79,6 +79,10 @@
  * -----------------------------------------------------------------------
  */
 func zeromem16
+#if ASM_ASSERTION
+	tst	x0, #0xf
+	ASM_ASSERT(eq)
+#endif
 	add	x2, x0, x1
 /* zero 16 bytes at a time */
 z_loop16:
@@ -105,6 +109,11 @@
  * --------------------------------------------------------------------------
  */
 func memcpy16
+#if ASM_ASSERTION
+	orr	x3, x0, x1
+	tst	x3, #0xf
+	ASM_ASSERT(eq)
+#endif
 /* copy 16 bytes at a time */
 m_loop16:
 	cmp	x2, #16
@@ -145,7 +154,6 @@
 	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
 	b	do_disable_mmu
 
-
 /* ---------------------------------------------------------------------------
  * Enable the use of VFP at EL3
  * ---------------------------------------------------------------------------
diff --git a/lib/aarch64/xlat_tables.c b/lib/aarch64/xlat_tables.c
index f1d658d..ddc9ba8 100644
--- a/lib/aarch64/xlat_tables.c
+++ b/lib/aarch64/xlat_tables.c
@@ -292,7 +292,7 @@
  *			exception level
  ******************************************************************************/
 #define DEFINE_ENABLE_MMU_EL(_el, _tcr_extra, _tlbi_fct)		\
-	void enable_mmu_el##_el(void)					\
+	void enable_mmu_el##_el(uint32_t flags)				\
 	{								\
 		uint64_t mair, tcr, ttbr;				\
 		uint32_t sctlr;						\
@@ -329,8 +329,13 @@
 		isb();							\
 									\
 		sctlr = read_sctlr_el##_el();				\
-		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT | SCTLR_I_BIT;	\
-		sctlr |= SCTLR_A_BIT | SCTLR_C_BIT;			\
+		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;			\
+									\
+		if (flags & DISABLE_DCACHE)				\
+			sctlr &= ~SCTLR_C_BIT;				\
+		else							\
+			sctlr |= SCTLR_C_BIT;				\
+									\
 		write_sctlr_el##_el(sctlr);				\
 									\
 		/* Ensure the MMU enable takes effect immediately */	\
diff --git a/plat/common/aarch64/plat_common.c b/plat/common/aarch64/plat_common.c
index 94b9dfd..90574fd 100644
--- a/plat/common/aarch64/plat_common.c
+++ b/plat/common/aarch64/plat_common.c
@@ -38,12 +38,12 @@
 #pragma weak bl31_plat_enable_mmu
 #pragma weak bl32_plat_enable_mmu
 
-void bl31_plat_enable_mmu(void)
+void bl31_plat_enable_mmu(uint32_t flags)
 {
-	enable_mmu_el3();
+	enable_mmu_el3(flags);
 }
 
-void bl32_plat_enable_mmu(void)
+void bl32_plat_enable_mmu(uint32_t flags)
 {
-	enable_mmu_el1();
+	enable_mmu_el1(flags);
 }
diff --git a/plat/common/aarch64/platform_helpers.S b/plat/common/aarch64/platform_helpers.S
index f6ac13e..5e2d1b1 100644
--- a/plat/common/aarch64/platform_helpers.S
+++ b/plat/common/aarch64/platform_helpers.S
@@ -37,6 +37,8 @@
 	.weak	platform_is_primary_cpu
 	.weak	platform_check_mpidr
 	.weak	plat_report_exception
+	.weak	plat_crash_console_init
+	.weak	plat_crash_console_putc
 
 	/* -----------------------------------------------------
 	 *  int platform_get_core_pos(int mpidr);
@@ -79,3 +81,20 @@
 	 */
 func plat_report_exception
 	ret
+
+	/* -----------------------------------------------------
+	 * Placeholder function which should be redefined by
+	 * each platform.
+	 * -----------------------------------------------------
+	 */
+func plat_crash_console_init
+	mov	x0, #0
+	ret
+
+	/* -----------------------------------------------------
+	 * Placeholder function which should be redefined by
+	 * each platform.
+	 * -----------------------------------------------------
+	 */
+func plat_crash_console_putc
+	ret
diff --git a/plat/common/aarch64/platform_mp_stack.S b/plat/common/aarch64/platform_mp_stack.S
index 801ec7f..8eb1aa6 100644
--- a/plat/common/aarch64/platform_mp_stack.S
+++ b/plat/common/aarch64/platform_mp_stack.S
@@ -33,29 +33,12 @@
 #include <platform_def.h>
 
 
-	.local	pcpu_dv_mem_stack
 	.local	platform_normal_stacks
 	.weak	platform_set_stack
 	.weak	platform_get_stack
-	.weak	platform_set_coherent_stack
 
 
 	/* -----------------------------------------------------
-	 * void platform_set_coherent_stack (unsigned long mpidr)
-	 *
-	 * For a given CPU, this function sets the stack pointer
-	 * to a stack allocated in device memory. This stack can
-	 * be used by C code which enables/disables the SCTLR.M
-	 * SCTLR.C bit e.g. while powering down a cpu
-	 * -----------------------------------------------------
-	 */
-func platform_set_coherent_stack
-	mov x5, x30 // lr
-	get_mp_stack pcpu_dv_mem_stack, PCPU_DV_MEM_STACK_SIZE
-	mov sp, x0
-	ret x5
-
-	/* -----------------------------------------------------
 	 * unsigned long platform_get_stack (unsigned long mpidr)
 	 *
 	 * For a given CPU, this function returns the stack
@@ -81,22 +64,9 @@
 	ret x9
 
 	/* -----------------------------------------------------
-	 * Per-cpu stacks in normal memory.
-	 * Used for C code during runtime execution (when coherent
-	 * stacks are not required).
-	 * Each cpu gets a stack of PLATFORM_STACK_SIZE bytes.
+	 * Per-cpu stacks in normal memory. Each cpu gets a
+	 * stack of PLATFORM_STACK_SIZE bytes.
 	 * -----------------------------------------------------
 	 */
 declare_stack platform_normal_stacks, tzfw_normal_stacks, \
 		PLATFORM_STACK_SIZE, PLATFORM_CORE_COUNT
-
-	/* -----------------------------------------------------
-	 * Per-cpu stacks in device memory.
-	 * Used for C code just before power down or right after
-	 * power up when the MMU or caches need to be turned on
-	 * or off.
-	 * Each cpu gets a stack of PCPU_DV_MEM_STACK_SIZE bytes.
-	 * -----------------------------------------------------
-	 */
-declare_stack pcpu_dv_mem_stack, tzfw_coherent_mem, \
-		PCPU_DV_MEM_STACK_SIZE, PLATFORM_CORE_COUNT
diff --git a/plat/common/aarch64/platform_up_stack.S b/plat/common/aarch64/platform_up_stack.S
index 45a96a6..73b74b2 100644
--- a/plat/common/aarch64/platform_up_stack.S
+++ b/plat/common/aarch64/platform_up_stack.S
@@ -33,28 +33,11 @@
 #include <platform_def.h>
 
 
-	.local	pcpu_dv_mem_stack
 	.local	platform_normal_stacks
 	.globl	platform_set_stack
 	.globl	platform_get_stack
-	.globl	platform_set_coherent_stack
-
 
 	/* -----------------------------------------------------
-	 * void platform_set_coherent_stack (unsigned long)
-	 *
-	 * For cold-boot BL images, only the primary CPU needs a
-	 * stack. This function sets the stack pointer to a stack
-	 * allocated in device memory.
-	 * -----------------------------------------------------
-	 */
-func platform_set_coherent_stack
-	get_up_stack pcpu_dv_mem_stack, PCPU_DV_MEM_STACK_SIZE
-	mov sp, x0
-	ret
-
-
-	/* -----------------------------------------------------
 	 * unsigned long platform_get_stack (unsigned long)
 	 *
 	 * For cold-boot BL images, only the primary CPU needs a
@@ -87,11 +70,3 @@
 	 */
 declare_stack platform_normal_stacks, tzfw_normal_stacks, \
 		PLATFORM_STACK_SIZE, 1
-
-	/* -----------------------------------------------------
-	 * Single cpu stack in device/coherent memory.
-	 * PCPU_DV_MEM_STACK_SIZE bytes are allocated.
-	 * -----------------------------------------------------
-	 */
-declare_stack pcpu_dv_mem_stack, tzfw_coherent_mem, \
-		PCPU_DV_MEM_STACK_SIZE, 1
diff --git a/plat/fvp/aarch64/fvp_common.c b/plat/fvp/aarch64/fvp_common.c
index a10f4e8..d22fd55 100644
--- a/plat/fvp/aarch64/fvp_common.c
+++ b/plat/fvp/aarch64/fvp_common.c
@@ -119,7 +119,7 @@
 		mmap_add(fvp_mmap);					\
 		init_xlat_tables();					\
 									\
-		enable_mmu_el##_el();					\
+		enable_mmu_el##_el(0);					\
 	}
 
 /* Define EL1 and EL3 variants of the function initialising the MMU */
diff --git a/plat/fvp/aarch64/fvp_helpers.S b/plat/fvp/aarch64/fvp_helpers.S
index 3cd0b46..823588e 100644
--- a/plat/fvp/aarch64/fvp_helpers.S
+++ b/plat/fvp/aarch64/fvp_helpers.S
@@ -32,6 +32,7 @@
 #include <asm_macros.S>
 #include <bl_common.h>
 #include <gic_v2.h>
+#include <pl011.h>
 #include "../drivers/pwrc/fvp_pwrc.h"
 #include "../fvp_def.h"
 
@@ -39,6 +40,8 @@
 	.globl	plat_secondary_cold_boot_setup
 	.globl	platform_mem_init
 	.globl	plat_report_exception
+	.globl	plat_crash_console_init
+	.globl	plat_crash_console_putc
 
 	.macro	fvp_choose_gicmmap  param1, param2, x_tmp, w_tmp, res
 	ldr	\x_tmp, =VE_SYSREGS_BASE + V2M_SYS_ID
@@ -187,3 +190,30 @@
 	add	x1, x1, #V2M_SYS_LED
 	str	w0, [x1]
 	ret
+
+	/* Define a crash console for the plaform */
+#define FVP_CRASH_CONSOLE_BASE		PL011_UART0_BASE
+
+	/* ---------------------------------------------
+	 * int plat_crash_console_init(void)
+	 * Function to initialize the crash console
+	 * without a C Runtime to print crash report.
+	 * Clobber list : x0, x1, x2
+	 * ---------------------------------------------
+	 */
+func plat_crash_console_init
+	mov_imm	x0, FVP_CRASH_CONSOLE_BASE
+	mov_imm	x1, PL011_UART0_CLK_IN_HZ
+	mov_imm	x2, PL011_BAUDRATE
+	b	console_core_init
+
+	/* ---------------------------------------------
+	 * int plat_crash_console_putc(void)
+	 * Function to print a character on the crash
+	 * console without a C Runtime.
+	 * Clobber list : x1, x2
+	 * ---------------------------------------------
+	 */
+func plat_crash_console_putc
+	mov_imm	x1, FVP_CRASH_CONSOLE_BASE
+	b	console_core_putc
diff --git a/plat/fvp/bl1_fvp_setup.c b/plat/fvp/bl1_fvp_setup.c
index bfd0f55..b146fdb 100644
--- a/plat/fvp/bl1_fvp_setup.c
+++ b/plat/fvp/bl1_fvp_setup.c
@@ -73,7 +73,7 @@
 	const size_t bl1_size = BL1_RAM_LIMIT - BL1_RAM_BASE;
 
 	/* Initialize the console to provide early debug support */
-	console_init(PL011_UART0_BASE);
+	console_init(PL011_UART0_BASE, PL011_UART0_CLK_IN_HZ, PL011_BAUDRATE);
 
 	/* Allow BL1 to see the whole Trusted RAM */
 	bl1_tzram_layout.total_base = TZRAM_BASE;
diff --git a/plat/fvp/bl2_fvp_setup.c b/plat/fvp/bl2_fvp_setup.c
index beba804..c0ad340 100644
--- a/plat/fvp/bl2_fvp_setup.c
+++ b/plat/fvp/bl2_fvp_setup.c
@@ -168,7 +168,7 @@
 void bl2_early_platform_setup(meminfo_t *mem_layout)
 {
 	/* Initialize the console to provide early debug support */
-	console_init(PL011_UART0_BASE);
+	console_init(PL011_UART0_BASE, PL011_UART0_CLK_IN_HZ, PL011_BAUDRATE);
 
 	/* Setup the BL2 memory layout */
 	bl2_tzram_layout = *mem_layout;
diff --git a/plat/fvp/bl31_fvp_setup.c b/plat/fvp/bl31_fvp_setup.c
index 683097a..21fca70 100644
--- a/plat/fvp/bl31_fvp_setup.c
+++ b/plat/fvp/bl31_fvp_setup.c
@@ -145,7 +145,7 @@
 				void *plat_params_from_bl2)
 {
 	/* Initialize the console to provide early debug support */
-	console_init(PL011_UART0_BASE);
+	console_init(PL011_UART0_BASE, PL011_UART0_CLK_IN_HZ, PL011_BAUDRATE);
 
 	/* Initialize the platform config for future decision making */
 	fvp_config_setup();
diff --git a/plat/fvp/bl32_fvp_setup.c b/plat/fvp/bl32_fvp_setup.c
index 901c585..aa49ff3 100644
--- a/plat/fvp/bl32_fvp_setup.c
+++ b/plat/fvp/bl32_fvp_setup.c
@@ -72,7 +72,7 @@
 	 * Initialize a different console than already in use to display
 	 * messages from TSP
 	 */
-	console_init(PL011_UART1_BASE);
+	console_init(PL011_UART1_BASE, PL011_UART1_CLK_IN_HZ, PL011_BAUDRATE);
 
 	/* Initialize the platform config for future decision making */
 	fvp_config_setup();
diff --git a/plat/fvp/fvp_def.h b/plat/fvp/fvp_def.h
index 89c8b02..21edb3b 100644
--- a/plat/fvp/fvp_def.h
+++ b/plat/fvp/fvp_def.h
@@ -198,6 +198,13 @@
 #define PL011_UART2_BASE		0x1c0b0000
 #define PL011_UART3_BASE		0x1c0c0000
 
+#define PL011_BAUDRATE  115200
+
+#define PL011_UART0_CLK_IN_HZ 24000000
+#define PL011_UART1_CLK_IN_HZ 24000000
+#define PL011_UART2_CLK_IN_HZ 24000000
+#define PL011_UART3_CLK_IN_HZ 24000000
+
 /*******************************************************************************
  * TrustZone address space controller related constants
  ******************************************************************************/
diff --git a/plat/fvp/fvp_pm.c b/plat/fvp/fvp_pm.c
index 55f465b..22e53e1 100644
--- a/plat/fvp/fvp_pm.c
+++ b/plat/fvp/fvp_pm.c
@@ -120,11 +120,10 @@
  * platform to decide whether the cluster is being turned off and take apt
  * actions.
  *
- * CAUTION: This function is called with coherent stacks so that caches can be
- * turned off, flushed and coherency disabled. There is no guarantee that caches
- * will remain turned on across calls to this function as each affinity level is
- * dealt with. So do not write & read global variables across calls. It will be
- * wise to do flush a write to the global to prevent unpredictable results.
+ * CAUTION: There is no guarantee that caches will remain turned on across calls
+ * to this function as each affinity level is dealt with. So do not write & read
+ * global variables across calls. It will be wise to do flush a write to the
+ * global to prevent unpredictable results.
  ******************************************************************************/
 int fvp_affinst_off(unsigned long mpidr,
 		    unsigned int afflvl,
@@ -192,11 +191,10 @@
  * platform to decide whether the cluster is being turned off and take apt
  * actions.
  *
- * CAUTION: This function is called with coherent stacks so that caches can be
- * turned off, flushed and coherency disabled. There is no guarantee that caches
- * will remain turned on across calls to this function as each affinity level is
- * dealt with. So do not write & read global variables across calls. It will be
- * wise to do flush a write to the global to prevent unpredictable results.
+ * CAUTION: There is no guarantee that caches will remain turned on across calls
+ * to this function as each affinity level is dealt with. So do not write & read
+ * global variables across calls. It will be wise to do flush a write to the
+ * global to prevent unpredictable results.
  ******************************************************************************/
 int fvp_affinst_suspend(unsigned long mpidr,
 			unsigned long sec_entrypoint,
diff --git a/plat/fvp/include/plat_macros.S b/plat/fvp/include/plat_macros.S
index 602eaf1..727b958 100644
--- a/plat/fvp/include/plat_macros.S
+++ b/plat/fvp/include/plat_macros.S
@@ -27,31 +27,79 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
-
+#include <cci400.h>
 #include <gic_v2.h>
 #include <plat_config.h>
+#include "platform_def.h"
 
 .section .rodata.gic_reg_name, "aS"
-gic_regs: .asciz "gic_iar", "gic_ctlr", ""
+gic_regs:
+	.asciz "gic_hppir", "gic_ahppir", "gic_ctlr", ""
+gicd_pend_reg:
+	.asciz "gicd_ispendr regs (Offsets 0x200 - 0x278)\n Offset:\t\t\tvalue\n"
+newline:
+	.asciz "\n"
+spacer:
+	.asciz ":\t\t0x"
 
-/* Currently we have only 2 GIC registers to report */
-#define GIC_REG_SIZE 				(2 * 8)
 	/* ---------------------------------------------
 	 * The below macro prints out relevant GIC
 	 * registers whenever an unhandled exception is
 	 * taken in BL31.
+	 * Clobbers: x0 - x10, x16, sp
 	 * ---------------------------------------------
 	 */
 	.macro plat_print_gic_regs
-	adr	x0, plat_config;
-	ldr	w0, [x0, #CONFIG_GICC_BASE_OFFSET]
-	/* gic base address is now in x0 */
-	ldr	w1, [x0, #GICC_IAR]
-	ldr	w2, [x0, #GICC_CTLR]
-	sub	sp, sp, #GIC_REG_SIZE
-	stp	x1, x2, [sp] /* we store the gic registers as 64 bit */
-	adr	x0, gic_regs
-	mov	x1, sp
-	bl	print_string_value
-	add	sp, sp, #GIC_REG_SIZE
+	adr	x0, plat_config
+	ldr	w16, [x0, #CONFIG_GICC_BASE_OFFSET]
+	cbz	x16, 1f
+	/* gic base address is now in x16 */
+	adr	x6, gic_regs	/* Load the gic reg list to x6 */
+	/* Load the gic regs to gp regs used by str_in_crash_buf_print */
+	ldr	w8, [x16, #GICC_HPPIR]
+	ldr	w9, [x16, #GICC_AHPPIR]
+	ldr	w10, [x16, #GICC_CTLR]
+	/* Store to the crash buf and print to cosole */
+	bl	str_in_crash_buf_print
+
+	/* Print the GICD_ISPENDR regs */
+	add	x7, x16, #GICD_ISPENDR
+	adr	x4, gicd_pend_reg
+	bl	asm_print_str
+2:
+	sub	x4, x7, x16
+	cmp	x4, #0x280
+	b.eq	1f
+	bl	asm_print_hex
+	adr	x4, spacer
+	bl	asm_print_str
+	ldr	x4, [x7], #8
+	bl	asm_print_hex
+	adr	x4, newline
+	bl	asm_print_str
+	b	2b
+1:
+	.endm
+
+.section .rodata.cci_reg_name, "aS"
+cci_iface_regs:
+	.asciz "cci_snoop_ctrl_cluster0", "cci_snoop_ctrl_cluster1" , ""
+
+	/* ------------------------------------------------
+	 * The below macro prints out relevant interconnect
+	 * registers whenever an unhandled exception is
+	 * taken in BL31.
+	 * Clobbers: x0 - x9, sp
+	 * ------------------------------------------------
+	 */
+	.macro plat_print_interconnect_regs
+	adr	x6, cci_iface_regs
+	/* Store in x7 the base address of the first interface */
+	mov_imm	x7, (CCI400_BASE + SLAVE_IFACE3_OFFSET)
+	ldr	w8, [x7, #SNOOP_CTRL_REG]
+	/* Store in x7 the base address of the second interface */
+	mov_imm	x7, (CCI400_BASE + SLAVE_IFACE4_OFFSET)
+	ldr	w9, [x7, #SNOOP_CTRL_REG]
+	/* Store to the crash buf and print to console */
+	bl	str_in_crash_buf_print
 	.endm
diff --git a/plat/fvp/include/platform_def.h b/plat/fvp/include/platform_def.h
index ec4cf52..9983266 100644
--- a/plat/fvp/include/platform_def.h
+++ b/plat/fvp/include/platform_def.h
@@ -47,13 +47,6 @@
 /* Size of cacheable stacks */
 #define PLATFORM_STACK_SIZE	0x800
 
-/* Size of coherent stacks for debug and release builds */
-#if DEBUG
-#define PCPU_DV_MEM_STACK_SIZE	0x400
-#else
-#define PCPU_DV_MEM_STACK_SIZE	0x300
-#endif
-
 #define FIRMWARE_WELCOME_STR		"Booting trusted firmware boot loader stage 1\n\r"
 
 /* Trusted Boot Firmware BL2 */
diff --git a/plat/fvp/platform.mk b/plat/fvp/platform.mk
index b22a339..f6275b7 100644
--- a/plat/fvp/platform.mk
+++ b/plat/fvp/platform.mk
@@ -45,8 +45,7 @@
 
 PLAT_INCLUDES		:=	-Iplat/fvp/include/
 
-PLAT_BL_COMMON_SOURCES	:=	drivers/arm/pl011/pl011.c			\
-				drivers/arm/pl011/pl011_console.c		\
+PLAT_BL_COMMON_SOURCES	:=	drivers/arm/pl011/pl011_console.S		\
 				drivers/io/io_fip.c				\
 				drivers/io/io_memmap.c				\
 				drivers/io/io_semihosting.c			\
diff --git a/services/std_svc/psci/psci_afflvl_off.c b/services/std_svc/psci/psci_afflvl_off.c
index a8904e98..83d19d3 100644
--- a/services/std_svc/psci/psci_afflvl_off.c
+++ b/services/std_svc/psci/psci_afflvl_off.c
@@ -44,7 +44,6 @@
 {
 	unsigned int plat_state;
 	int rc;
-	unsigned long sctlr;
 
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
@@ -70,26 +69,10 @@
 	/*
 	 * Arch. management. Perform the necessary steps to flush all
 	 * cpu caches.
-	 *
-	 * TODO: This power down sequence varies across cpus so it needs to be
-	 * abstracted out on the basis of the MIDR like in cpu_reset_handler().
-	 * Do the bare minimal for the time being. Fix this before porting to
-	 * Cortex models.
 	 */
-	sctlr = read_sctlr_el3();
-	sctlr &= ~SCTLR_C_BIT;
-	write_sctlr_el3(sctlr);
-	isb();	/* ensure MMU disable takes immediate effect */
+	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
 
 	/*
-	 * CAUTION: This flush to the level of unification makes an assumption
-	 * about the cache hierarchy at affinity level 0 (cpu) in the platform.
-	 * Ideally the platform should tell psci which levels to flush to exit
-	 * coherency.
-	 */
-	dcsw_op_louis(DCCISW);
-
-	/*
 	 * Plat. management: Perform platform specific actions to turn this
 	 * cpu off e.g. exit cpu coherency, program the power controller etc.
 	 */
@@ -227,9 +210,6 @@
  * the lowest to the highest affinity level implemented by the platform because
  * to turn off affinity level X it is neccesary to turn off affinity level X - 1
  * first.
- *
- * CAUTION: This function is called with coherent stacks so that coherency can
- * be turned off and caches can be flushed safely.
  ******************************************************************************/
 int psci_afflvl_off(int start_afflvl,
 		    int end_afflvl)
diff --git a/services/std_svc/psci/psci_afflvl_on.c b/services/std_svc/psci/psci_afflvl_on.c
index d620172..3b7d805 100644
--- a/services/std_svc/psci/psci_afflvl_on.c
+++ b/services/std_svc/psci/psci_afflvl_on.c
@@ -359,9 +359,9 @@
 	}
 
 	/*
-	 * Arch. management: Turn on mmu & restore architectural state
+	 * Arch. management: Enable data cache and manage stack memory
 	 */
-	bl31_plat_enable_mmu();
+	psci_do_pwrup_cache_maintenance();
 
 	/*
 	 * All the platform specific actions for turning this cpu
diff --git a/services/std_svc/psci/psci_afflvl_suspend.c b/services/std_svc/psci/psci_afflvl_suspend.c
index 0977198..1e60276 100644
--- a/services/std_svc/psci/psci_afflvl_suspend.c
+++ b/services/std_svc/psci/psci_afflvl_suspend.c
@@ -126,8 +126,7 @@
 				unsigned int power_state)
 {
 	unsigned int plat_state;
-	unsigned long psci_entrypoint, sctlr;
-	el3_state_t *saved_el3_state;
+	unsigned long psci_entrypoint;
 	uint32_t ns_scr_el3 = read_scr_el3();
 	uint32_t ns_sctlr_el1 = read_sctlr_el1();
 	int rc;
@@ -170,39 +169,16 @@
 	 */
 	cm_el3_sysregs_context_save(NON_SECURE);
 
-	/*
-	 * The EL3 state to PoC since it will be accessed after a
-	 * reset with the caches turned off
-	 */
-	saved_el3_state = get_el3state_ctx(cm_get_context(NON_SECURE));
-	flush_dcache_range((uint64_t) saved_el3_state, sizeof(*saved_el3_state));
-
 	/* Set the secure world (EL3) re-entry point after BL1 */
 	psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
 
 	/*
 	 * Arch. management. Perform the necessary steps to flush all
 	 * cpu caches.
-	 *
-	 * TODO: This power down sequence varies across cpus so it needs to be
-	 * abstracted out on the basis of the MIDR like in cpu_reset_handler().
-	 * Do the bare minimal for the time being. Fix this before porting to
-	 * Cortex models.
 	 */
-	sctlr = read_sctlr_el3();
-	sctlr &= ~SCTLR_C_BIT;
-	write_sctlr_el3(sctlr);
-	isb();	/* ensure MMU disable takes immediate effect */
+	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
 
 	/*
-	 * CAUTION: This flush to the level of unification makes an assumption
-	 * about the cache hierarchy at affinity level 0 (cpu) in the platform.
-	 * Ideally the platform should tell psci which levels to flush to exit
-	 * coherency.
-	 */
-	dcsw_op_louis(DCCISW);
-
-	/*
 	 * Plat. management: Allow the platform to perform the
 	 * necessary actions to turn off this cpu e.g. set the
 	 * platform defined mailbox with the psci entrypoint,
@@ -379,9 +355,6 @@
  * the lowest to the highest affinity level implemented by the platform because
  * to turn off affinity level X it is neccesary to turn off affinity level X - 1
  * first.
- *
- * CAUTION: This function is called with coherent stacks so that coherency can
- * be turned off and caches can be flushed safely.
  ******************************************************************************/
 int psci_afflvl_suspend(unsigned long entrypoint,
 			unsigned long context_id,
@@ -467,9 +440,11 @@
 
 	/* Get the index for restoring the re-entry information */
 	/*
-	 * Arch. management: Restore the stashed EL3 architectural
-	 * context from the 'cpu_context' structure for this cpu.
+	 * Arch. management: Enable the data cache, manage stack memory and
+	 * restore the stashed EL3 architectural context from the 'cpu_context'
+	 * structure for this cpu.
 	 */
+	psci_do_pwrup_cache_maintenance();
 	cm_el3_sysregs_context_restore(NON_SECURE);
 
 	/*
@@ -575,4 +550,3 @@
 	psci_afflvl1_suspend_finish,
 	psci_afflvl2_suspend_finish,
 };
-
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index 3c79a5e..56f3daf 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -390,9 +390,6 @@
  * the highest to the lowest affinity level implemented by the platform because
  * to turn on affinity level X it is neccesary to turn on affinity level X + 1
  * first.
- *
- * CAUTION: This function is called with coherent stacks so that coherency and
- * the mmu can be turned on safely.
  ******************************************************************************/
 void psci_afflvl_power_on_finish(int start_afflvl,
 				 int end_afflvl,
diff --git a/services/std_svc/psci/psci_entry.S b/services/std_svc/psci/psci_entry.S
index 1ffde06..e9ad130 100644
--- a/services/std_svc/psci/psci_entry.S
+++ b/services/std_svc/psci/psci_entry.S
@@ -31,6 +31,7 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <psci.h>
+#include <xlat_tables.h>
 
 	.globl	psci_aff_on_finish_entry
 	.globl	psci_aff_suspend_finish_entry
@@ -43,11 +44,6 @@
 	 * upon whether it was resumed from suspend or simply
 	 * turned on, call the common power on finisher with
 	 * the handlers (chosen depending upon original state).
-	 * For ease, the finisher is called with coherent
-	 * stacks. This allows the cluster/cpu finishers to
-	 * enter coherency and enable the mmu without running
-	 * into issues. We switch back to normal stacks once
-	 * all this is done.
 	 * -----------------------------------------------------
 	 */
 func psci_aff_on_finish_entry
@@ -58,6 +54,25 @@
 	adr	x23, psci_afflvl_suspend_finishers
 
 psci_aff_common_finish_entry:
+#if !RESET_TO_BL31
+	/* ---------------------------------------------
+	 * Enable the instruction cache, stack pointer
+	 * and data access alignment checks. Also, set
+	 * the EL3 exception endianess to little-endian.
+	 * It can be assumed that BL3-1 entrypoint code
+	 * will do this when RESET_TO_BL31 is set. The
+	 * same  assumption cannot be made when another
+	 * boot loader executes before BL3-1 in the warm
+	 * boot path e.g. BL1.
+	 * ---------------------------------------------
+	 */
+	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
+	mrs	x0, sctlr_el3
+	orr	x0, x0, x1
+	msr	sctlr_el3, x0
+	isb
+#endif
+
 	/* ---------------------------------------------
 	 * Initialise the pcpu cache pointer for the CPU
 	 * ---------------------------------------------
@@ -78,8 +93,34 @@
 	 */
 	msr	spsel, #0
 
+	/* --------------------------------------------
+	 * Give ourselves a stack whose memory will be
+	 * marked as Normal-IS-WBWA when the MMU is
+	 * enabled.
+	 * --------------------------------------------
+	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
+
+	/* --------------------------------------------
+	 * Enable the MMU with the DCache disabled. It
+	 * is safe to use stacks allocated in normal
+	 * memory as a result. All memory accesses are
+	 * marked nGnRnE when the MMU is disabled. So
+	 * all the stack writes will make it to memory.
+	 * All memory accesses are marked Non-cacheable
+	 * when the MMU is enabled but D$ is disabled.
+	 * So used stack memory is guaranteed to be
+	 * visible immediately after the MMU is enabled
+	 * Enabling the DCache at the same time as the
+	 * MMU can lead to speculatively fetched and
+	 * possibly stale stack memory being read from
+	 * other caches. This can lead to coherency
+	 * issues.
+	 * --------------------------------------------
+	 */
+	mov	x0, #DISABLE_DCACHE
+	bl	bl31_plat_enable_mmu
 
 	/* ---------------------------------------------
 	 * Call the finishers starting from affinity
@@ -95,60 +136,10 @@
 	mov	x0, #MPIDR_AFFLVL0
 	bl	psci_afflvl_power_on_finish
 
-	/* --------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * --------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
 	b	el3_exit
 _panic:
 	b	_panic
 
-	/* -----------------------------------------------------
-	 * The following two stubs give the calling cpu a
-	 * coherent stack to allow flushing of caches without
-	 * suffering from stack coherency issues
-	 * -----------------------------------------------------
-	 */
-func __psci_cpu_off
-	func_prologue
-	sub	sp, sp, #0x10
-	stp	x19, x20, [sp, #0]
-	mov	x19, sp
-	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
-	bl	psci_cpu_off
-	mov	sp, x19
-	ldp	x19, x20, [sp,#0]
-	add	sp, sp, #0x10
-	func_epilogue
-	ret
-
-func __psci_cpu_suspend
-	func_prologue
-	sub	sp, sp, #0x20
-	stp	x19, x20, [sp, #0]
-	stp	x21, x22, [sp, #0x10]
-	mov	x19, sp
-	mov	x20, x0
-	mov	x21, x1
-	mov	x22, x2
-	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
-	mov	x0, x20
-	mov	x1, x21
-	mov	x2, x22
-	bl	psci_cpu_suspend
-	mov	sp, x19
-	ldp	x21, x22, [sp,#0x10]
-	ldp	x19, x20, [sp,#0]
-	add	sp, sp, #0x20
-	func_epilogue
-	ret
-
 	/* --------------------------------------------
 	 * This function is called to indicate to the
 	 * power controller that it is safe to power
diff --git a/services/std_svc/psci/psci_helpers.S b/services/std_svc/psci/psci_helpers.S
new file mode 100644
index 0000000..21b5688
--- /dev/null
+++ b/services/std_svc/psci/psci_helpers.S
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <platform_def.h>
+
+	.globl	psci_do_pwrdown_cache_maintenance
+	.globl	psci_do_pwrup_cache_maintenance
+
+/* -----------------------------------------------------------------------
+ * void psci_do_pwrdown_cache_maintenance(uint32_t affinity level);
+ *
+ * This function performs cache maintenance before this cpu is powered
+ * off. The levels of cache affected are determined by the affinity level
+ * which is passed as the argument. Additionally, this function also
+ * ensures that stack memory is correctly flushed out to avoid coherency
+ * issues due to a change in its memory attributes after the data cache
+ * is disabled.
+ * -----------------------------------------------------------------------
+ */
+func psci_do_pwrdown_cache_maintenance
+	stp     x29, x30, [sp,#-16]!
+	stp     x19, x20, [sp,#-16]!
+
+	/* ---------------------------------------------
+	 * Disable the Data Cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+
+	/* ---------------------------------------------
+	 * Determine to how many levels of cache will be
+	 * subject to cache maintenance. Affinity level
+	 * 0 implies that only the cpu is being powered
+	 * down. Only the L1 data cache needs to be
+	 * flushed to the PoU in this case. For a higher
+	 * affinity level we are assuming that a flush
+	 * of L1 data and L2 unified cache is enough.
+	 * This information should be provided by the
+	 * platform.
+	 * ---------------------------------------------
+	 */
+	cmp	x0, #MPIDR_AFFLVL0
+	mov	x0, #DCCISW
+	b.ne	flush_caches_to_poc
+
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	bl	dcsw_op_louis
+	b	do_stack_maintenance
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+flush_caches_to_poc:
+	bl	dcsw_op_all
+
+	/* ---------------------------------------------
+	 * TODO: Intra-cluster coherency should be
+	 * turned off here once cpu-specific
+	 * abstractions are in place.
+	 * ---------------------------------------------
+	 */
+
+	/* ---------------------------------------------
+	 * Do stack maintenance by flushing the used
+	 * stack to the main memory and invalidating the
+	 * remainder.
+	 * ---------------------------------------------
+	 */
+do_stack_maintenance:
+	mrs	x0, mpidr_el1
+	bl	platform_get_stack
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the used
+	 * stack memory in x1.
+	 * ---------------------------------------------
+	 */
+	mov	x19, x0
+	mov	x1, sp
+	sub	x1, x0, x1
+	mov	x0, sp
+	bl	flush_dcache_range
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the unused
+	 * stack memory in x1. Calculate and store the
+	 * stack base address in x0.
+	 * ---------------------------------------------
+	 */
+	sub	x0, x19, #PLATFORM_STACK_SIZE
+	sub	x1, sp, x0
+	bl	inv_dcache_range
+
+	ldp	x19, x20, [sp], #16
+	ldp	x29, x30, [sp], #16
+	ret
+
+
+/* -----------------------------------------------------------------------
+ * void psci_do_pwrup_cache_maintenance(void);
+ *
+ * This function performs cache maintenance after this cpu is powered up.
+ * Currently, this involves managing the used stack memory before turning
+ * on the data cache.
+ * -----------------------------------------------------------------------
+ */
+func psci_do_pwrup_cache_maintenance
+	stp	x29, x30, [sp,#-16]!
+
+	/* ---------------------------------------------
+	 * Ensure any inflight stack writes have made it
+	 * to main memory.
+	 * ---------------------------------------------
+	 */
+	dmb	st
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the used
+	 * stack memory in x1. Calculate and store the
+	 * stack base address in x0.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, mpidr_el1
+	bl	platform_get_stack
+	mov	x1, sp
+	sub	x1, x0, x1
+	mov	x0, sp
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
+	 * Enable the data cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, sctlr_el3
+	orr	x0, x0, #SCTLR_C_BIT
+	msr	sctlr_el3, x0
+	isb
+
+	ldp	x29, x30, [sp], #16
+	ret
diff --git a/services/std_svc/psci/psci_main.c b/services/std_svc/psci/psci_main.c
index d68f3d0..21968d9 100644
--- a/services/std_svc/psci/psci_main.c
+++ b/services/std_svc/psci/psci_main.c
@@ -230,10 +230,10 @@
 			SMC_RET1(handle, psci_version());
 
 		case PSCI_CPU_OFF:
-			SMC_RET1(handle, __psci_cpu_off());
+			SMC_RET1(handle, psci_cpu_off());
 
 		case PSCI_CPU_SUSPEND_AARCH32:
-			SMC_RET1(handle, __psci_cpu_suspend(x1, x2, x3));
+			SMC_RET1(handle, psci_cpu_suspend(x1, x2, x3));
 
 		case PSCI_CPU_ON_AARCH32:
 			SMC_RET1(handle, psci_cpu_on(x1, x2, x3));
@@ -258,7 +258,7 @@
 
 		switch (smc_fid) {
 		case PSCI_CPU_SUSPEND_AARCH64:
-			SMC_RET1(handle, __psci_cpu_suspend(x1, x2, x3));
+			SMC_RET1(handle, psci_cpu_suspend(x1, x2, x3));
 
 		case PSCI_CPU_ON_AARCH64:
 			SMC_RET1(handle, psci_cpu_on(x1, x2, x3));
diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h
index 06db63f..b47bf85 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/services/std_svc/psci/psci_private.h
@@ -128,5 +128,8 @@
 			int);
 unsigned int psci_afflvl_suspend_finish(int, int);
 
+/* Private exported functions from psci_helpers.S */
+void psci_do_pwrdown_cache_maintenance(uint32_t affinity_level);
+void psci_do_pwrup_cache_maintenance(void);
 
 #endif /* __PSCI_PRIVATE_H__ */