Allocate single stacks for BL1 and BL2

The BL images share common stack management code which provides
one coherent and one cacheable stack for every CPU. BL1 and BL2
just execute on the primary CPU during boot and do not require
the additional CPU stacks. This patch provides separate stack
support code for UP and MP images, substantially reducing the
RAM usage for BL1 and BL2 for the FVP platform.

This patch also provides macros for declaring stacks and
calculating stack base addresses to improve consistency where
this has to be done in the firmware.

The stack allocation source files are now included via
platform.mk rather than the common BLx makefiles. This allows
each platform to select the appropriate MP/UP stack support
for each BL image.

Each platform makefile must be updated when including this
commit.

Fixes ARM-software/tf-issues#76

Change-Id: Ia251f61b8148ffa73eae3f3711f57b1ffebfa632
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index e16d8d9..d00c1d7 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -435,21 +435,12 @@
 	 */
 func get_exception_stack
 	mov	x10, x30 // lr
-	bl	platform_get_core_pos
-	add	x0, x0, #1
-	mov	x1, #PCPU_EXCEPTION_STACK_SIZE
-	mul	x0, x0, x1
-	ldr	x1, =pcpu_exception_stack
-	add	x0, x1, x0
+	get_mp_stack pcpu_exception_stack, PCPU_EXCEPTION_STACK_SIZE
 	ret	x10
 
 	/* -----------------------------------------------------
 	 * Per-cpu exception stacks in normal memory.
 	 * -----------------------------------------------------
 	 */
-	.section	tzfw_normal_stacks, "aw", %nobits; .align 6
-
-pcpu_exception_stack:
-	/* Zero fill */
-	.space (PLATFORM_CORE_COUNT * PCPU_EXCEPTION_STACK_SIZE), 0
-
+declare_stack pcpu_exception_stack, tzfw_normal_stacks, \
+		PCPU_EXCEPTION_STACK_SIZE, PLATFORM_CORE_COUNT
diff --git a/bl32/tsp/tsp-fvp.mk b/bl32/tsp/tsp-fvp.mk
index ead30ef..02fae09 100644
--- a/bl32/tsp/tsp-fvp.mk
+++ b/bl32/tsp/tsp-fvp.mk
@@ -34,4 +34,5 @@
 
 # TSP source files specific to FVP platform
 BL32_SOURCES		+=	bl32_plat_setup.c			\
+				platform_mp_stack.S			\
 				plat_common.c
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 5dca6fd..868eb33 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -81,7 +81,7 @@
 
 Each platform must export a header file of this name with the following
 constants defined. In the ARM FVP port, this file is found in
-[../plat/fvp/platform.h].
+[plat/fvp/platform.h].
 
 *   **#define : PLATFORM_LINKER_FORMAT**
 
@@ -96,7 +96,14 @@
 *   **#define : PLATFORM_STACK_SIZE**
 
     Defines the normal stack memory available to each CPU. This constant is used
-    by `platform_set_stack()`.
+    by [plat/common/aarch64/platform_mp_stack.S] and
+    [plat/common/aarch64/platform_up_stack.S].
+
+*   **#define : PCPU_DV_MEM_STACK_SIZE**
+
+    Defines the coherent stack memory available to each CPU. This constant is used
+    by [plat/common/aarch64/platform_mp_stack.S] and
+    [plat/common/aarch64/platform_up_stack.S].
 
 *   **#define : FIRMWARE_WELCOME_STR**
 
@@ -192,21 +199,7 @@
 
 The following mandatory modifications may be implemented in any file
 the implementer chooses. In the ARM FVP port, they are implemented in
-[../plat/fvp/aarch64/plat_common.c].
-
-*   **Variable : unsigned char platform_normal_stacks[X][Y]**
-
-        where  X = PLATFORM_STACK_SIZE
-          and  Y = PLATFORM_CORE_COUNT
-
-    Each platform must allocate a block of memory with Normal Cacheable, Write
-    back, Write allocate and Inner Shareable attributes aligned to the size (in
-    bytes) of the largest cache line amongst all caches implemented in the
-    system. A pointer to this memory should be exported with the name
-    `platform_normal_stacks`. This pointer is used by the common platform helper
-    functions `platform_set_stack()` (to allocate a stack for each CPU in the
-    platform)  & `platform_get_stack()` (to return the base address of that
-    stack) (see [../plat/common/aarch64/platform_helpers.S]).
+[plat/fvp/aarch64/plat_common.c].
 
 *   **Function : uint64_t plat_get_syscnt_freq(void)**
 
@@ -216,6 +209,7 @@
     In the ARM FVP port, it returns the base frequency of the system counter,
     which is retrieved from the first entry in the frequency modes table.
 
+
 2.2 Common optional modifications
 ---------------------------------
 
@@ -253,13 +247,18 @@
 *   Flushing caches prior to powering down a CPU or cluster.
 
 Each BL stage allocates this coherent stack memory for each CPU in the
-`tzfw_coherent_mem` section. A pointer to this memory (`pcpu_dv_mem_stack`) is
-used by this function to allocate a coherent stack for each CPU. A CPU is
-identified by its `MPIDR`, which is passed as an argument to this function.
+`tzfw_coherent_mem` section.
 
-The size of the stack allocated to each CPU is specified by the constant
+This function sets the current stack pointer to the coherent stack that
+has been allocated for the CPU specified by MPIDR. For BL images that only
+require a stack for the primary CPU the parameter is ignored. The size of
+the stack allocated to each CPU is specified by the platform defined constant
 `PCPU_DV_MEM_STACK_SIZE`.
 
+Common implementations of this function for the UP and MP BL images are
+provided in [plat/common/aarch64/platform_up_stack.S] and
+[plat/common/aarch64/platform_mp_stack.S]
+
 
 ### Function : platform_is_primary_cpu()
 
@@ -277,13 +276,15 @@
     Argument : unsigned long
     Return   : void
 
-This function uses the `platform_normal_stacks` pointer variable to allocate
-stacks to each CPU. Further details are given in the description of the
-`platform_normal_stacks` variable below. A CPU is identified by its `MPIDR`,
-which is passed as the argument.
+This function sets the current stack pointer to the normal memory stack that
+has been allocated for the CPU specificed by MPIDR. For BL images that only
+require a stack for the primary CPU the parameter is ignored. The size of
+the stack allocated to each CPU is specified by the platform defined constant
+`PLATFORM_STACK_SIZE`.
 
-The size of the stack allocated to each CPU is specified by the platform defined
-constant `PLATFORM_STACK_SIZE`.
+Common implementations of this function for the UP and MP BL images are
+provided in [plat/common/aarch64/platform_up_stack.S] and
+[plat/common/aarch64/platform_mp_stack.S]
 
 
 ### Function : platform_get_stack()
@@ -291,13 +292,15 @@
     Argument : unsigned long
     Return   : unsigned long
 
-This function uses the `platform_normal_stacks` pointer variable to return the
-base address of the stack memory reserved for a CPU. Further details are given
-in the description of the `platform_normal_stacks` variable below. A CPU is
-identified by its `MPIDR`, which is passed as the argument.
+This function returns the base address of the normal memory stack that
+has been allocated for the CPU specificed by MPIDR. For BL images that only
+require a stack for the primary CPU the parameter is ignored. The size of
+the stack allocated to each CPU is specified by the platform defined constant
+`PLATFORM_STACK_SIZE`.
 
-The size of the stack allocated to each CPU is specified by the platform defined
-constant `PLATFORM_STACK_SIZE`.
+Common implementations of this function for the UP and MP BL images are
+provided in [plat/common/aarch64/platform_up_stack.S] and
+[plat/common/aarch64/platform_mp_stack.S]
 
 
 ### Function : plat_report_exception()
@@ -319,7 +322,7 @@
 about the way the platform displays its status information.
 
 This function receives the exception type as its argument. Possible values for
-exceptions types are listed in the [../include/runtime_svc.h] header file. Note
+exceptions types are listed in the [include/runtime_svc.h] header file. Note
 that these constants are not related to any architectural exception code; they
 are just an ARM Trusted Firmware convention.
 
@@ -933,7 +936,7 @@
 the passed pointer with a pointer to BL3-1's private `plat_pm_ops` structure.
 
 A description of each member of this structure is given below. Please refer to
-the ARM FVP specific implementation of these handlers in [../plat/fvp/plat_pm.c]
+the ARM FVP specific implementation of these handlers in [plat/fvp/plat_pm.c]
 as an example. A platform port may choose not implement some of the power
 management operations. For example, the ARM FVP port does not implement the
 `affinst_standby()` function.
@@ -1135,8 +1138,9 @@
 [User Guide]: user-guide.md
 [FreeBSD]:    http://www.freebsd.org
 
-[../plat/common/aarch64/platform_helpers.S]: ../plat/common/aarch64/platform_helpers.S
-[../plat/fvp/platform.h]:                    ../plat/fvp/platform.h
-[../plat/fvp/aarch64/plat_common.c]:          ../plat/fvp/aarch64/plat_common.c
-[../plat/fvp/plat_pm.c]:                      ../plat/fvp/plat_pm.c
-[../include/runtime_svc.h]:                  ../include/runtime_svc.h
+[plat/common/aarch64/platform_mp_stack.S]: ../plat/common/aarch64/platform_mp_stack.S
+[plat/common/aarch64/platform_up_stack.S]: ../plat/common/aarch64/platform_up_stack.S
+[plat/fvp/platform.h]:                     ../plat/fvp/platform.h
+[plat/fvp/aarch64/plat_common.c]:          ../plat/fvp/aarch64/plat_common.c
+[plat/fvp/plat_pm.c]:                      ../plat/fvp/plat_pm.c
+[include/runtime_svc.h]:                   ../include/runtime_svc.h
diff --git a/include/asm_macros.S b/include/asm_macros.S
index 135c11a..8bcb7d2 100644
--- a/include/asm_macros.S
+++ b/include/asm_macros.S
@@ -91,3 +91,43 @@
 	.type \_name, %function
 	\_name:
 	.endm
+
+	/*
+	 * This macro declares an array of 1 or more stacks, properly
+	 * aligned and in the requested section
+	 */
+#define STACK_ALIGN	6
+
+	.macro declare_stack _name, _section, _size, _count
+	.if ((\_size & ((1 << STACK_ALIGN) - 1)) <> 0)
+	  .error "Stack size not correctly aligned"
+	.endif
+	.section    \_section, "aw", %nobits
+	.align STACK_ALIGN
+	\_name:
+	.space ((\_count) * (\_size)), 0
+	.endm
+
+	/*
+	 * This macro calculates the base address of an MP stack using the
+	 * platform_get_core_pos() index, the name of the stack storage and
+	 * the size of each stack
+	 * In: X0 = MPIDR of CPU whose stack is wanted
+	 * Out: X0 = physical address of stack base
+	 * Clobber: X30, X1, X2
+	 */
+	.macro get_mp_stack _name, _size
+	bl  platform_get_core_pos
+	ldr x2, =(\_name + \_size)
+	mov x1, #\_size
+	madd x0, x0, x1, x2
+	.endm
+
+	/*
+	 * This macro calculates the base address of a UP stack using the
+	 * name of the stack storage and the size of the stack
+	 * Out: X0 = physical address of stack base
+	 */
+	.macro get_up_stack _name, _size
+	ldr x0, =(\_name + \_size)
+	.endm
diff --git a/plat/common/aarch64/platform_helpers.S b/plat/common/aarch64/platform_helpers.S
index 50abb70..29268ba 100644
--- a/plat/common/aarch64/platform_helpers.S
+++ b/plat/common/aarch64/platform_helpers.S
@@ -33,46 +33,12 @@
 #include <asm_macros.S>
 
 
-	.globl	pcpu_dv_mem_stack
 	.weak	platform_get_core_pos
-	.weak	platform_set_stack
-	.weak	platform_get_stack
 	.weak	platform_is_primary_cpu
-	.weak	platform_set_coherent_stack
 	.weak	platform_check_mpidr
 	.weak	plat_report_exception
 
 	/* -----------------------------------------------------
-	 * Coherent stack sizes for debug and release builds
-	 * -----------------------------------------------------
-	 */
-#if DEBUG
-#define PCPU_DV_MEM_STACK_SIZE	0x400
-#else
-#define PCPU_DV_MEM_STACK_SIZE	0x300
-#endif
-
-	/* -----------------------------------------------------
-	 * unsigned long long platform_set_coherent_stack
-	 *                                    (unsigned mpidr);
-	 * For a given mpidr, this function returns the stack
-	 * pointer allocated in device memory. This stack can
-	 * be used by C code which enables/disables the SCTLR.M
-	 * SCTLR.C bit e.g. while powering down a cpu
-	 * -----------------------------------------------------
-	 */
-func platform_set_coherent_stack
-	mov	x5, x30 // lr
-	bl	platform_get_core_pos
-	add	x0, x0, #1
-	mov	x1, #PCPU_DV_MEM_STACK_SIZE
-	mul	x0, x0, x1
-	ldr	x1, =pcpu_dv_mem_stack
-	add	sp, x1, x0
-	ret	x5
-
-
-	/* -----------------------------------------------------
 	 *  int platform_get_core_pos(int mpidr);
 	 *  With this function: CorePos = (ClusterId * 4) +
 	 *  				  CoreId
@@ -84,7 +50,6 @@
 	add	x0, x1, x0, LSR #6
 	ret
 
-
 	/* -----------------------------------------------------
 	 * void platform_is_primary_cpu (unsigned int mpid);
 	 *
@@ -99,30 +64,6 @@
 	ret
 
 	/* -----------------------------------------------------
-	 * void platform_get_stack (unsigned long mpidr)
-	 * -----------------------------------------------------
-	 */
-func platform_get_stack
-	mov	x10, x30 // lr
-	bl	platform_get_core_pos
-	add	x0, x0, #1
-	mov	x1, #PLATFORM_STACK_SIZE
-	mul	x0, x0, x1
-	ldr	x1, =platform_normal_stacks
-	add	x0, x1, x0
-	ret	x10
-
-	/* -----------------------------------------------------
-	 * void platform_set_stack (unsigned long mpidr)
-	 * -----------------------------------------------------
-	 */
-func platform_set_stack
-	mov	x9, x30 // lr
-	bl	platform_get_stack
-	mov	sp, x0
-	ret	x9
-
-	/* -----------------------------------------------------
 	 * Placeholder function which should be redefined by
 	 * each platform.
 	 * -----------------------------------------------------
@@ -138,16 +79,3 @@
 	 */
 func plat_report_exception
 	ret
-
-	/* -----------------------------------------------------
-	 * Per-cpu stacks in device memory.
-	 * Used for C code just before power down or right after
-	 * power up when the MMU or caches need to be turned on
-	 * or off. Each cpu gets a stack of 512 bytes.
-	 * -----------------------------------------------------
-	 */
-	.section	tzfw_coherent_mem, "aw", %nobits; .align 6
-
-pcpu_dv_mem_stack:
-	/* Zero fill */
-	.space (PLATFORM_CORE_COUNT * PCPU_DV_MEM_STACK_SIZE), 0
diff --git a/plat/common/aarch64/platform_mp_stack.S b/plat/common/aarch64/platform_mp_stack.S
new file mode 100644
index 0000000..1438814
--- /dev/null
+++ b/plat/common/aarch64/platform_mp_stack.S
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <platform.h>
+#include <asm_macros.S>
+
+
+	.local	pcpu_dv_mem_stack
+	.local	platform_normal_stacks
+	.weak	platform_set_stack
+	.weak	platform_get_stack
+	.weak	platform_set_coherent_stack
+
+
+	/* -----------------------------------------------------
+	 * void platform_set_coherent_stack (unsigned long mpidr)
+	 *
+	 * For a given CPU, this function sets the stack pointer
+	 * to a stack allocated in device memory. This stack can
+	 * be used by C code which enables/disables the SCTLR.M
+	 * SCTLR.C bit e.g. while powering down a cpu
+	 * -----------------------------------------------------
+	 */
+func platform_set_coherent_stack
+	mov x5, x30 // lr
+	get_mp_stack pcpu_dv_mem_stack, PCPU_DV_MEM_STACK_SIZE
+	mov sp, x0
+	ret x5
+
+	/* -----------------------------------------------------
+	 * unsigned long platform_get_stack (unsigned long mpidr)
+	 *
+	 * For a given CPU, this function returns the stack
+	 * pointer for a stack allocated in device memory.
+	 * -----------------------------------------------------
+	 */
+func platform_get_stack
+	mov x10, x30 // lr
+	get_mp_stack platform_normal_stacks, PLATFORM_STACK_SIZE
+	ret x10
+
+	/* -----------------------------------------------------
+	 * void platform_set_stack (unsigned long mpidr)
+	 *
+	 * For a given CPU, this function sets the stack pointer
+	 * to a stack allocated in normal memory.
+	 * -----------------------------------------------------
+	 */
+func platform_set_stack
+	mov x9, x30 // lr
+	bl  platform_get_stack
+	mov sp, x0
+	ret x9
+
+	/* -----------------------------------------------------
+	 * Per-cpu stacks in normal memory.
+	 * Used for C code during runtime execution (when coherent
+	 * stacks are not required).
+	 * Each cpu gets a stack of PLATFORM_STACK_SIZE bytes.
+	 * -----------------------------------------------------
+	 */
+declare_stack platform_normal_stacks, tzfw_normal_stacks, \
+		PLATFORM_STACK_SIZE, PLATFORM_CORE_COUNT
+
+	/* -----------------------------------------------------
+	 * Per-cpu stacks in device memory.
+	 * Used for C code just before power down or right after
+	 * power up when the MMU or caches need to be turned on
+	 * or off.
+	 * Each cpu gets a stack of PCPU_DV_MEM_STACK_SIZE bytes.
+	 * -----------------------------------------------------
+	 */
+declare_stack pcpu_dv_mem_stack, tzfw_coherent_mem, \
+		PCPU_DV_MEM_STACK_SIZE, PLATFORM_CORE_COUNT
diff --git a/plat/common/aarch64/platform_up_stack.S b/plat/common/aarch64/platform_up_stack.S
new file mode 100644
index 0000000..b321a4e
--- /dev/null
+++ b/plat/common/aarch64/platform_up_stack.S
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <platform.h>
+#include <asm_macros.S>
+
+
+	.local	pcpu_dv_mem_stack
+	.local	platform_normal_stacks
+	.globl	platform_set_stack
+	.globl	platform_get_stack
+	.globl	platform_set_coherent_stack
+
+
+	/* -----------------------------------------------------
+	 * void platform_set_coherent_stack (unsigned long)
+	 *
+	 * For cold-boot BL images, only the primary CPU needs a
+	 * stack. This function sets the stack pointer to a stack
+	 * allocated in device memory.
+	 * -----------------------------------------------------
+	 */
+func platform_set_coherent_stack
+	get_up_stack pcpu_dv_mem_stack, PCPU_DV_MEM_STACK_SIZE
+	mov sp, x0
+	ret
+
+
+	/* -----------------------------------------------------
+	 * unsigned long platform_get_stack (unsigned long)
+	 *
+	 * For cold-boot BL images, only the primary CPU needs a
+	 * stack. This function returns the stack pointer for a
+	 * stack allocated in device memory.
+	 * -----------------------------------------------------
+	 */
+func platform_get_stack
+	get_up_stack platform_normal_stacks, PLATFORM_STACK_SIZE
+	ret
+
+	/* -----------------------------------------------------
+	 * void platform_set_stack (unsigned long)
+	 *
+	 * For cold-boot BL images, only the primary CPU needs a
+	 * stack. This function sets the stack pointer to a stack
+	 * allocated in normal memory.
+	 * -----------------------------------------------------
+	 */
+func platform_set_stack
+	get_up_stack platform_normal_stacks, PLATFORM_STACK_SIZE
+	mov sp, x0
+	ret
+
+	/* -----------------------------------------------------
+	 * Single cpu stack in normal memory.
+	 * Used for C code during boot, PLATFORM_STACK_SIZE bytes
+	 * are allocated
+	 * -----------------------------------------------------
+	 */
+declare_stack platform_normal_stacks, tzfw_normal_stacks, \
+		PLATFORM_STACK_SIZE, 1
+
+	/* -----------------------------------------------------
+	 * Single cpu stack in device/coherent memory.
+	 * PCPU_DV_MEM_STACK_SIZE bytes are allocated.
+	 * -----------------------------------------------------
+	 */
+declare_stack pcpu_dv_mem_stack, tzfw_coherent_mem, \
+		PCPU_DV_MEM_STACK_SIZE, 1
diff --git a/plat/fvp/aarch64/plat_common.c b/plat/fvp/aarch64/plat_common.c
index a5d9f1d..6a5e5a7 100644
--- a/plat/fvp/aarch64/plat_common.c
+++ b/plat/fvp/aarch64/plat_common.c
@@ -34,10 +34,6 @@
 #include <platform.h>
 #include <xlat_tables.h>
 
-unsigned char platform_normal_stacks[PLATFORM_STACK_SIZE][PLATFORM_CORE_COUNT]
-__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
-		section("tzfw_normal_stacks")));
-
 /*******************************************************************************
  * This array holds the characteristics of the differences between the three
  * FVP platforms (Base, A53_A57 & Foundation). It will be populated during cold
diff --git a/plat/fvp/platform.h b/plat/fvp/platform.h
index 5f4adc3..c594357 100644
--- a/plat/fvp/platform.h
+++ b/plat/fvp/platform.h
@@ -47,7 +47,16 @@
 /*******************************************************************************
  * Generic platform constants
  ******************************************************************************/
-#define PLATFORM_STACK_SIZE		0x800
+
+/* Size of cacheable stacks */
+#define PLATFORM_STACK_SIZE	0x800
+
+/* Size of coherent stacks for debug and release builds */
+#if DEBUG
+#define PCPU_DV_MEM_STACK_SIZE	0x400
+#else
+#define PCPU_DV_MEM_STACK_SIZE	0x300
+#endif
 
 #define FIRMWARE_WELCOME_STR		"Booting trusted firmware boot loader stage 1\n\r"
 
diff --git a/plat/fvp/platform.mk b/plat/fvp/platform.mk
index e8de098..3a918e9 100644
--- a/plat/fvp/platform.mk
+++ b/plat/fvp/platform.mk
@@ -79,14 +79,17 @@
 BL1_SOURCES		+=	bl1_plat_setup.c			\
 				bl1_plat_helpers.S			\
 				plat_helpers.S				\
+				platform_up_stack.S			\
 				plat_common.c				\
 				cci400.c
 
 BL2_SOURCES		+=	bl2_plat_setup.c			\
+				platform_up_stack.S			\
 				plat_common.c
 
 BL31_SOURCES		+=	bl31_plat_setup.c			\
 				plat_helpers.S				\
+				platform_mp_stack.S			\
 				plat_common.c				\
 				plat_pm.c				\
 				plat_topology.c				\