Remove coherent stack usage from the cold boot path

This patch reworks the cold boot path across the BL1, BL2, BL3-1 and BL3-2 boot
loader stages to not use stacks allocated in coherent memory for early platform
setup and enabling the MMU. Stacks allocated in normal memory are used instead.

Attributes for stack memory change from nGnRnE when the MMU is disabled to
Normal WBWA Inner-shareable when the MMU and data cache are enabled. It is
possible for the CPU to read stale stack memory after the MMU is enabled from
another CPUs cache. Hence, it is unsafe to turn on the MMU and data cache while
using normal stacks when multiple CPUs are a part of the same coherency
domain. It is safe to do so in the cold boot path as only the primary cpu
executes it. The secondary cpus are in a quiescent state.

This patch does not remove the allocation of coherent stack memory. That is done
in a subsequent patch.

Change-Id: I12c80b7c7ab23506d425c5b3a8a7de693498f830
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index 50cfae6..ac6d913 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -130,14 +130,16 @@
 	ldr	x2, =__DATA_SIZE__
 	bl	memcpy16
 
-	/* ---------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU and
-	 * CCI in assembler
-	 * ---------------------------------------------
+	/* --------------------------------------------
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
+	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Architectural init. can be generic e.g.
@@ -150,14 +152,6 @@
 	bl	bl1_early_platform_setup
 	bl	bl1_plat_arch_setup
 
-	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
 	/* --------------------------------------------------
 	 * Initialize platform and jump to our c-entry point
 	 * for this type of reset. Panic if it returns
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 09eadff..c615baf 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -96,12 +96,15 @@
 	bl	zeromem16
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Perform early platform setup & platform
@@ -113,14 +116,6 @@
 	bl	bl2_plat_arch_setup
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
-	/* ---------------------------------------------
 	 * Jump to main function.
 	 * ---------------------------------------------
 	 */
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 6e48e31..1023983 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -152,12 +152,15 @@
 	msr	spsel, #0
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Perform platform specific early arch. setup
@@ -175,14 +178,6 @@
 	bl	bl31_plat_arch_setup
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
-	/* ---------------------------------------------
 	 * Jump to main function.
 	 * ---------------------------------------------
 	 */
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 479ca59..c5ec9e3 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -111,12 +111,15 @@
 	bl	zeromem16
 
 	/* --------------------------------------------
-	 * Give ourselves a small coherent stack to
-	 * ease the pain of initializing the MMU
+	 * Allocate a stack whose memory will be marked
+	 * as Normal-IS-WBWA when the MMU is enabled.
+	 * There is no risk of reading stale stack
+	 * memory after enabling the MMU as only the
+	 * primary cpu is running at the moment.
 	 * --------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	bl	platform_set_coherent_stack
+	bl	platform_set_stack
 
 	/* ---------------------------------------------
 	 * Perform early platform setup & platform
@@ -127,14 +130,6 @@
 	bl	bl32_plat_arch_setup
 
 	/* ---------------------------------------------
-	 * Give ourselves a stack allocated in Normal
-	 * -IS-WBWA memory
-	 * ---------------------------------------------
-	 */
-	mrs	x0, mpidr_el1
-	bl	platform_set_stack
-
-	/* ---------------------------------------------
 	 * Jump to main function.
 	 * ---------------------------------------------
 	 */