arm: spl: Allow board_init_r() to run with a larger stack

At present SPL uses a single stack, either CONFIG_SPL_STACK or
CONFIG_SYS_INIT_SP_ADDR. Since some SPL features (such as MMC and
environment) require a lot of stack, some boards set CONFIG_SPL_STACK to
point into SDRAM. They then set up SDRAM very early, before board_init_f(),
so that the larger stack can be used.

This is an abuse of lowlevel_init(). That function should only be used for
essential start-up code which cannot be delayed. An example of a valid use is
when only part of the SPL code is visible/executable, and the SoC must be set
up so that board_init_f() can be reached. It should not be used for SDRAM
init, console init, etc.

Add a CONFIG_SPL_STACK_R option, which allows the stack to be moved to a new
address before board_init_r() is called in SPL.

The expected SPL flow (for CONFIG_SPL_FRAMEWORK) is documented in the README.

Signed-off-by: Simon Glass <sjg@chromium.org>
For version 1:
Acked-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
Reviewed-by: Stefan Roese <sr@denx.de>
Tested-by: Bo Shen <voice.shen@atmel.com>
Acked-by: Bo Shen <voice.shen@atmel.com>
Acked-by: Heiko Schocher <hs@denx.de>
Tested-by: Heiko Schocher <hs@denx.de>

Signed-off-by: Tom Rini <trini@konsulko.com>
diff --git a/Kconfig b/Kconfig
index 91a0618..b879461 100644
--- a/Kconfig
+++ b/Kconfig
@@ -96,6 +96,24 @@
 	help
 	  If you want to build SPL as well as the normal image, say Y.
 
+config SPL_STACK_R
+	depends on SPL
+	bool "Enable SDRAM location for SPL stack"
+	help
+	  SPL starts off execution in SRAM and thus typically has only a small
+	  stack available. Since SPL sets up DRAM while in its board_init_f()
+	  function, it is possible for the stack to move there before
+	  board_init_r() is reached. This option enables a special SDRAM
+	  location for the SPL stack. U-Boot SPL switches to this after
+	  board_init_f() completes, and before board_init_r() starts.
+
+config SPL_STACK_R_ADDR
+	depends on SPL_STACK_R
+	hex "SDRAM location for SPL stack"
+	help
+	  Specify the address in SDRAM for the SPL stack. This will be set up
+	  before board_init_r() is called.
+
 config TPL
 	bool
 	depends on SPL && SUPPORT_TPL
diff --git a/README b/README
index febefb5..3547ead 100644
--- a/README
+++ b/README
@@ -273,6 +273,75 @@
 See board/sandbox/README.sandbox for more details.
 
 
+Board Initialisation Flow:
+--------------------------
+
+This is the intended start-up flow for boards. This should apply for both
+SPL and U-Boot proper (i.e. they both follow the same rules). At present SPL
+mostly uses a separate code path, but the funtion names and roles of each
+function are the same. Some boards or architectures may not conform to this.
+At least most ARM boards which use CONFIG_SPL_FRAMEWORK conform to this.
+
+Execution starts with start.S with three functions called during init after
+that. The purpose and limitations of each is described below.
+
+lowlevel_init():
+	- purpose: essential init to permit execution to reach board_init_f()
+	- no global_data or BSS
+	- there is no stack (ARMv7 may have one but it will soon be removed)
+	- must not set up SDRAM or use console
+	- must only do the bare minimum to allow execution to continue to
+		board_init_f()
+	- this is almost never needed
+	- return normally from this function
+
+board_init_f():
+	- purpose: set up the machine ready for running board_init_r():
+		i.e. SDRAM and serial UART
+	- global_data is available
+	- stack is in SRAM
+	- BSS is not available, so you cannot use global/static variables,
+		only stack variables and global_data
+
+	Non-SPL-specific notes:
+	- dram_init() is called to set up DRAM. If already done in SPL this
+		can do nothing
+
+	SPL-specific notes:
+	- you can override the entire board_init_f() function with your own
+		version as needed.
+	- preloader_console_init() can be called here in extremis
+	- should set up SDRAM, and anything needed to make the UART work
+	- these is no need to clear BSS, it will be done by crt0.S
+	- must return normally from this function (don't call board_init_r()
+		directly)
+
+Here the BSS is cleared. For SPL, if CONFIG_SPL_STACK_R is defined, then at
+this point the stack and global_data are relocated to below
+CONFIG_SPL_STACK_R_ADDR. For non-SPL, U-Boot is relocated to run at the top of
+memory.
+
+board_init_r():
+	- purpose: main execution, common code
+	- global_data is available
+	- SDRAM is available
+	- BSS is available, all static/global variables can be used
+	- execution eventually continues to main_loop()
+
+	Non-SPL-specific notes:
+	- U-Boot is relocated to the top of memory and is now running from
+		there.
+
+	SPL-specific notes:
+	- stack is optionally in SDRAM, if CONFIG_SPL_STACK_R is defined and
+		CONFIG_SPL_STACK_R_ADDR points into SDRAM
+	- preloader_console_init() can be called here - typically this is
+		done by defining CONFIG_SPL_BOARD_INIT and then supplying a
+		spl_board_init() function containing this call
+	- loads U-Boot or (in falcon mode) Linux
+
+
+
 Configuration Options:
 ----------------------
 
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S
index 22df3e5..7939ced 100644
--- a/arch/arm/lib/crt0.S
+++ b/arch/arm/lib/crt0.S
@@ -113,7 +113,14 @@
 /* Set up final (full) environment */
 
 	bl	c_runtime_cpu_setup	/* we still call old routine here */
-
+#endif
+#if !defined(CONFIG_SPL_BUILD) || defined(CONFIG_SPL_FRAMEWORK)
+# ifdef CONFIG_SPL_BUILD
+	/* Use a DRAM stack for the rest of SPL, if requested */
+	bl	spl_relocate_stack_gd
+	cmp	r0, #0
+	movne	sp, r0
+# endif
 	ldr	r0, =__bss_start	/* this is auto-relocated! */
 	ldr	r1, =__bss_end		/* this is auto-relocated! */
 
@@ -124,9 +131,10 @@
 	addlo	r0, r0, #4		/* move to next */
 	blo	clbss_l
 
+#if ! defined(CONFIG_SPL_BUILD)
 	bl coloured_LED_init
 	bl red_led_on
-
+#endif
 	/* call board_init_r(gd_t *id, ulong dest_addr) */
 	mov     r0, r9                  /* gd_t */
 	ldr	r1, [r9, #GD_RELOCADDR]	/* dest_addr */
@@ -134,7 +142,6 @@
 	ldr	pc, =board_init_r	/* this is auto-relocated! */
 
 	/* we should not return here. */
-
 #endif
 
 ENDPROC(_main)
diff --git a/common/spl/spl.c b/common/spl/spl.c
index ded0f30..cd75bbc 100644
--- a/common/spl/spl.c
+++ b/common/spl/spl.c
@@ -281,3 +281,38 @@
 	spl_display_print();
 #endif
 }
+
+/**
+ * spl_relocate_stack_gd() - Relocate stack ready for board_init_r() execution
+ *
+ * Sometimes board_init_f() runs with a stack in SRAM but we want to use SDRAM
+ * for the main board_init_r() execution. This is typically because we need
+ * more stack space for things like the MMC sub-system.
+ *
+ * This function calculates the stack position, copies the global_data into
+ * place and returns the new stack position. The caller is responsible for
+ * setting up the sp register.
+ *
+ * @return new stack location, or 0 to use the same stack
+ */
+ulong spl_relocate_stack_gd(void)
+{
+#ifdef CONFIG_SPL_STACK_R
+	gd_t *new_gd;
+	ulong ptr;
+
+	/* Get stack position: use 8-byte alignment for ABI compliance */
+	ptr = CONFIG_SPL_STACK_R - sizeof(gd_t);
+	ptr &= ~7;
+	new_gd = (gd_t *)ptr;
+	memcpy(new_gd, (void *)gd, sizeof(gd_t));
+	gd = new_gd;
+
+	/* Clear the BSS. */
+	memset(__bss_start, 0, __bss_end - __bss_start);
+
+	return ptr;
+#else
+	return 0;
+#endif
+}