Merge remote-tracking branch 'u-boot/master'
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
index d846236..ab8c089 100644
--- a/arch/arm/cpu/armv8/cache.S
+++ b/arch/arm/cpu/armv8/cache.S
@@ -112,7 +112,7 @@
 
 ENTRY(__asm_invalidate_dcache_all)
 	mov	x16, lr
-	mov	x0, #0xffff
+	mov	x0, #0x1
 	bl	__asm_dcache_all
 	mov	lr, x16
 	ret
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S
index afd4f10..4c3a94a 100644
--- a/arch/arm/lib/crt0.S
+++ b/arch/arm/lib/crt0.S
@@ -25,7 +25,8 @@
  *    the GD ('global data') structure, both located in some readily
  *    available RAM (SRAM, locked cache...). In this context, VARIABLE
  *    global data, initialized or not (BSS), are UNAVAILABLE; only
- *    CONSTANT initialized data are available.
+ *    CONSTANT initialized data are available. GD should be zeroed
+ *    before board_init_f() is called.
  *
  * 2. Call board_init_f(). This function prepares the hardware for
  *    execution from system RAM (DRAM, DDR...) As system RAM may not
@@ -34,24 +35,29 @@
  *    data include the relocation destination, the future stack, and
  *    the future GD location.
  *
- * (the following applies only to non-SPL builds)
- *
  * 3. Set up intermediate environment where the stack and GD are the
  *    ones allocated by board_init_f() in system RAM, but BSS and
  *    initialized non-const data are still not available.
  *
+ * 4a.For U-Boot proper (not SPL), call relocate_code(). This function
+ *    relocates U-Boot from its current location into the relocation
+ *    destination computed by board_init_f().
+ *
- * 4. Call relocate_code(). This function relocates U-Boot from its
- *    current location into the relocation destination computed by
- *    board_init_f().
+ * 4b.For SPL, board_init_f() just returns (to crt0). There is no
+ *    code relocation in SPL.
  *
  * 5. Set up final environment for calling board_init_r(). This
  *    environment has BSS (initialized to 0), initialized non-const
  *    data (initialized to their intended value), and stack in system
- *    RAM. GD has retained values set by board_init_f(). Some CPUs
- *    have some work left to do at this point regarding memory, so
- *    call c_runtime_cpu_setup.
+ *    RAM (for SPL moving the stack and GD into RAM is optional - see
+ *    CONFIG_SPL_STACK_R). GD has retained values set by board_init_f().
+ *
+ * 6. For U-Boot proper (not SPL), some CPUs have some work left to do
+ *    at this point regarding memory, so call c_runtime_cpu_setup.
+ *
+ * 7. Branch to board_init_r().
  *
- * 6. Branch to board_init_r().
+ * For more information see 'Board Initialisation Flow in README.
  */
 
 /*
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
index 98a906e..8b34e04 100644
--- a/arch/arm/lib/crt0_64.S
+++ b/arch/arm/lib/crt0_64.S
@@ -27,7 +27,8 @@
  *    the GD ('global data') structure, both located in some readily
  *    available RAM (SRAM, locked cache...). In this context, VARIABLE
  *    global data, initialized or not (BSS), are UNAVAILABLE; only
- *    CONSTANT initialized data are available.
+ *    CONSTANT initialized data are available. GD should be zeroed
+ *    before board_init_f() is called.
  *
  * 2. Call board_init_f(). This function prepares the hardware for
  *    execution from system RAM (DRAM, DDR...) As system RAM may not
@@ -36,24 +37,31 @@
  *    data include the relocation destination, the future stack, and
  *    the future GD location.
  *
- * (the following applies only to non-SPL builds)
- *
  * 3. Set up intermediate environment where the stack and GD are the
  *    ones allocated by board_init_f() in system RAM, but BSS and
  *    initialized non-const data are still not available.
  *
- * 4. Call relocate_code(). This function relocates U-Boot from its
- *    current location into the relocation destination computed by
- *    board_init_f().
+ * 4a.For U-Boot proper (not SPL), call relocate_code(). This function
+ *    relocates U-Boot from its current location into the relocation
+ *    destination computed by board_init_f().
+ *
+ * 4b.For SPL, board_init_f() just returns (to crt0). There is no
+ *    code relocation in SPL.
  *
  * 5. Set up final environment for calling board_init_r(). This
  *    environment has BSS (initialized to 0), initialized non-const
  *    data (initialized to their intended value), and stack in system
- *    RAM. GD has retained values set by board_init_f(). Some CPUs
- *    have some work left to do at this point regarding memory, so
- *    call c_runtime_cpu_setup.
+ *    RAM (for SPL moving the stack and GD into RAM is optional - see
+ *    CONFIG_SPL_STACK_R). GD has retained values set by board_init_f().
+ *
+ * TODO: For SPL, implement stack relocation on AArch64.
  *
- * 6. Branch to board_init_r().
+ * 6. For U-Boot proper (not SPL), some CPUs have some work left to do
+ *    at this point regarding memory, so call c_runtime_cpu_setup.
+ *
+ * 7. Branch to board_init_r().
+ *
+ * For more information see 'Board Initialisation Flow in README.
  */
 
 ENTRY(_main)
@@ -106,6 +114,8 @@
  */
 	bl	c_runtime_cpu_setup		/* still call old routine */
 
+/* TODO: For SPL, call spl_relocate_stack_gd() to alloc stack relocation */
+
 /*
  * Clear BSS section
  */