sunxi: Add arm64 FEL support

So far we did not support the BootROM based FEL USB debug mode on the
64-bit builds for Allwinner SoCs: The BootROM is using AArch32, but the
SPL runs in AArch64.
Returning back to AArch32 was not working as expected, since the RMR
reset into 32-bit mode always starts execution in the BootROM, but not
in the FEL routine.

After some debug and research and with help via IRC, the CPU hotplug
mechanism emerged as a solution: If a certain R_CPUCFG register contains
some magic, the BootROM will immediately branch to an address stored in
some other register. This works well for our purposes.

Enable the FEL feature by providing early AArch32 code to first save the
FEL state, *before* initially entering AArch64.
If we eventually determine that we should return to FEL, we reset back
into AArch32, and use the CPU hotplug mechanism to run some small
AArch32 code snippet that restores the initially saved FEL state.

That allows the normal AArch64 SPL build to be loaded via the sunxi-fel
tool, with it returning into FEL mode, so that other payloads can be
transferred via FEL as well.

Tested on A64, H5 and H6.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Tested-by: Icenowy Zheng <icenowy@aosc.io>
Acked-by: Jagan Teki <jagan@amarulasolutions.com>
Tested-by: Priit Laes <plaes@plaes.org> (on Olimex A64-Olinuxino)
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
index 93d26f9..f7b4a5e 100644
--- a/arch/arm/cpu/armv8/Makefile
+++ b/arch/arm/cpu/armv8/Makefile
@@ -27,6 +27,8 @@
 
 ifndef CONFIG_SPL_BUILD
 obj-$(CONFIG_ARMV8_SPIN_TABLE) += spin_table.o spin_table_v8.o
+else
+obj-$(CONFIG_ARCH_SUNXI) += fel_utils.o
 endif
 obj-$(CONFIG_$(SPL_)ARMV8_SEC_FIRMWARE_SUPPORT) += sec_firmware.o sec_firmware_asm.o
 
diff --git a/arch/arm/cpu/armv8/fel_utils.S b/arch/arm/cpu/armv8/fel_utils.S
new file mode 100644
index 0000000..9510dcd
--- /dev/null
+++ b/arch/arm/cpu/armv8/fel_utils.S
@@ -0,0 +1,78 @@
+/*
+ * Utility functions for FEL mode, when running SPL in AArch64.
+ *
+ * Copyright (c) 2017 Arm Ltd.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <asm/system.h>
+#include <linux/linkage.h>
+
+/*
+ * We don't overwrite save_boot_params() here, to save the FEL state upon
+ * entry, since this would run *after* the RMR reset, which clobbers that
+ * state.
+ * Instead we store the state _very_ early in the boot0 hook, *before*
+ * resetting to AArch64.
+ */
+
+/*
+ * The FEL routines in BROM run in AArch32.
+ * Reset back into 32-bit mode here and restore the saved FEL state
+ * afterwards.
+ * Resetting back into AArch32/EL3 using the RMR always enters the BROM,
+ * but we can use the CPU hotplug mechanism to branch back to our code
+ * immediately.
+ */
+ENTRY(return_to_fel)
+	/*
+	 * the RMR reset will clear all registers, so save the arguments
+	 * (LR and SP) in the fel_stash structure, which we read anyways later
+	 */
+	adr	x2, fel_stash
+	str	w0, [x2]
+	str	w1, [x2, #4]
+
+	adr	x1, fel_stash_addr	// to find the fel_stash address in AA32
+	str	w2, [x1]
+
+	ldr	x0, =0xfa50392f		// CPU hotplug magic
+#ifdef CONFIG_MACH_SUN50I_H6
+	ldr	x2, =(SUNXI_RTC_BASE + 0x1b8)	// BOOT_CPU_HP_FLAG_REG
+	str	w0, [x2], #0x4
+#else
+	ldr	x2, =(SUNXI_CPUCFG_BASE + 0x1a4) // offset for CPU hotplug base
+	str	w0, [x2, #0x8]
+#endif
+	adr	x0, back_in_32
+	str	w0, [x2]
+
+	dsb	sy
+	isb	sy
+	mov	x0, #2			// RMR reset into AArch32
+	dsb	sy
+	msr	RMR_EL3, x0
+	isb	sy
+1:	wfi
+	b	1b
+
+/* AArch32 code to restore the state from fel_stash and return back to FEL. */
+back_in_32:
+	.word	0xe59f0028 	// ldr	r0, [pc, #40]	; load fel_stash address
+	.word	0xe5901008 	// ldr	r1, [r0, #8]
+	.word	0xe129f001 	// msr	CPSR_fc, r1
+	.word	0xf57ff06f	// isb
+	.word	0xe590d000 	// ldr	sp, [r0]
+	.word	0xe590e004 	// ldr	lr, [r0, #4]
+	.word	0xe5901010 	// ldr	r1, [r0, #16]
+	.word	0xee0c1f10 	// mcr	15, 0, r1, cr12, cr0, {0} ; VBAR
+	.word	0xe590100c 	// ldr	r1, [r0, #12]
+	.word	0xee011f10 	// mcr	15, 0, r1, cr1, cr0, {0}  ; SCTLR
+	.word	0xf57ff06f	// isb
+	.word	0xe12fff1e 	// bx	lr		; return to FEL
+fel_stash_addr:
+	.word   0x00000000	// receives fel_stash addr, by AA64 code above
+ENDPROC(return_to_fel)