x86: Improve the trampoline in 64-bit mode

At present this leaves the stack at the pre-relocation value. This is not
ideal since we want to have U-Boot running entirely from the top of
memory.

In addition, the new global_data pointer is not actually used, since
the global_data pointer itself is relocated, then the pre-relocation value
is changed, so the effective value (after relocation) does not update.

Adjust the implementation to follow the 32-bit code more closely, with a
trampoline function which is passed the new stack and global_data pointer.
This ensures that the correct values come through even when relocating.

Signed-off-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
diff --git a/common/board_f.c b/common/board_f.c
index 334d04a..e5969ec 100644
--- a/common/board_f.c
+++ b/common/board_f.c
@@ -729,8 +729,7 @@
 #endif
 
 /* ARM calls relocate_code from its crt0.S */
-#if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX) && \
-		!CONFIG_IS_ENABLED(X86_64)
+#if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX)
 
 static int jump_to_copy(void)
 {
@@ -752,7 +751,11 @@
 	 * (CPU cache)
 	 */
 	arch_setup_gd(gd->new_gd);
-	board_init_f_r_trampoline(gd->start_addr_sp);
+# if CONFIG_IS_ENABLED(X86_64)
+		board_init_f_r_trampoline64(gd->new_gd, gd->start_addr_sp);
+# else
+		board_init_f_r_trampoline(gd->start_addr_sp);
+# endif
 #else
 	relocate_code(gd->start_addr_sp, gd->new_gd, gd->relocaddr);
 #endif
@@ -967,8 +970,7 @@
 	 * watchdog device is not serviced is as small as possible.
 	 */
 	cyclic_unregister_all,
-#if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX) && \
-		!CONFIG_IS_ENABLED(X86_64)
+#if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX)
 	jump_to_copy,
 #endif
 	NULL,