x86: Implement fully relocatable image

u-boot.bin can be loaded at any 4-byte aligned memory location and directly
'jumped' to using the 'go' command using the load address as the start
address. Doing so performs a 'warm boot' which skips memory initialisation
and other low-level initialisations, relocates U-Boot to upper memory and
starts U-Boot in RAM as per normal 'cold boot'
diff --git a/arch/i386/cpu/start.S b/arch/i386/cpu/start.S
index 8fdcd81..829468f 100644
--- a/arch/i386/cpu/start.S
+++ b/arch/i386/cpu/start.S
@@ -118,6 +118,11 @@
 
 	wbinvd
 
+	/* Determine our load offset */
+	call	1f
+1:	popl	%ecx
+	subl	$1b, %ecx
+
 	/* Set the upper memory limit parameter */
 	subl	$CONFIG_SYS_STACK_SIZE, %eax
 
@@ -127,6 +132,7 @@
 	/* %eax points to the global data structure */
 	movl	%esp, (GD_RAM_SIZE * 4)(%eax)
 	movl	%ebx, (GD_FLAGS * 4)(%eax)
+	movl	%ecx, (GD_LOAD_OFF * 4)(%eax)
 
 	call	board_init_f	/* Enter, U-boot! */
 
diff --git a/arch/i386/include/asm/global_data.h b/arch/i386/include/asm/global_data.h
index a15c598..5971123 100644
--- a/arch/i386/include/asm/global_data.h
+++ b/arch/i386/include/asm/global_data.h
@@ -41,6 +41,7 @@
 	unsigned long	baudrate;
 	unsigned long	have_console;	/* serial_init() was called */
 	unsigned long	reloc_off;	/* Relocation Offset */
+	unsigned long	load_off;	/* Load Offset */
 	unsigned long	env_addr;	/* Address  of Environment struct */
 	unsigned long	env_valid;	/* Checksum of Environment valid? */
 	unsigned long	cpu_clk;	/* CPU clock in Hz!		*/
@@ -56,20 +57,21 @@
 #endif
 
 /* Word Offsets into Global Data - MUST match struct gd_t */
-#define GD_BD    0
-#define GD_FLAGS 1
-#define GD_BAUDRATE  2
-#define GD_HAVE_CONSOLE  3
-#define GD_RELOC_OFF 4
-#define GD_ENV_ADDR  5
-#define GD_ENV_VALID 6
-#define GD_CPU_CLK 7
-#define GD_BUS_CLK 8
-#define GD_RAM_SIZE  9
-#define GD_RESET_STATUS  10
-#define GD_JT    11
+#define GD_BD		0
+#define GD_FLAGS	1
+#define GD_BAUDRATE	2
+#define GD_HAVE_CONSOLE	3
+#define GD_RELOC_OFF	4
+#define GD_LOAD_OFF	5
+#define GD_ENV_ADDR	6
+#define GD_ENV_VALID	7
+#define GD_CPU_CLK	8
+#define GD_BUS_CLK	9
+#define GD_RAM_SIZE	10
+#define GD_RESET_STATUS	11
+#define GD_JT		12
 
-#define GD_SIZE    12
+#define GD_SIZE		13
 
 /*
  * Global Data Flags
diff --git a/arch/i386/lib/board.c b/arch/i386/lib/board.c
index 9c2f77f..1129918 100644
--- a/arch/i386/lib/board.c
+++ b/arch/i386/lib/board.c
@@ -190,18 +190,21 @@
 	dest_addr  = (void *)gdp - (bss_end - text_start);
 	rel_offset = text_start - dest_addr;
 
-	/* First stage CPU initialization */
-	if (cpu_init_f() != 0)
-		hang();
+	/* Perform low-level initialization only when cold booted */
+	if (((gd_t *)gdp)->flags & GD_FLG_COLD_BOOT) {
+		/* First stage CPU initialization */
+		if (cpu_init_f() != 0)
+			hang();
 
-	/* First stage Board initialization */
-	if (board_early_init_f() != 0)
-		hang();
+		/* First stage Board initialization */
+		if (board_early_init_f() != 0)
+			hang();
+	}
 
 	/* Copy U-Boot into RAM */
 	dst_addr = (ulong *)dest_addr;
-	src_addr = (ulong *)text_start;
-	end_addr = (ulong *)data_end;
+	src_addr = (ulong *)(text_start + ((gd_t *)gdp)->load_off);
+	end_addr = (ulong *)(data_end  + ((gd_t *)gdp)->load_off);
 
 	while (src_addr < end_addr)
 		*dst_addr++ = *src_addr++;
@@ -214,8 +217,8 @@
 		*dst_addr++ = 0x00000000;
 
 	/* Perform relocation adjustments */
-	re_src = (Elf32_Rel *)rel_dyn_start;
-	re_end = (Elf32_Rel *)rel_dyn_end;
+	re_src = (Elf32_Rel *)(rel_dyn_start + ((gd_t *)gdp)->load_off);
+	re_end = (Elf32_Rel *)(rel_dyn_end + ((gd_t *)gdp)->load_off);
 
 	do {
 		if (re_src->r_offset >= TEXT_BASE)