armv7: Add Position Independent Execution support

A U-Boot image could be loaded and executed at a different
location than it was linked at.

For example, Aspeed takes a stable release version of U-Boot image
as the golden one for recovery purposes. When the primary storage
such as flash is corrupted, the golden image would be loaded to any
SRAM/DRAM address on demands through ethernet/UART/etc and run for
rescue.

To deal with this condition, the PIE is needed as there is only one
signed, golden image, which could be however executed at different
places.

This patch adds the PIE support for ARMv7 platform.

Signed-off-by: Chia-Wei Wang <chiawei_wang@aspeedtech.com>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 31ae295..50efb5e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -11,7 +11,7 @@
 
 config POSITION_INDEPENDENT
 	bool "Generate position-independent pre-relocation code"
-	depends on ARM64
+	depends on ARM64 || CPU_V7A
 	help
 	  U-Boot expects to be linked to a specific hard-coded address, and to
 	  be loaded to and run from that address. This option lifts that
diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index 87329d2..698e15b 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -39,6 +39,42 @@
 	/* Allow the board to save important registers */
 	b	save_boot_params
 save_boot_params_ret:
+#ifdef CONFIG_POSITION_INDEPENDENT
+	/*
+	 * Fix .rela.dyn relocations. This allows U-Boot to loaded to and
+	 * executed at a different address than it was linked at.
+	 */
+pie_fixup:
+	adr	r0, reset	/* r0 <- Runtime value of reset label */
+	ldr	r1, =reset	/* r1 <- Linked value of reset label */
+	subs	r4, r0, r1	/* r4 <- Runtime-vs-link offset */
+	beq	pie_fixup_done
+
+	adr	r0, pie_fixup
+	ldr	r1, _rel_dyn_start_ofs
+	add	r2, r0, r1	/* r2 <- Runtime &__rel_dyn_start */
+	ldr	r1, _rel_dyn_end_ofs
+	add	r3, r0, r1	/* r3 <- Runtime &__rel_dyn_end */
+
+pie_fix_loop:
+	ldr	r0, [r2]	/* r0 <- Link location */
+	ldr	r1, [r2, #4]	/* r1 <- fixup */
+	cmp	r1, #23		/* relative fixup? */
+	bne	pie_skip_reloc
+
+	/* relative fix: increase location by offset */
+	add	r0, r4
+	ldr	r1, [r0]
+	add	r1, r4
+	str	r1, [r0]
+	str	r0, [r2]
+	add	r2, #8
+pie_skip_reloc:
+	cmp	r2, r3
+	blo	pie_fix_loop
+pie_fixup_done:
+#endif
+
 #ifdef CONFIG_ARMV7_LPAE
 /*
  * check for Hypervisor support
@@ -340,3 +376,10 @@
 	b	lowlevel_init		@ go setup pll,mux,memory
 ENDPROC(cpu_init_crit)
 #endif
+
+#if CONFIG_POSITION_INDEPENDENT
+_rel_dyn_start_ofs:
+	.word	__rel_dyn_start - pie_fixup
+_rel_dyn_end_ofs:
+	.word	__rel_dyn_end - pie_fixup
+#endif
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S
index 46b6be2..956d258 100644
--- a/arch/arm/lib/crt0.S
+++ b/arch/arm/lib/crt0.S
@@ -130,6 +130,14 @@
 	ldr	r9, [r9, #GD_NEW_GD]		/* r9 <- gd->new_gd */
 
 	adr	lr, here
+#if defined(CONFIG_POSITION_INDEPENDENT)
+	adr	r0, _main
+	ldr	r1, _start_ofs
+	add	r0, r1
+	ldr	r1, =CONFIG_SYS_TEXT_BASE
+	sub	r1, r0
+	add	lr, r1
+#endif
 	ldr	r0, [r9, #GD_RELOC_OFF]		/* r0 = gd->reloc_off */
 	add	lr, lr, r0
 #if defined(CONFIG_CPU_V7M)
@@ -180,3 +188,6 @@
 #endif
 
 ENDPROC(_main)
+
+_start_ofs:
+	.word	_start - _main
diff --git a/arch/arm/lib/relocate.S b/arch/arm/lib/relocate.S
index e5f7267..14b7f61 100644
--- a/arch/arm/lib/relocate.S
+++ b/arch/arm/lib/relocate.S
@@ -78,22 +78,28 @@
  */
 
 ENTRY(relocate_code)
-	ldr	r1, =__image_copy_start	/* r1 <- SRC &__image_copy_start */
-	subs	r4, r0, r1		/* r4 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	ldr	r2, =__image_copy_end	/* r2 <- SRC &__image_copy_end */
-
+	adr	r3, relocate_code
+	ldr	r1, _image_copy_start_ofs
+	add	r1, r3			/* r1 <- Run &__image_copy_start */
+	subs	r4, r0, r1		/* r4 <- Run to copy offset      */
+	beq	relocate_done		/* skip relocation               */
+	ldr	r1, _image_copy_start_ofs
+	add	r1, r3			/* r1 <- Run &__image_copy_start */
+	ldr	r2, _image_copy_end_ofs
+	add	r2, r3			/* r2 <- Run &__image_copy_end   */
 copy_loop:
-	ldmia	r1!, {r10-r11}		/* copy from source address [r1]    */
-	stmia	r0!, {r10-r11}		/* copy to   target address [r0]    */
-	cmp	r1, r2			/* until source end address [r2]    */
+	ldmia	r1!, {r10-r11}		/* copy from source address [r1] */
+	stmia	r0!, {r10-r11}		/* copy to   target address [r0] */
+	cmp	r1, r2			/* until source end address [r2] */
 	blo	copy_loop
 
 	/*
 	 * fix .rel.dyn relocations
 	 */
-	ldr	r2, =__rel_dyn_start	/* r2 <- SRC &__rel_dyn_start */
-	ldr	r3, =__rel_dyn_end	/* r3 <- SRC &__rel_dyn_end */
+	ldr	r1, _rel_dyn_start_ofs
+	add	r2, r1, r3		/* r2 <- Run &__rel_dyn_start */
+	ldr	r1, _rel_dyn_end_ofs
+	add	r3, r1, r3		/* r3 <- Run &__rel_dyn_end */
 fixloop:
 	ldmia	r2!, {r0-r1}		/* (r0,r1) <- (SRC location,fixup) */
 	and	r1, r1, #0xff
@@ -129,3 +135,12 @@
 #endif
 
 ENDPROC(relocate_code)
+
+_image_copy_start_ofs:
+	.word	__image_copy_start - relocate_code
+_image_copy_end_ofs:
+	.word	__image_copy_end - relocate_code
+_rel_dyn_start_ofs:
+	.word	__rel_dyn_start - relocate_code
+_rel_dyn_end_ofs:
+	.word	__rel_dyn_end - relocate_code