arm64: support running at addr other than linked to

This is required in the case where U-Boot is typically loaded and run at
a particular address, but for some reason the RAM at that location is not
available, e.g. due to memory fragmentation loading other boot binaries or
firmware, splitting an SMP complex between various different OSs without
using e.g. the EL2 second-stage page tables to hide the memory asignments,
or due to known ECC failures.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S
index fdba004..0480452 100644
--- a/arch/arm/lib/relocate_64.S
+++ b/arch/arm/lib/relocate_64.S
@@ -27,11 +27,24 @@
 	/*
 	 * Copy u-boot from flash to RAM
 	 */
-	ldr	x1, =__image_copy_start	/* x1 <- SRC &__image_copy_start */
-	subs	x9, x0, x1		/* x9 <- relocation offset */
+	adr	x1, __image_copy_start	/* x1 <- Run &__image_copy_start */
+	subs	x9, x0, x1		/* x8 <- Run to copy offset */
 	b.eq	relocate_done		/* skip relocation */
-	ldr	x2, =__image_copy_end	/* x2 <- SRC &__image_copy_end */
+	/*
+	 * Don't ldr x1, __image_copy_start here, since if the code is already
+	 * running at an address other than it was linked to, that instruction
+	 * will load the relocated value of __image_copy_start. To
+	 * correctly apply relocations, we need to know the linked value.
+	 *
+	 * Linked &__image_copy_start, which we know was at
+	 * CONFIG_SYS_TEXT_BASE, which is stored in _TEXT_BASE, as a non-
+	 * relocated value, since it isn't a symbol reference.
+	 */
+	ldr	x1, _TEXT_BASE		/* x1 <- Linked &__image_copy_start */
+	subs	x9, x0, x1		/* x9 <- Link to copy offset */
 
+	adr	x1, __image_copy_start	/* x1 <- Run &__image_copy_start */
+	adr	x2, __image_copy_end	/* x2 <- Run &__image_copy_end */
 copy_loop:
 	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
 	stp	x10, x11, [x0], #16	/* copy to   target address [x0] */
@@ -42,8 +55,8 @@
 	/*
 	 * Fix .rela.dyn relocations
 	 */
-	ldr	x2, =__rel_dyn_start	/* x2 <- SRC &__rel_dyn_start */
-	ldr	x3, =__rel_dyn_end	/* x3 <- SRC &__rel_dyn_end */
+	adr	x2, __rel_dyn_start	/* x2 <- Run &__rel_dyn_start */
+	adr	x3, __rel_dyn_end	/* x3 <- Run &__rel_dyn_end */
 fixloop:
 	ldp	x0, x1, [x2], #16	/* (x0,x1) <- (SRC location, fixup) */
 	ldr	x4, [x2], #8		/* x4 <- addend */