arm: factorize relocate_code routine

Replace all relocate_code routines from ARM start.S files
with a single instance in file arch/arm/lib/relocate.S.
For PXA, this requires moving the dcache unlocking code
from within relocate_code into c_runtime_cpu_setup.

Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
Reviewed-by: Benoît Thébaudeau <benoit.thebaudeau@advansee.com>
Tested-by: Simon Glass <sjg@chromium.org>
diff --git a/arch/arm/cpu/pxa/start.S b/arch/arm/cpu/pxa/start.S
index 595778a..2e3f65e 100644
--- a/arch/arm/cpu/pxa/start.S
+++ b/arch/arm/cpu/pxa/start.S
@@ -167,94 +167,24 @@
 	bl	_main
 
 /*------------------------------------------------------------------------------*/
-#ifndef CONFIG_SPL_BUILD
-/*
- * void relocate_code(addr_moni)
- *
- * This function relocates the monitor code.
- */
-	.globl	relocate_code
-relocate_code:
-	mov	r6, r0	/* save addr of destination */
 
-/* Disable the Dcache RAM lock for stack now */
-#ifdef	CONFIG_CPU_PXA25X
-	mov	r12, lr
-	bl	cpu_init_crit
-	mov	lr, r12
-#endif
-
-	adr	r0, _start
-	subs	r9, r6, r0		/* r9 <- relocation offset */
-	beq	relocate_done		/* skip relocation */
-	mov	r1, r6			/* r1 <- scratch for copy_loop */
-	ldr	r3, _image_copy_end_ofs
-	add	r2, r0, r3		/* r2 <- source end address	    */
-
-copy_loop:
-	ldmia	r0!, {r10-r11}		/* copy from source address [r0]    */
-	stmia	r1!, {r10-r11}		/* copy to   target address [r1]    */
-	cmp	r0, r2			/* until source end address [r2]    */
-	blo	copy_loop
+	.globl	c_runtime_cpu_setup
+c_runtime_cpu_setup:
 
+#ifdef CONFIG_CPU_PXA25X
 	/*
-	 * fix .rel.dyn relocations
+	 * Unlock (actually, disable) the cache now that board_init_f
+	 * is done. We could do this earlier but we would need to add
+	 * a new C runtime hook, whereas c_runtime_cpu_setup already
+	 * exists.
+	 * As this routine is just a call to cpu_init_crit, let us
+	 * tail-optimize and do a simple branch here.
 	 */
-	ldr	r0, _TEXT_BASE		/* r0 <- Text base */
-	ldr	r10, _dynsym_start_ofs	/* r10 <- sym table ofs */
-	add	r10, r10, r0		/* r10 <- sym table in FLASH */
-	ldr	r2, _rel_dyn_start_ofs	/* r2 <- rel dyn start ofs */
-	add	r2, r2, r0		/* r2 <- rel dyn start in FLASH */
-	ldr	r3, _rel_dyn_end_ofs	/* r3 <- rel dyn end ofs */
-	add	r3, r3, r0		/* r3 <- rel dyn end in FLASH */
-fixloop:
-	ldr	r0, [r2]		/* r0 <- location to fix up, IN FLASH! */
-	add	r0, r0, r9		/* r0 <- location to fix up in RAM */
-	ldr	r1, [r2, #4]
-	and	r7, r1, #0xff
-	cmp	r7, #23			/* relative fixup? */
-	beq	fixrel
-	cmp	r7, #2			/* absolute fixup? */
-	beq	fixabs
-	/* ignore unknown type of fixup */
-	b	fixnext
-fixabs:
-	/* absolute fix: set location to (offset) symbol value */
-	mov	r1, r1, LSR #4		/* r1 <- symbol index in .dynsym */
-	add	r1, r10, r1		/* r1 <- address of symbol in table */
-	ldr	r1, [r1, #4]		/* r1 <- symbol value */
-	add	r1, r1, r9		/* r1 <- relocated sym addr */
-	b	fixnext
-fixrel:
-	/* relative fix: increase location by offset */
-	ldr	r1, [r0]
-	add	r1, r1, r9
-fixnext:
-	str	r1, [r0]
-	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
-	cmp	r2, r3
-	blo	fixloop
-
-relocate_done:
-
+	b	cpu_init_crit
+#else
 	bx	lr
-
-_image_copy_end_ofs:
-	.word __image_copy_end - _start
-_rel_dyn_start_ofs:
-	.word __rel_dyn_start - _start
-_rel_dyn_end_ofs:
-	.word __rel_dyn_end - _start
-_dynsym_start_ofs:
-	.word __dynsym_start - _start
-
 #endif
 
-	.globl	c_runtime_cpu_setup
-c_runtime_cpu_setup:
-
-	bx	lr
-
 /*
  *************************************************************************
  *