arm: move C runtime setup code in crt0.S

Move all the C runtime setup code from every start.S
in arch/arm into arch/arm/lib/crt0.S. This covers
the code sequence from setting up the initial stack
to calling into board_init_r().

Also, rewrite the C runtime setup and make functions
board_init_*() and relocate_code() behave according to
normal C semantics (no jumping across the C stack any
more, etc).

Some SPL targets had to be touched because they use
start.S explicitly or for some reason; the relevant
maintainers and custodians are cc:ed.

Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
diff --git a/arch/arm/cpu/arm1136/start.S b/arch/arm/cpu/arm1136/start.S
index 5d3b4c2..a067b8a 100644
--- a/arch/arm/cpu/arm1136/start.S
+++ b/arch/arm/cpu/arm1136/start.S
@@ -165,13 +165,7 @@
 	bl  cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-	ldr	sp, =(CONFIG_SYS_INIT_SP_ADDR)
-	bic	sp, sp, #7 /* 8-byte alignment for ABI compliance */
-	ldr	r0,=0x00000000
-
-	bl	board_init_f
+	bl	_main
 
 /*------------------------------------------------------------------------------*/
 
@@ -188,14 +182,10 @@
 	mov	r5, r1	/* save addr of gd */
 	mov	r6, r2	/* save addr of destination */
 
-	/* Set up the stack						    */
-stack_setup:
-	mov	sp, r4
-
 	adr	r0, _start
 	cmp	r0, r6
 	moveq	r9, #0		/* no relocation. relocation offset(r9) = 0 */
-	beq	clear_bss		/* skip relocation */
+	beq	relocate_done		/* skip relocation */
 	mov	r1, r6			/* r1 <- scratch for copy_loop */
 	ldr	r3, _image_copy_end_ofs
 	add	r2, r0, r3		/* r2 <- source end address	    */
@@ -245,7 +235,15 @@
 	add	r2, r2, #8		/* each rel.dyn entry is 8 bytes */
 	cmp	r2, r3
 	blo	fixloop
-	b	clear_bss
+	bx	lr
+
+#endif
+
+relocate_done:
+
+	bx	lr
+
+#ifndef CONFIG_SPL_BUILD
 
 _rel_dyn_start_ofs:
 	.word __rel_dyn_start - _start
@@ -253,54 +251,13 @@
 	.word __rel_dyn_end - _start
 _dynsym_start_ofs:
 	.word __dynsym_start - _start
-#endif
 
-clear_bss:
-#ifdef CONFIG_SPL_BUILD
-	/* No relocation for SPL */
-	ldr	r0, =__bss_start
-	ldr	r1, =__bss_end__
-#else
-	ldr	r0, _bss_start_ofs
-	ldr	r1, _bss_end_ofs
-	mov	r4, r6			/* reloc addr */
-	add	r0, r0, r4
-	add	r1, r1, r4
 #endif
-	mov	r2, #0x00000000		/* clear			    */
 
-clbss_l:cmp	r0, r1			/* clear loop... */
-	bhs	clbss_e			/* if reached end of bss, exit */
-	str	r2, [r0]
-	add	r0, r0, #4
-	b	clbss_l
-clbss_e:
+	.globl	c_runtime_cpu_setup
+c_runtime_cpu_setup:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-	ldr     r0, _nand_boot_ofs
-	mov	pc, r0
-
-_nand_boot_ofs:
-	.word nand_boot
-#else
-jump_2_ram:
-	ldr	r0, _board_init_r_ofs
-	adr	r1, _start
-	add	lr, r0, r1
-	add	lr, lr, r9
-	/* setup parameters for board_init_r */
-	mov	r0, r5		/* gd_t */
-	mov	r1, r6		/* dest_addr */
-	/* jump to it ... */
-	mov	pc, lr
-
-_board_init_r_ofs:
-	.word board_init_r - _start
-#endif
+	bx	lr
 
 /*
  *************************************************************************