MIPS: start.S: optimize BSS initialization

Get the start and end address for clearing BSS from the newly
introduced symbols __bss_start and __bss_end. After GOT is
relocated, those symbols are already pointing to the correct
addresses.

Also optimize the loop by moving the address incrementation
to the delay slot to avoid the initial sub instruction.

Signed-off-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/cpu/mips64/start.S b/arch/mips/cpu/mips64/start.S
index c0ae41a..ba4ca4d 100644
--- a/arch/mips/cpu/mips64/start.S
+++ b/arch/mips/cpu/mips64/start.S
@@ -220,17 +220,19 @@
 	blt	t2, t3, 1b
 	 daddi	t8, 8
 
-	/* Clear BSS */
-	ld	t1, -24(t0)		# t1 <-- uboot_end_data
-	ld	t2, -16(t0)		# t2 <-- uboot_end
-	dadd	t1, s1			# adjust pointers
-	dadd	t2, s1
+	/*
+	 * Clear BSS
+	 *
+	 * GOT is now relocated. Thus __bss_start and __bss_end can be
+	 * accessed directly via $gp.
+	 */
+	dla	t1, __bss_start		# t1 <-- __bss_start
+	dla	t2, __bss_end		# t2 <-- __bss_end
 
-	dsub	t1, 8
 1:
-	daddi	t1, 8
-	bltl	t1, t2, 1b
-	 sd	zero, 0(t1)
+	sd	zero, 0(t1)
+	blt	t1, t2, 1b
+	 daddi	t1, 8
 
 	move	a0, s0			# a0 <-- gd
 	dla	t9, board_init_r