riscv: Fix clear bss loop in the start-up code

For RV64, it will use sd instruction to clear t0
register, and the increament will be 8 bytes. So
if the difference between__bss_strat and __bss_end
was not 8 bytes aligned, the clear bss loop will
overflow and acks like system hang.

Signed-off-by: Rick Chen <rick@andestech.com>
Cc: KC Lin <kclin@andestech.com>
Cc: Alan Kao <alankao@andestech.com>
diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
index 0a2ce6d..ee6d471 100644
--- a/arch/riscv/cpu/start.S
+++ b/arch/riscv/cpu/start.S
@@ -174,7 +174,7 @@
 spl_clear_bss_loop:
 	SREG	zero, 0(t0)
 	addi	t0, t0, REGBYTES
-	bne	t0, t1, spl_clear_bss_loop
+	blt	t0, t1, spl_clear_bss_loop
 
 spl_stack_gd_setup:
 	jal	spl_relocate_stack_gd
@@ -324,7 +324,7 @@
 clbss_l:
 	SREG	zero, 0(t0)		/* clear loop... */
 	addi	t0, t0, REGBYTES
-	bne	t0, t1, clbss_l
+	blt	t0, t1, clbss_l
 
 relocate_secondary_harts:
 #ifdef CONFIG_SMP
diff --git a/arch/riscv/cpu/u-boot-spl.lds b/arch/riscv/cpu/u-boot-spl.lds
index 32255d5..955dd31 100644
--- a/arch/riscv/cpu/u-boot-spl.lds
+++ b/arch/riscv/cpu/u-boot-spl.lds
@@ -76,7 +76,7 @@
 	.bss : {
 		__bss_start = .;
 		*(.bss*)
-		. = ALIGN(4);
+		. = ALIGN(8);
 		__bss_end = .;
 	} > .bss_mem
 }
diff --git a/arch/riscv/cpu/u-boot.lds b/arch/riscv/cpu/u-boot.lds
index 11bc4a7..838a844 100644
--- a/arch/riscv/cpu/u-boot.lds
+++ b/arch/riscv/cpu/u-boot.lds
@@ -82,7 +82,7 @@
 	.bss : {
 		__bss_start = .;
 		*(.bss*)
-		. = ALIGN(4);
+		. = ALIGN(8);
 		__bss_end = .;
 	}
 }