riscv: Add SPL_ZERO_MEM_BEFORE_USE implementation

Add the actual support code for SPL_ZERO_MEM_BEFORE_USE and remove
existing Starfive JH7110's L2 LIM clean code, since existing code has
following issues:
 1. Each hart (in the middle of a function call) overwriting its own
    stack and other harts' stacks.
    (data-race and data-corruption)
 2. Lottery winner hart can be doing "board_init_f_init_reserve",
    while other harts are in the middle of zeroing L2 LIM.
    (data-race)

Signed-off-by: Bo Gan <ganboing@gmail.com>
Signed-off-by: Shengyu Qu <wiagn233@outlook.com>
Reviewed-by: Leo Yu-Chi Liang <ycliang@andestech.com>
diff --git a/arch/riscv/cpu/jh7110/spl.c b/arch/riscv/cpu/jh7110/spl.c
index 72adcef..4047b10 100644
--- a/arch/riscv/cpu/jh7110/spl.c
+++ b/arch/riscv/cpu/jh7110/spl.c
@@ -13,7 +13,6 @@
 #include <init.h>
 
 #define CSR_U74_FEATURE_DISABLE	0x7c1
-#define L2_LIM_MEM_END	0x81FFFFFUL
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -59,9 +58,6 @@
 
 void harts_early_init(void)
 {
-	ulong *ptr;
-	u8 *tmp;
-	ulong len, remain;
 	/*
 	 * Feature Disable CSR
 	 *
@@ -70,25 +66,4 @@
 	 */
 	if (CONFIG_IS_ENABLED(RISCV_MMODE))
 		csr_write(CSR_U74_FEATURE_DISABLE, 0);
-
-	/* clear L2 LIM  memory
-	 * set __bss_end to 0x81FFFFF region to zero
-	 * The L2 Cache Controller supports ECC. ECC is applied to SRAM.
-	 * If it is not cleared, the ECC part is invalid, and an ECC error
-	 * will be reported when reading data.
-	 */
-	ptr = (ulong *)&__bss_end;
-	len = L2_LIM_MEM_END - (ulong)&__bss_end;
-	remain = len % sizeof(ulong);
-	len /= sizeof(ulong);
-
-	while (len--)
-		*ptr++ = 0;
-
-	/* clear the remain bytes */
-	if (remain) {
-		tmp = (u8 *)ptr;
-		while (remain--)
-			*tmp++ = 0;
-	}
 }
diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
index 59d58a5..30cf674 100644
--- a/arch/riscv/cpu/start.S
+++ b/arch/riscv/cpu/start.S
@@ -111,6 +111,18 @@
  * It's essential before any function call, otherwise, we get data-race.
  */
 
+/* clear stack if necessary */
+#if CONFIG_IS_ENABLED(ZERO_MEM_BEFORE_USE)
+clear_stack:
+	li	t1, 1
+	slli	t1, t1, CONFIG_STACK_SIZE_SHIFT
+	sub	t1, sp, t1
+clear_stack_loop:
+	SREG	zero, 0(t1)		/* t1 is always 16 byte aligned */
+	addi	t1, t1, REGBYTES
+	blt	t1, sp, clear_stack_loop
+#endif
+
 call_board_init_f_0:
 	/* find top of reserve space */
 #if CONFIG_IS_ENABLED(SMP)
diff --git a/common/init/board_init.c b/common/init/board_init.c
index 96ffb79..ab8c508 100644
--- a/common/init/board_init.c
+++ b/common/init/board_init.c
@@ -162,6 +162,9 @@
 #if CONFIG_VAL(SYS_MALLOC_F_LEN)
 	/* go down one 'early malloc arena' */
 	gd->malloc_base = base;
+#if CONFIG_IS_ENABLED(ZERO_MEM_BEFORE_USE)
+	memset((void *)base, '\0', CONFIG_VAL(SYS_MALLOC_F_LEN));
+#endif
 #endif
 
 	if (CONFIG_IS_ENABLED(SYS_REPORT_STACK_F_USAGE))