mips: mtmips: add support for MediaTek MT7621 SoC

This patch adds support for MediaTek MT7621 SoC.
All files are dedicated for u-boot.

The default build target is u-boot-mt7621.bin.

The specification of this chip:
https://www.mediatek.com/products/homenetworking/mt7621

Reviewed-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/mach-mtmips/mt7621/spl/Makefile b/arch/mips/mach-mtmips/mt7621/spl/Makefile
new file mode 100644
index 0000000..ebe54e7
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/Makefile
@@ -0,0 +1,9 @@
+
+extra-y += start.o
+
+obj-y += spl.o
+obj-y += cps.o
+obj-y += dram.o
+obj-y += serial.o
+obj-y += launch.o
+obj-y += launch_ll.o
diff --git a/arch/mips/mach-mtmips/mt7621/spl/cps.c b/arch/mips/mach-mtmips/mt7621/spl/cps.c
new file mode 100644
index 0000000..779e646
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/cps.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <asm/io.h>
+#include <asm/addrspace.h>
+#include <asm/mipsregs.h>
+#include <asm/cm.h>
+#include <linux/bitfield.h>
+#include "../mt7621.h"
+
+/* GIC Shared Register Bases */
+#define GIC_SH_POL_BASE		0x100
+#define GIC_SH_TRIG_BASE	0x180
+#define GIC_SH_RMASK_BASE	0x300
+#define GIC_SH_SMASK_BASE	0x380
+#define GIC_SH_MASK_BASE	0x400
+#define GIC_SH_PEND_BASE	0x480
+#define GIC_SH_MAP_PIN_BASE	0x500
+#define GIC_SH_MAP_VPE_BASE	0x2000
+
+/* GIC Registers */
+#define GIC_SH_POL31_0		(GIC_SH_POL_BASE + 0x00)
+#define GIC_SH_POL63_32		(GIC_SH_POL_BASE + 0x04)
+
+#define GIC_SH_TRIG31_0		(GIC_SH_TRIG_BASE + 0x00)
+#define GIC_SH_TRIG63_32	(GIC_SH_TRIG_BASE + 0x04)
+
+#define GIC_SH_RMASK31_0	(GIC_SH_RMASK_BASE + 0x00)
+#define GIC_SH_RMASK63_32	(GIC_SH_RMASK_BASE + 0x04)
+
+#define GIC_SH_SMASK31_0	(GIC_SH_SMASK_BASE + 0x00)
+#define GIC_SH_SMASK63_32	(GIC_SH_SMASK_BASE + 0x04)
+
+#define GIC_SH_MAP_PIN(n)	(GIC_SH_MAP_PIN_BASE + (n) * 4)
+
+#define GIC_SH_MAP_VPE(n, v)	(GIC_SH_MAP_VPE_BASE + (n) * 0x20 + ((v) / 32) * 4)
+#define GIC_SH_MAP_VPE31_0(n)	GIC_SH_MAP_VPE(n, 0)
+
+/* GIC_SH_MAP_PIN fields */
+#define GIC_MAP_TO_PIN		BIT(31)
+#define GIC_MAP_TO_NMI		BIT(30)
+#define GIC_MAP			GENMASK(5, 0)
+#define GIC_MAP_SHIFT		0
+
+static void cm_init(void __iomem *cm_base)
+{
+	u32 gcrcfg, num_cores;
+
+	gcrcfg = readl(cm_base + GCR_CONFIG);
+	num_cores = FIELD_GET(GCR_CONFIG_PCORES, gcrcfg) + 1;
+
+	writel((1 << num_cores) - 1, cm_base + GCR_ACCESS);
+
+	writel(GCR_REG0_BASE_VALUE, cm_base + GCR_REG0_BASE);
+	writel(GCR_REG1_BASE_VALUE, cm_base + GCR_REG1_BASE);
+	writel(GCR_REG2_BASE_VALUE, cm_base + GCR_REG2_BASE);
+	writel(GCR_REG3_BASE_VALUE, cm_base + GCR_REG3_BASE);
+
+	clrsetbits_32(cm_base + GCR_REG0_MASK,
+		      GCR_REGn_MASK_ADDRMASK | GCR_REGn_MASK_CMTGT,
+		      FIELD_PREP(GCR_REGn_MASK_ADDRMASK, GCR_REG0_MASK_VALUE) |
+		      GCR_REGn_MASK_CMTGT_IOCU0);
+
+	clrsetbits_32(cm_base + GCR_REG1_MASK,
+		      GCR_REGn_MASK_ADDRMASK | GCR_REGn_MASK_CMTGT,
+		      FIELD_PREP(GCR_REGn_MASK_ADDRMASK, GCR_REG1_MASK_VALUE) |
+		      GCR_REGn_MASK_CMTGT_IOCU0);
+
+	clrsetbits_32(cm_base + GCR_REG2_MASK,
+		      GCR_REGn_MASK_ADDRMASK | GCR_REGn_MASK_CMTGT,
+		      FIELD_PREP(GCR_REGn_MASK_ADDRMASK, GCR_REG2_MASK_VALUE) |
+		      GCR_REGn_MASK_CMTGT_IOCU0);
+
+	clrsetbits_32(cm_base + GCR_REG3_MASK,
+		      GCR_REGn_MASK_ADDRMASK | GCR_REGn_MASK_CMTGT,
+		      FIELD_PREP(GCR_REGn_MASK_ADDRMASK, GCR_REG3_MASK_VALUE) |
+		      GCR_REGn_MASK_CMTGT_IOCU0);
+
+	clrbits_32(cm_base + GCR_BASE, CM_DEFAULT_TARGET_MASK);
+	setbits_32(cm_base + GCR_CONTROL, GCR_CONTROL_SYNCCTL);
+}
+
+static void gic_init(void)
+{
+	void __iomem *gic_base = (void *)KSEG1ADDR(MIPS_GIC_BASE);
+	int i;
+
+	/* Interrupt 0..5: Level Trigger, Active High */
+	writel(0, gic_base + GIC_SH_TRIG31_0);
+	writel(0x3f, gic_base + GIC_SH_RMASK31_0);
+	writel(0x3f, gic_base + GIC_SH_POL31_0);
+	writel(0x3f, gic_base + GIC_SH_SMASK31_0);
+
+	/* Interrupt 56..63: Edge Trigger, Rising Edge */
+	/* Hardcoded to set up the last 8 external interrupts for IPI. */
+	writel(0xff000000, gic_base + GIC_SH_TRIG63_32);
+	writel(0xff000000, gic_base + GIC_SH_RMASK63_32);
+	writel(0xff000000, gic_base + GIC_SH_POL63_32);
+	writel(0xff000000, gic_base + GIC_SH_SMASK63_32);
+
+	/* Map interrupt source to particular hardware interrupt pin */
+	/* source {0,1,2,3,4,5} -> pin {0,0,4,3,0,5} */
+	writel(GIC_MAP_TO_PIN | 0, gic_base + GIC_SH_MAP_PIN(0));
+	writel(GIC_MAP_TO_PIN | 0, gic_base + GIC_SH_MAP_PIN(1));
+	writel(GIC_MAP_TO_PIN | 4, gic_base + GIC_SH_MAP_PIN(2));
+	writel(GIC_MAP_TO_PIN | 3, gic_base + GIC_SH_MAP_PIN(3));
+	writel(GIC_MAP_TO_PIN | 0, gic_base + GIC_SH_MAP_PIN(4));
+	writel(GIC_MAP_TO_PIN | 5, gic_base + GIC_SH_MAP_PIN(5));
+
+	/* source 56~59 -> pin 1, 60~63 -> pin 2 */
+	writel(GIC_MAP_TO_PIN | 1, gic_base + GIC_SH_MAP_PIN(56));
+	writel(GIC_MAP_TO_PIN | 1, gic_base + GIC_SH_MAP_PIN(57));
+	writel(GIC_MAP_TO_PIN | 1, gic_base + GIC_SH_MAP_PIN(58));
+	writel(GIC_MAP_TO_PIN | 1, gic_base + GIC_SH_MAP_PIN(59));
+	writel(GIC_MAP_TO_PIN | 2, gic_base + GIC_SH_MAP_PIN(60));
+	writel(GIC_MAP_TO_PIN | 2, gic_base + GIC_SH_MAP_PIN(61));
+	writel(GIC_MAP_TO_PIN | 2, gic_base + GIC_SH_MAP_PIN(62));
+	writel(GIC_MAP_TO_PIN | 2, gic_base + GIC_SH_MAP_PIN(63));
+
+	/* Interrupt map to VPE (bit mask) */
+	for (i = 0; i < 32; i++)
+		writel(BIT(0), gic_base + GIC_SH_MAP_VPE31_0(i));
+
+	/*
+	 * Direct GIC_int 56..63 to vpe 0..3
+	 * MIPS Linux convention that last 16 interrupts implemented be set
+	 * aside for IPI signaling.
+	 * The actual interrupts are tied low and software sends interrupts
+	 * via GIC_SH_WEDGE writes.
+	 */
+	for (i = 0; i < 4; i++) {
+		writel(BIT(i), gic_base + GIC_SH_MAP_VPE31_0(i + 56));
+		writel(BIT(i), gic_base + GIC_SH_MAP_VPE31_0(i + 60));
+	}
+}
+
+void mt7621_cps_init(void)
+{
+	void __iomem *cm_base = (void *)KSEG1ADDR(CONFIG_MIPS_CM_BASE);
+
+	/* Enable GIC */
+	writel(MIPS_GIC_BASE | GCR_GIC_EN, cm_base + GCR_GIC_BASE);
+
+	/* Enable CPC */
+	writel(MIPS_CPC_BASE | GCR_CPC_EN, cm_base + GCR_CPC_BASE);
+
+	gic_init();
+	cm_init(cm_base);
+}
diff --git a/arch/mips/mach-mtmips/mt7621/spl/dram.c b/arch/mips/mach-mtmips/mt7621/spl/dram.c
new file mode 100644
index 0000000..100adfb
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/dram.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <vsprintf.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+#include <asm/byteorder.h>
+#include <asm/addrspace.h>
+#include <linux/string.h>
+#include "../mt7621.h"
+#include "dram.h"
+
+static const u32 ddr2_act[DDR_PARAM_SIZE] = {
+#if defined(CONFIG_MT7621_DRAM_DDR2_512M)
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x22174441,
+	0x00000000, 0xF0748661, 0x40001273, 0x9F0A0481,
+	0x0304692F, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000000, 0x07100000,
+	0x00001B63, 0x00002000, 0x00004000, 0x00006000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+#elif defined(CONFIG_MT7621_DRAM_DDR2_512M_W9751G6KB_A02_1066MHZ)
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x33484584,
+	0x00000000, 0xF07486A1, 0x50001273, 0x9F010481,
+	0x0304693F, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000010, 0x07100000,
+	0x00001F73, 0x00002000, 0x00004000, 0x00006000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+#elif defined(CONFIG_MT7621_DRAM_DDR2_1024M_W971GG6KB25_800MHZ)
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x22174430,
+	0x01000000, 0xF0748661, 0x40001273, 0x9F0F0481,
+	0x0304692F, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000000, 0x07100000,
+	0x00001B63, 0x00002000, 0x00004000, 0x00006000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+#elif defined(CONFIG_MT7621_DRAM_DDR2_1024M_W971GG6KB18_1066MHZ)
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x33484584,
+	0x01000000, 0xF07486A1, 0x50001273, 0x9F070481,
+	0x0304693F, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000010, 0x07100000,
+	0x00001F73, 0x00002000, 0x00004000, 0x00006000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+#else /* CONFIG_MT7621_DRAM_DDR2_1024M */
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x22174441,
+	0x01000000, 0xF0748661, 0x40001273, 0x9F0F0481,
+	0x0304692F, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000000, 0x07100000,
+	0x00001B63, 0x00002000, 0x00004000, 0x00006000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+#endif
+};
+
+static const u32 ddr3_act[DDR_PARAM_SIZE] = {
+#if defined(CONFIG_MT7621_DRAM_DDR3_1024M)
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x44694683,
+	0x01000000, 0xF07486A1, 0xC287221D, 0x9F060481,
+	0x03046948, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000210, 0x07100000,
+	0x00001B61, 0x00002040, 0x00004010, 0x00006000,
+	0x0C000000, 0x07070000, 0x00000000, 0x00000000,
+#elif defined(CONFIG_MT7621_DRAM_DDR3_4096M)
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x44694683,
+	0x01000000, 0xF07486A1, 0xC287221D, 0x9F0F0481,
+	0x03046948, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000240, 0x07100000,
+	0x00001B61, 0x00002040, 0x00004010, 0x00006000,
+	0x0C000000, 0x07070000, 0x00000000, 0x00000000,
+#elif defined(CONFIG_MT7621_DRAM_DDR3_1024M_KGD)
+	0xFF00FF00, 0xFF00FF00, 0x00000007, 0x44694683,
+	0x01000000, 0xF07406A1, 0xC287221D, 0x9F060481,
+	0x03046923, 0x152f2842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000210, 0x07100000,
+	0x00001B61, 0x00002040, 0x00004010, 0x00006000,
+	0x0C000000, 0x07070000, 0x000C0000, 0x00000000,
+#else /* CONFIG_MT7621_DRAM_DDR3_2048M */
+	0xAA00AA00, 0xAA00AA00, 0x00000007, 0x44694673,
+	0x01000000, 0xF07486A1, 0xC287221D, 0x9F050481,
+	0x03046948, 0x15602842, 0x00008888, 0x88888888,
+	0x00000000, 0x00000000, 0x00000220, 0x07100000,
+	0x00001B61, 0x00002040, 0x00004010, 0x00006000,
+	0x0C000000, 0x07070000, 0x00000000, 0x00000000,
+#endif
+};
+
+#if defined(CONFIG_MT7621_DRAM_FREQ_400)
+#define DDR_FREQ_PARAM		0x41000000
+#elif defined(CONFIG_MT7621_DRAM_FREQ_1066)
+#define DDR_FREQ_PARAM		0x21000000
+#elif defined(CONFIG_MT7621_DRAM_FREQ_1200)
+#define DDR_FREQ_PARAM		0x11000000
+#else /* CONFIG_MT7621_DRAM_FREQ_800 */
+#define DDR_FREQ_PARAM		0x31000000
+#endif
+
+#define RG_MEPL_FBDIV_S		4
+#define RG_MEPL_FBDIV_M		0x7f
+
+static inline void word_copy(u32 *dest, const u32 *src, u32 count)
+{
+	u32 i;
+
+	for (i = 0; i < count; i++)
+		dest[i] = src[i];
+}
+
+static u32 calc_cpu_pll_val(void)
+{
+	u32 div, baseval, fb;
+
+	div = get_xtal_mhz();
+
+	if (div == 40) {
+		div /= 2;
+		baseval = 0xc0005802;
+	} else {
+		baseval = 0xc0004802;
+	}
+
+	fb = CONFIG_MT7621_CPU_FREQ / div - 1;
+	if (fb > RG_MEPL_FBDIV_M)
+		fb = RG_MEPL_FBDIV_M;
+
+	return baseval | (fb << RG_MEPL_FBDIV_S);
+}
+
+void prepare_stage_bin(void)
+{
+	u32 stage_size;
+
+	const struct stage_header *stock_stage_bin =
+		(const struct stage_header *)__image_copy_end;
+
+	struct stage_header *new_stage_bin =
+		(struct stage_header *)STAGE_LOAD_ADDR;
+
+	if (be32_to_cpu(stock_stage_bin->ep) != STAGE_LOAD_ADDR)
+		panic("Invalid DDR stage binary blob\n");
+
+	stage_size = be32_to_cpu(stock_stage_bin->stage_size);
+
+	word_copy((u32 *)new_stage_bin, (const u32 *)stock_stage_bin,
+		  (stage_size + sizeof(u32) - 1) / sizeof(u32));
+
+	word_copy(new_stage_bin->ddr2_act, ddr2_act, DDR_PARAM_SIZE);
+	word_copy(new_stage_bin->ddr3_act, ddr3_act, DDR_PARAM_SIZE);
+
+	new_stage_bin->cpu_pll_cfg = calc_cpu_pll_val();
+	new_stage_bin->ddr_pll_cfg = DDR_FREQ_PARAM;
+	new_stage_bin->baudrate = CONFIG_BAUDRATE;
+}
diff --git a/arch/mips/mach-mtmips/mt7621/spl/dram.h b/arch/mips/mach-mtmips/mt7621/spl/dram.h
new file mode 100644
index 0000000..7322c58
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/dram.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#ifndef _MT7621_DRAM_H_
+#define _MT7621_DRAM_H_
+
+#define STAGE_LOAD_ADDR			0xBE108800
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+#define DDR_PARAM_SIZE			24
+
+struct stage_header {
+	u32 jump_insn[2];
+	u32 ep;
+	u32 stage_size;
+	u32 has_stage2;
+	u32 next_ep;
+	u32 next_size;
+	u32 next_offset;
+	u32 cpu_pll_cfg;
+	u32 ddr_pll_cfg;
+	u32 reserved2[6];
+	char build_tag[32];
+	u32 ddr3_act[DDR_PARAM_SIZE];
+	u32 padding1[2];
+	u32 ddr2_act[DDR_PARAM_SIZE];
+	u32 padding2[2];
+	u32 baudrate;
+	u32 padding3;
+};
+#endif
+
+#endif /* _MT7621_DRAM_H_ */
diff --git a/arch/mips/mach-mtmips/mt7621/spl/launch.c b/arch/mips/mach-mtmips/mt7621/spl/launch.c
new file mode 100644
index 0000000..37c20a5
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/launch.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <asm/io.h>
+#include <asm/cm.h>
+#include <asm/sections.h>
+#include <asm/addrspace.h>
+#include <asm/mipsmtregs.h>
+#include <linux/sizes.h>
+#include <time.h>
+#include <cpu_func.h>
+#include "launch.h"
+#include "../mt7621.h"
+
+/* Cluster Power Controller (CPC) offsets */
+#define CPC_CL_OTHER			0x2010
+#define CPC_CO_CMD			0x4000
+
+/* CPC_CL_OTHER fields */
+#define CPC_CL_OTHER_CORENUM_SHIFT	16
+#define CPC_CL_OTHER_CORENUM		GENMASK(23, 16)
+
+/* CPC_CO_CMD */
+#define PWR_UP				3
+
+#define NUM_CORES			2
+#define NUM_CPUS			4
+#define WAIT_CPUS_TIMEOUT		4000
+
+static void copy_launch_wait_code(void)
+{
+	memset((void *)KSEG1, 0, SZ_4K);
+
+	memcpy((void *)KSEG1ADDR(LAUNCH_WAITCODE),
+	       &launch_wait_code_start,
+	       &launch_wait_code_end - &launch_wait_code_start);
+
+	invalidate_dcache_range(KSEG0, SZ_4K);
+}
+
+static void bootup_secondary_core(void)
+{
+	void __iomem *cpcbase = (void __iomem *)KSEG1ADDR(MIPS_CPC_BASE);
+	int i;
+
+	for (i = 1; i < NUM_CORES; i++) {
+		writel(i << CPC_CL_OTHER_CORENUM_SHIFT, cpcbase + CPC_CL_OTHER);
+		writel(PWR_UP, cpcbase + CPC_CO_CMD);
+	}
+}
+
+void secondary_cpu_init(void)
+{
+	void __iomem *sysc = (void __iomem *)KSEG1ADDR(SYSCTL_BASE);
+	u32 i, dual_core = 0, cpuready = 1, cpumask = 0x03;
+	ulong wait_tick;
+	struct cpulaunch_t *c;
+
+	/* Copy LAUNCH wait code used by other VPEs */
+	copy_launch_wait_code();
+
+	dual_core = readl(sysc + SYSCTL_CHIP_REV_ID_REG) & CPU_ID;
+
+	if (dual_core) {
+		/* Bootup secondary core for MT7621A */
+		cpumask = 0x0f;
+
+		/* Make BootROM/TPL redirect Core1's bootup flow to our entry point */
+		writel((uintptr_t)&_start, sysc + BOOT_SRAM_BASE_REG);
+
+		bootup_secondary_core();
+	}
+
+	/* Join the coherent domain */
+	join_coherent_domain(dual_core ? 2 : 1);
+
+	/* Bootup Core0/VPE1 */
+	boot_vpe1();
+
+	/* Wait for all CPU ready */
+	wait_tick = get_timer(0) + WAIT_CPUS_TIMEOUT;
+
+	while (time_before(get_timer(0), wait_tick)) {
+		/* CPU0 is obviously ready */
+		for (i = 1; i < NUM_CPUS; i++) {
+			c = (struct cpulaunch_t *)(KSEG0ADDR(CPULAUNCH) +
+						   (i << LOG2CPULAUNCH));
+
+			if (c->flags & LAUNCH_FREADY)
+				cpuready |= BIT(i);
+		}
+
+		if ((cpuready & cpumask) == cpumask)
+			break;
+	}
+}
diff --git a/arch/mips/mach-mtmips/mt7621/spl/launch.h b/arch/mips/mach-mtmips/mt7621/spl/launch.h
new file mode 100644
index 0000000..f34250d
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/launch.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#ifndef _LAUNCH_H_
+#define _LAUNCH_H_
+
+#ifndef __ASSEMBLY__
+
+struct cpulaunch_t {
+	unsigned long pc;
+	unsigned long gp;
+	unsigned long sp;
+	unsigned long a0;
+	unsigned long _pad[3]; /* pad to cache line size to avoid thrashing */
+	unsigned long flags;
+};
+
+extern char launch_wait_code_start;
+extern char launch_wait_code_end;
+
+void join_coherent_domain(int ncores);
+void boot_vpe1(void);
+
+#else
+
+#define	LAUNCH_PC		0
+#define	LAUNCH_GP		4
+#define	LAUNCH_SP		8
+#define	LAUNCH_A0		12
+#define	LAUNCH_FLAGS		28
+
+#endif
+
+#define LOG2CPULAUNCH		5
+
+#define LAUNCH_FREADY		1
+#define LAUNCH_FGO		2
+#define LAUNCH_FGONE		4
+
+#define LAUNCH_WAITCODE		0x00000d00
+#define SCRLAUNCH		0x00000e00
+#define CPULAUNCH		0x00000f00
+#define NCPULAUNCH		8
+
+/* Polling period in count cycles for secondary CPU's */
+#define LAUNCHPERIOD		10000
+
+#endif /* _LAUNCH_H_ */
diff --git a/arch/mips/mach-mtmips/mt7621/spl/launch_ll.S b/arch/mips/mach-mtmips/mt7621/spl/launch_ll.S
new file mode 100644
index 0000000..32d28c7
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/launch_ll.S
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <asm/cm.h>
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/cacheops.h>
+#include <asm/mipsregs.h>
+#include <asm/addrspace.h>
+#include <asm/mipsmtregs.h>
+#include "launch.h"
+
+	.macro cache_loop	curr, end, line_sz, op
+10:	cache		\op, 0(\curr)
+	PTR_ADDU	\curr, \curr, \line_sz
+	bne		\curr, \end, 10b
+	.endm
+
+	.set	mt
+
+/*
+ * Join the coherent domain
+ * a0 = number of cores
+ */
+LEAF(join_coherent_domain)
+	/*
+	 * Enable coherence and allow interventions from all other cores.
+	 * (Write access enabled via GCR_ACCESS by core 0.)
+	 */
+	li	t1, 1
+	sll	t1, a0
+	addiu	t1, -1
+
+	li	t0, KSEG1ADDR(CONFIG_MIPS_CM_BASE)
+	sw	t1, GCR_Cx_COHERENCE(t0)
+	ehb
+
+	move	t2, zero
+
+_next_coherent_core:
+	sll	t1, t2, GCR_Cx_OTHER_CORENUM_SHIFT
+	sw	t1, GCR_Cx_OTHER(t0)
+
+_busy_wait_coherent_core:
+	lw	t1, GCR_CO_COHERENCE(t0)
+	beqz	t1, _busy_wait_coherent_core
+
+	addiu	t2, 1
+	bne	t2, a0, _next_coherent_core
+
+	jr	ra
+	END(join_coherent_domain)
+
+/*
+ * All VPEs other than VPE0 will go here.
+ */
+LEAF(launch_vpe_entry)
+	mfc0	t0, CP0_EBASE
+	and	t0, t0, MIPS_EBASE_CPUNUM
+
+	/* per-VPE cpulaunch_t */
+	li	a0, KSEG0ADDR(CPULAUNCH)
+	sll	t1, t0, LOG2CPULAUNCH
+	addu	a0, t1
+
+	/* Set CPU online flag */
+	li	t0, LAUNCH_FREADY
+	sw	t0, LAUNCH_FLAGS(a0)
+
+	/* Enable count interrupt in mask, but do not enable interrupts */
+	mfc0	t0, CP0_STATUS
+	ori	t0, STATUSF_IP7
+	mtc0	t0, CP0_STATUS
+
+	/* VPEs executing in wait code do not need a stack */
+	li	t9, KSEG0ADDR(LAUNCH_WAITCODE)
+	jr	t9
+	END(launch_vpe_entry)
+
+/*
+ * This function will not be executed in place.
+ * It will be copied into memory, and VPEs other than VPE0 will be
+ * started to run into this in-memory function.
+ */
+LEAF(launch_wait_code)
+	.globl	launch_wait_code_start
+launch_wait_code_start:
+
+	move	t0, a0
+
+start_poll:
+	/* Poll CPU go flag */
+	mtc0	zero, CP0_COUNT
+	li	t1, LAUNCHPERIOD
+	mtc0	t1, CP0_COMPARE
+
+time_wait:
+	/* Software wait */
+	mfc0	t2, CP0_COUNT
+	subu	t2, t1
+	bltz	t2, time_wait
+
+	/* Check the launch flag */
+	lw	t3, LAUNCH_FLAGS(t0)
+	and	t3, LAUNCH_FGO
+	beqz	t3, start_poll
+
+	/* Reset the counter and interrupts to give naive clients a chance */
+	mfc0	t1, CP0_STATUS
+	ins	t1, zero, STATUSB_IP7, 1
+	mtc0	t1, CP0_STATUS
+
+	mfc0	t1, CP0_COUNT
+	subu	t1, 1
+	mtc0	t1, CP0_COMPARE
+
+	/* Jump to kernel */
+	lw	t9, LAUNCH_PC(t0)
+	lw	gp, LAUNCH_GP(t0)
+	lw	sp, LAUNCH_SP(t0)
+	lw	a0, LAUNCH_A0(t0)
+	move	a1, zero
+	move	a2, zero
+	move	a3, zero
+	ori	t3, LAUNCH_FGONE
+	sw	t3, LAUNCH_FLAGS(t0)
+
+	jr	t9
+
+	.globl	launch_wait_code_end
+launch_wait_code_end:
+	END(launch_wait_code)
+
+/*
+ * Core1 will go here.
+ */
+LEAF(launch_core_entry)
+	/* Disable caches */
+	bal	mips_cache_disable
+
+	/* Initialize L1 cache only */
+	li	a0, CONFIG_SYS_ICACHE_SIZE
+	li	a1, CONFIG_SYS_ICACHE_LINE_SIZE
+	li	a2, CONFIG_SYS_DCACHE_SIZE
+	li	a3, CONFIG_SYS_DCACHE_LINE_SIZE
+
+	mtc0	zero, CP0_TAGLO
+	mtc0	zero, CP0_TAGLO, 2
+	ehb
+
+	/*
+	 * Initialize the I-cache first,
+	 */
+	li		t0, KSEG0
+	addu		t1, t0, a0
+	/* clear tag to invalidate */
+	cache_loop	t0, t1, a1, INDEX_STORE_TAG_I
+#ifdef CONFIG_SYS_MIPS_CACHE_INIT_RAM_LOAD
+	/* fill once, so data field parity is correct */
+	PTR_LI		t0, KSEG0
+	cache_loop	t0, t1, a1, FILL
+	/* invalidate again - prudent but not strictly necessary */
+	PTR_LI		t0, KSEG0
+	cache_loop	t0, t1, a1, INDEX_STORE_TAG_I
+#endif
+
+	/*
+	 * then initialize D-cache.
+	 */
+	PTR_LI		t0, KSEG0
+	PTR_ADDU	t1, t0, a2
+	/* clear all tags */
+	cache_loop	t0, t1, a3, INDEX_STORE_TAG_D
+#ifdef CONFIG_SYS_MIPS_CACHE_INIT_RAM_LOAD
+	/* load from each line (in cached space) */
+	PTR_LI		t0, KSEG0
+2:	LONG_L		zero, 0(t0)
+	PTR_ADDU	t0, a3
+	 bne		t0, t1, 2b
+	/* clear all tags */
+	PTR_LI		t0, KSEG0
+	cache_loop	t0, t1, a3, INDEX_STORE_TAG_D
+#endif
+
+	/* Set Cache Mode */
+	mfc0	t0, CP0_CONFIG
+	li	t1, CONF_CM_CACHABLE_COW
+	ins	t0, t1, 0, 3
+	mtc0	t0, CP0_CONFIG
+
+	/* Join the coherent domain */
+	li	a0, 2
+	bal	join_coherent_domain
+
+	/* Bootup Core0/VPE1 */
+	bal	boot_vpe1
+
+	b	launch_vpe_entry
+	END(launch_core_entry)
+
+/*
+ * Bootup VPE1.
+ * This subroutine must be executed from VPE0 with VPECONF0[MVP] already set.
+ */
+LEAF(boot_vpe1)
+	mfc0	t0, CP0_MVPCONF0
+
+	/* a0 = number of TCs - 1 */
+	ext	a0, t0, MVPCONF0_PTC_SHIFT, 8
+	beqz	a0, _vpe1_init_done
+
+	/* a1 = number of VPEs - 1 */
+	ext	a1, t0, MVPCONF0_PVPE_SHIFT, 4
+	beqz	a1, _vpe1_init_done
+
+	/* a2 = current TC No. */
+	move	a2, zero
+
+	/* Enter VPE Configuration State */
+	mfc0	t0, CP0_MVPCONTROL
+	or	t0, MVPCONTROL_VPC
+	mtc0	t0, CP0_MVPCONTROL
+	ehb
+
+_next_tc:
+	/* Set the TC number to be used on MTTR and MFTR instructions */
+	mfc0	t0, CP0_VPECONTROL
+	ins	t0, a2, 0, 8
+	mtc0	t0, CP0_VPECONTROL
+	ehb
+
+	/* TC0 is already bound */
+	beqz	a2, _next_vpe
+
+	/* Halt current TC */
+	li	t0, TCHALT_H
+	mttc0	t0, CP0_TCHALT
+	ehb
+
+	/* If there is spare TC, bind it to the last VPE (VPE[a1]) */
+	slt	t1, a1, a2
+	bnez	t1, _vpe_bind_tc
+	 move	t1, a1
+
+	/* Set Exclusive TC for active TC */
+	mftc0	t0, CP0_VPECONF0
+	ins	t0, a2, VPECONF0_XTC_SHIFT, 8
+	mttc0	t0, CP0_VPECONF0
+
+	move	t1, a2
+_vpe_bind_tc:
+	/* Bind TC to a VPE */
+	mftc0	t0, CP0_TCBIND
+	ins	t0, t1, TCBIND_CURVPE_SHIFT, 4
+	mttc0	t0, CP0_TCBIND
+
+	/*
+	 * Set up CP0_TCSTATUS register:
+	 * Disable Coprocessor Usable bits
+	 * Disable MDMX/DSP ASE
+	 * Clear Dirty TC
+	 * not dynamically allocatable
+	 * not allocated
+	 * Kernel mode
+	 * interrupt exempt
+	 * ASID 0
+	 */
+	li	t0, TCSTATUS_IXMT
+	mttc0	t0, CP0_TCSTATUS
+
+_next_vpe:
+	slt	t1, a1, a2
+	bnez	t1, _done_vpe	# No more VPEs
+
+	/* Disable TC multi-threading */
+	mftc0	t0, CP0_VPECONTROL
+	ins	t0, zero, VPECONTROL_TE_SHIFT, 1
+	mttc0	t0, CP0_VPECONTROL
+
+	/* Skip following configuration for TC0 */
+	beqz	a2, _done_vpe
+
+	/* Deactivate VPE, set Master VPE */
+	mftc0	t0, CP0_VPECONF0
+	ins	t0, zero, VPECONF0_VPA_SHIFT, 1
+	or	t0, VPECONF0_MVP
+	mttc0	t0, CP0_VPECONF0
+
+	mfc0	t0, CP0_STATUS
+	mttc0	t0, CP0_STATUS
+
+	mttc0	zero, CP0_EPC
+	mttc0	zero, CP0_CAUSE
+
+	mfc0	t0, CP0_CONFIG
+	mttc0	t0, CP0_CONFIG
+
+	/*
+	 * VPE1 of each core can execute cached as its L1 I$ has already
+	 * been initialized.
+	 * and the L2$ has been initialized or "disabled" via CCA override.
+	 */
+	PTR_LA	t0, _start
+	mttc0	t0, CP0_TCRESTART
+
+	/* Unset Interrupt Exempt, set Activate Thread */
+	mftc0	t0, CP0_TCSTATUS
+	ins	t0, zero, TCSTATUS_IXMT_SHIFT, 1
+	ori	t0, TCSTATUS_A
+	mttc0	t0, CP0_TCSTATUS
+
+	/* Resume TC */
+	mttc0	zero, CP0_TCHALT
+
+	/* Activate VPE */
+	mftc0	t0, CP0_VPECONF0
+	ori	t0, VPECONF0_VPA
+	mttc0	t0, CP0_VPECONF0
+
+_done_vpe:
+	addu	a2, 1
+	sltu	t0, a0, a2
+	beqz	t0, _next_tc
+
+	mfc0	t0, CP0_MVPCONTROL
+	/* Enable all activated VPE to execute */
+	ori	t0, MVPCONTROL_EVP
+	/* Exit VPE Configuration State */
+	ins	t0, zero, MVPCONTROL_VPC_SHIFT, 1
+	mtc0	t0, CP0_MVPCONTROL
+	ehb
+
+_vpe1_init_done:
+	jr	ra
+	END(boot_vpe1)
diff --git a/arch/mips/mach-mtmips/mt7621/spl/serial.c b/arch/mips/mach-mtmips/mt7621/spl/serial.c
new file mode 100644
index 0000000..5cf093a
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/serial.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <asm/io.h>
+#include "../mt7621.h"
+
+void mtmips_spl_serial_init(void)
+{
+#ifdef CONFIG_SPL_SERIAL
+	void __iomem *base = ioremap_nocache(SYSCTL_BASE, SYSCTL_SIZE);
+
+#if CONFIG_CONS_INDEX == 1
+	clrbits_32(base + SYSCTL_GPIOMODE_REG, UART1_MODE);
+#elif CONFIG_CONS_INDEX == 2
+	clrbits_32(base + SYSCTL_GPIOMODE_REG, UART2_MODE_M);
+#elif CONFIG_CONS_INDEX == 3
+	clrbits_32(base + SYSCTL_GPIOMODE_REG, UART3_MODE_M);
+#endif /* CONFIG_CONS_INDEX */
+#endif /* CONFIG_SPL_SERIAL */
+}
diff --git a/arch/mips/mach-mtmips/mt7621/spl/spl.c b/arch/mips/mach-mtmips/mt7621/spl/spl.c
new file mode 100644
index 0000000..91eebc6
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/spl.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <init.h>
+#include <image.h>
+#include <vsprintf.h>
+#include <malloc.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+#include <asm/addrspace.h>
+#include <asm/byteorder.h>
+#include <asm/global_data.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+#include <mach/serial.h>
+#include "../mt7621.h"
+#include "dram.h"
+#include <spl.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+struct tpl_info {
+	u32 magic;
+	u32 size;
+};
+
+void set_timer_freq_simple(void)
+{
+	u32 div = get_xtal_mhz();
+
+	/* Round down cpu freq */
+	gd->arch.timer_freq = rounddown(CONFIG_MT7621_CPU_FREQ, div) * 500000;
+}
+
+void __noreturn board_init_f(ulong dummy)
+{
+	spl_init();
+
+#ifdef CONFIG_SPL_SERIAL
+	/*
+	 * mtmips_spl_serial_init() is useful if debug uart is enabled,
+	 * or DM based serial is not enabled.
+	 */
+	mtmips_spl_serial_init();
+	preloader_console_init();
+#endif
+
+	board_init_r(NULL, 0);
+}
+
+void board_boot_order(u32 *spl_boot_list)
+{
+#ifdef CONFIG_MT7621_BOOT_FROM_NAND
+	spl_boot_list[0] = BOOT_DEVICE_NAND;
+#else
+	spl_boot_list[0] = BOOT_DEVICE_NOR;
+#endif
+}
+
+unsigned long spl_nor_get_uboot_base(void)
+{
+	const struct tpl_info *tpli;
+	const image_header_t *hdr;
+	u32 addr;
+
+	addr = FLASH_MMAP_BASE + TPL_INFO_OFFSET;
+	tpli = (const struct tpl_info *)KSEG1ADDR(addr);
+
+	if (tpli->magic == TPL_INFO_MAGIC) {
+		addr = FLASH_MMAP_BASE + tpli->size;
+		hdr = (const image_header_t *)KSEG1ADDR(addr);
+
+		if (image_get_magic(hdr) == IH_MAGIC) {
+			addr += sizeof(*hdr) + image_get_size(hdr);
+			return KSEG1ADDR(addr);
+		}
+	}
+
+	panic("Unable to locate SPL payload\n");
+	return 0;
+}
+
+uint32_t spl_nand_get_uboot_raw_page(void)
+{
+	const struct stage_header *sh = (const struct stage_header *)&_start;
+	u32 addr;
+
+	addr = image_get_header_size() + be32_to_cpu(sh->stage_size);
+	addr = ALIGN(addr, SZ_4K);
+
+	return addr;
+}
diff --git a/arch/mips/mach-mtmips/mt7621/spl/start.S b/arch/mips/mach-mtmips/mt7621/spl/start.S
new file mode 100644
index 0000000..3cad356
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/spl/start.S
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/cacheops.h>
+#include <asm/addrspace.h>
+#include <asm/mipsmtregs.h>
+#include <asm/cm.h>
+#include "../mt7621.h"
+#include "dram.h"
+
+#ifndef CONFIG_SYS_INIT_SP_ADDR
+#define CONFIG_SYS_INIT_SP_ADDR	(CONFIG_SYS_SDRAM_BASE + \
+				CONFIG_SYS_INIT_SP_OFFSET)
+#endif
+
+#define SP_ADDR_TEMP		0xbe10dff0
+
+	.macro init_wr sel
+	MTC0	zero, CP0_WATCHLO,\sel
+	mtc0	t1, CP0_WATCHHI,\sel
+	.endm
+
+	.macro setup_stack_gd
+	li	t0, -16
+	PTR_LI	t1, CONFIG_SYS_INIT_SP_ADDR
+	and	sp, t1, t0		# force 16 byte alignment
+	PTR_SUBU \
+		sp, sp, GD_SIZE		# reserve space for gd
+	and	sp, sp, t0		# force 16 byte alignment
+	move	k0, sp			# save gd pointer
+#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
+    !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+	li	t2, CONFIG_VAL(SYS_MALLOC_F_LEN)
+	PTR_SUBU \
+		sp, sp, t2		# reserve space for early malloc
+	and	sp, sp, t0		# force 16 byte alignment
+#endif
+	move	fp, sp
+
+	/* Clear gd */
+	move	t0, k0
+1:
+	PTR_S	zero, 0(t0)
+	PTR_ADDIU t0, PTRSIZE
+	blt	t0, t1, 1b
+	 nop
+
+#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
+    !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+	PTR_S	sp, GD_MALLOC_BASE(k0)	# gd->malloc_base offset
+#endif
+	.endm
+
+	.set	noreorder
+
+ENTRY(_start)
+	b	1f
+	 mtc0	zero, CP0_COUNT
+
+	/* Stage header required by BootROM */
+	.org	0x8
+	.word	0		# ep, filled by mkimage
+	.word	0		# stage_size, filled by mkimage
+	.word	0		# has_stage2
+	.word	0		# next_ep
+	.word	0		# next_size
+	.word	0		# next_offset
+
+1:
+	/* Init CP0 Status */
+	mfc0	t0, CP0_STATUS
+	and	t0, ST0_IMPL
+	or	t0, ST0_BEV | ST0_ERL
+	mtc0	t0, CP0_STATUS
+	ehb
+
+	/* Clear Watch Status bits and disable watch exceptions */
+	li	t1, 0x7		# Clear I, R and W conditions
+	init_wr	0
+	init_wr	1
+	init_wr	2
+	init_wr	3
+
+	/* Clear WP, IV and SW interrupts */
+	mtc0	zero, CP0_CAUSE
+
+	/* Clear timer interrupt (CP0_COUNT cleared on branch to 'reset') */
+	mtc0	zero, CP0_COMPARE
+
+	/* VPE1 goes to wait code directly */
+	mfc0	t0, CP0_TCBIND
+	andi	t0, TCBIND_CURVPE
+	bnez	t0, launch_vpe_entry
+	 nop
+
+	/* Core1 goes to specific launch entry */
+	PTR_LI	t0, KSEG1ADDR(CONFIG_MIPS_CM_BASE)
+	lw	t1, GCR_Cx_ID(t0)
+	bnez	t1, launch_core_entry
+	 nop
+
+	/* MT7530 reset */
+	li	t0, KSEG1ADDR(SYSCTL_BASE)
+	lw	t1, SYSCTL_RSTCTL_REG(t0)
+	ori	t1, MCM_RST
+	sw	t1, SYSCTL_RSTCTL_REG(t0)
+
+	/* Disable DMA route for PSE SRAM set by BootROM */
+	PTR_LI	t0, KSEG1ADDR(DMA_CFG_ARB_BASE)
+	sw	zero, DMA_ROUTE_REG(t0)
+
+	/* Set CPU clock to 500MHz (Required if boot from NAND) */
+	li	t0, KSEG1ADDR(SYSCTL_BASE)
+	lw	t1, SYSCTL_CLKCFG0_REG(t0)
+	ins	t1, zero, 30, 2		# CPU_CLK_SEL
+	sw	t1, SYSCTL_CLKCFG0_REG(t0)
+
+	/* Set CPU clock divider to 1/1 */
+	li	t0, KSEG1ADDR(RBUS_BASE)
+	li	t1, 0x101
+	sw	t1, RBUS_DYN_CFG0_REG(t0)
+
+	/* (Re-)initialize the SRAM */
+	bal	mips_sram_init
+	 nop
+
+	/* Set up temporary stack */
+	li	sp, SP_ADDR_TEMP
+
+	/* Setup full CPS */
+	bal	mips_cm_map
+	 nop
+
+	bal	mt7621_cps_init
+	 nop
+
+	/* Prepare for CPU/DDR initialization binary blob */
+	bal	prepare_stage_bin
+	 nop
+
+	/* Call CPU/DDR initialization binary blob */
+	li	t9, STAGE_LOAD_ADDR
+	jalr	t9
+	 nop
+
+	/* Switch CPU PLL source */
+	li	t0, KSEG1ADDR(SYSCTL_BASE)
+	lw	t1, SYSCTL_CLKCFG0_REG(t0)
+	li	t2, 1
+	ins	t1, t2, CPU_CLK_SEL_S, 2
+	sw	t1, SYSCTL_CLKCFG0_REG(t0)
+
+	/*
+	 * Currently SPL is running on locked L2 cache (on KSEG0).
+	 * To reset the entire cache, we have to writeback SPL to DRAM first.
+	 * Cache flush won't work here. Use memcpy instead.
+	 */
+
+	la	a0, __text_start
+	move	a1, a0
+	la	a2, __image_copy_end
+	sub	a2, a2, a1
+	li	a3, 5
+	ins	a0, a3, 29, 3	# convert to KSEG1
+
+	bal	memcpy
+	 nop
+
+	/* Disable caches */
+	bal	mips_cache_disable
+	 nop
+
+	/* Reset caches */
+	bal	mips_cache_reset
+	 nop
+
+	/* Disable SRAM */
+	li	t0, KSEG1ADDR(FE_BASE)
+	li	t1, FE_PSE_RESET
+	sw	t1, FE_RST_GLO_REG(t0)
+
+	/* Clear the .bss section */
+	la	a0, __bss_start
+	la	a1, __bss_end
+1:	sw	zero, 0(a0)
+	addiu	a0, 4
+	ble	a0, a1, 1b
+	 nop
+
+	/* Set up initial stack and global data */
+	setup_stack_gd
+
+#if CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+	/* Set malloc base */
+	li	t0, (CONFIG_SYS_INIT_SP_ADDR + 15) & (~15)
+	PTR_S	t0, GD_MALLOC_BASE(k0)	# gd->malloc_base offset
+#endif
+
+#if defined(CONFIG_DEBUG_UART) && defined(CONFIG_SPL_SERIAL)
+	/* Earliest point to set up debug uart */
+	bal	debug_uart_init
+	 nop
+#endif
+
+	/* Setup timer */
+	bal	set_timer_freq_simple
+	 nop
+
+	/* Bootup secondary CPUs */
+	bal	secondary_cpu_init
+	 nop
+
+	move	a0, zero		# a0 <-- boot_flags = 0
+	bal board_init_f
+	 move	ra, zero
+
+	END(_start)