mips: mtmips: rewrite lowlevel codes of mt7628

This patch rewrites the mtmips architecture with the following changes:

1. Move MT7628 soc parts into a subfolder.
2. Lock parts of D-Cache as temporary stack.
3. Reimplement DDR initialization in C language.
4. Reimplement DDR calibration in a clear logic.
5. Add full support for auto size detection for DDR1 and DDR2.
6. Use accurate CPU clock depending on the input xtal frequency for timer
   and delay functions.

Note:

print_cpuinfo() has incompatible parts with MT7620 so it's moved into
mt7628 subfolder.

Reviewed-by: Stefan Roese <sr@denx.de>
Reviewed-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/mach-mtmips/Kconfig b/arch/mips/mach-mtmips/Kconfig
index 8cb76c4..3f25de8 100644
--- a/arch/mips/mach-mtmips/Kconfig
+++ b/arch/mips/mach-mtmips/Kconfig
@@ -19,12 +19,18 @@
 config SYS_ICACHE_LINE_SIZE
 	default 32
 
+config SYS_TEXT_BASE
+	default 0x9c000000
+
 choice
 	prompt "MediaTek MIPS SoC select"
 
 config SOC_MT7628
 	bool "MT7628"
 	select MIPS_L1_CACHE_SHIFT_5
+	select MIPS_INIT_STACK_IN_SRAM
+	select MIPS_SRAM_INIT
+	select SYS_MIPS_CACHE_INIT_RAM_LOAD
 	select PINCTRL_MT7628
 	select MTK_SERIAL
 	select SYSRESET_RESETCTL
@@ -79,69 +85,6 @@
 
 endchoice
 
-choice
-	prompt "DDR2 size"
-
-config ONBOARD_DDR2_SIZE_256MBIT
-	bool "256MBit (32MByte) total size"
-	depends on BOOT_ROM
-	help
-	  Use 256MBit (32MByte) of DDR total size
-
-config ONBOARD_DDR2_SIZE_512MBIT
-	bool "512MBit (64MByte) total size"
-	depends on BOOT_ROM
-	help
-	  Use 512MBit (64MByte) of DDR total size
-
-config ONBOARD_DDR2_SIZE_1024MBIT
-	bool "1024MBit (128MByte) total size"
-	depends on BOOT_ROM
-	help
-	  Use 1024MBit (128MByte) of DDR total size
-
-config ONBOARD_DDR2_SIZE_2048MBIT
-	bool "2048MBit (256MByte) total size"
-	depends on BOOT_ROM
-	help
-	  Use 2048MBit (256MByte) of DDR total size
-
-endchoice
-
-choice
-	prompt "DDR2 chip width"
-
-config ONBOARD_DDR2_CHIP_WIDTH_8BIT
-	bool "8bit DDR chip width"
-	depends on BOOT_ROM
-	help
-	  Use DDR chips with 8bit width
-
-config ONBOARD_DDR2_CHIP_WIDTH_16BIT
-	bool "16bit DDR chip width"
-	depends on BOOT_ROM
-	help
-	  Use DDR chips with 16bit width
-
-endchoice
-
-choice
-	prompt "DDR2 bus width"
-
-config ONBOARD_DDR2_BUS_WIDTH_16BIT
-	bool "16bit DDR bus width"
-	depends on BOOT_ROM
-	help
-	  Use 16bit DDR bus width
-
-config ONBOARD_DDR2_BUS_WIDTH_32BIT
-	bool "32bit DDR bus width"
-	depends on BOOT_ROM
-	help
-	  Use 32bit DDR bus width
-
-endchoice
-
 config SUPPORTS_BOOT_RAM
 	bool
 
diff --git a/arch/mips/mach-mtmips/Makefile b/arch/mips/mach-mtmips/Makefile
index 1f3e65e..72f0369 100644
--- a/arch/mips/mach-mtmips/Makefile
+++ b/arch/mips/mach-mtmips/Makefile
@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0+
 
 obj-y += cpu.o
+obj-y += ddr_init.o
+obj-y += ddr_cal.o
 
-ifndef CONFIG_SKIP_LOWLEVEL_INIT
-obj-y += ddr_calibrate.o
-obj-y += lowlevel_init.o
-endif
+obj-$(CONFIG_SOC_MT7628) += mt7628/
diff --git a/arch/mips/mach-mtmips/cpu.c b/arch/mips/mach-mtmips/cpu.c
index 8976ef5..459a967 100644
--- a/arch/mips/mach-mtmips/cpu.c
+++ b/arch/mips/mach-mtmips/cpu.c
@@ -4,69 +4,17 @@
  */
 
 #include <common.h>
-#include <dm.h>
-#include <init.h>
 #include <malloc.h>
-#include <ram.h>
-#include <wdt.h>
-#include <asm/io.h>
 #include <linux/io.h>
 #include <linux/sizes.h>
-#include "mt76xx.h"
 
-#define STR_LEN			6
-
-#ifdef CONFIG_BOOT_ROM
-int mach_cpu_init(void)
-{
-	ddr_calibrate();
-
-	return 0;
-}
-#endif
+DECLARE_GLOBAL_DATA_PTR;
 
 int dram_init(void)
 {
+#ifdef CONFIG_SKIP_LOWLEVEL_INIT
 	gd->ram_size = get_ram_size((void *)CONFIG_SYS_SDRAM_BASE, SZ_256M);
-
-	return 0;
-}
-
-int print_cpuinfo(void)
-{
-	static const char * const boot_str[] = { "PLL (3-Byte SPI Addr)",
-						 "PLL (4-Byte SPI Addr)",
-						 "XTAL (3-Byte SPI Addr)",
-						 "XTAL (4-Byte SPI Addr)" };
-	const void *blob = gd->fdt_blob;
-	void __iomem *sysc_base;
-	char buf[STR_LEN + 1];
-	fdt_addr_t base;
-	fdt_size_t size;
-	char *str;
-	int node;
-	u32 val;
-
-	/* Get system controller base address */
-	node = fdt_node_offset_by_compatible(blob, -1, "ralink,mt7620a-sysc");
-	if (node < 0)
-		return -FDT_ERR_NOTFOUND;
-
-	base = fdtdec_get_addr_size_auto_noparent(blob, node, "reg",
-						  0, &size, true);
-	if (base == FDT_ADDR_T_NONE)
-		return -EINVAL;
-
-	sysc_base = ioremap_nocache(base, size);
-
-	str = (char *)sysc_base + MT76XX_CHIPID_OFFS;
-	snprintf(buf, STR_LEN + 1, "%s", str);
-	val = readl(sysc_base + MT76XX_CHIP_REV_ID_OFFS);
-	printf("CPU:   %-*s Rev %ld.%ld - ", STR_LEN, buf,
-	       (val & GENMASK(11, 8)) >> 8, val & GENMASK(3, 0));
-
-	val = (readl(sysc_base + MT76XX_SYSCFG0_OFFS) & GENMASK(3, 1)) >> 1;
-	printf("Boot from %s\n", boot_str[val]);
+#endif
 
 	return 0;
 }
diff --git a/arch/mips/mach-mtmips/ddr_cal.c b/arch/mips/mach-mtmips/ddr_cal.c
new file mode 100644
index 0000000..0ea7c7d
--- /dev/null
+++ b/arch/mips/mach-mtmips/ddr_cal.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <common.h>
+#include <asm/addrspace.h>
+#include <asm/cacheops.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <mach/mc.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define COARSE_MIN_START	6
+#define FINE_MIN_START		15
+#define COARSE_MAX_START	7
+#define FINE_MAX_START		0
+
+#define NUM_OF_CACHELINE	128
+#define TEST_PAT_SIZE		(NUM_OF_CACHELINE * CONFIG_SYS_CACHELINE_SIZE)
+
+#define INIT_DQS_VAL		((7 << DQS1_DELAY_COARSE_TUNING_S) | \
+				(4 << DQS1_DELAY_FINE_TUNING_S) | \
+				(7 << DQS0_DELAY_COARSE_TUNING_S) | \
+				(4 << DQS0_DELAY_FINE_TUNING_S))
+
+static inline void pref_op(int op, const volatile void *addr)
+{
+	__asm__ __volatile__("pref %0, 0(%1)" : : "i" (op), "r" (addr));
+}
+
+static inline int dqs_test_valid(void __iomem *memc, u32 memsize, u32 dqsval,
+				 u32 bias)
+{
+	u32 *nca, *ca;
+	u32 off;
+	int i;
+
+	for (off = 0; off < memsize - TEST_PAT_SIZE; off += (memsize >> 6)) {
+		nca = (u32 *)KSEG1ADDR(off);
+		ca = (u32 *)KSEG0ADDR(off);
+
+		writel(INIT_DQS_VAL, memc + MEMCTL_DDR_DQS_DLY_REG);
+		wmb();
+
+		for (i = 0; i < TEST_PAT_SIZE / sizeof(u32); i++)
+			ca[i] = 0x1f1f1f1f;
+
+		for (i = 0; i < TEST_PAT_SIZE / sizeof(u32); i++)
+			nca[i] = (u32)nca + i + bias;
+
+		writel(dqsval, memc + MEMCTL_DDR_DQS_DLY_REG);
+		wmb();
+
+		for (i = 0; i < TEST_PAT_SIZE; i += CONFIG_SYS_CACHELINE_SIZE)
+			mips_cache(HIT_INVALIDATE_D, (u8 *)ca + i);
+		wmb();
+
+		for (i = 0; i < TEST_PAT_SIZE; i += CONFIG_SYS_CACHELINE_SIZE)
+			pref_op(0, (u8 *)ca + i);
+
+		for (i = 0; i < TEST_PAT_SIZE / sizeof(u32); i++) {
+			if (ca[i] != (u32)nca + i + bias)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static inline u32 dqs_find_max(void __iomem *memc, u32 memsize, u32 initval,
+			       u32 maxval, u32 shift, u32 regval)
+{
+	u32 fieldval = initval, dqsval;
+
+	do {
+		dqsval = regval | (fieldval << shift);
+
+		if (dqs_test_valid(memc, memsize, dqsval, 3))
+			break;
+
+		fieldval++;
+	} while (fieldval <= maxval);
+
+	return fieldval;
+}
+
+static inline u32 dqs_find_min(void __iomem *memc, u32 memsize, u32 initval,
+			       u32 minval, u32 shift, u32 regval)
+{
+	u32 fieldval = initval, dqsval;
+
+	while (fieldval > minval) {
+		dqsval = regval | (fieldval << shift);
+
+		if (dqs_test_valid(memc, memsize, dqsval, 1)) {
+			fieldval++;
+			break;
+		}
+
+		fieldval--;
+	}
+
+	return fieldval;
+}
+
+void ddr_calibrate(void __iomem *memc, u32 memsize, u32 bw)
+{
+	u32 dqs_coarse_min, dqs_coarse_max, dqs_coarse_val;
+	u32 dqs_fine_min, dqs_fine_max, dqs_fine_val;
+	u32 dqs_coarse_min_limit, dqs_fine_min_limit;
+	u32 dlls, dqs_dll, ddr_cfg2_reg;
+	u32 dqs_dly_tmp, dqs_dly, test_dqs, shift;
+	u32 rem, mask;
+	int i;
+
+	/* Disable Self-refresh */
+	clrbits_32(memc + MEMCTL_DDR_SELF_REFRESH_REG, SR_AUTO_EN);
+
+	/* Save DDR_CFG2 and modify its DQS gating window */
+	ddr_cfg2_reg = readl(memc + MEMCTL_DDR_CFG2_REG);
+	mask = DQS0_GATING_WINDOW_M;
+	if (bw == IND_SDRAM_WIDTH_16BIT)
+		mask |= DQS1_GATING_WINDOW_M;
+	clrbits_32(memc + MEMCTL_DDR_CFG2_REG, mask);
+
+	/* Get minimum available DQS value */
+	dlls = readl(memc + MEMCTL_DLL_DBG_REG);
+	dlls = (dlls & MST_DLY_SEL_M) >> MST_DLY_SEL_S;
+
+	dqs_dll = dlls >> 4;
+	if (dqs_dll <= 8)
+		dqs_coarse_min_limit = 8 - dqs_dll;
+	else
+		dqs_coarse_min_limit = 0;
+
+	dqs_dll = dlls & 0xf;
+	if (dqs_dll <= 8)
+		dqs_fine_min_limit = 8 - dqs_dll;
+	else
+		dqs_fine_min_limit = 0;
+
+	/* Initial DQS register value */
+	dqs_dly = INIT_DQS_VAL;
+
+	/* Calibrate DQS0 and/or DQS1 */
+	for (i = 0; i < bw; i++) {
+		shift = i * 8;
+		dqs_dly &= ~(0xff << shift);
+
+		/* Find maximum DQS coarse-grain */
+		dqs_dly_tmp = dqs_dly | (0xf << shift);
+		dqs_coarse_max = dqs_find_max(memc, memsize, COARSE_MAX_START,
+					      0xf, 4 + shift, dqs_dly_tmp);
+
+		/* Find maximum DQS fine-grain */
+		dqs_dly_tmp = dqs_dly | (dqs_coarse_max << (4 + shift));
+		test_dqs = dqs_find_max(memc, memsize, FINE_MAX_START, 0xf,
+					shift, dqs_dly_tmp);
+
+		if (test_dqs == FINE_MAX_START) {
+			dqs_coarse_max--;
+			dqs_fine_max = 0xf;
+		} else {
+			dqs_fine_max = test_dqs - 1;
+		}
+
+		/* Find minimum DQS coarse-grain */
+		dqs_dly_tmp = dqs_dly;
+		dqs_coarse_min = dqs_find_min(memc, memsize, COARSE_MIN_START,
+					      dqs_coarse_min_limit, 4 + shift,
+					      dqs_dly_tmp);
+
+		/* Find minimum DQS fine-grain */
+		dqs_dly_tmp = dqs_dly | (dqs_coarse_min << (4 + shift));
+		test_dqs = dqs_find_min(memc, memsize, FINE_MIN_START,
+					dqs_fine_min_limit, shift, dqs_dly_tmp);
+
+		if (test_dqs == FINE_MIN_START + 1) {
+			dqs_coarse_min++;
+			dqs_fine_min = 0;
+		} else {
+			dqs_fine_min = test_dqs;
+		}
+
+		/* Calculate central DQS coarse/fine value */
+		dqs_coarse_val = (dqs_coarse_max + dqs_coarse_min) >> 1;
+		rem = (dqs_coarse_max + dqs_coarse_min) % 2;
+
+		dqs_fine_val = (rem * 4) + ((dqs_fine_max + dqs_fine_min) >> 1);
+		if (dqs_fine_val >= 0x10) {
+			dqs_coarse_val++;
+			dqs_fine_val -= 8;
+		}
+
+		/* Save current DQS value */
+		dqs_dly |= ((dqs_coarse_val << 4) | dqs_fine_val) << shift;
+	}
+
+	/* Set final DQS value */
+	writel(dqs_dly, memc + MEMCTL_DDR_DQS_DLY_REG);
+
+	/* Restore DDR_CFG2 */
+	writel(ddr_cfg2_reg, memc + MEMCTL_DDR_CFG2_REG);
+
+	/* Enable Self-refresh */
+	setbits_32(memc + MEMCTL_DDR_SELF_REFRESH_REG, SR_AUTO_EN);
+}
diff --git a/arch/mips/mach-mtmips/ddr_calibrate.c b/arch/mips/mach-mtmips/ddr_calibrate.c
deleted file mode 100644
index 3cd4408..0000000
--- a/arch/mips/mach-mtmips/ddr_calibrate.c
+++ /dev/null
@@ -1,309 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2018 Stefan Roese <sr@denx.de>
- *
- * This code is mostly based on the code extracted from this MediaTek
- * github repository:
- *
- * https://github.com/MediaTek-Labs/linkit-smart-uboot.git
- *
- * I was not able to find a specific license or other developers
- * copyrights here, so I can't add them here.
- *
- * Most functions in this file are copied from the MediaTek U-Boot
- * repository. Without any documentation, it was impossible to really
- * implement this differently. So its mostly a cleaned-up version of
- * the original code, with only support for the MT7628 / MT7688 SoC.
- */
-
-#include <common.h>
-#include <cpu_func.h>
-#include <linux/io.h>
-#include <asm/cacheops.h>
-#include <asm/io.h>
-#include "mt76xx.h"
-
-#define NUM_OF_CACHELINE	128
-#define MIN_START		6
-#define MIN_FINE_START		0xf
-#define MAX_START		7
-#define MAX_FINE_START		0x0
-
-#define CPU_FRAC_DIV		1
-
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_256MBIT)
-#define DRAM_BUTTOM 0x02000000
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_512MBIT)
-#define DRAM_BUTTOM 0x04000000
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_1024MBIT)
-#define DRAM_BUTTOM 0x08000000
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_2048MBIT)
-#define DRAM_BUTTOM 0x10000000
-#endif
-
-static inline void cal_memcpy(void *src, void *dst, u32 size)
-{
-	u8 *psrc = (u8 *)src;
-	u8 *pdst = (u8 *)dst;
-	int i;
-
-	for (i = 0; i < size; i++, psrc++, pdst++)
-		*pdst = *psrc;
-}
-
-static inline void cal_memset(void *src, u8 pat, u32 size)
-{
-	u8 *psrc = (u8 *)src;
-	int i;
-
-	for (i = 0; i < size; i++, psrc++)
-		*psrc = pat;
-}
-
-#define pref_op(hint, addr)						\
-	__asm__ __volatile__(						\
-		".set	push\n"						\
-		".set	noreorder\n"					\
-		"pref	%0, %1\n"					\
-		".set	pop\n"						\
-		:							\
-		: "i" (hint), "R" (*(u8 *)(addr)))
-
-static inline void cal_patgen(u32 start_addr, u32 size, u32 bias)
-{
-	u32 *addr = (u32 *)start_addr;
-	int i;
-
-	for (i = 0; i < size; i++)
-		addr[i] = start_addr + i + bias;
-}
-
-static inline int test_loop(int k, int dqs, u32 test_dqs, u32 *coarse_dqs,
-			    u32 offs, u32 pat, u32 val)
-{
-	u32 nc_addr;
-	u32 *c_addr;
-	int i;
-
-	for (nc_addr = 0xa0000000;
-	     nc_addr < (0xa0000000 + DRAM_BUTTOM - NUM_OF_CACHELINE * 32);
-	     nc_addr += (DRAM_BUTTOM >> 6) + offs) {
-		writel(0x00007474, (void *)MT76XX_MEMCTRL_BASE + 0x64);
-		wmb();		/* Make sure store if finished */
-
-		c_addr = (u32 *)(nc_addr & 0xdfffffff);
-		cal_memset(((u8 *)c_addr), 0x1F, NUM_OF_CACHELINE * 32);
-		cal_patgen(nc_addr, NUM_OF_CACHELINE * 8, pat);
-
-		if (dqs > 0)
-			writel(0x00000074 |
-			       (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 12) |
-			       (((k == 0) ? val : test_dqs) << 8),
-			       (void *)MT76XX_MEMCTRL_BASE + 0x64);
-		else
-			writel(0x00007400 |
-			       (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 4) |
-			       (((k == 0) ? val : test_dqs) << 0),
-			       (void *)MT76XX_MEMCTRL_BASE + 0x64);
-		wmb();		/* Make sure store if finished */
-
-		invalidate_dcache_range((u32)c_addr,
-					(u32)c_addr +
-					NUM_OF_CACHELINE * 32);
-		wmb();		/* Make sure store if finished */
-
-		for (i = 0; i < NUM_OF_CACHELINE * 8; i++) {
-			if (i % 8 == 0)
-				pref_op(0, &c_addr[i]);
-		}
-
-		for (i = 0; i < NUM_OF_CACHELINE * 8; i++) {
-			if (c_addr[i] != nc_addr + i + pat)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-void ddr_calibrate(void)
-{
-	u32 min_coarse_dqs[2];
-	u32 max_coarse_dqs[2];
-	u32 min_fine_dqs[2];
-	u32 max_fine_dqs[2];
-	u32 coarse_dqs[2];
-	u32 fine_dqs[2];
-	int reg = 0, ddr_cfg2_reg;
-	int flag;
-	int i, k;
-	int dqs = 0;
-	u32 min_coarse_dqs_bnd, min_fine_dqs_bnd, coarse_dqs_dll, fine_dqs_dll;
-	u32 val;
-	u32 fdiv = 0, frac = 0;
-
-	/* Setup clock to run at full speed */
-	val = readl((void *)MT76XX_DYN_CFG0_REG);
-	fdiv = (u32)((val >> 8) & 0x0F);
-	if (CPU_FRAC_DIV < 1 || CPU_FRAC_DIV > 10)
-		frac = val & 0x0f;
-	else
-		frac = CPU_FRAC_DIV;
-
-	while (frac < fdiv) {
-		val = readl((void *)MT76XX_DYN_CFG0_REG);
-		fdiv = (val >> 8) & 0x0f;
-		fdiv--;
-		val &= ~(0x0f << 8);
-		val |= (fdiv << 8);
-		writel(val, (void *)MT76XX_DYN_CFG0_REG);
-		udelay(500);
-		val = readl((void *)MT76XX_DYN_CFG0_REG);
-		fdiv = (val >> 8) & 0x0f;
-	}
-
-	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
-	ddr_cfg2_reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x48);
-	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x48,
-		     (0x3 << 28) | (0x3 << 26));
-
-	min_coarse_dqs[0] = MIN_START;
-	min_coarse_dqs[1] = MIN_START;
-	min_fine_dqs[0] = MIN_FINE_START;
-	min_fine_dqs[1] = MIN_FINE_START;
-	max_coarse_dqs[0] = MAX_START;
-	max_coarse_dqs[1] = MAX_START;
-	max_fine_dqs[0] = MAX_FINE_START;
-	max_fine_dqs[1] = MAX_FINE_START;
-	dqs = 0;
-
-	/* Add by KP, DQS MIN boundary */
-	reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x20);
-	coarse_dqs_dll = (reg & 0xf00) >> 8;
-	fine_dqs_dll = (reg & 0xf0) >> 4;
-	if (coarse_dqs_dll <= 8)
-		min_coarse_dqs_bnd = 8 - coarse_dqs_dll;
-	else
-		min_coarse_dqs_bnd = 0;
-
-	if (fine_dqs_dll <= 8)
-		min_fine_dqs_bnd = 8 - fine_dqs_dll;
-	else
-		min_fine_dqs_bnd = 0;
-	/* DQS MIN boundary */
-
-DQS_CAL:
-
-	for (k = 0; k < 2; k++) {
-		u32 test_dqs;
-
-		if (k == 0)
-			test_dqs = MAX_START;
-		else
-			test_dqs = MAX_FINE_START;
-
-		do {
-			flag = test_loop(k, dqs, test_dqs, max_coarse_dqs,
-					 0x400, 0x3, 0xf);
-			if (flag == -1)
-				break;
-
-			test_dqs++;
-		} while (test_dqs <= 0xf);
-
-		if (k == 0) {
-			max_coarse_dqs[dqs] = test_dqs;
-		} else {
-			test_dqs--;
-
-			if (test_dqs == MAX_FINE_START - 1) {
-				max_coarse_dqs[dqs]--;
-				max_fine_dqs[dqs] = 0xf;
-			} else {
-				max_fine_dqs[dqs] = test_dqs;
-			}
-		}
-	}
-
-	for (k = 0; k < 2; k++) {
-		u32 test_dqs;
-
-		if (k == 0)
-			test_dqs = MIN_START;
-		else
-			test_dqs = MIN_FINE_START;
-
-		do {
-			flag = test_loop(k, dqs, test_dqs, min_coarse_dqs,
-					 0x480, 0x1, 0x0);
-			if (k == 0) {
-				if (flag == -1 ||
-				    test_dqs == min_coarse_dqs_bnd)
-					break;
-
-				test_dqs--;
-
-				if (test_dqs < min_coarse_dqs_bnd)
-					break;
-			} else {
-				if (flag == -1) {
-					test_dqs++;
-					break;
-				} else if (test_dqs == min_fine_dqs_bnd) {
-					break;
-				}
-
-				test_dqs--;
-
-				if (test_dqs < min_fine_dqs_bnd)
-					break;
-			}
-		} while (test_dqs >= 0);
-
-		if (k == 0) {
-			min_coarse_dqs[dqs] = test_dqs;
-		} else {
-			if (test_dqs == MIN_FINE_START + 1) {
-				min_coarse_dqs[dqs]++;
-				min_fine_dqs[dqs] = 0x0;
-			} else {
-				min_fine_dqs[dqs] = test_dqs;
-			}
-		}
-	}
-
-	if (dqs == 0) {
-		dqs = 1;
-		goto DQS_CAL;
-	}
-
-	for (i = 0; i < 2; i++) {
-		u32 temp;
-
-		coarse_dqs[i] = (max_coarse_dqs[i] + min_coarse_dqs[i]) >> 1;
-		temp =
-		    (((max_coarse_dqs[i] + min_coarse_dqs[i]) % 2) * 4) +
-		    ((max_fine_dqs[i] + min_fine_dqs[i]) >> 1);
-		if (temp >= 0x10) {
-			coarse_dqs[i]++;
-			fine_dqs[i] = (temp - 0x10) + 0x8;
-		} else {
-			fine_dqs[i] = temp;
-		}
-	}
-	reg = (coarse_dqs[1] << 12) | (fine_dqs[1] << 8) |
-		(coarse_dqs[0] << 4) | fine_dqs[0];
-
-	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
-	writel(reg, (void *)MT76XX_MEMCTRL_BASE + 0x64);
-	writel(ddr_cfg2_reg, (void *)MT76XX_MEMCTRL_BASE + 0x48);
-	setbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
-
-	for (i = 0; i < 2; i++)
-		debug("[%02X%02X%02X%02X]", min_coarse_dqs[i],
-		      min_fine_dqs[i], max_coarse_dqs[i], max_fine_dqs[i]);
-	debug("\nDDR Calibration DQS reg = %08X\n", reg);
-}
diff --git a/arch/mips/mach-mtmips/ddr_init.c b/arch/mips/mach-mtmips/ddr_init.c
new file mode 100644
index 0000000..cd355cc
--- /dev/null
+++ b/arch/mips/mach-mtmips/ddr_init.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <common.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/sizes.h>
+#include <mach/ddr.h>
+#include <mach/mc.h>
+
+#define DDR_BW_TEST_PAT			0xaa5555aa
+
+static const u32 dram_size[] = {
+	[DRAM_8MB] = SZ_8M,
+	[DRAM_16MB] = SZ_16M,
+	[DRAM_32MB] = SZ_32M,
+	[DRAM_64MB] = SZ_64M,
+	[DRAM_128MB] = SZ_128M,
+	[DRAM_256MB] = SZ_256M,
+};
+
+static void dram_test_write(u32 addr, u32 val)
+{
+	volatile ulong *target = (volatile ulong *)(KSEG1 + addr);
+
+	sync();
+	*target = val;
+	sync();
+}
+
+static u32 dram_test_read(u32 addr)
+{
+	volatile ulong *target = (volatile ulong *)(KSEG1 + addr);
+	u32 val;
+
+	sync();
+	val = *target;
+	sync();
+
+	return val;
+}
+
+static int dram_addr_test_bit(u32 bit)
+{
+	u32 val;
+
+	dram_test_write(0, 0);
+	dram_test_write(BIT(bit), DDR_BW_TEST_PAT);
+	val = dram_test_read(0);
+
+	if (val == DDR_BW_TEST_PAT)
+		return 1;
+
+	return 0;
+}
+
+static void mc_ddr_init(void __iomem *memc, const struct mc_ddr_cfg *cfg,
+			u32 dq_dly, u32 dqs_dly, mc_reset_t mc_reset, u32 bw)
+{
+	u32 val;
+
+	mc_reset(1);
+	__udelay(200);
+	mc_reset(0);
+
+	clrbits_32(memc + MEMCTL_SDRAM_CFG1_REG, RBC_MAPPING);
+
+	writel(cfg->cfg2, memc + MEMCTL_DDR_CFG2_REG);
+	writel(cfg->cfg3, memc + MEMCTL_DDR_CFG3_REG);
+	writel(cfg->cfg4, memc + MEMCTL_DDR_CFG4_REG);
+	writel(dq_dly, memc + MEMCTL_DDR_DQ_DLY_REG);
+	writel(dqs_dly, memc + MEMCTL_DDR_DQS_DLY_REG);
+
+	writel(cfg->cfg0, memc + MEMCTL_DDR_CFG0_REG);
+
+	val = cfg->cfg1;
+	if (bw) {
+		val &= ~IND_SDRAM_WIDTH_M;
+		val |= (bw << IND_SDRAM_WIDTH_S) & IND_SDRAM_WIDTH_M;
+	}
+
+	writel(val, memc + MEMCTL_DDR_CFG1_REG);
+
+	clrsetbits_32(memc + MEMCTL_PWR_SAVE_CNT_REG, SR_TAR_CNT_M,
+		      1 << SR_TAR_CNT_S);
+
+	setbits_32(memc + MEMCTL_DDR_SELF_REFRESH_REG, SR_AUTO_EN);
+}
+
+void ddr1_init(struct mc_ddr_init_param *param)
+{
+	enum mc_dram_size sz;
+	u32 bw = 0;
+
+	/* First initialization, determine bus width */
+	mc_ddr_init(param->memc, &param->cfgs[DRAM_8MB], param->dq_dly,
+		    param->dqs_dly, param->mc_reset, IND_SDRAM_WIDTH_16BIT);
+
+	/* Test bus width */
+	dram_test_write(0, DDR_BW_TEST_PAT);
+	if (dram_test_read(0) == DDR_BW_TEST_PAT)
+		bw = IND_SDRAM_WIDTH_16BIT;
+	else
+		bw = IND_SDRAM_WIDTH_8BIT;
+
+	/* Second initialization, determine DDR capacity */
+	mc_ddr_init(param->memc, &param->cfgs[DRAM_128MB], param->dq_dly,
+		    param->dqs_dly, param->mc_reset, bw);
+
+	if (dram_addr_test_bit(9)) {
+		sz = DRAM_8MB;
+	} else {
+		if (dram_addr_test_bit(10)) {
+			if (dram_addr_test_bit(23))
+				sz = DRAM_16MB;
+			else
+				sz = DRAM_32MB;
+		} else {
+			if (dram_addr_test_bit(24))
+				sz = DRAM_64MB;
+			else
+				sz = DRAM_128MB;
+		}
+	}
+
+	/* Final initialization, with DDR calibration */
+	mc_ddr_init(param->memc, &param->cfgs[sz], param->dq_dly,
+		    param->dqs_dly, param->mc_reset, bw);
+
+	/* Return actual DDR configuration */
+	param->memsize = dram_size[sz];
+	param->bus_width = bw;
+}
+
+void ddr2_init(struct mc_ddr_init_param *param)
+{
+	enum mc_dram_size sz;
+	u32 bw = 0;
+
+	/* First initialization, determine bus width */
+	mc_ddr_init(param->memc, &param->cfgs[DRAM_32MB], param->dq_dly,
+		    param->dqs_dly, param->mc_reset, IND_SDRAM_WIDTH_16BIT);
+
+	/* Test bus width */
+	dram_test_write(0, DDR_BW_TEST_PAT);
+	if (dram_test_read(0) == DDR_BW_TEST_PAT)
+		bw = IND_SDRAM_WIDTH_16BIT;
+	else
+		bw = IND_SDRAM_WIDTH_8BIT;
+
+	/* Second initialization, determine DDR capacity */
+	mc_ddr_init(param->memc, &param->cfgs[DRAM_256MB], param->dq_dly,
+		    param->dqs_dly, param->mc_reset, bw);
+
+	if (bw == IND_SDRAM_WIDTH_16BIT) {
+		if (dram_addr_test_bit(10)) {
+			sz = DRAM_32MB;
+		} else {
+			if (dram_addr_test_bit(24)) {
+				if (dram_addr_test_bit(27))
+					sz = DRAM_64MB;
+				else
+					sz = DRAM_128MB;
+			} else {
+				sz = DRAM_256MB;
+			}
+		}
+	} else {
+		if (dram_addr_test_bit(23)) {
+			sz = DRAM_32MB;
+		} else {
+			if (dram_addr_test_bit(24)) {
+				if (dram_addr_test_bit(27))
+					sz = DRAM_64MB;
+				else
+					sz = DRAM_128MB;
+			} else {
+				sz = DRAM_256MB;
+			}
+		}
+	}
+
+	/* Final initialization, with DDR calibration */
+	mc_ddr_init(param->memc, &param->cfgs[sz], param->dq_dly,
+		    param->dqs_dly, param->mc_reset, bw);
+
+	/* Return actual DDR configuration */
+	param->memsize = dram_size[sz];
+	param->bus_width = bw;
+}
diff --git a/arch/mips/mach-mtmips/include/mach/ddr.h b/arch/mips/mach-mtmips/include/mach/ddr.h
new file mode 100644
index 0000000..f921981
--- /dev/null
+++ b/arch/mips/mach-mtmips/include/mach/ddr.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#ifndef _MTMIPS_DDR_H_
+#define _MTMIPS_DDR_H_
+
+#include <linux/io.h>
+#include <linux/types.h>
+
+enum mc_dram_size {
+	DRAM_8MB,
+	DRAM_16MB,
+	DRAM_32MB,
+	DRAM_64MB,
+	DRAM_128MB,
+	DRAM_256MB,
+
+	__DRAM_SZ_MAX
+};
+
+struct mc_ddr_cfg {
+	u32 cfg0;
+	u32 cfg1;
+	u32 cfg2;
+	u32 cfg3;
+	u32 cfg4;
+};
+
+typedef void (*mc_reset_t)(int assert);
+
+struct mc_ddr_init_param {
+	void __iomem *memc;
+
+	u32 dq_dly;
+	u32 dqs_dly;
+
+	const struct mc_ddr_cfg *cfgs;
+	mc_reset_t mc_reset;
+
+	u32 memsize;
+	u32 bus_width;
+};
+
+void ddr1_init(struct mc_ddr_init_param *param);
+void ddr2_init(struct mc_ddr_init_param *param);
+void ddr_calibrate(void __iomem *memc, u32 memsize, u32 bw);
+
+#endif /* _MTMIPS_DDR_H_ */
diff --git a/arch/mips/mach-mtmips/include/mach/mc.h b/arch/mips/mach-mtmips/include/mach/mc.h
new file mode 100644
index 0000000..d7d623a
--- /dev/null
+++ b/arch/mips/mach-mtmips/include/mach/mc.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#ifndef _MTMIPS_MC_H_
+#define _MTMIPS_MC_H_
+
+#define MEMCTL_SDRAM_CFG0_REG		0x00
+#define DIS_CLK_GT			0x80000000
+#define CLK_SLEW_S			29
+#define CLK_SLEW_M			0x60000000
+#define TWR				0x10000000
+#define TMRD_S				24
+#define TMRD_M				0xf000000
+#define TRFC_S				20
+#define TRFC_M				0xf00000
+#define TCAS_S				16
+#define TCAS_M				0x30000
+#define TRAS_S				12
+#define TRAS_M				0xf000
+#define TRCD_S				8
+#define TRCD_M				0x300
+#define TRC_S				4
+#define TRC_M				0xf0
+#define TRP_S				0
+#define TRP_M				0x03
+
+#define MEMCTL_SDRAM_CFG1_REG		0x04
+#define SDRAM_INIT_START		0x80000000
+#define SDRAM_INIT_DONE			0x40000000
+#define RBC_MAPPING			0x20000000
+#define PWR_DOWN_EN			0x10000000
+#define PWR_DOWN_MODE			0x8000000
+#define SDRAM_WIDTH			0x1000000
+#define NUMCOLS_S			20
+#define NUMCOLS_M			0x300000
+#define NUMROWS_S			16
+#define NUMROWS_M			0x30000
+#define TREFR_S				0
+#define TREFR_M				0xffff
+
+#define MEMCTL_DDR_SELF_REFRESH_REG	0x10
+#define ODT_SRC_SEL_S			24
+#define ODT_SRC_SEL_M			0xf000000
+#define ODT_OFF_DLY_S			20
+#define ODT_OFF_DLY_M			0xf00000
+#define ODT_ON_DLY_S			16
+#define ODT_ON_DLY_M			0xf0000
+#define SR_AUTO_EN			0x10
+#define SRACK_B				0x02
+#define SRREQ_B				0x01
+
+#define MEMCTL_PWR_SAVE_CNT_REG		0x14
+#define PD_CNT_S			24
+#define PD_CNT_M			0xff000000
+#define SR_TAR_CNT_S			0
+#define SR_TAR_CNT_M			0xffffff
+
+#define MEMCTL_DLL_DBG_REG		0x20
+#define TDC_STABLE_S			12
+#define TDC_STABLE_M			0x3f000
+#define MST_DLY_SEL_S			4
+#define MST_DLY_SEL_M			0xff0
+#define CURR_STATE_S			1
+#define CURR_STATE_M			0x06
+#define ADLL_LOCK_DONE			0x01
+
+#define MEMCTL_DDR_CFG0_REG		0x40
+#define T_RRD_S				28
+#define T_RRD_M				0xf0000000
+#define T_RAS_S				23
+#define T_RAS_M				0xf800000
+#define T_RP_S				19
+#define T_RP_M				0x780000
+#define T_RFC_S				13
+#define T_RFC_M				0x7e000
+#define T_REFI_S			0
+#define T_REFI_M			0x1fff
+
+#define MEMCTL_DDR_CFG1_REG		0x44
+#define T_WTR_S				28
+#define T_WTR_M				0xf0000000
+#define T_RTP_S				24
+#define T_RTP_M				0xf000000
+#define USER_DATA_WIDTH			0x200000
+#define IND_SDRAM_SIZE_S		18
+#define IND_SDRAM_SIZE_M		0x1c0000
+#define IND_SDRAM_SIZE_8MB		1
+#define IND_SDRAM_SIZE_16MB		2
+#define IND_SDRAM_SIZE_32MB		3
+#define IND_SDRAM_SIZE_64MB		4
+#define IND_SDRAM_SIZE_128MB		5
+#define IND_SDRAM_SIZE_256MB		6
+#define IND_SDRAM_WIDTH_S		16
+#define IND_SDRAM_WIDTH_M		0x30000
+#define IND_SDRAM_WIDTH_8BIT		1
+#define IND_SDRAM_WIDTH_16BIT		2
+#define EXT_BANK_S			14
+#define EXT_BANK_M			0xc000
+#define TOTAL_SDRAM_WIDTH_S		12
+#define TOTAL_SDRAM_WIDTH_M		0x3000
+#define T_WR_S				8
+#define T_WR_M				0xf00
+#define T_MRD_S				4
+#define T_MRD_M				0xf0
+#define T_RCD_S				0
+#define T_RCD_M				0x0f
+
+#define MEMCTL_DDR_CFG2_REG		0x48
+#define REGE				0x80000000
+#define DDR2_MODE			0x40000000
+#define DQS0_GATING_WINDOW_S		28
+#define DQS0_GATING_WINDOW_M		0x30000000
+#define DQS1_GATING_WINDOW_S		26
+#define DQS1_GATING_WINDOW_M		0xc000000
+#define PD				0x1000
+#define WR_S				9
+#define WR_M				0xe00
+#define DLLRESET			0x100
+#define TESTMODE			0x80
+#define CAS_LATENCY_S			4
+#define CAS_LATENCY_M			0x70
+#define BURST_TYPE			0x08
+#define BURST_LENGTH_S			0
+#define BURST_LENGTH_M			0x07
+
+#define MEMCTL_DDR_CFG3_REG		0x4c
+#define Q_OFF				0x1000
+#define RDOS				0x800
+#define DIS_DIFF_DQS			0x400
+#define OCD_S				7
+#define OCD_M				0x380
+#define RTT1				0x40
+#define ADDITIVE_LATENCY_S		3
+#define ADDITIVE_LATENCY_M		0x38
+#define RTT0				0x04
+#define DS				0x02
+#define DLL				0x01
+
+#define MEMCTL_DDR_CFG4_REG		0x50
+#define FAW_S				0
+#define FAW_M				0x0f
+
+#define MEMCTL_DDR_DQ_DLY_REG		0x60
+#define DQ1_DELAY_SEL_S			24
+#define DQ1_DELAY_SEL_M			0xff000000
+#define DQ0_DELAY_SEL_S			16
+#define DQ0_DELAY_SEL_M			0xff0000
+#define DQ1_DELAY_COARSE_TUNING_S	12
+#define DQ1_DELAY_COARSE_TUNING_M	0xf000
+#define DQ1_DELAY_FINE_TUNING_S		8
+#define DQ1_DELAY_FINE_TUNING_M		0xf00
+#define DQ0_DELAY_COARSE_TUNING_S	4
+#define DQ0_DELAY_COARSE_TUNING_M	0xf0
+#define DQ0_DELAY_FINE_TUNING_S		0
+#define DQ0_DELAY_FINE_TUNING_M		0x0f
+
+#define MEMCTL_DDR_DQS_DLY_REG		0x64
+#define DQS1_DELAY_SEL_S		24
+#define DQS1_DELAY_SEL_M		0xff000000
+#define DQS0_DELAY_SEL_S		16
+#define DQS0_DELAY_SEL_M		0xff0000
+#define DQS1_DELAY_COARSE_TUNING_S	12
+#define DQS1_DELAY_COARSE_TUNING_M	0xf000
+#define DQS1_DELAY_FINE_TUNING_S	8
+#define DQS1_DELAY_FINE_TUNING_M	0xf00
+#define DQS0_DELAY_COARSE_TUNING_S	4
+#define DQS0_DELAY_COARSE_TUNING_M	0xf0
+#define DQS0_DELAY_FINE_TUNING_S	0
+#define DQS0_DELAY_FINE_TUNING_M	0x0f
+
+#define MEMCTL_DDR_DLL_SLV_REG		0x68
+#define DLL_SLV_UPDATE_MODE		0x100
+#define DQS_DLY_SEL_EN			0x80
+#define DQ_DLY_SEL_EN			0x01
+
+#endif /* _MTMIPS_MC_H_ */
diff --git a/arch/mips/mach-mtmips/lowlevel_init.S b/arch/mips/mach-mtmips/lowlevel_init.S
deleted file mode 100644
index aa707e0..0000000
--- a/arch/mips/mach-mtmips/lowlevel_init.S
+++ /dev/null
@@ -1,328 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * (c) 2018 Stefan Roese <sr@denx.de>
- *
- * This code is mostly based on the code extracted from this MediaTek
- * github repository:
- *
- * https://github.com/MediaTek-Labs/linkit-smart-uboot.git
- *
- * I was not able to find a specific license or other developers
- * copyrights here, so I can't add them here.
- */
-
-#include <config.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/addrspace.h>
-#include <asm/asm.h>
-#include "mt76xx.h"
-
-#ifndef BIT
-#define BIT(nr)			(1 << (nr))
-#endif
-
-#define DELAY_USEC(us)		((us) / 100)
-
-#define DDR_CFG1_CHIP_WIDTH_MASK (0x3 << 16)
-#define DDR_CFG1_BUS_WIDTH_MASK	(0x3 << 12)
-
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_256MBIT)
-#define DDR_CFG1_SIZE_VAL	0x222e2323
-#define DDR_CFG4_SIZE_VAL	7
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_512MBIT)
-#define DDR_CFG1_SIZE_VAL	0x22322323
-#define DDR_CFG4_SIZE_VAL	9
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_1024MBIT)
-#define DDR_CFG1_SIZE_VAL	0x22362323
-#define DDR_CFG4_SIZE_VAL	9
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_SIZE_2048MBIT)
-#define DDR_CFG1_SIZE_VAL	0x223a2323
-#define DDR_CFG4_SIZE_VAL	9
-#endif
-
-#if defined(CONFIG_ONBOARD_DDR2_CHIP_WIDTH_8BIT)
-#define DDR_CFG1_CHIP_WIDTH_VAL	(0x1 << 16)
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_CHIP_WIDTH_16BIT)
-#define DDR_CFG1_CHIP_WIDTH_VAL	(0x2 << 16)
-#endif
-
-#if defined(CONFIG_ONBOARD_DDR2_BUS_WIDTH_16BIT)
-#define DDR_CFG1_BUS_WIDTH_VAL	(0x2 << 12)
-#endif
-#if defined(CONFIG_ONBOARD_DDR2_BUS_WIDTH_32BIT)
-#define DDR_CFG1_BUS_WIDTH_VAL	(0x3 << 12)
-#endif
-
-	.set noreorder
-
-LEAF(lowlevel_init)
-
-	/* Load base addresses as physical addresses for later usage */
-	li	s0, CKSEG1ADDR(MT76XX_SYSCTL_BASE)
-	li	s1, CKSEG1ADDR(MT76XX_MEMCTRL_BASE)
-	li	s2, CKSEG1ADDR(MT76XX_RGCTRL_BASE)
-
-	/* polling CPLL is ready */
-	li	t1, DELAY_USEC(1000000)
-	la	t5, MT76XX_ROM_STATUS_REG
-1:
-	lw	t2, 0(t5)
-	andi	t2, t2, 0x1
-	bnez	t2, CPLL_READY
-	subu	t1, t1, 1
-	bgtz	t1, 1b
-	nop
-	la      t0, MT76XX_CLKCFG0_REG
-	lw      t3, 0(t0)
-	ori	t3, t3, 0x1
-	sw	t3, 0(t0)
-	b	CPLL_DONE
-	nop
-CPLL_READY:
-	la	t0, MT76XX_CLKCFG0_REG
-	lw	t1, 0(t0)
-	li	t2, ~0x0c
-	and	t1, t1, t2
-	ori	t1, t1, 0xc
-	sw	t1, 0(t0)
-	la	t0, MT76XX_DYN_CFG0_REG
-	lw	t3, 0(t0)
-	li	t5, ~((0x0f << 8) | (0x0f << 0))
-	and	t3, t3, t5
-	li	t5, (10 << 8) | (1 << 0)
-	or	t3, t3, t5
-	sw	t3, 0(t0)
-	la	t0, MT76XX_CLKCFG0_REG
-	lw	t3, 0(t0)
-	li	t4, ~0x0F
-	and     t3, t3, t4
-	ori	t3, t3, 0xc
-	sw	t3, 0(t0)
-	lw	t3, 0(t0)
-	ori	t3, t3, 0x08
-	sw	t3, 0(t0)
-
-CPLL_DONE:
-	/* Reset MC */
-	lw	t2, 0x34(s0)
-	ori	t2, BIT(10)
-	sw	t2, 0x34(s0)
-	nop
-
-	/*
-	 * SDR and DDR initialization: delay 200us
-	 */
-	li	t0, DELAY_USEC(200 + 40)
-	li	t1, 0x1
-1:
-	sub	t0, t0, t1
-	bnez	t0, 1b
-	nop
-
-	/* set DRAM IO PAD for MT7628IC */
-	/* DDR LDO Enable  */
-	lw	t4, 0x100(s2)
-	li	t2, BIT(31)
-	or	t4, t4, t2
-	sw	t4, 0x100(s2)
-	lw	t4, 0x10c(s2)
-	j	LDO_1P8V
-	nop
-LDO_1P8V:
-	li	t2, ~BIT(6)
-	and	t4, t4, t2
-	sw	t4, 0x10c(s2)
-	j	DDRLDO_SOFT_START
-LDO_2P5V:
-	/* suppose external DDR1 LDO 2.5V */
-	li	t2, BIT(6)
-	or	t4, t4, t2
-	sw	t4, 0x10c(s2)
-
-DDRLDO_SOFT_START:
-	lw	t2, 0x10c(s2)
-	li	t3, BIT(16)
-	or	t2, t2, t3
-	sw	t2, 0x10c(s2)
-	li	t3, DELAY_USEC(250*50)
-LDO_DELAY:
-	subu	t3, t3, 1
-	bnez	t3, LDO_DELAY
-	nop
-
-	lw	t2, 0x10c(s2)
-	li	t3, BIT(18)
-	or	t2, t2, t3
-	sw	t2, 0x10c(s2)
-
-SET_RG_BUCK_FPWM:
-	lw	t2, 0x104(s2)
-	ori	t2, t2, BIT(10)
-	sw	t2, 0x104(s2)
-
-DDR_PAD_CFG:
-	/* clean CLK PAD */
-	lw	t2, 0x704(s2)
-	li	t8, 0xfffff0f0
-	and	t2, t2, t8
-	/* clean CMD PAD */
-	lw	t3, 0x70c(s2)
-	li	t8, 0xfffff0f0
-	and	t3, t3, t8
-	/* clean DQ IPAD */
-	lw	t4, 0x710(s2)
-	li	t8, 0xfffff8ff
-	and	t4, t4, t8
-	/* clean DQ OPAD */
-	lw	t5, 0x714(s2)
-	li	t8, 0xfffff0f0
-	and	t5, t5, t8
-	/* clean DQS IPAD */
-	lw	t6, 0x718(s2)
-	li	t8, 0xfffff8ff
-	and	t6, t6, t8
-	/* clean DQS OPAD */
-	lw	t7, 0x71c(s2)
-	li	t8, 0xfffff0f0
-	and	t7, t7, t8
-
-	lw	t9, 0xc(s0)
-	srl	t9, t9, 16
-	andi	t9, t9, 0x1
-	bnez	t9, MT7628_AN_DDR1_PAD
-MT7628_KN_PAD:
-	li	t8, 0x00000303
-	or	t2, t2, t8
-	or	t3, t3, t8
-	or	t5, t5, t8
-	or	t7, t7, t8
-	li	t8, 0x00000000
-	or	t4, t4, t8
-	or	t6, t6, t8
-	j	SET_PAD_CFG
-MT7628_AN_DDR1_PAD:
-	lw	t1, 0x10(s0)
-	andi	t1, t1, 0x1
-	beqz	t1, MT7628_AN_DDR2_PAD
-	li	t8, 0x00000c0c
-	or	t2, t2, t8
-	li	t8, 0x00000202
-	or	t3, t3, t8
-	li	t8, 0x00000707
-	or	t5, t5, t8
-	li	t8, 0x00000c0c
-	or	t7, t7, t8
-	li	t8, 0x00000000
-	or	t4, t4, t8
-	or	t6, t6, t8
-	j	SET_PAD_CFG
-MT7628_AN_DDR2_PAD:
-	li	t8, 0x00000c0c
-	or	t2, t2, t8
-	li	t8, 0x00000202
-	or	t3, t3, t8
-	li	t8, 0x00000404
-	or	t5, t5, t8
-	li	t8, 0x00000c0c
-	or	t7, t7, t8
-	li	t8, 0x00000000		/* ODT off */
-	or	t4, t4, t8
-	or	t6, t6, t8
-
-SET_PAD_CFG:
-	sw	t2, 0x704(s2)
-	sw	t3, 0x70c(s2)
-	sw	t4, 0x710(s2)
-	sw	t5, 0x714(s2)
-	sw	t6, 0x718(s2)
-	sw	t7, 0x71c(s2)
-
-	/*
-	 * DDR initialization: reset pin to 0
-	 */
-	lw	t2, 0x34(s0)
-	and	t2, ~BIT(10)
-	sw	t2, 0x34(s0)
-	nop
-
-	/*
-	 * DDR initialization: wait til reg DDR_CFG1 bit 21 equal to 1 (ready)
-	 */
-DDR_READY:
-	li	t1, DDR_CFG1_REG
-	lw	t0, 0(t1)
-	nop
-	and	t2, t0, BIT(21)
-	beqz	t2, DDR_READY
-	nop
-
-	/*
-	 * DDR initialization
-	 *
-	 * Only DDR2 supported right now. DDR2 support can be added, once
-	 * boards using it will get added to mainline U-Boot.
-	 */
-	li	t1, DDR_CFG2_REG
-	lw	t0, 0(t1)
-	nop
-	and	t0, ~BIT(30)
-	and	t0, ~(7 << 4)
-	or	t0, (4 << 4)
-	or	t0, BIT(30)
-	or	t0, BIT(11)
-	sw	t0, 0(t1)
-	nop
-
-	li	t1, DDR_CFG3_REG
-	lw	t2, 0(t1)
-	/* Disable ODT; reference board ok, ev board fail */
-	and	t2, ~BIT(6)
-	or	t2, BIT(2)
-	li	t0, DDR_CFG4_REG
-	lw	t1, 0(t0)
-	li	t2, ~(0x01f | 0x0f0)
-	and	t1, t1, t2
-	ori	t1, t1, DDR_CFG4_SIZE_VAL
-	sw	t1, 0(t0)
-	nop
-
-	/*
-	 * DDR initialization: config size and width on reg DDR_CFG1
-	 */
-	li	t6, DDR_CFG1_SIZE_VAL
-
-	and	t6, ~DDR_CFG1_CHIP_WIDTH_MASK
-	or	t6, DDR_CFG1_CHIP_WIDTH_VAL
-
-	/* CONFIG DDR_CFG1[13:12] about TOTAL WIDTH */
-	and	t6, ~DDR_CFG1_BUS_WIDTH_MASK
-	or	t6, DDR_CFG1_BUS_WIDTH_VAL
-
-	li	t5, DDR_CFG1_REG
-	sw	t6, 0(t5)
-	nop
-
-	/*
-	 * DDR: enable self auto refresh for power saving
-	 * enable it by default for both RAM and ROM version (for CoC)
-	 */
-	lw	t1, 0x14(s1)
-	nop
-	and	t1, 0xff000000
-	or	t1, 0x01
-	sw	t1, 0x14(s1)
-	nop
-	lw	t1, 0x10(s1)
-	nop
-	or	t1, 0x10
-	sw	t1, 0x10(s1)
-	nop
-
-	jr	ra
-	nop
-	END(lowlevel_init)
diff --git a/arch/mips/mach-mtmips/mt7628/Makefile b/arch/mips/mach-mtmips/mt7628/Makefile
new file mode 100644
index 0000000..db62e90
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7628/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += lowlevel_init.o
+obj-y += init.o
+obj-y += ddr.o
diff --git a/arch/mips/mach-mtmips/mt7628/ddr.c b/arch/mips/mach-mtmips/mt7628/ddr.c
new file mode 100644
index 0000000..06c0ca6
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7628/ddr.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <common.h>
+#include <asm/addrspace.h>
+#include <linux/bitops.h>
+#include <linux/sizes.h>
+#include <linux/io.h>
+#include <mach/ddr.h>
+#include <mach/mc.h>
+#include "mt7628.h"
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* DDR2 DQ_DLY */
+#define DDR2_DQ_DLY \
+				((0x8 << DQ1_DELAY_COARSE_TUNING_S) | \
+				(0x2 << DQ1_DELAY_FINE_TUNING_S) | \
+				(0x8 << DQ0_DELAY_COARSE_TUNING_S) | \
+				(0x2 << DQ0_DELAY_FINE_TUNING_S))
+
+/* DDR2 DQS_DLY */
+#define DDR2_DQS_DLY \
+				((0x8 << DQS1_DELAY_COARSE_TUNING_S) | \
+				(0x3 << DQS1_DELAY_FINE_TUNING_S) | \
+				(0x8 << DQS0_DELAY_COARSE_TUNING_S) | \
+				(0x3 << DQS0_DELAY_FINE_TUNING_S))
+
+const struct mc_ddr_cfg ddr1_cfgs_200mhz[] = {
+	[DRAM_8MB]   = { 0x34A1EB94, 0x20262324, 0x28000033, 0x00000002, 0x00000000 },
+	[DRAM_16MB]  = { 0x34A1EB94, 0x202A2324, 0x28000033, 0x00000002, 0x00000000 },
+	[DRAM_32MB]  = { 0x34A1E5CA, 0x202E2324, 0x28000033, 0x00000002, 0x00000000 },
+	[DRAM_64MB]  = { 0x3421E5CA, 0x20322324, 0x28000033, 0x00000002, 0x00000000 },
+	[DRAM_128MB] = { 0x241B05CA, 0x20362334, 0x28000033, 0x00000002, 0x00000000 },
+};
+
+const struct mc_ddr_cfg ddr1_cfgs_160mhz[] = {
+	[DRAM_8MB]   = { 0x239964A1, 0x20262323, 0x00000033, 0x00000002, 0x00000000 },
+	[DRAM_16MB]  = { 0x239964A1, 0x202A2323, 0x00000033, 0x00000002, 0x00000000 },
+	[DRAM_32MB]  = { 0x239964A1, 0x202E2323, 0x00000033, 0x00000002, 0x00000000 },
+	[DRAM_64MB]  = { 0x239984A1, 0x20322323, 0x00000033, 0x00000002, 0x00000000 },
+	[DRAM_128MB] = { 0x239AB4A1, 0x20362333, 0x00000033, 0x00000002, 0x00000000 },
+};
+
+const struct mc_ddr_cfg ddr2_cfgs_200mhz[] = {
+	[DRAM_32MB]  = { 0x2519E2E5, 0x222E2323, 0x68000C43, 0x00000452, 0x0000000A },
+	[DRAM_64MB]  = { 0x249AA2E5, 0x22322323, 0x68000C43, 0x00000452, 0x0000000A },
+	[DRAM_128MB] = { 0x249B42E5, 0x22362323, 0x68000C43, 0x00000452, 0x0000000A },
+	[DRAM_256MB] = { 0x249CE2E5, 0x223A2323, 0x68000C43, 0x00000452, 0x0000000A },
+};
+
+const struct mc_ddr_cfg ddr2_cfgs_160mhz[] = {
+	[DRAM_32MB]  = { 0x23918250, 0x222E2322, 0x40000A43, 0x00000452, 0x00000006 },
+	[DRAM_64MB]  = { 0x239A2250, 0x22322322, 0x40000A43, 0x00000452, 0x00000008 },
+	[DRAM_128MB] = { 0x2392A250, 0x22362322, 0x40000A43, 0x00000452, 0x00000008 },
+	[DRAM_256MB] = { 0x24140250, 0x223A2322, 0x40000A43, 0x00000452, 0x00000008 },
+};
+
+static void mt7628_memc_reset(int assert)
+{
+	void __iomem *sysc = ioremap_nocache(SYSCTL_BASE, SYSCTL_SIZE);
+
+	if (assert)
+		setbits_32(sysc + SYSCTL_RSTCTL_REG, MC_RST);
+	else
+		clrbits_32(sysc + SYSCTL_RSTCTL_REG, MC_RST);
+}
+
+static void mt7628_ddr_pad_ldo_config(int ddr_type, int pkg_type)
+{
+	void __iomem *rgc = ioremap_nocache(RGCTL_BASE, RGCTL_SIZE);
+	u32 ck_pad1, cmd_pad1, dq_pad0, dq_pad1, dqs_pad0, dqs_pad1;
+
+	setbits_32(rgc + RGCTL_PMU_G0_REG, PMU_CFG_EN);
+
+	if (ddr_type == DRAM_DDR1)
+		setbits_32(rgc + RGCTL_PMU_G3_REG, RG_DDRLDO_VOSEL);
+	else
+		clrbits_32(rgc + RGCTL_PMU_G3_REG, RG_DDRLDO_VOSEL);
+
+	setbits_32(rgc + RGCTL_PMU_G3_REG, NI_DDRLDO_EN);
+
+	__udelay(250 * 50);
+
+	setbits_32(rgc + RGCTL_PMU_G3_REG, NI_DDRLDO_STB);
+	setbits_32(rgc + RGCTL_PMU_G1_REG, RG_BUCK_FPWM);
+
+	ck_pad1 = readl(rgc + RGCTL_DDR_PAD_CK_G1_REG);
+	cmd_pad1 = readl(rgc + RGCTL_DDR_PAD_CMD_G1_REG);
+	dq_pad0 = readl(rgc + RGCTL_DDR_PAD_DQ_G0_REG);
+	dq_pad1 = readl(rgc + RGCTL_DDR_PAD_DQ_G1_REG);
+	dqs_pad0 = readl(rgc + RGCTL_DDR_PAD_DQS_G0_REG);
+	dqs_pad1 = readl(rgc + RGCTL_DDR_PAD_DQS_G1_REG);
+
+	ck_pad1 &= ~(DRVP_M | DRVN_M);
+	cmd_pad1 &= ~(DRVP_M | DRVN_M);
+	dq_pad0 &= ~RTT_M;
+	dq_pad1 &= ~(DRVP_M | DRVN_M);
+	dqs_pad0 &= ~RTT_M;
+	dqs_pad1 &= ~(DRVP_M | DRVN_M);
+
+	if (pkg_type == PKG_ID_KN) {
+		ck_pad1 |= (3 << DRVP_S) | (3 << DRVN_S);
+		cmd_pad1 |= (3 << DRVP_S) | (3 << DRVN_S);
+		dq_pad1 |= (3 << DRVP_S) | (3 << DRVN_S);
+		dqs_pad1 |= (3 << DRVP_S) | (3 << DRVN_S);
+	} else {
+		ck_pad1 |= (12 << DRVP_S) | (12 << DRVN_S);
+		cmd_pad1 |= (2 << DRVP_S) | (2 << DRVN_S);
+		dqs_pad1 |= (12 << DRVP_S) | (12 << DRVN_S);
+		if (ddr_type == DRAM_DDR1)
+			dq_pad1 |= (7 << DRVP_S) | (7 << DRVN_S);
+		else
+			dq_pad1 |= (4 << DRVP_S) | (4 << DRVN_S);
+	}
+
+	writel(ck_pad1, rgc + RGCTL_DDR_PAD_CK_G1_REG);
+	writel(cmd_pad1, rgc + RGCTL_DDR_PAD_CMD_G1_REG);
+	writel(dq_pad0, rgc + RGCTL_DDR_PAD_DQ_G0_REG);
+	writel(dq_pad1, rgc + RGCTL_DDR_PAD_DQ_G1_REG);
+	writel(dqs_pad0, rgc + RGCTL_DDR_PAD_DQS_G0_REG);
+	writel(dqs_pad1, rgc + RGCTL_DDR_PAD_DQS_G1_REG);
+}
+
+void mt7628_ddr_init(void)
+{
+	void __iomem *sysc;
+	int ddr_type, pkg_type, lspd;
+	struct mc_ddr_init_param param;
+
+	sysc = ioremap_nocache(SYSCTL_BASE, SYSCTL_SIZE);
+	ddr_type = readl(sysc + SYSCTL_SYSCFG0_REG) & DRAM_TYPE;
+	pkg_type = !!(readl(sysc + SYSCTL_CHIP_REV_ID_REG) & PKG_ID);
+	lspd = readl(sysc + SYSCTL_CLKCFG0_REG) &
+	       (CPU_PLL_FROM_BBP | CPU_PLL_FROM_XTAL);
+
+	mt7628_memc_reset(1);
+	__udelay(200);
+
+	mt7628_ddr_pad_ldo_config(ddr_type, pkg_type);
+
+	param.memc = ioremap_nocache(MEMCTL_BASE, MEMCTL_SIZE);
+	param.dq_dly = DDR2_DQ_DLY;
+	param.dqs_dly = DDR2_DQS_DLY;
+	param.mc_reset = mt7628_memc_reset;
+	param.memsize = 0;
+	param.bus_width = 0;
+
+	if (pkg_type == PKG_ID_KN)
+		ddr_type = DRAM_DDR1;
+
+	if (ddr_type == DRAM_DDR1) {
+		if (lspd)
+			param.cfgs = ddr1_cfgs_160mhz;
+		else
+			param.cfgs = ddr1_cfgs_200mhz;
+		ddr1_init(&param);
+	} else {
+		if (lspd)
+			param.cfgs = ddr2_cfgs_160mhz;
+		else
+			param.cfgs = ddr2_cfgs_200mhz;
+		ddr2_init(&param);
+	}
+
+	ddr_calibrate(param.memc, param.memsize, param.bus_width);
+
+	gd->ram_size = param.memsize;
+}
diff --git a/arch/mips/mach-mtmips/mt7628/init.c b/arch/mips/mach-mtmips/mt7628/init.c
new file mode 100644
index 0000000..77d1f2e
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7628/init.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <common.h>
+#include <clk.h>
+#include <dm.h>
+#include <dm/uclass.h>
+#include <dt-bindings/clock/mt7628-clk.h>
+#include <linux/io.h>
+#include "mt7628.h"
+
+DECLARE_GLOBAL_DATA_PTR;
+
+static void set_init_timer_freq(void)
+{
+	void __iomem *sysc;
+	u32 bs, val, timer_freq_post;
+
+	sysc = ioremap_nocache(SYSCTL_BASE, SYSCTL_SIZE);
+
+	/* We can't use the clk driver as the DM has not been initialized yet */
+	bs = readl(sysc + SYSCTL_SYSCFG0_REG);
+	if ((bs & XTAL_FREQ_SEL) == XTAL_25MHZ) {
+		gd->arch.timer_freq = 25000000;
+		timer_freq_post = 575000000;
+	} else {
+		gd->arch.timer_freq = 40000000;
+		timer_freq_post = 580000000;
+	}
+
+	val = readl(sysc + SYSCTL_CLKCFG0_REG);
+	if (!(val & (CPU_PLL_FROM_BBP | CPU_PLL_FROM_XTAL)))
+		gd->arch.timer_freq = timer_freq_post;
+}
+
+void mt7628_init(void)
+{
+	set_init_timer_freq();
+
+	mt7628_ddr_init();
+}
+
+int print_cpuinfo(void)
+{
+	void __iomem *sysc;
+	struct udevice *clkdev;
+	u32 val, ver, eco, pkg, ddr, chipmode, ee;
+	ulong cpu_clk, bus_clk, xtal_clk, timer_freq;
+	struct clk clk;
+	int ret;
+
+	sysc = ioremap_nocache(SYSCTL_BASE, SYSCTL_SIZE);
+
+	val = readl(sysc + SYSCTL_CHIP_REV_ID_REG);
+	ver = (val & VER_M) >> VER_S;
+	eco = (val & ECO_M) >> ECO_S;
+	pkg = !!(val & PKG_ID);
+
+	val = readl(sysc + SYSCTL_SYSCFG0_REG);
+	ddr = val & DRAM_TYPE;
+	chipmode = (val & CHIP_MODE_M) >> CHIP_MODE_S;
+
+	val = readl(sysc + SYSCTL_EFUSE_CFG_REG);
+	ee = val & EFUSE_MT7688;
+
+	printf("CPU:   MediaTek MT%u%c ver:%u eco:%u\n",
+	       ee ? 7688 : 7628, pkg ? 'A' : 'K', ver, eco);
+
+	printf("Boot:  DDR%s, SPI-NOR %u-Byte Addr, CPU clock from %s\n",
+	       ddr ? "" : "2", chipmode & 0x01 ? 4 : 3,
+	       chipmode & 0x02 ? "XTAL" : "CPLL");
+
+	ret = uclass_get_device_by_driver(UCLASS_CLK, DM_GET_DRIVER(mt7628_clk),
+					  &clkdev);
+	if (ret)
+		return ret;
+
+	clk.dev = clkdev;
+
+	clk.id = CLK_CPU;
+	cpu_clk = clk_get_rate(&clk);
+
+	clk.id = CLK_SYS;
+	bus_clk = clk_get_rate(&clk);
+
+	clk.id = CLK_XTAL;
+	xtal_clk = clk_get_rate(&clk);
+
+	clk.id = CLK_MIPS_CNT;
+	timer_freq = clk_get_rate(&clk);
+
+	/* Set final timer frequency */
+	if (timer_freq)
+		gd->arch.timer_freq = timer_freq;
+
+	printf("Clock: CPU: %luMHz, Bus: %luMHz, XTAL: %luMHz\n",
+	       cpu_clk / 1000000, bus_clk / 1000000, xtal_clk / 1000000);
+
+	return 0;
+}
+
+ulong notrace get_tbclk(void)
+{
+	return gd->arch.timer_freq;
+}
diff --git a/arch/mips/mach-mtmips/mt7628/lowlevel_init.S b/arch/mips/mach-mtmips/mt7628/lowlevel_init.S
new file mode 100644
index 0000000..e4a6c03
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7628/lowlevel_init.S
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+#include <asm/cacheops.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/addrspace.h>
+#include <asm/asm.h>
+#include "mt7628.h"
+
+/* Set temporary stack address range */
+#ifndef CONFIG_SYS_INIT_SP_ADDR
+#define CONFIG_SYS_INIT_SP_ADDR	(CONFIG_SYS_SDRAM_BASE + \
+				CONFIG_SYS_INIT_SP_OFFSET)
+#endif
+
+#define CACHE_STACK_SIZE	0x4000
+#define CACHE_STACK_BASE	(CONFIG_SYS_INIT_SP_ADDR - CACHE_STACK_SIZE)
+
+#define DELAY_USEC(us)		((58 * (us)) / 3)
+
+	.set noreorder
+
+LEAF(mips_sram_init)
+#ifndef CONFIG_SKIP_LOWLEVEL_INIT
+	/* Setup CPU PLL */
+	li	t0, DELAY_USEC(1000000)
+	li	t1, KSEG1ADDR(SYSCTL_BASE + SYSCTL_ROM_STATUS_REG)
+	li	t2, KSEG1ADDR(SYSCTL_BASE + SYSCTL_CLKCFG0_REG)
+
+_check_rom_status:
+	lw	t3, 0(t1)
+	andi	t3, t3, 1
+	bnez	t3, _rom_normal
+	subu	t0, t0, 1
+	bnez	t0, _check_rom_status
+	 nop
+
+	lw	t3, 0(t2)
+	ori	t3, (CPU_PLL_FROM_BBP | CPU_PLL_FROM_XTAL)
+	xori	t3, CPU_PLL_FROM_BBP
+	b	_cpu_pll_done
+	 nop
+
+_rom_normal:
+	lw	t3, 0(t2)
+	ori	t3, (CPU_PLL_FROM_BBP | CPU_PLL_FROM_XTAL | \
+		    DIS_BBP_SLEEP | EN_BBP_CLK)
+	xori	t3, (CPU_PLL_FROM_BBP | CPU_PLL_FROM_XTAL)
+
+_cpu_pll_done:
+	sw	t3, 0(t2)
+
+	li	t2, KSEG1ADDR(RBUSCTL_BASE + RBUSCTL_DYN_CFG0_REG)
+	lw	t3, 0(t2)
+	ori	t3, t3, (CPU_FDIV_M | CPU_FFRAC_M)
+	xori	t3, t3, (CPU_FDIV_M | CPU_FFRAC_M)
+	ori	t3, t3, ((1 << CPU_FDIV_S) | (1 << CPU_FFRAC_S))
+	sw	t3, 0(t2)
+
+	/* Clear WST & SPR bits in ErrCtl */
+	mfc0	t0, CP0_ECC
+	ins	t0, zero, 30, 2
+	mtc0	t0, CP0_ECC
+	ehb
+
+	/* Simply initialize I-Cache */
+	li	a0, 0
+	li	a1, CONFIG_SYS_ICACHE_SIZE
+
+	mtc0	zero, CP0_TAGLO		/* Zero to DDataLo */
+
+1:	cache	INDEX_STORE_TAG_I, 0(a0)
+	addiu	a0, CONFIG_SYS_ICACHE_LINE_SIZE
+	bne	a0, a1, 1b
+	 nop
+
+	/* Simply initialize D-Cache */
+	li	a0, 0
+	li	a1, CONFIG_SYS_DCACHE_SIZE
+
+	mtc0	zero, CP0_TAGLO, 2
+
+2:	cache	INDEX_STORE_TAG_D, 0(a0)
+	addiu	a0, CONFIG_SYS_DCACHE_LINE_SIZE
+	bne	a0, a1, 2b
+	 nop
+
+	/* Set KSEG0 Cachable */
+	mfc0	t0, CP0_CONFIG
+	and	t0, t0, MIPS_CONF_IMPL
+	or	t0, t0, CONF_CM_CACHABLE_NONCOHERENT
+	mtc0	t0, CP0_CONFIG
+	ehb
+
+	/* Lock D-Cache */
+	PTR_LI	a0, CACHE_STACK_BASE		/* D-Cache lock base */
+	li	a1, CACHE_STACK_SIZE		/* D-Cache lock size */
+	li	a2, 0x1ffff800			/* Mask of DTagLo[PTagLo] */
+
+3:
+	/* Lock one cacheline */
+	and	t0, a0, a2
+	ori	t0, 0xe0			/* Valid & Dirty & Lock bits */
+	mtc0	t0, CP0_TAGLO, 2		/* Write to DTagLo */
+	ehb
+	cache	INDEX_STORE_TAG_D, 0(a0)
+
+	addiu	a0, CONFIG_SYS_DCACHE_LINE_SIZE
+	sub	a1, CONFIG_SYS_DCACHE_LINE_SIZE
+	bnez	a1, 3b
+	 nop
+#endif /* CONFIG_SKIP_LOWLEVEL_INIT */
+
+	jr	ra
+	 nop
+	END(mips_sram_init)
+
+NESTED(lowlevel_init, 0, ra)
+	/* Save ra and do real lowlevel initialization */
+	move	s0, ra
+
+	PTR_LA	t9, mt7628_init
+	jalr	t9
+	 nop
+
+	move	ra, s0
+
+#if CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+	/* Set malloc base */
+	li	t0, (CONFIG_SYS_INIT_SP_ADDR + 15) & (~15)
+	PTR_S	t0, GD_MALLOC_BASE(k0)	# gd->malloc_base offset
+#endif
+
+	/* Write back data in locked cache to DRAM */
+	PTR_LI	a0, CACHE_STACK_BASE		/* D-Cache unlock base */
+	li	a1, CACHE_STACK_SIZE		/* D-Cache unlock size */
+
+1:
+	cache	HIT_WRITEBACK_INV_D, 0(a0)
+	addiu	a0, CONFIG_SYS_DCACHE_LINE_SIZE
+	sub	a1, CONFIG_SYS_DCACHE_LINE_SIZE
+	bnez	a1, 1b
+	 nop
+
+	/* Set KSEG0 Uncached */
+	mfc0	t0, CP0_CONFIG
+	and	t0, t0, MIPS_CONF_IMPL
+	or	t0, t0, CONF_CM_UNCACHED
+	mtc0	t0, CP0_CONFIG
+	ehb
+
+	jr	ra
+	 nop
+	END(lowlevel_init)
diff --git a/arch/mips/mach-mtmips/mt7628/mt7628.h b/arch/mips/mach-mtmips/mt7628/mt7628.h
new file mode 100644
index 0000000..391880b
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7628/mt7628.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author:  Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#ifndef _MT7628_H_
+#define _MT7628_H_
+
+#define SYSCTL_BASE			0x10000000
+#define SYSCTL_SIZE			0x100
+#define MEMCTL_BASE			0x10000300
+#define MEMCTL_SIZE			0x100
+#define RBUSCTL_BASE			0x10000400
+#define RBUSCTL_SIZE			0x100
+#define RGCTL_BASE			0x10001000
+#define RGCTL_SIZE			0x800
+
+#define SYSCTL_EFUSE_CFG_REG		0x08
+#define EFUSE_MT7688			0x100000
+
+#define SYSCTL_CHIP_REV_ID_REG		0x0c
+#define PKG_ID				0x10000
+#define PKG_ID_AN			1
+#define PKG_ID_KN			0
+#define VER_S				8
+#define VER_M				0xf00
+#define ECO_S				0
+#define ECO_M				0x0f
+
+#define SYSCTL_SYSCFG0_REG		0x10
+#define XTAL_FREQ_SEL			0x40
+#define XTAL_40MHZ			1
+#define XTAL_25MHZ			0
+#define CHIP_MODE_S			1
+#define CHIP_MODE_M			0x0e
+#define DRAM_TYPE			0x01
+#define DRAM_DDR1			1
+#define DRAM_DDR2			0
+
+#define SYSCTL_ROM_STATUS_REG		0x28
+
+#define SYSCTL_CLKCFG0_REG		0x2c
+#define DIS_BBP_SLEEP			0x08
+#define EN_BBP_CLK			0x04
+#define CPU_PLL_FROM_BBP		0x02
+#define CPU_PLL_FROM_XTAL		0x01
+
+#define SYSCTL_RSTCTL_REG		0x34
+#define MC_RST				0x400
+
+#define SYSCTL_AGPIO_CFG_REG		0x3c
+#define EPHY_GPIO_AIO_EN_S		17
+#define EPHY_GPIO_AIO_EN_M		0x1e0000
+
+#define SYSCTL_GPIO_MODE1_REG		0x60
+#define UART2_MODE_S			26
+#define UART2_MODE_M			0xc000000
+#define UART1_MODE_S			24
+#define UART1_MODE_M			0x3000000
+#define UART0_MODE_S			8
+#define UART0_MODE_M			0x300
+#define SPIS_MODE_S			2
+#define SPIS_MODE_M			0x0c
+
+#define RBUSCTL_DYN_CFG0_REG		0x40
+#define CPU_FDIV_S			8
+#define CPU_FDIV_M			0xf00
+#define CPU_FFRAC_S			0
+#define CPU_FFRAC_M			0x0f
+
+#define RGCTL_PMU_G0_REG		0x100
+#define PMU_CFG_EN			0x80000000
+
+#define RGCTL_PMU_G1_REG		0x104
+#define RG_BUCK_FPWM			0x02
+
+#define RGCTL_PMU_G3_REG		0x10c
+#define NI_DDRLDO_STB			0x40000
+#define NI_DDRLDO_EN			0x10000
+#define RG_DDRLDO_VOSEL			0x40
+
+#define RGCTL_DDR_PAD_CK_G0_REG		0x700
+#define RGCTL_DDR_PAD_CMD_G0_REG	0x708
+#define RGCTL_DDR_PAD_DQ_G0_REG		0x710
+#define RGCTL_DDR_PAD_DQS_G0_REG	0x718
+#define RTT_S				8
+#define RTT_M				0x700
+
+#define RGCTL_DDR_PAD_CK_G1_REG		0x704
+#define RGCTL_DDR_PAD_CMD_G1_REG	0x70c
+#define RGCTL_DDR_PAD_DQ_G1_REG		0x714
+#define RGCTL_DDR_PAD_DQS_G1_REG	0x71c
+#define DRVP_S				0
+#define DRVP_M				0x0f
+#define DRVN_S				8
+#define DRVN_M				0xf00
+
+#ifndef __ASSEMBLY__
+void mt7628_ddr_init(void);
+#endif
+
+#endif /* _MT7628_H_ */
diff --git a/arch/mips/mach-mtmips/mt76xx.h b/arch/mips/mach-mtmips/mt76xx.h
deleted file mode 100644
index 17473ea..0000000
--- a/arch/mips/mach-mtmips/mt76xx.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (C) 2018 Stefan Roese <sr@denx.de>
- */
-
-#ifndef __MT76XX_H
-#define __MT76XX_H
-
-#define MT76XX_SYSCTL_BASE	0x10000000
-
-#define MT76XX_CHIPID_OFFS	0x00
-#define MT76XX_CHIP_REV_ID_OFFS	0x0c
-#define MT76XX_SYSCFG0_OFFS	0x10
-
-#define MT76XX_MEMCTRL_BASE	(MT76XX_SYSCTL_BASE + 0x0300)
-#define MT76XX_RGCTRL_BASE	(MT76XX_SYSCTL_BASE + 0x1000)
-
-#define MT76XX_ROM_STATUS_REG	(MT76XX_SYSCTL_BASE + 0x0028)
-#define MT76XX_CLKCFG0_REG	(MT76XX_SYSCTL_BASE + 0x002c)
-#define MT76XX_DYN_CFG0_REG	(MT76XX_SYSCTL_BASE + 0x0440)
-
-#define DDR_CFG1_REG		(MT76XX_MEMCTRL_BASE + 0x44)
-#define DDR_CFG2_REG		(MT76XX_MEMCTRL_BASE + 0x48)
-#define DDR_CFG3_REG		(MT76XX_MEMCTRL_BASE + 0x4c)
-#define DDR_CFG4_REG		(MT76XX_MEMCTRL_BASE + 0x50)
-
-#ifndef __ASSEMBLY__
-/* Prototypes */
-void ddr_calibrate(void);
-#endif
-
-#endif