mips: mtmips: add support for MediaTek MT7621 SoC

This patch adds support for MediaTek MT7621 SoC.
All files are dedicated for u-boot.

The default build target is u-boot-mt7621.bin.

The specification of this chip:
https://www.mediatek.com/products/homenetworking/mt7621

Reviewed-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/mach-mtmips/mt7621/tpl/Makefile b/arch/mips/mach-mtmips/mt7621/tpl/Makefile
new file mode 100644
index 0000000..471ad74
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/tpl/Makefile
@@ -0,0 +1,4 @@
+
+extra-y += start.o
+
+obj-y += tpl.o
diff --git a/arch/mips/mach-mtmips/mt7621/tpl/start.S b/arch/mips/mach-mtmips/mt7621/tpl/start.S
new file mode 100644
index 0000000..19b09f7
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/tpl/start.S
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/addrspace.h>
+#include <asm/mipsregs.h>
+#include <asm/cm.h>
+#include "../mt7621.h"
+
+#define SP_ADDR_TEMP		0xbe10dff0
+
+	.set noreorder
+
+	.macro init_wr sel
+	MTC0	zero, CP0_WATCHLO,\sel
+	mtc0	t1, CP0_WATCHHI,\sel
+	.endm
+
+	.macro uhi_mips_exception
+	move	k0, t9		# preserve t9 in k0
+	move	k1, a0		# preserve a0 in k1
+	li	t9, 15		# UHI exception operation
+	li	a0, 0		# Use hard register context
+	sdbbp	1		# Invoke UHI operation
+	.endm
+
+ENTRY(_start)
+	b	reset
+	 mtc0	zero, CP0_COUNT
+
+	/*
+	 * Store TPL size here.
+	 * This will be used by SPL to locate u-boot payload.
+	 */
+	.org	TPL_INFO_OFFSET
+	.word	TPL_INFO_MAGIC
+	.word	__image_copy_len
+
+	/* Exception vector */
+	.org 0x200
+	/* TLB refill, 32 bit task */
+	uhi_mips_exception
+
+	.org 0x280
+	/* XTLB refill, 64 bit task */
+	uhi_mips_exception
+
+	.org 0x300
+	/* Cache error exception */
+	uhi_mips_exception
+
+	.org 0x380
+	/* General exception */
+	uhi_mips_exception
+
+	.org 0x400
+	/* Catch interrupt exceptions */
+	uhi_mips_exception
+
+	.org 0x480
+	/* EJTAG debug exception */
+1:	b	1b
+	 nop
+
+	.org 0x500
+
+reset:
+	/* Set KSEG0 to Uncached */
+	mfc0	t0, CP0_CONFIG
+	ins	t0, zero, 0, 3
+	ori	t0, t0, CONF_CM_UNCACHED
+	mtc0	t0, CP0_CONFIG
+	ehb
+
+	/* Check for CPU number */
+	mfc0	t0, CP0_EBASE
+	and	t0, t0, MIPS_EBASE_CPUNUM
+	beqz	t0, 1f
+	 nop
+
+	/* Secondary core goes to specified SPL entry address */
+	li	t0, KSEG1ADDR(SYSCTL_BASE)
+	lw	t0, BOOT_SRAM_BASE_REG(t0)
+	jr	t0
+	 nop
+
+	/* Init CP0 Status */
+1:	mfc0	t0, CP0_STATUS
+	and	t0, ST0_IMPL
+	or	t0, ST0_BEV | ST0_ERL
+	mtc0	t0, CP0_STATUS
+	 nop
+
+	/* Clear Watch Status bits and disable watch exceptions */
+	li	t1, 0x7		# Clear I, R and W conditions
+	init_wr	0
+	init_wr	1
+	init_wr	2
+	init_wr	3
+
+	/* Clear WP, IV and SW interrupts */
+	mtc0	zero, CP0_CAUSE
+
+	/* Clear timer interrupt (CP0_COUNT cleared on branch to 'reset') */
+	mtc0	zero, CP0_COMPARE
+
+	/* Setup basic CPS */
+	bal	mips_cm_map
+	 nop
+
+	li	t0, KSEG1ADDR(CONFIG_MIPS_CM_BASE)
+	li	t1, GCR_REG0_BASE_VALUE
+	sw	t1, GCR_REG0_BASE(t0)
+
+	li	t1, ((GCR_REG0_MASK_VALUE << GCR_REGn_MASK_ADDRMASK_SHIFT) | \
+		    GCR_REGn_MASK_CMTGT_IOCU0)
+	sw	t1, GCR_REG0_MASK(t0)
+
+	lw	t1, GCR_BASE(t0)
+	ins	t1, zero, 0, 2		# CM_DEFAULT_TARGET
+	sw	t1, GCR_BASE(t0)
+
+	lw	t1, GCR_CONTROL(t0)
+	li	t2, GCR_CONTROL_SYNCCTL
+	or	t1, t1, t2
+	sw	t1, GCR_CONTROL(t0)
+
+	/* Increase SPI frequency */
+	li	t0, KSEG1ADDR(SPI_BASE)
+	li	t1, 5
+	sw	t1, SPI_SPACE_REG(t0)
+
+	/* Set CPU clock to 500MHz */
+	li	t0, KSEG1ADDR(SYSCTL_BASE)
+	lw	t1, SYSCTL_CLKCFG0_REG(t0)
+	ins	t1, zero, 30, 2		# CPU_CLK_SEL
+	sw	t1, SYSCTL_CLKCFG0_REG(t0)
+
+	/* Set CPU clock divider to 1/1 */
+	li	t0, KSEG1ADDR(RBUS_BASE)
+	li	t1, 0x101
+	sw	t1, RBUS_DYN_CFG0_REG(t0)
+
+	/* Initialize the SRAM */
+	bal	mips_sram_init
+	 nop
+
+	/* Set up initial stack */
+	li	sp, SP_ADDR_TEMP
+
+	bal	tpl_main
+	 nop
+
+	END(_start)
diff --git a/arch/mips/mach-mtmips/mt7621/tpl/tpl.c b/arch/mips/mach-mtmips/mt7621/tpl/tpl.c
new file mode 100644
index 0000000..2a82890
--- /dev/null
+++ b/arch/mips/mach-mtmips/mt7621/tpl/tpl.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 MediaTek Inc. All rights reserved.
+ *
+ * Author: Weijie Gao <weijie.gao@mediatek.com>
+ */
+
+#include <image.h>
+#include <asm/system.h>
+#include <asm/sections.h>
+#include <asm/cacheops.h>
+#include <asm/mipsregs.h>
+#include <asm/cm.h>
+
+#define INDEX_STORE_DATA_SD	0x0f
+
+typedef void __noreturn (*image_entry_noargs_t)(void);
+
+/*
+ * Lock L2 cache and fill data
+ * Assume that data is 4-byte aligned and start_addr/size is 32-byte aligned
+ */
+static void fill_lock_l2cache(uintptr_t dataptr, ulong start_addr, ulong size)
+{
+	ulong slsize = CONFIG_SYS_DCACHE_LINE_SIZE;
+	ulong end_addr = start_addr + size;
+	const u32 *data = (u32 *)dataptr;
+	ulong i, addr;
+	u32 val;
+
+	/* Clear WSC & SPR bit in ErrCtl */
+	val = read_c0_ecc();
+	val &= 0xcfffffff;
+	write_c0_ecc(val);
+	execution_hazard_barrier();
+
+	for (addr = start_addr; addr < end_addr; addr += slsize) {
+		/* Set STagLo to lock cache line */
+		write_c0_staglo((addr & 0x1ffff800) | 0xa0);
+		mips_cache(INDEX_STORE_TAG_SD, (void *)addr);
+
+		/* Fill data */
+		for (i = 0; i < slsize; i += 8) {
+			val = *data++;
+			__write_32bit_c0_register($28, 5, val); /* sdtaglo */
+			val = *data++;
+			__write_32bit_c0_register($29, 5, val); /* sdtaghi */
+			mips_cache(INDEX_STORE_DATA_SD, (void *)(addr + i));
+		}
+	}
+
+	sync();
+}
+
+/* A simple function to initialize MT7621's cache */
+static void mt7621_cache_init(void)
+{
+	void __iomem *cm_base = (void *)KSEG1ADDR(CONFIG_MIPS_CM_BASE);
+	ulong lsize = CONFIG_SYS_DCACHE_LINE_SIZE;
+	ulong addr;
+	u32 val;
+
+	/* Enable CCA override. Set to uncached */
+	val = readl(cm_base + GCR_BASE);
+	val &= ~CCA_DEFAULT_OVR_MASK;
+	val |= CCA_DEFAULT_OVREN | (2 << CCA_DEFAULT_OVR_SHIFT);
+	writel(val, cm_base + GCR_BASE);
+
+	/* Initialize L1 I-Cache */
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+
+	for (addr = 0; addr < CONFIG_SYS_ICACHE_SIZE; addr += lsize)
+		mips_cache(INDEX_STORE_TAG_I, (void *)addr);
+
+	/* Initialize L1 D-Cache */
+	write_c0_dtaglo(0);
+	__write_32bit_c0_register($29, 2, 0); /* dtaghi */
+
+	for (addr = 0; addr < CONFIG_SYS_DCACHE_SIZE; addr += lsize)
+		mips_cache(INDEX_STORE_TAG_D, (void *)addr);
+
+	/* Initialize L2 Cache */
+	write_c0_staglo(0);
+	__write_32bit_c0_register($29, 4, 0); /* staghi */
+
+	for (addr = 0; addr < (256 << 10); addr += lsize)
+		mips_cache(INDEX_STORE_TAG_SD, (void *)addr);
+
+	/* Dsiable CCA override */
+	val = readl(cm_base + GCR_BASE);
+	val &= ~(CCA_DEFAULT_OVR_MASK | CCA_DEFAULT_OVREN);
+	writel(val, cm_base + GCR_BASE);
+
+	/* Set KSEG0 to non-coherent cached (important!) */
+	val = read_c0_config();
+	val &= ~CONF_CM_CMASK;
+	val |= CONF_CM_CACHABLE_NONCOHERENT;
+	write_c0_config(val);
+	execution_hazard_barrier();
+
+	/* Again, invalidate L1 D-Cache */
+	for (addr = 0; addr < CONFIG_SYS_DCACHE_SIZE; addr += lsize)
+		mips_cache(INDEX_WRITEBACK_INV_D, (void *)addr);
+
+	/* Invalidate L1 I-Cache */
+	for (addr = 0; addr < CONFIG_SYS_ICACHE_SIZE; addr += lsize)
+		mips_cache(INDEX_INVALIDATE_I, (void *)addr);
+
+	/* Disable L2 cache bypass */
+	val = read_c0_config2();
+	val &= ~MIPS_CONF_IMPL;
+	write_c0_config2(val);
+	execution_hazard_barrier();
+}
+
+void __noreturn tpl_main(void)
+{
+	const image_header_t *hdr = (const image_header_t *)__image_copy_end;
+	image_entry_noargs_t image_entry;
+	u32 loadaddr, size;
+	uintptr_t data;
+
+	/* Initialize the cache first */
+	mt7621_cache_init();
+
+	if (image_get_magic(hdr) != IH_MAGIC)
+		goto failed;
+
+	loadaddr = image_get_load(hdr);
+	size = image_get_size(hdr);
+	image_entry = (image_entry_noargs_t)image_get_ep(hdr);
+
+	/* Load TPL image to L2 cache */
+	data = (uintptr_t)__image_copy_end + sizeof(struct image_header);
+	fill_lock_l2cache(data, loadaddr, size);
+
+	/* Jump to SPL */
+	image_entry();
+
+failed:
+	for (;;)
+		;
+}