arm: apple: Add M1 Pro/Max support

Choose the memory map based on the compatible property from the
device tree passed to us by m1n1. Since DRAM on the M1 Pro/Max
starts at a different address avoid hardcoding the top of usable
memory. Also make sure that the addresses entered into the memory
map are page aligned such that we don't crash in dcache_enable().

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
Tested on: Macbook M1 Max
Tested-by: Janne Grunau <j@jannau.net>
diff --git a/arch/arm/mach-apple/board.c b/arch/arm/mach-apple/board.c
index b7e8d21..f9f8a2f 100644
--- a/arch/arm/mach-apple/board.c
+++ b/arch/arm/mach-apple/board.c
@@ -14,12 +14,22 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
-static struct mm_region apple_mem_map[] = {
+/* Apple M1 */
+
+static struct mm_region t8103_mem_map[] = {
 	{
 		/* I/O */
 		.virt = 0x200000000,
 		.phys = 0x200000000,
-		.size = 8UL * SZ_1G,
+		.size = 2UL * SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0x380000000,
+		.phys = 0x380000000,
+		.size = SZ_1G,
 		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
 			 PTE_BLOCK_NON_SHARE |
 			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
@@ -27,7 +37,7 @@
 		/* I/O */
 		.virt = 0x500000000,
 		.phys = 0x500000000,
-		.size = 2UL * SZ_1G,
+		.size = SZ_1G,
 		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
 			 PTE_BLOCK_NON_SHARE |
 			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
@@ -63,15 +73,110 @@
 		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |
 			 PTE_BLOCK_INNER_SHARE
 	}, {
+		/* Framebuffer */
+		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL_NC) |
+			 PTE_BLOCK_INNER_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
-		/* Empty entry for framebuffer */
+		/* List terminator */
 		0,
+	}
+};
+
+/* Apple M1 Pro/Max */
+
+static struct mm_region t6000_mem_map[] = {
+	{
+		/* I/O */
+		.virt = 0x280000000,
+		.phys = 0x280000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0x380000000,
+		.phys = 0x380000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0x580000000,
+		.phys = 0x580000000,
+		.size = SZ_512M,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* PCIE */
+		.virt = 0x5a0000000,
+		.phys = 0x5a0000000,
+		.size = SZ_512M,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRE) |
+			 PTE_BLOCK_INNER_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* PCIE */
+		.virt = 0x5c0000000,
+		.phys = 0x5c0000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRE) |
+			 PTE_BLOCK_INNER_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0x700000000,
+		.phys = 0x700000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0xb00000000,
+		.phys = 0xb00000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0xf00000000,
+		.phys = 0xf00000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* I/O */
+		.virt = 0x1300000000,
+		.phys = 0x1300000000,
+		.size = SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
+			 PTE_BLOCK_NON_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
+	}, {
+		/* RAM */
+		.virt = 0x10000000000,
+		.phys = 0x10000000000,
+		.size = 16UL * SZ_1G,
+		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |
+			 PTE_BLOCK_INNER_SHARE
+	}, {
+		/* Framebuffer */
+		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL_NC) |
+			 PTE_BLOCK_INNER_SHARE |
+			 PTE_BLOCK_PXN | PTE_BLOCK_UXN
 	}, {
 		/* List terminator */
 		0,
 	}
 };
 
-struct mm_region *mem_map = apple_mem_map;
+struct mm_region *mem_map;
 
 int board_init(void)
 {
@@ -80,59 +185,83 @@
 
 int dram_init(void)
 {
+	return fdtdec_setup_mem_size_base();
+}
+
+int dram_init_banksize(void)
+{
+	return fdtdec_setup_memory_banksize();
+}
+
+extern long fw_dtb_pointer;
+
+void *board_fdt_blob_setup(int *err)
+{
+	/* Return DTB pointer passed by m1n1 */
+	*err = 0;
+	return (void *)fw_dtb_pointer;
+}
+
+void build_mem_map(void)
+{
 	ofnode node;
-	int index, ret;
 	fdt_addr_t base;
 	fdt_size_t size;
+	int i;
+
+	if (of_machine_is_compatible("apple,t8103"))
+		mem_map = t8103_mem_map;
+	else if (of_machine_is_compatible("apple,t6000"))
+		mem_map = t6000_mem_map;
+	else if (of_machine_is_compatible("apple,t6001"))
+		mem_map = t6000_mem_map;
+	else
+		panic("Unsupported SoC\n");
 
-	ret = fdtdec_setup_mem_size_base();
-	if (ret)
-		return ret;
+	/* Find list terminator. */
+	for (i = 0; mem_map[i].size || mem_map[i].attrs; i++)
+		;
+
+	/* Align RAM mapping to page boundaries */
+	base = gd->bd->bi_dram[0].start;
+	size = gd->bd->bi_dram[0].size;
+	size += (base - ALIGN_DOWN(base, SZ_4K));
+	base = ALIGN_DOWN(base, SZ_4K);
+	size = ALIGN(size, SZ_4K);
 
 	/* Update RAM mapping */
-	index = ARRAY_SIZE(apple_mem_map) - 3;
-	apple_mem_map[index].virt = gd->ram_base;
-	apple_mem_map[index].phys = gd->ram_base;
-	apple_mem_map[index].size = gd->ram_size;
+	mem_map[i - 2].virt = base;
+	mem_map[i - 2].phys = base;
+	mem_map[i - 2].size = size;
 
 	node = ofnode_path("/chosen/framebuffer");
 	if (!ofnode_valid(node))
-		return 0;
+		return;
 
 	base = ofnode_get_addr_size(node, "reg", &size);
 	if (base == FDT_ADDR_T_NONE)
-		return 0;
+		return;
 
-	/* Add framebuffer mapping */
-	index = ARRAY_SIZE(apple_mem_map) - 2;
-	apple_mem_map[index].virt = base;
-	apple_mem_map[index].phys = base;
-	apple_mem_map[index].size = size;
-	apple_mem_map[index].attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL_NC) |
-		PTE_BLOCK_INNER_SHARE | PTE_BLOCK_PXN | PTE_BLOCK_UXN;
+	/* Align framebuffer mapping to page boundaries */
+	size += (base - ALIGN_DOWN(base, SZ_4K));
+	base = ALIGN_DOWN(base, SZ_4K);
+	size = ALIGN(size, SZ_4K);
 
-	return 0;
+	/* Add framebuffer mapping */
+	mem_map[i - 1].virt = base;
+	mem_map[i - 1].phys = base;
+	mem_map[i - 1].size = size;
 }
 
-int dram_init_banksize(void)
+void enable_caches(void)
 {
-	return fdtdec_setup_memory_banksize();
-}
+	build_mem_map();
 
-extern long fw_dtb_pointer;
-
-void *board_fdt_blob_setup(int *err)
-{
-	/* Return DTB pointer passed by m1n1 */
-	*err = 0;
-	return (void *)fw_dtb_pointer;
+	icache_enable();
+	dcache_enable();
 }
 
-ulong board_get_usable_ram_top(ulong total_size)
+u64 get_page_table_size(void)
 {
-	/*
-	 * Top part of RAM is used by firmware for things like the
-	 * framebuffer.  This gives us plenty of room to play with.
-	 */
-	return 0x980000000;
+	return SZ_256K;
 }