avr32: Add simple paging support

Use the MMU hardware to set up 1:1 mappings between physical and virtual
addresses. This allows us to bypass the cache when accessing the flash
without having to do any physical-to-virtual address mapping in the CFI
driver.

The virtual memory mappings are defined at compile time through a sorted
array of virtual memory range objects. When a TLB miss exception
happens, the exception handler does a binary search through the array
until it finds a matching entry and loads it into the TLB. The u-boot
image itself is covered by a fixed TLB entry which is never replaced.

This makes the 'saveenv' command work again on ATNGW100 and other boards
using the CFI driver, hopefully without breaking any rules.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
diff --git a/arch/avr32/cpu/at32ap700x/Makefile b/arch/avr32/cpu/at32ap700x/Makefile
index 46e6ef6..30ea925 100644
--- a/arch/avr32/cpu/at32ap700x/Makefile
+++ b/arch/avr32/cpu/at32ap700x/Makefile
@@ -24,7 +24,7 @@
 
 LIB	:= $(obj)lib$(SOC).a
 
-COBJS	:= portmux.o clk.o
+COBJS	:= portmux.o clk.o mmu.o
 SRCS	:= $(SOBJS:.o=.S) $(COBJS:.o=.c)
 OBJS	:= $(addprefix $(obj),$(SOBJS) $(COBJS))
 
diff --git a/arch/avr32/cpu/at32ap700x/mmu.c b/arch/avr32/cpu/at32ap700x/mmu.c
new file mode 100644
index 0000000..c3a1b93
--- /dev/null
+++ b/arch/avr32/cpu/at32ap700x/mmu.c
@@ -0,0 +1,78 @@
+#include <common.h>
+#include <asm/arch/mmu.h>
+#include <asm/sysreg.h>
+
+void mmu_init_r(unsigned long dest_addr)
+{
+	uintptr_t	vmr_table_addr;
+
+	/* Round monitor address down to the nearest page boundary */
+	dest_addr &= PAGE_ADDR_MASK;
+
+	/* Initialize TLB entry 0 to cover the monitor, and lock it */
+	sysreg_write(TLBEHI, dest_addr | SYSREG_BIT(TLBEHI_V));
+	sysreg_write(TLBELO, dest_addr | MMU_VMR_CACHE_WRBACK);
+	sysreg_write(MMUCR, SYSREG_BF(DRP, 0) | SYSREG_BF(DLA, 1)
+			| SYSREG_BIT(MMUCR_S) | SYSREG_BIT(M));
+	__builtin_tlbw();
+
+	/*
+	 * Calculate the address of the VM range table in a PC-relative
+	 * manner to make sure we hit the SDRAM and not the flash.
+	 */
+	vmr_table_addr = (uintptr_t)&mmu_vmr_table;
+	sysreg_write(PTBR, vmr_table_addr);
+	printf("VMR table @ 0x%08x\n", vmr_table_addr);
+
+	/* Enable paging */
+	sysreg_write(MMUCR, SYSREG_BF(DRP, 1) | SYSREG_BF(DLA, 1)
+			| SYSREG_BIT(MMUCR_S) | SYSREG_BIT(M) | SYSREG_BIT(E));
+}
+
+int mmu_handle_tlb_miss(void)
+{
+	const struct mmu_vm_range *vmr_table;
+	const struct mmu_vm_range *vmr;
+	unsigned int fault_pgno;
+	int first, last;
+
+	fault_pgno = sysreg_read(TLBEAR) >> PAGE_SHIFT;
+	vmr_table = (const struct mmu_vm_range *)sysreg_read(PTBR);
+
+	/* Do a binary search through the VM ranges */
+	first = 0;
+	last = CONFIG_SYS_NR_VM_REGIONS;
+	while (first < last) {
+		unsigned int start;
+		int middle;
+
+		/* Pick the entry in the middle of the remaining range */
+		middle = (first + last) >> 1;
+		vmr = &vmr_table[middle];
+		start = vmr->virt_pgno;
+
+		/* Do the bisection thing */
+		if (fault_pgno < start) {
+			last = middle;
+		} else if (fault_pgno >= (start + vmr->nr_pages)) {
+			first = middle + 1;
+		} else {
+			/* Got it; let's slam it into the TLB */
+			uint32_t tlbelo;
+
+			tlbelo = vmr->phys & ~PAGE_ADDR_MASK;
+			tlbelo |= fault_pgno << PAGE_SHIFT;
+			sysreg_write(TLBELO, tlbelo);
+			__builtin_tlbw();
+
+			/* Zero means success */
+			return 0;
+		}
+	}
+
+	/*
+	 * Didn't find any matching entries. Return a nonzero value to
+	 * indicate that this should be treated as a fatal exception.
+	 */
+	return -1;
+}
diff --git a/arch/avr32/cpu/start.S b/arch/avr32/cpu/start.S
index 99c9e06..06bf4c6 100644
--- a/arch/avr32/cpu/start.S
+++ b/arch/avr32/cpu/start.S
@@ -82,12 +82,19 @@
 	.org	0x44
 	rjmp	unknown_exception	/* DTLB Modified */
 
-	.org	0x50
-	rjmp	unknown_exception	/* ITLB Miss */
-	.org	0x60
-	rjmp	unknown_exception	/* DTLB Miss (read) */
-	.org	0x70
-	rjmp	unknown_exception	/* DTLB Miss (write) */
+	.org	0x50			/* ITLB Miss */
+	pushm   r8-r12,lr
+	rjmp	1f
+	.org	0x60			/* DTLB Miss (read) */
+	pushm   r8-r12,lr
+	rjmp	1f
+	.org	0x70			/* DTLB Miss (write) */
+	pushm   r8-r12,lr
+1:	mov	r12, sp
+	rcall	mmu_handle_tlb_miss
+	popm	r8-r12,lr
+	brne	unknown_exception
+	rete
 
 	.size	_evba, . - _evba
 
diff --git a/arch/avr32/include/asm/arch-at32ap700x/addrspace.h b/arch/avr32/include/asm/arch-at32ap700x/addrspace.h
index 409eee3..4edc1bd 100644
--- a/arch/avr32/include/asm/arch-at32ap700x/addrspace.h
+++ b/arch/avr32/include/asm/arch-at32ap700x/addrspace.h
@@ -75,10 +75,7 @@
 static inline void *
 map_physmem(phys_addr_t paddr, unsigned long len, unsigned long flags)
 {
-	if (flags == MAP_WRBACK)
-		return (void *)P1SEGADDR(paddr);
-	else
-		return (void *)P2SEGADDR(paddr);
+	return (void *)paddr;
 }
 
 #endif /* __ASM_AVR32_ADDRSPACE_H */
diff --git a/arch/avr32/include/asm/arch-at32ap700x/mmu.h b/arch/avr32/include/asm/arch-at32ap700x/mmu.h
new file mode 100644
index 0000000..fcd9a05
--- /dev/null
+++ b/arch/avr32/include/asm/arch-at32ap700x/mmu.h
@@ -0,0 +1,66 @@
+/*
+ * In order to deal with the hardcoded u-boot requirement that virtual
+ * addresses are always mapped 1:1 with physical addresses, we implement
+ * a small virtual memory manager so that we can use the MMU hardware in
+ * order to get the caching properties right.
+ *
+ * A few pages (or possibly just one) are locked in the TLB permanently
+ * in order to avoid recursive TLB misses, but most pages are faulted in
+ * on demand.
+ */
+#ifndef __ASM_ARCH_MMU_H
+#define __ASM_ARCH_MMU_H
+
+#include <asm/sysreg.h>
+
+#define PAGE_SHIFT	20
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_ADDR_MASK	(~(PAGE_SIZE - 1))
+
+#define MMU_VMR_CACHE_NONE						\
+	(SYSREG_BF(AP, 3) | SYSREG_BF(SZ, 3) | SYSREG_BIT(TLBELO_D))
+#define MMU_VMR_CACHE_WBUF						\
+	(MMU_VMR_CACHE_NONE | SYSREG_BIT(B))
+#define MMU_VMR_CACHE_WRTHRU						\
+	(MMU_VMR_CACHE_NONE | SYSREG_BIT(TLBELO_C) | SYSREG_BIT(W))
+#define MMU_VMR_CACHE_WRBACK						\
+	(MMU_VMR_CACHE_WBUF | SYSREG_BIT(TLBELO_C))
+
+/*
+ * This structure is used in our "page table". Instead of the usual
+ * x86-inspired radix tree, we let each entry cover an arbitrary-sized
+ * virtual address range and store them in a binary search tree. This is
+ * somewhat slower, but should use significantly less RAM, and we
+ * shouldn't get many TLB misses when using 1 MB pages anyway.
+ *
+ * With 1 MB pages, we need 12 bits to store the page number. In
+ * addition, we stick an Invalid bit in the high bit of virt_pgno (if
+ * set, it cannot possibly match any faulting page), and all the bits
+ * that need to be written to TLBELO in phys_pgno.
+ */
+struct mmu_vm_range {
+	uint16_t	virt_pgno;
+	uint16_t	nr_pages;
+	uint32_t	phys;
+};
+
+/*
+ * An array of mmu_vm_range objects describing all pageable addresses.
+ * The array is sorted by virt_pgno so that the TLB miss exception
+ * handler can do a binary search to find the correct entry.
+ */
+extern struct mmu_vm_range mmu_vmr_table[];
+
+/*
+ * Initialize the MMU. This will set up a fixed TLB entry for the static
+ * u-boot image at dest_addr and enable paging.
+ */
+void mmu_init_r(unsigned long dest_addr);
+
+/*
+ * Handle a TLB miss exception. This function is called directly from
+ * the exception vector table written in assembly.
+ */
+int mmu_handle_tlb_miss(void);
+
+#endif /* __ASM_ARCH_MMU_H */
diff --git a/arch/avr32/lib/board.c b/arch/avr32/lib/board.c
index 9e741d2..aa589bb 100644
--- a/arch/avr32/lib/board.c
+++ b/arch/avr32/lib/board.c
@@ -33,6 +33,7 @@
 
 #include <asm/initcalls.h>
 #include <asm/sections.h>
+#include <asm/arch/mmu.h>
 
 #ifndef CONFIG_IDENT_STRING
 #define CONFIG_IDENT_STRING ""
@@ -265,6 +266,9 @@
 	gd->flags |= GD_FLG_RELOC;
 	gd->reloc_off = dest_addr - CONFIG_SYS_MONITOR_BASE;
 
+	/* Enable the MMU so that we can keep u-boot simple */
+	mmu_init_r(dest_addr);
+
 	board_early_init_r();
 
 	monitor_flash_len = _edata - _text;
diff --git a/board/atmel/atngw100/atngw100.c b/board/atmel/atngw100/atngw100.c
index 4580f55..49bc03e 100644
--- a/board/atmel/atngw100/atngw100.c
+++ b/board/atmel/atngw100/atngw100.c
@@ -26,11 +26,26 @@
 #include <asm/arch/clk.h>
 #include <asm/arch/gpio.h>
 #include <asm/arch/hmatrix.h>
+#include <asm/arch/mmu.h>
 #include <asm/arch/portmux.h>
 #include <netdev.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+struct mmu_vm_range mmu_vmr_table[CONFIG_SYS_NR_VM_REGIONS] = {
+	{
+		.virt_pgno	= CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT,
+		.nr_pages	= CONFIG_SYS_FLASH_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_NONE,
+	}, {
+		.virt_pgno	= CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT,
+		.nr_pages	= EBI_SDRAM_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_WRBACK,
+	},
+};
+
 static const struct sdram_config sdram_config = {
 	.data_bits	= SDRAM_DATA_16BIT,
 	.row_bits	= 13,
diff --git a/board/atmel/atstk1000/atstk1000.c b/board/atmel/atstk1000/atstk1000.c
index d91d594..8b1e1b5 100644
--- a/board/atmel/atstk1000/atstk1000.c
+++ b/board/atmel/atstk1000/atstk1000.c
@@ -25,11 +25,26 @@
 #include <asm/sdram.h>
 #include <asm/arch/clk.h>
 #include <asm/arch/hmatrix.h>
+#include <asm/arch/mmu.h>
 #include <asm/arch/portmux.h>
 #include <netdev.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+struct mmu_vm_range mmu_vmr_table[CONFIG_SYS_NR_VM_REGIONS] = {
+	{
+		.virt_pgno	= CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT,
+		.nr_pages	= CONFIG_SYS_FLASH_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_NONE,
+	}, {
+		.virt_pgno	= CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT,
+		.nr_pages	= EBI_SDRAM_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_WRBACK,
+	},
+};
+
 static const struct sdram_config sdram_config = {
 #if defined(CONFIG_ATSTK1006)
 	/* Dual MT48LC16M16A2-7E (64 MB) on daughterboard */
diff --git a/board/earthlcd/favr-32-ezkit/favr-32-ezkit.c b/board/earthlcd/favr-32-ezkit/favr-32-ezkit.c
index d2843c9..b0eca93 100644
--- a/board/earthlcd/favr-32-ezkit/favr-32-ezkit.c
+++ b/board/earthlcd/favr-32-ezkit/favr-32-ezkit.c
@@ -24,10 +24,25 @@
 #include <asm/sdram.h>
 #include <asm/arch/clk.h>
 #include <asm/arch/hmatrix.h>
+#include <asm/arch/mmu.h>
 #include <asm/arch/portmux.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+struct mmu_vm_range mmu_vmr_table[CONFIG_SYS_NR_VM_REGIONS] = {
+	{
+		.virt_pgno	= CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT,
+		.nr_pages	= CONFIG_SYS_FLASH_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_NONE,
+	}, {
+		.virt_pgno	= CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT,
+		.nr_pages	= EBI_SDRAM_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_WRBACK,
+	},
+};
+
 static const struct sdram_config sdram_config = {
 	/* MT48LC4M32B2P-6 (16 MB) */
 	.data_bits	= SDRAM_DATA_32BIT,
diff --git a/board/mimc/mimc200/mimc200.c b/board/mimc/mimc200/mimc200.c
index 9940669..470adba 100644
--- a/board/mimc/mimc200/mimc200.c
+++ b/board/mimc/mimc200/mimc200.c
@@ -27,12 +27,32 @@
 #include <asm/arch/clk.h>
 #include <asm/arch/gpio.h>
 #include <asm/arch/hmatrix.h>
+#include <asm/arch/mmu.h>
 #include <asm/arch/portmux.h>
 #include <atmel_lcdc.h>
 #include <lcd.h>
 
 #include "../../../arch/avr32/cpu/hsmc3.h"
 
+struct mmu_vm_range mmu_vmr_table[CONFIG_SYS_NR_VM_REGIONS] = {
+	{
+		.virt_pgno	= CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT,
+		.nr_pages	= CONFIG_SYS_FLASH_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_NONE,
+	}, {
+		.virt_pgno	= EBI_SRAM_CS2_BASE >> PAGE_SHIFT,
+		.nr_pages	= EBI_SRAM_CS2_SIZE >> PAGE_SHIFT,
+		.phys		= (EBI_SRAM_CS2_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_NONE,
+	}, {
+		.virt_pgno	= CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT,
+		.nr_pages	= EBI_SDRAM_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_WRBACK,
+	},
+};
+
 #if defined(CONFIG_LCD)
 /* 480x272x16 @ 72 Hz */
 vidinfo_t panel_info = {
diff --git a/board/miromico/hammerhead/hammerhead.c b/board/miromico/hammerhead/hammerhead.c
index 5ab999e..78f4fd4 100644
--- a/board/miromico/hammerhead/hammerhead.c
+++ b/board/miromico/hammerhead/hammerhead.c
@@ -30,10 +30,25 @@
 #include <asm/arch/clk.h>
 #include <asm/arch/hmatrix.h>
 #include <asm/arch/memory-map.h>
+#include <asm/arch/mmu.h>
 #include <asm/arch/portmux.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+struct mmu_vm_range mmu_vmr_table[CONFIG_SYS_NR_VM_REGIONS] = {
+	{
+		.virt_pgno	= CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT,
+		.nr_pages	= CONFIG_SYS_FLASH_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_FLASH_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_NONE,
+	}, {
+		.virt_pgno	= CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT,
+		.nr_pages	= EBI_SDRAM_SIZE >> PAGE_SHIFT,
+		.phys		= (CONFIG_SYS_SDRAM_BASE >> PAGE_SHIFT)
+					| MMU_VMR_CACHE_WRBACK,
+	},
+};
+
 static const struct sdram_config sdram_config = {
 	.data_bits	= SDRAM_DATA_32BIT,
 	.row_bits	= 13,
diff --git a/include/configs/atngw100.h b/include/configs/atngw100.h
index 4ed5514..83056b6 100644
--- a/include/configs/atngw100.h
+++ b/include/configs/atngw100.h
@@ -49,6 +49,9 @@
 #define CONFIG_SYS_CLKDIV_PBA			2
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/atstk1002.h b/include/configs/atstk1002.h
index b258f2d..6416d17 100644
--- a/include/configs/atstk1002.h
+++ b/include/configs/atstk1002.h
@@ -73,6 +73,9 @@
  */
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/atstk1003.h b/include/configs/atstk1003.h
index 2ef2552..a4d9b0b 100644
--- a/include/configs/atstk1003.h
+++ b/include/configs/atstk1003.h
@@ -73,6 +73,9 @@
  */
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/atstk1004.h b/include/configs/atstk1004.h
index 195be82..06bb5da 100644
--- a/include/configs/atstk1004.h
+++ b/include/configs/atstk1004.h
@@ -73,6 +73,9 @@
  */
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/atstk1006.h b/include/configs/atstk1006.h
index f93118e..d3cbee6 100644
--- a/include/configs/atstk1006.h
+++ b/include/configs/atstk1006.h
@@ -73,6 +73,9 @@
  */
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/favr-32-ezkit.h b/include/configs/favr-32-ezkit.h
index 739ff0d..1c381c7 100644
--- a/include/configs/favr-32-ezkit.h
+++ b/include/configs/favr-32-ezkit.h
@@ -70,6 +70,9 @@
  */
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/hammerhead.h b/include/configs/hammerhead.h
index 0c70af5..8ca04ea 100644
--- a/include/configs/hammerhead.h
+++ b/include/configs/hammerhead.h
@@ -47,6 +47,9 @@
 #define CONFIG_SYS_CLKDIV_PBA			2
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM and NOR flash */
+#define CONFIG_SYS_NR_VM_REGIONS		2
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>
diff --git a/include/configs/mimc200.h b/include/configs/mimc200.h
index 36488b3..6ed9e75 100644
--- a/include/configs/mimc200.h
+++ b/include/configs/mimc200.h
@@ -51,6 +51,9 @@
 #define CONFIG_SYS_CLKDIV_PBA			2
 #define CONFIG_SYS_CLKDIV_PBB			1
 
+/* Reserve VM regions for SDRAM, NOR flash and FRAM */
+#define CONFIG_SYS_NR_VM_REGIONS		3
+
 /*
  * The PLLOPT register controls the PLL like this:
  *   icp = PLLOPT<2>