armv7: integrate cache maintenance support

- Enable I-cache on bootup
- Enable MMU and D-cache immediately after relocation
	- Do necessary initialization before enabling d-cache and MMU
- Changes to cleanup_before_linux()
	- Make changes according to the new framework

Signed-off-by: Aneesh V <aneesh@ti.com>
diff --git a/arch/arm/cpu/armv7/cpu.c b/arch/arm/cpu/armv7/cpu.c
index bc4238f..def9ced 100644
--- a/arch/arm/cpu/armv7/cpu.c
+++ b/arch/arm/cpu/armv7/cpu.c
@@ -35,13 +35,10 @@
 #include <command.h>
 #include <asm/system.h>
 #include <asm/cache.h>
-
-static void cache_flush(void);
+#include <asm/armv7.h>
 
 int cleanup_before_linux(void)
 {
-	unsigned int i;
-
 	/*
 	 * this function is called just before we call linux
 	 * it prepares the processor for linux
@@ -50,31 +47,29 @@
 	 */
 	disable_interrupts();
 
-	/* turn off I/D-cache */
+	/*
+	 * Turn off I-cache and invalidate it
+	 */
 	icache_disable();
-	dcache_disable();
-
-	/* invalidate I-cache */
-	cache_flush();
+	invalidate_icache_all();
 
-#ifndef CONFIG_L2_OFF
-	/* turn off L2 cache */
-	l2_cache_disable();
-	/* invalidate L2 cache also */
-	invalidate_dcache(get_device_type());
-#endif
-	i = 0;
-	/* mem barrier to sync up things */
-	asm("mcr p15, 0, %0, c7, c10, 4": :"r"(i));
+	/*
+	 * turn off D-cache
+	 * dcache_disable() in turn flushes the d-cache and disables MMU
+	 */
+	dcache_disable();
 
-#ifndef CONFIG_L2_OFF
-	l2_cache_enable();
-#endif
+	/*
+	 * After D-cache is flushed and before it is disabled there may
+	 * be some new valid entries brought into the cache. We are sure
+	 * that these lines are not dirty and will not affect our execution.
+	 * (because unwinding the call-stack and setting a bit in CP15 SCTRL
+	 * is all we did during this. We have not pushed anything on to the
+	 * stack. Neither have we affected any static data)
+	 * So just invalidate the entire d-cache again to avoid coherency
+	 * problems for kernel
+	 */
+	invalidate_dcache_all();
 
 	return 0;
 }
-
-static void cache_flush(void)
-{
-	asm ("mcr p15, 0, %0, c7, c5, 0": :"r" (0));
-}
diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index d91ae12..0e698b6 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -255,6 +255,14 @@
  * initialization, now running from RAM.
  */
 jump_2_ram:
+/*
+ * If I-cache is enabled invalidate it
+ */
+#ifndef CONFIG_SYS_ICACHE_OFF
+	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
+	mcr     p15, 0, r0, c7, c10, 4	@ DSB
+	mcr     p15, 0, r0, c7, c5, 4	@ ISB
+#endif
 	ldr	r0, _board_init_r_ofs
 	adr	r1, _start
 	add	lr, r0, r1
@@ -290,6 +298,9 @@
 	mov	r0, #0			@ set up for MCR
 	mcr	p15, 0, r0, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, r0, c7, c5, 0	@ invalidate icache
+	mcr	p15, 0, r0, c7, c5, 6	@ invalidate BP array
+	mcr     p15, 0, r0, c7, c10, 4	@ DSB
+	mcr     p15, 0, r0, c7, c5, 4	@ ISB
 
 	/*
 	 * disable MMU stuff and caches
@@ -298,7 +309,12 @@
 	bic	r0, r0, #0x00002000	@ clear bits 13 (--V-)
 	bic	r0, r0, #0x00000007	@ clear bits 2:0 (-CAM)
 	orr	r0, r0, #0x00000002	@ set bit 1 (--A-) Align
-	orr	r0, r0, #0x00000800	@ set bit 12 (Z---) BTB
+	orr	r0, r0, #0x00000800	@ set bit 11 (Z---) BTB
+#ifdef CONFIG_SYS_ICACHE_OFF
+	bic	r0, r0, #0x00001000	@ clear bit 12 (I) I-cache
+#else
+	orr	r0, r0, #0x00001000	@ set bit 12 (I) I-cache
+#endif
 	mcr	p15, 0, r0, c1, c0, 0
 
 	/*
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 4f88f58..fc52a26 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -450,6 +450,12 @@
 	gd->flags |= GD_FLG_RELOC;	/* tell others: relocation done */
 
 	monitor_flash_len = _end_ofs;
+	/*
+	 * Enable D$:
+	 * I$, if needed, must be already enabled in start.S
+	 */
+	dcache_enable();
+
 	debug ("monitor flash len: %08lX\n", monitor_flash_len);
 	board_init();	/* Setup chipselects */
 
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
index ba73fb9..51831a9 100644
--- a/arch/arm/lib/cache-cp15.c
+++ b/arch/arm/lib/cache-cp15.c
@@ -34,6 +34,12 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+void __arm_init_before_mmu(void)
+{
+}
+void arm_init_before_mmu(void)
+	__attribute__((weak, alias("__arm_init_before_mmu")));
+
 static void cp_delay (void)
 {
 	volatile int i;
@@ -65,6 +71,7 @@
 	int i;
 	u32 reg;
 
+	arm_init_before_mmu();
 	/* Set up an identity-mapping for all 4GB, rw for everyone */
 	for (i = 0; i < 4096; i++)
 		page_table[i] = i << 20 | (3 << 10) | 0x12;
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c
index 27123cd..dc3242c 100644
--- a/arch/arm/lib/cache.c
+++ b/arch/arm/lib/cache.c
@@ -38,11 +38,6 @@
 	/* disable write buffer as well (page 2-22) */
 	asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0));
 #endif
-#ifdef CONFIG_OMAP34XX
-	void v7_flush_cache_all(void);
-
-	v7_flush_cache_all();
-#endif
 	return;
 }
 void  flush_cache(unsigned long start, unsigned long size)