arc: significant cache rework

[1] Align cache management functions to those in Linux kernel. I.e.:
    a) Use the same functions for all cache ops (D$ Inv/Flush)
    b) Split cache ops in 3 sub-functions: "before", "lineloop" and
"after". That way we may re-use "before" and "after" functions for
region and full cache ops.

 [2] Implement full-functional L2 (SLC) management. Before SLC was
simply disabled early on boot. It's also possible to enable or disable
L2 cache from config utility.

 [3] Disable/enable corresponding caches early on boot. So if U-Boot is
configured to use caches they will be used at all times (this is useful
in partucular for speed-up of relocation).

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 0e11dcc..667f218 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -47,9 +47,12 @@
 #endif
 #define ARC_BCR_DC_BUILD	0x72
 #define ARC_BCR_SLC		0xce
-#define ARC_AUX_SLC_CONTROL	0x903
+#define ARC_AUX_SLC_CONFIG	0x901
+#define ARC_AUX_SLC_CTRL	0x903
 #define ARC_AUX_SLC_FLUSH	0x904
 #define ARC_AUX_SLC_INVALIDATE	0x905
+#define ARC_AUX_SLC_IVDL	0x910
+#define ARC_AUX_SLC_FLDL	0x912
 
 #ifndef __ASSEMBLY__
 /* Accessors for auxiliary registers */
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 0b3ebd9..432606a 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -29,12 +29,7 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_ISA_ARCV2
-void slc_enable(void);
-void slc_disable(void);
-void slc_flush(void);
-void slc_invalidate(void);
-#endif
+void cache_init(void);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c
index e369e5a..ed8e8e7 100644
--- a/arch/arc/lib/cache.c
+++ b/arch/arc/lib/cache.c
@@ -5,9 +5,13 @@
  */
 
 #include <config.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 
+#define CACHE_LINE_MASK		(~(CONFIG_SYS_CACHELINE_SIZE - 1))
+
 /* Bit values in IC_CTRL */
 #define IC_CTRL_CACHE_DISABLE	(1 << 0)
 
@@ -18,60 +22,186 @@
 #define CACHE_VER_NUM_MASK	0xF
 #define SLC_CTRL_SB		(1 << 2)
 
+#define OP_INV		0x1
+#define OP_FLUSH	0x2
+#define OP_INV_IC	0x3
+
+#ifdef CONFIG_ISA_ARCV2
+/*
+ * By default that variable will fall into .bss section.
+ * But .bss section is not relocated and so it will be initilized before
+ * relocation but will be used after being zeroed.
+ */
+int slc_line_sz __section(".data");
+int slc_exists __section(".data");
+
+static unsigned int __before_slc_op(const int op)
+{
+	unsigned int reg = reg;
+
+	if (op == OP_INV) {
+		/*
+		 * IM is set by default and implies Flush-n-inv
+		 * Clear it here for vanilla inv
+		 */
+		reg = read_aux_reg(ARC_AUX_SLC_CTRL);
+		write_aux_reg(ARC_AUX_SLC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
+	}
+
+	return reg;
+}
+
+static void __after_slc_op(const int op, unsigned int reg)
+{
+	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
+		while (read_aux_reg(ARC_AUX_SLC_CTRL) &
+		       DC_CTRL_FLUSH_STATUS)
+			;
+
+	/* Switch back to default Invalidate mode */
+	if (op == OP_INV)
+		write_aux_reg(ARC_AUX_SLC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH);
+}
+
+static inline void __slc_line_loop(unsigned long paddr, unsigned long sz,
+				   const int op)
+{
+	unsigned int aux_cmd;
+	int num_lines;
+
+#define SLC_LINE_MASK	(~(slc_line_sz - 1))
+
+	aux_cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL;
+
+	sz += paddr & ~SLC_LINE_MASK;
+	paddr &= SLC_LINE_MASK;
+
+	num_lines = DIV_ROUND_UP(sz, slc_line_sz);
+
+	while (num_lines-- > 0) {
+		write_aux_reg(aux_cmd, paddr);
+		paddr += slc_line_sz;
+	}
+}
+
+static inline void __slc_entire_op(const int cacheop)
+{
+	int aux;
+	unsigned int ctrl_reg = __before_slc_op(cacheop);
+
+	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		aux = ARC_AUX_SLC_INVALIDATE;
+	else
+		aux = ARC_AUX_SLC_FLUSH;
+
+	write_aux_reg(aux, 0x1);
+
+	__after_slc_op(cacheop, ctrl_reg);
+}
+
+static inline void __slc_line_op(unsigned long paddr, unsigned long sz,
+				 const int cacheop)
+{
+	unsigned int ctrl_reg = __before_slc_op(cacheop);
+	__slc_line_loop(paddr, sz, cacheop);
+	__after_slc_op(cacheop, ctrl_reg);
+}
+#else
+#define __slc_entire_op(cacheop)
+#define __slc_line_op(paddr, sz, cacheop)
+#endif
+
+static inline int icache_exists(void)
+{
+	/* Check if Instruction Cache is available */
+	if (read_aux_reg(ARC_BCR_IC_BUILD) & CACHE_VER_NUM_MASK)
+		return 1;
+	else
+		return 0;
+}
+
+static inline int dcache_exists(void)
+{
+	/* Check if Data Cache is available */
+	if (read_aux_reg(ARC_BCR_DC_BUILD) & CACHE_VER_NUM_MASK)
+		return 1;
+	else
+		return 0;
+}
+
+void cache_init(void)
+{
+#ifdef CONFIG_ISA_ARCV2
+	/* Check if System-Level Cache (SLC) is available */
+	if (read_aux_reg(ARC_BCR_SLC) & CACHE_VER_NUM_MASK) {
+#define LSIZE_OFFSET	4
+#define LSIZE_MASK	3
+		if (read_aux_reg(ARC_AUX_SLC_CONFIG) &
+		    (LSIZE_MASK << LSIZE_OFFSET))
+			slc_line_sz = 64;
+		else
+			slc_line_sz = 128;
+		slc_exists = 1;
+	} else {
+		slc_exists = 0;
+	}
+#endif
+}
+
 int icache_status(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_IC_BUILD) & CACHE_VER_NUM_MASK))
+	if (!icache_exists())
 		return 0;
 
-	return (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE) !=
-	       IC_CTRL_CACHE_DISABLE;
+	if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE)
+		return 0;
+	else
+		return 1;
 }
 
 void icache_enable(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_IC_BUILD) & CACHE_VER_NUM_MASK))
-		return;
-
-	write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
-		      ~IC_CTRL_CACHE_DISABLE);
+	if (icache_exists())
+		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
+			      ~IC_CTRL_CACHE_DISABLE);
 }
 
 void icache_disable(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_IC_BUILD) & CACHE_VER_NUM_MASK))
-		return;
-
-	write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
-		      IC_CTRL_CACHE_DISABLE);
+	if (icache_exists())
+		write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
+			      IC_CTRL_CACHE_DISABLE);
 }
 
+#ifndef CONFIG_SYS_DCACHE_OFF
 void invalidate_icache_all(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_IC_BUILD) & CACHE_VER_NUM_MASK))
-		return;
-
 	/* Any write to IC_IVIC register triggers invalidation of entire I$ */
-	write_aux_reg(ARC_AUX_IC_IVIC, 1);
+	if (icache_status()) {
+		write_aux_reg(ARC_AUX_IC_IVIC, 1);
+		read_aux_reg(ARC_AUX_IC_CTRL);	/* blocks */
+	}
 }
+#else
+void invalidate_icache_all(void)
+{
+}
+#endif
 
 int dcache_status(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_DC_BUILD) & CACHE_VER_NUM_MASK))
+	if (!dcache_exists())
 		return 0;
 
-	return (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE) !=
-		DC_CTRL_CACHE_DISABLE;
+	if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE)
+		return 0;
+	else
+		return 1;
 }
 
 void dcache_enable(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_DC_BUILD) & CACHE_VER_NUM_MASK))
+	if (!dcache_exists())
 		return;
 
 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
@@ -80,139 +210,144 @@
 
 void dcache_disable(void)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_DC_BUILD) & CACHE_VER_NUM_MASK))
+	if (!dcache_exists())
 		return;
 
 	write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
 		      DC_CTRL_CACHE_DISABLE);
 }
 
-void flush_dcache_all(void)
-{
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_DC_BUILD) & CACHE_VER_NUM_MASK))
-		return;
-
-	/* Do flush of entire cache */
-	write_aux_reg(ARC_AUX_DC_FLSH, 1);
-
-	/* Wait flush end */
-	while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
-		;
-}
-
 #ifndef CONFIG_SYS_DCACHE_OFF
-static void dcache_flush_line(unsigned addr)
+/*
+ * Common Helper for Line Operations on {I,D}-Cache
+ */
+static inline void __cache_line_loop(unsigned long paddr, unsigned long sz,
+				     const int cacheop)
 {
+	unsigned int aux_cmd;
+#if (CONFIG_ARC_MMU_VER == 3)
+	unsigned int aux_tag;
+#endif
+	int num_lines;
+
+	if (cacheop == OP_INV_IC) {
+		aux_cmd = ARC_AUX_IC_IVIL;
 #if (CONFIG_ARC_MMU_VER == 3)
-	write_aux_reg(ARC_AUX_DC_PTAG, addr);
+		aux_tag = ARC_AUX_IC_PTAG;
 #endif
-	write_aux_reg(ARC_AUX_DC_FLDL, addr);
+	} else {
+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+		aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
+#if (CONFIG_ARC_MMU_VER == 3)
+		aux_tag = ARC_AUX_DC_PTAG;
+#endif
+	}
 
-	/* Wait flush end */
-	while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
-		;
+	sz += paddr & ~CACHE_LINE_MASK;
+	paddr &= CACHE_LINE_MASK;
 
-#ifndef CONFIG_SYS_ICACHE_OFF
-	/*
-	 * Invalidate I$ for addresses range just flushed from D$.
-	 * If we try to execute data flushed above it will be valid/correct
-	 */
+	num_lines = DIV_ROUND_UP(sz, CONFIG_SYS_CACHELINE_SIZE);
+
+	while (num_lines-- > 0) {
 #if (CONFIG_ARC_MMU_VER == 3)
-	write_aux_reg(ARC_AUX_IC_PTAG, addr);
+		write_aux_reg(aux_tag, paddr);
 #endif
-	write_aux_reg(ARC_AUX_IC_IVIL, addr);
-#endif /* CONFIG_SYS_ICACHE_OFF */
+		write_aux_reg(aux_cmd, paddr);
+		paddr += CONFIG_SYS_CACHELINE_SIZE;
+	}
 }
-#endif /* CONFIG_SYS_DCACHE_OFF */
 
-void flush_dcache_range(unsigned long start, unsigned long end)
+static unsigned int __before_dc_op(const int op)
 {
-#ifndef CONFIG_SYS_DCACHE_OFF
-	unsigned int addr;
+	unsigned int reg;
 
-	start = start & (~(CONFIG_SYS_CACHELINE_SIZE - 1));
-	end = end & (~(CONFIG_SYS_CACHELINE_SIZE - 1));
+	if (op == OP_INV) {
+		/*
+		 * IM is set by default and implies Flush-n-inv
+		 * Clear it here for vanilla inv
+		 */
+		reg = read_aux_reg(ARC_AUX_DC_CTRL);
+		write_aux_reg(ARC_AUX_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
+	}
 
-	for (addr = start; addr <= end; addr += CONFIG_SYS_CACHELINE_SIZE)
-		dcache_flush_line(addr);
-#endif /* CONFIG_SYS_DCACHE_OFF */
+	return reg;
 }
 
-void invalidate_dcache_range(unsigned long start, unsigned long end)
+static void __after_dc_op(const int op, unsigned int reg)
 {
-#ifndef CONFIG_SYS_DCACHE_OFF
-	unsigned int addr;
+	if (op & OP_FLUSH)	/* flush / flush-n-inv both wait */
+		while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
+			;
 
-	start = start & (~(CONFIG_SYS_CACHELINE_SIZE - 1));
-	end = end & (~(CONFIG_SYS_CACHELINE_SIZE - 1));
-
-	for (addr = start; addr <= end; addr += CONFIG_SYS_CACHELINE_SIZE) {
-#if (CONFIG_ARC_MMU_VER == 3)
-		write_aux_reg(ARC_AUX_DC_PTAG, addr);
-#endif
-		write_aux_reg(ARC_AUX_DC_IVDL, addr);
-	}
-#endif /* CONFIG_SYS_DCACHE_OFF */
+	/* Switch back to default Invalidate mode */
+	if (op == OP_INV)
+		write_aux_reg(ARC_AUX_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH);
 }
 
-void invalidate_dcache_all(void)
+static inline void __dc_entire_op(const int cacheop)
 {
-	/* If no cache in CPU exit immediately */
-	if (!(read_aux_reg(ARC_BCR_DC_BUILD) & CACHE_VER_NUM_MASK))
-		return;
+	int aux;
+	unsigned int ctrl_reg = __before_dc_op(cacheop);
+
+	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		aux = ARC_AUX_DC_IVDC;
+	else
+		aux = ARC_AUX_DC_FLSH;
 
-	/* Write 1 to DC_IVDC register triggers invalidation of entire D$ */
-	write_aux_reg(ARC_AUX_DC_IVDC, 1);
+	write_aux_reg(aux, 0x1);
+
+	__after_dc_op(cacheop, ctrl_reg);
 }
 
-void flush_cache(unsigned long start, unsigned long size)
+static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
+				const int cacheop)
 {
-	flush_dcache_range(start, start + size);
+	unsigned int ctrl_reg = __before_dc_op(cacheop);
+	__cache_line_loop(paddr, sz, cacheop);
+	__after_dc_op(cacheop, ctrl_reg);
 }
+#else
+#define __dc_entire_op(cacheop)
+#define __dc_line_op(paddr, sz, cacheop)
+#endif /* !CONFIG_SYS_DCACHE_OFF */
 
-#ifdef CONFIG_ISA_ARCV2
-void slc_enable(void)
+void invalidate_dcache_range(unsigned long start, unsigned long end)
 {
-	/* If SLC ver = 0, no SLC present in CPU */
-	if (!(read_aux_reg(ARC_BCR_SLC) & 0xff))
-		return;
-
-	write_aux_reg(ARC_AUX_SLC_CONTROL,
-		      read_aux_reg(ARC_AUX_SLC_CONTROL) & ~1);
+	__dc_line_op(start, end - start, OP_INV);
+#ifdef CONFIG_ISA_ARCV2
+	if (slc_exists)
+		__slc_line_op(start, end - start, OP_INV);
+#endif
 }
 
-void slc_disable(void)
+void flush_dcache_range(unsigned long start, unsigned long end)
 {
-	/* If SLC ver = 0, no SLC present in CPU */
-	if (!(read_aux_reg(ARC_BCR_SLC) & 0xff))
-		return;
-
-	write_aux_reg(ARC_AUX_SLC_CONTROL,
-		      read_aux_reg(ARC_AUX_SLC_CONTROL) | 1);
+	__dc_line_op(start, end - start, OP_FLUSH);
+#ifdef CONFIG_ISA_ARCV2
+	if (slc_exists)
+		__slc_line_op(start, end - start, OP_FLUSH);
+#endif
 }
 
-void slc_flush(void)
+void flush_cache(unsigned long start, unsigned long size)
 {
-	/* If SLC ver = 0, no SLC present in CPU */
-	if (!(read_aux_reg(ARC_BCR_SLC) & 0xff))
-		return;
-
-	write_aux_reg(ARC_AUX_SLC_FLUSH, 1);
-
-	/* Wait flush end */
-	while (read_aux_reg(ARC_AUX_SLC_CONTROL) & SLC_CTRL_SB)
-		;
+	flush_dcache_range(start, start + size);
 }
 
-void slc_invalidate(void)
+void invalidate_dcache_all(void)
 {
-	/* If SLC ver = 0, no SLC present in CPU */
-	if (!(read_aux_reg(ARC_BCR_SLC) & 0xff))
-		return;
-
-	write_aux_reg(ARC_AUX_SLC_INVALIDATE, 1);
+	__dc_entire_op(OP_INV);
+#ifdef CONFIG_ISA_ARCV2
+	if (slc_exists)
+		__slc_entire_op(OP_INV);
+#endif
 }
 
-#endif /* CONFIG_ISA_ARCV2 */
+void flush_dcache_all(void)
+{
+	__dc_entire_op(OP_FLUSH);
+#ifdef CONFIG_ISA_ARCV2
+	if (slc_exists)
+		__slc_entire_op(OP_FLUSH);
+#endif
+}
diff --git a/arch/arc/lib/cpu.c b/arch/arc/lib/cpu.c
index 3c930bc..4e4dd74 100644
--- a/arch/arc/lib/cpu.c
+++ b/arch/arc/lib/cpu.c
@@ -23,6 +23,8 @@
 	gd->cpu_clk = CONFIG_SYS_CLK_FREQ;
 	gd->ram_size = CONFIG_SYS_SDRAM_SIZE;
 
+	cache_init();
+
 	return 0;
 }
 
diff --git a/arch/arc/lib/init_helpers.c b/arch/arc/lib/init_helpers.c
index 25690ee..dbc8d68 100644
--- a/arch/arc/lib/init_helpers.c
+++ b/arch/arc/lib/init_helpers.c
@@ -10,16 +10,8 @@
 
 int init_cache_f_r(void)
 {
-#ifndef CONFIG_SYS_ICACHE_OFF
-	icache_enable();
-	/* Make sure no stale entries persist from before we disabled cache */
-	invalidate_icache_all();
-#endif
-
 #ifndef CONFIG_SYS_DCACHE_OFF
-	dcache_enable();
-	/* Make sure no stale entries persist from before we disabled cache */
-	invalidate_dcache_all();
+	flush_dcache_all();
 #endif
 	return 0;
 }
diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S
index e1ef19c..26a5934 100644
--- a/arch/arc/lib/start.S
+++ b/arch/arc/lib/start.S
@@ -13,18 +13,46 @@
 	/* Setup interrupt vector base that matches "__text_start" */
 	sr	__ivt_start, [ARC_AUX_INTR_VEC_BASE]
 
-	/* Setup stack- and frame-pointers */
-	mov	%sp, CONFIG_SYS_INIT_SP_ADDR
-	mov	%fp, %sp
+	; Disable/enable I-cache according to configuration
+	lr	r5, [ARC_BCR_IC_BUILD]
+	breq	r5, 0, 1f		; I$ doesn't exist
+	lr	r5, [ARC_AUX_IC_CTRL]
+#ifndef CONFIG_SYS_ICACHE_OFF
+	bclr	r5, r5, 0		; 0 - Enable, 1 is Disable
+#else
+	bset	r5, r5, 0		; I$ exists, but is not used
+#endif
+	sr	r5, [ARC_AUX_IC_CTRL]
+
+1:
+	; Disable/enable D-cache according to configuration
+	lr	r5, [ARC_BCR_DC_BUILD]
+	breq	r5, 0, 1f		; D$ doesn't exist
+	lr	r5, [ARC_AUX_DC_CTRL]
+	bclr	r5, r5, 6		; Invalidate (discard w/o wback)
+#ifndef CONFIG_SYS_DCACHE_OFF
+	bclr	r5, r5, 0		; Enable (+Inv)
+#else
+	bset	r5, r5, 0		; Disable (+Inv)
+#endif
+	sr	r5, [ARC_AUX_DC_CTRL]
 
-	/* Unconditionally disable caches */
+1:
 #ifdef CONFIG_ISA_ARCV2
-	bl	slc_flush
-	bl	slc_disable
+	; Disable System-Level Cache (SLC)
+	lr	r5, [ARC_BCR_SLC]
+	breq	r5, 0, 1f		; SLC doesn't exist
+	lr	r5, [ARC_AUX_SLC_CTRL]
+	bclr	r5, r5, 6		; Invalidate (discard w/o wback)
+	bclr	r5, r5, 0		; Enable (+Inv)
+	sr	r5, [ARC_AUX_SLC_CTRL]
+
+1:
 #endif
-	bl	flush_dcache_all
-	bl	dcache_disable
-	bl	icache_disable
+
+	/* Setup stack- and frame-pointers */
+	mov	%sp, CONFIG_SYS_INIT_SP_ADDR
+	mov	%fp, %sp
 
 	/* Allocate and zero GD, update SP */
 	mov	%r0, %sp