Add support for level specific cache maintenance operations

This patch adds level specific cache maintenance functions
to cache_helpers.S. The new functions 'dcsw_op_levelx',
where '1 <= x <= 3', allow to perform cache maintenance by
set/way for that particular level of cache.  With this patch,
functions to support cache maintenance upto level 3 have
been implemented since it is the highest cache level for
most ARM SoCs.

These functions are now utilized in CPU specific power down
sequences to implement them as mandated by processor specific
technical reference manual.

Change-Id: Icd90ce6b51cff5a12863bcda01b93601417fd45c
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 1c80550..dc60102 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -35,6 +35,9 @@
 	.globl	inv_dcache_range
 	.globl	dcsw_op_louis
 	.globl	dcsw_op_all
+	.globl	dcsw_op_level1
+	.globl	dcsw_op_level2
+	.globl	dcsw_op_level3
 
 	/* ------------------------------------------
 	 * Clean+Invalidate from base address till
@@ -81,6 +84,7 @@
 	 * x0: The operation type (0-2), as defined in arch.h
 	 * x3: The last cache level to operate on
 	 * x9: clidr_el1
+	 * x10: The cache level to begin operation from
 	 * and will carry out the operation on each data cache from level 0
 	 * to the level in x3 in sequence
 	 *
@@ -93,12 +97,12 @@
 	mrs	x9, clidr_el1
 	ubfx	x3, x9, \shift, \fw
 	lsl	x3, x3, \ls
+	mov	x10, xzr
 	b	do_dcsw_op
 	.endm
 
 func do_dcsw_op
 	cbz	x3, exit
-	mov	x10, xzr
 	adr	x14, dcsw_loop_table	// compute inner loop address
 	add	x14, x14, x0, lsl #5	// inner loop is 8x32-bit instructions
 	mov	x0, x9
@@ -163,3 +167,45 @@
 
 func dcsw_op_all
 	dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+
+	/* ---------------------------------------------------------------
+	 *  Helper macro for data cache operations by set/way for the
+	 *  level specified
+	 * ---------------------------------------------------------------
+	 */
+	.macro dcsw_op_level level
+	mrs	x9, clidr_el1
+	mov	x3, \level
+	sub	x10, x3, #2
+	b	do_dcsw_op
+	.endm
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way for level 1 cache
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * x0: The operation type (0-2), as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_level1
+	dcsw_op_level #(1 << LEVEL_SHIFT)
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way for level 2 cache
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * x0: The operation type (0-2), as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_level2
+	dcsw_op_level #(2 << LEVEL_SHIFT)
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way for level 3 cache
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * x0: The operation type (0-2), as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_level3
+	dcsw_op_level #(3 << LEVEL_SHIFT)
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index 722ce7a..ec18464 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -77,11 +77,11 @@
 	bl	cortex_a53_disable_dcache
 
 	/* ---------------------------------------------
-	 * Flush L1 cache to PoU.
+	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
 	mov	x0, #DCCISW
-	bl	dcsw_op_louis
+	bl	dcsw_op_level1
 
 	/* ---------------------------------------------
 	 * Come out of intra cluster coherency
@@ -100,17 +100,24 @@
 	bl	cortex_a53_disable_dcache
 
 	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
 	 * Disable the optional ACP.
 	 * ---------------------------------------------
 	 */
 	bl	plat_disable_acp
 
 	/* ---------------------------------------------
-	 * Flush L1 and L2 caches to PoC.
+	 * Flush L2 caches.
 	 * ---------------------------------------------
 	 */
 	mov	x0, #DCCISW
-	bl	dcsw_op_all
+	bl	dcsw_op_level2
 
 	/* ---------------------------------------------
 	 * Come out of intra cluster coherency
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index eed1bbb..3e55297 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -134,11 +134,11 @@
 	bl	cortex_a57_disable_l2_prefetch
 
 	/* ---------------------------------------------
-	 * Flush L1 cache to PoU.
+	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
 	mov	x0, #DCCISW
-	bl	dcsw_op_louis
+	bl	dcsw_op_level1
 
 	/* ---------------------------------------------
 	 * Come out of intra cluster coherency
@@ -168,18 +168,25 @@
 	 */
 	bl	cortex_a57_disable_l2_prefetch
 
+	/* -------------------------------------------------
+	 * Flush the L1 caches.
+	 * -------------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
 	/* ---------------------------------------------
 	 * Disable the optional ACP.
 	 * ---------------------------------------------
 	 */
 	bl	plat_disable_acp
 
-	/* ---------------------------------------------
-	 * Flush L1 and L2 caches to PoC.
-	 * ---------------------------------------------
+	/* -------------------------------------------------
+	 * Flush the L2 caches.
+	 * -------------------------------------------------
 	 */
 	mov	x0, #DCCISW
-	bl	dcsw_op_all
+	bl	dcsw_op_level2
 
 	/* ---------------------------------------------
 	 * Come out of intra cluster coherency