Add CPU specific power management operations

This patch adds CPU core and cluster power down sequences to the CPU specific
operations framework introduced in a earlier patch. Cortex-A53, Cortex-A57 and
generic AEM sequences have been added. The latter is suitable for the
Foundation and Base AEM FVPs. A pointer to each CPU's operations structure is
saved in the per-cpu data so that it can be easily accessed during power down
seqeunces.

An optional platform API has been introduced to allow a platform to disable the
Accelerator Coherency Port (ACP) during a cluster power down sequence. The weak
definition of this function (plat_disable_acp()) does not take any action. It
should be overriden with a strong definition if the ACP is present on a
platform.

Change-Id: I8d09bd40d2f528a28d2d3f19b77101178778685d
diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S
index a8dbf1a..19c9433 100644
--- a/lib/cpus/aarch64/aem_generic.S
+++ b/lib/cpus/aarch64/aem_generic.S
@@ -27,15 +27,50 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
+#include <aem_generic.h>
 #include <arch.h>
 #include <asm_macros.S>
 #include <cpu_macros.S>
 
-#define BASE_AEM_MIDR 0x410FD0F0
+func aem_generic_core_pwr_dwn
+	/* ---------------------------------------------
+	 * Disable the Data Cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
 
-#define FOUNDATION_AEM_MIDR 0x410FD000
+	mov	x0, #DCCISW
 
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	b	dcsw_op_louis
 
+
+func aem_generic_cluster_pwr_dwn
+	/* ---------------------------------------------
+	 * Disable the Data Cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	b	dcsw_op_all
+
+
+/* cpu_ops for Base AEM FVP */
 declare_cpu_ops aem_generic, BASE_AEM_MIDR, 1
 
+/* cpu_ops for Foundation FVP */
 declare_cpu_ops aem_generic, FOUNDATION_AEM_MIDR, 1
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index 2d28dd9..08cc938 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -29,10 +29,32 @@
  */
 #include <arch.h>
 #include <asm_macros.S>
+#include <cortex_a53.h>
 #include <cpu_macros.S>
 #include <plat_macros.S>
 
-#define CORTEX_A53_MIDR 0x410FD030
+	/* ---------------------------------------------
+	 * Disable L1 data cache and unified L2 cache
+	 * ---------------------------------------------
+	 */
+func cortex_a53_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a53_disable_smp
+	mrs	x0, CPUECTLR_EL1
+	bic	x0, x0, #CPUECTLR_SMP_BIT
+	msr	CPUECTLR_EL1, x0
+	isb
+	dsb	sy
+	ret
 
 func cortex_a53_reset_func
 	/* ---------------------------------------------
@@ -45,4 +67,56 @@
 	isb
 	ret
 
+func cortex_a53_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a53_disable_dcache
+
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_louis
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a53_disable_smp
+
+func cortex_a53_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a53_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_all
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a53_disable_smp
+
 declare_cpu_ops cortex_a53, CORTEX_A53_MIDR
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index df3a898..8de7fe9 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -29,10 +29,56 @@
  */
 #include <arch.h>
 #include <asm_macros.S>
+#include <cortex_a57.h>
 #include <cpu_macros.S>
 #include <plat_macros.S>
 
-#define CORTEX_A57_MIDR 0x410FD070
+	/* ---------------------------------------------
+	 * Disable L1 data cache and unified L2 cache
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+
+	/* ---------------------------------------------
+	 * Disable all types of L2 prefetches.
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_l2_prefetch
+	mrs	x0, CPUECTLR_EL1
+	orr	x0, x0, #CPUECTLR_DIS_TWD_ACC_PFTCH_BIT
+	mov	x1, #CPUECTLR_L2_IPFTCH_DIST_MASK
+	orr	x1, x1, #CPUECTLR_L2_DPFTCH_DIST_MASK
+	bic	x0, x0, x1
+	msr	CPUECTLR_EL1, x0
+	isb
+	dsb	sy
+	ret
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_smp
+	mrs	x0, CPUECTLR_EL1
+	bic	x0, x0, #CPUECTLR_SMP_BIT
+	msr	CPUECTLR_EL1, x0
+	ret
+
+	/* ---------------------------------------------
+	 * Disable debug interfaces
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_ext_debug
+	mov	x0, #1
+	msr	osdlr_el1, x0
+	isb
+	dsb	sy
+	ret
 
 func cortex_a57_reset_func
 	/* ---------------------------------------------
@@ -45,4 +91,80 @@
 	isb
 	ret
 
+func cortex_a57_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_louis
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a57_disable_ext_debug
+
+func cortex_a57_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_all
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a57_disable_ext_debug
+
 declare_cpu_ops cortex_a57, CORTEX_A57_MIDR
diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S
index 6db04ed..624a459 100644
--- a/lib/cpus/aarch64/cpu_helpers.S
+++ b/lib/cpus/aarch64/cpu_helpers.S
@@ -65,6 +65,66 @@
 
 #endif /* IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31) */
 
+#if IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
+	/*
+	 * The prepare core power down function for all platforms.  After
+	 * the cpu_ops pointer is retrieved from cpu_data, the corresponding
+	 * pwr_dwn_core in the cpu_ops is invoked.
+	 */
+	.globl	prepare_core_pwr_dwn
+func prepare_core_pwr_dwn
+	mrs	x1, tpidr_el3
+	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the cpu_ops core_pwr_dwn handler */
+	ldr	x1, [x0, #CPU_PWR_DWN_CORE]
+	br	x1
+
+	/*
+	 * The prepare cluster power down function for all platforms.  After
+	 * the cpu_ops pointer is retrieved from cpu_data, the corresponding
+	 * pwr_dwn_cluster in the cpu_ops is invoked.
+	 */
+	.globl	prepare_cluster_pwr_dwn
+func prepare_cluster_pwr_dwn
+	mrs	x1, tpidr_el3
+	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the cpu_ops cluster_pwr_dwn handler */
+	ldr	x1, [x0, #CPU_PWR_DWN_CLUSTER]
+	br	x1
+
+
+	/*
+	 * Initializes the cpu_ops_ptr if not already initialized
+	 * in cpu_data. This can be called without a runtime stack.
+	 * clobbers: x0 - x6, x10
+	 */
+	.globl	init_cpu_ops
+func init_cpu_ops
+	mrs	x6, tpidr_el3
+	ldr	x0, [x6, #CPU_DATA_CPU_OPS_PTR]
+	cbnz	x0, 1f
+	mov	x10, x30
+	bl	get_cpu_ops_ptr
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+	str	x0, [x6, #CPU_DATA_CPU_OPS_PTR]
+	mov x30, x10
+1:
+	ret
+#endif /* IMAGE_BL31 */
+
 	/*
 	 * The below function returns the cpu_ops structure matching the
 	 * midr of the core. It reads the MIDR_EL1 and finds the matching