Merge pull request #194 from danh-arm/sm/tf-issues#98

Implement the CPU Specific operations framework v3
diff --git a/Makefile b/Makefile
index 520a0d7..8c97b49 100644
--- a/Makefile
+++ b/Makefile
@@ -138,6 +138,10 @@
 
 include plat/${PLAT}/platform.mk
 
+# By default all CPU errata workarounds are disabled. This can be
+# overridden by the platform.
+include lib/cpus/cpu-errata.mk
+
 ifdef BL1_SOURCES
 NEED_BL1 := yes
 include bl1/bl1.mk
@@ -180,6 +184,7 @@
 				-Iinclude/drivers/io		\
 				-Iinclude/lib			\
 				-Iinclude/lib/aarch64		\
+				-Iinclude/lib/cpus/aarch64	\
 				-Iinclude/plat/common		\
 				-Iinclude/stdlib		\
 				-Iinclude/stdlib/sys		\
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index e7f92c7..82330c1 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -57,7 +57,7 @@
 	 * reset e.g. cache, tlb invalidations etc.
 	 * ---------------------------------------------
 	 */
-	bl	cpu_reset_handler
+	bl	reset_handler
 
 	/* ---------------------------------------------
 	 * Enable the instruction cache, stack pointer
diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S
index 0ca4a63..8092396 100644
--- a/bl1/bl1.ld.S
+++ b/bl1/bl1.ld.S
@@ -50,10 +50,23 @@
         *bl1_entrypoint.o(.text*)
         *(.text*)
         *(.rodata*)
+
+        /*
+         * Ensure 8-byte alignment for cpu_ops so that its fields are also
+         * aligned. Also ensure cpu_ops inclusion.
+         */
+        . = ALIGN(8);
+        __CPU_OPS_START__ = .;
+        KEEP(*(cpu_ops))
+        __CPU_OPS_END__ = .;
+
         *(.vectors)
         __RO_END__ = .;
     } >ROM
 
+    ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
+           "cpu_ops not defined for this platform.")
+
     /*
      * The .data section gets copied from ROM to RAM at runtime.
      * Its LMA must be 16-byte aligned.
diff --git a/bl1/bl1.mk b/bl1/bl1.mk
index 032dc5e..8e73bef 100644
--- a/bl1/bl1.mk
+++ b/bl1/bl1.mk
@@ -32,6 +32,6 @@
 				bl1/aarch64/bl1_arch_setup.c		\
 				bl1/aarch64/bl1_entrypoint.S		\
 				bl1/aarch64/bl1_exceptions.S		\
-				lib/aarch64/cpu_helpers.S
+				lib/cpus/aarch64/cpu_helpers.S
 
 BL1_LINKERFILE		:=	bl1/bl1.ld.S
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index a088c2e..c74858f 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -68,7 +68,7 @@
 	 * Boot ROM(BL0) programming sequence
 	 * -----------------------------------------------------
 	 */
-	bl	cpu_reset_handler
+	bl	reset_handler
 #endif
 	/* ---------------------------------------------
 	 * Enable the instruction cache, stack pointer
@@ -154,6 +154,12 @@
 	bl	zeromem16
 
 	/* ---------------------------------------------
+	 * Initialize the cpu_ops pointer.
+	 * ---------------------------------------------
+	 */
+	bl	init_cpu_ops
+
+	/* ---------------------------------------------
 	 * Use SP_EL0 for the C runtime stack.
 	 * ---------------------------------------------
 	 */
diff --git a/bl31/aarch64/crash_reporting.S b/bl31/aarch64/crash_reporting.S
index e69878b..68fe256 100644
--- a/bl31/aarch64/crash_reporting.S
+++ b/bl31/aarch64/crash_reporting.S
@@ -52,9 +52,6 @@
 print_spacer:
 	.asciz	" =\t\t0x"
 
-cpu_ectlr_reg:
-	.asciz	"cpuectlr_el1 =\t\t0x"
-
 gp_regs:
 	.asciz	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",\
 		"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",\
@@ -337,21 +334,9 @@
 	mrs	x10, sp_el0
 	bl	str_in_crash_buf_print
 
-	/* Print the CPUECTLR_EL1 reg */
-	mrs	x0, midr_el1
-	lsr	x0, x0, #MIDR_PN_SHIFT
-	and	x0, x0, #MIDR_PN_MASK
-	cmp	x0, #MIDR_PN_A57
-	b.eq	1f
-	cmp	x0, #MIDR_PN_A53
-	b.ne	2f
-1:
-	adr	x4, cpu_ectlr_reg
-	bl	asm_print_str
-	mrs	x4, CPUECTLR_EL1
-	bl	asm_print_hex
-	bl	print_newline
-2:
+	/* Get the cpu specific registers to report */
+	bl	do_cpu_reg_dump
+	bl	str_in_crash_buf_print
 
 	/* Print the gic registers */
 	plat_print_gic_regs
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 83ef7e7..add65b8 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -58,6 +58,15 @@
         KEEP(*(rt_svc_descs))
         __RT_SVC_DESCS_END__ = .;
 
+        /*
+         * Ensure 8-byte alignment for cpu_ops so that its fields are also
+         * aligned. Also ensure cpu_ops inclusion.
+         */
+        . = ALIGN(8);
+        __CPU_OPS_START__ = .;
+        KEEP(*(cpu_ops))
+        __CPU_OPS_END__ = .;
+
         *(.vectors)
         __RO_END_UNALIGNED__ = .;
         /*
@@ -69,6 +78,9 @@
         __RO_END__ = .;
     } >RAM
 
+    ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
+           "cpu_ops not defined for this platform.")
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index 53b58b3..f53a41f 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -39,7 +39,7 @@
 				bl31/aarch64/cpu_data.S				\
 				bl31/aarch64/runtime_exceptions.S		\
 				bl31/aarch64/crash_reporting.S			\
-				lib/aarch64/cpu_helpers.S			\
+				lib/cpus/aarch64/cpu_helpers.S			\
 				lib/locks/bakery/bakery_lock.c			\
 				lib/locks/exclusive/spinlock.S			\
 				services/std_svc/std_svc_setup.c		\
diff --git a/common/aarch64/debug.S b/common/aarch64/debug.S
index b7d7ac2..fcf5f26 100644
--- a/common/aarch64/debug.S
+++ b/common/aarch64/debug.S
@@ -58,13 +58,13 @@
 	.macro asm_print_line_dec
 	mov	x6, #10		/* Divide by 10 after every loop iteration */
 	mov	x5, #MAX_DEC_DIVISOR
-1:
+dec_print_loop:
 	udiv	x0, x4, x5		/* Get the quotient */
 	msub	x4, x0, x5, x4		/* Find the remainder */
 	add	x0, x0, #ASCII_OFFSET_NUM		/* Convert to ascii */
 	bl	plat_crash_console_putc
 	udiv	x5, x5, x6		/* Reduce divisor */
-	cbnz	x5, 1b
+	cbnz	x5, dec_print_loop
 	.endm
 
 
diff --git a/docs/cpu-errata-workarounds.md b/docs/cpu-errata-workarounds.md
new file mode 100644
index 0000000..73e8e18
--- /dev/null
+++ b/docs/cpu-errata-workarounds.md
@@ -0,0 +1,35 @@
+ARM CPU Errata Workarounds
+==========================
+
+ARM Trusted Firmware exports a series of build flags which controls the
+errata workarounds that are applied to each CPU by the reset handler. The
+errata details can be found in the CPU specifc errata documents published
+by ARM. The errata workarounds are implemented for a particular revision
+or a set of processor revisions. This check is done in the debug build.
+Each errata workaround is identified by its `ID` as specified in the processor's
+errata notice document. The format of the define used to enable/disable the
+errata is `ERRATA_<Processor name>_<ID>` where the `Processor name`
+is either `A57` for the `Cortex_A57` CPU or `A53` for `Cortex_A53` CPU.
+
+All workarounds are disabled by default. The platform is reponsible for
+enabling these workarounds according to its requirement by defining the
+errata workaround build flags in the platform specific makefile.
+
+In the current implementation, a platform which has more than 1 variant
+with different revisions of a processor has no runtime mechanism available
+for it to specify which errata workarounds should be enabled or not.
+
+The value of the build flags are 0 by default, that is disabled. Any other
+value will enable it.
+
+For Cortex A57, following errata build flags are defined :
+
+*   `ERRATA_A57_806969`: This applies errata 806969 workaround to cortex a57
+     CPU. This needs to be enabled only for revision r0p0 of the CPU.
+
+*   `ERRATA_A57_813420`: This applies errata 813420 workaround to cortex a57
+     CPU. This needs to be enabled only for revision r0p0 of the CPU.
+
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+_Copyright (c) 2014, ARM Limited and Contributors. All rights reserved._
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index e98c4fa..84684c3 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -9,10 +9,11 @@
 4.  Power State Coordination Interface
 5.  Secure-EL1 Payloads and Dispatchers
 6.  Crash Reporting in BL3-1
-7.  Memory layout on FVP platforms
-8.  Firmware Image Package (FIP)
-9.  Code Structure
-10.  References
+7.  CPU specific operations framework
+8.  Memory layout on FVP platforms
+9.  Firmware Image Package (FIP)
+10.  Code Structure
+11.  References
 
 
 1.  Introduction
@@ -302,8 +303,8 @@
 architectural initialization in BL3-1 allows override of any previous
 initialization done by BL1. BL3-1 creates page tables to address the first
 4GB of physical address space and initializes the MMU accordingly. It initializes
-a buffer of frequently used pointers, called per-cpu pointer cache, in memory for
-faster access. Currently the per-cpu pointer cache contains only the pointer
+a buffer of frequently used pointers, called per-CPU pointer cache, in memory for
+faster access. Currently the per-CPU pointer cache contains only the pointer
 to crash stack. It then replaces the exception vectors populated by BL1 with its
 own. BL3-1 exception vectors implement more elaborate support for
 handling SMCs since this is the only mechanism to access the runtime services
@@ -845,8 +846,8 @@
 register contents and report it via the default serial output. The general purpose
 registers, EL3, Secure EL1 and some EL2 state registers are reported.
 
-A dedicated per-cpu crash stack is maintained by BL3-1 and this is retrieved via
-the per-cpu pointer cache. The implementation attempts to minimise the memory
+A dedicated per-CPU crash stack is maintained by BL3-1 and this is retrieved via
+the per-CPU pointer cache. The implementation attempts to minimise the memory
 required for this feature. The file `crash_reporting.S` contains the
 implementation for crash reporting.
 
@@ -931,8 +932,100 @@
     fpexc32_el2	:0x0000000004000700
     sp_el0	:0x0000000004010780
 
+7.  CPU specific operations framework
+-----------------------------
 
-7.  Memory layout on FVP platforms
+Certain aspects of the ARMv8 architecture are implementation defined,
+that is, certain behaviours are not architecturally defined, but must be defined
+and documented by individual processor implementations. The ARM Trusted
+Firmware implements a framework which categorises the common implementation
+defined behaviours and allows a processor to export its implementation of that
+behaviour. The categories are:
+
+1.  Processor specific reset sequence.
+
+2.  Processor specific power down sequences.
+
+3.  Processor specific register dumping as a part of crash reporting.
+
+Each of the above categories fulfils a different requirement.
+
+1.  allows any processor specific initialization before the caches and MMU
+    are turned on, like implementation of errata workarounds, entry into
+    the intra-cluster coherency domain etc.
+
+2.  allows each processor to implement the power down sequence mandated in
+    its Technical Reference Manual (TRM).
+
+3.  allows a processor to provide additional information to the developer
+    in the event of a crash, for example Cortex-A53 has registers which
+    can expose the data cache contents.
+
+Please note that only 2. is mandated by the TRM.
+
+The CPU specific operations framework scales to accommodate a large number of
+different CPUs during power down and reset handling. The platform can specify
+the CPU errata workarounds to be applied for each CPU type during reset
+handling by defining CPU errata compile time macros. Details on these macros
+can be found in the [cpu-errata-workarounds.md][ERRW] file.
+
+The CPU specific operations framework depends on the `cpu_ops` structure which
+needs to be exported for each type of CPU in the platform. It is defined in
+`include/lib/cpus/aarch64/cpu_macros.S` and has the following fields : `midr`,
+`reset_func()`, `core_pwr_dwn()`, `cluster_pwr_dwn()` and `cpu_reg_dump()`.
+
+The CPU specific files in `lib/cpus` export a `cpu_ops` data structure with
+suitable handlers for that CPU.  For example, `lib/cpus/cortex_a53.S` exports
+the `cpu_ops` for Cortex-A53 CPU. According to the platform configuration,
+these CPU specific files must must be included in the build by the platform
+makefile. The generic CPU specific operations framework code exists in
+`lib/cpus/aarch64/cpu_helpers.S`.
+
+### CPU specific Reset Handling
+
+After a reset, the state of the CPU when it calls generic reset handler is:
+MMU turned off, both instruction and data caches turned off and not part
+of any coherency domain.
+
+The BL entrypoint code first invokes the `plat_reset_handler()` to allow
+the platform to perform any system initialization required and any system
+errata wrokarounds that needs to be applied. The `get_cpu_ops_ptr()` reads
+the current CPU midr, finds the matching `cpu_ops` entry in the `cpu_ops`
+array and returns it. Note that only the part number and implementator fields
+in midr are used to find the matching `cpu_ops` entry. The `reset_func()` in
+the returned `cpu_ops` is then invoked which executes the required reset
+handling for that CPU and also any errata workarounds enabled by the platform.
+
+### CPU specific power down sequence
+
+During the BL3-1 initialization sequence, the pointer to the matching `cpu_ops`
+entry is stored in per-CPU data by `init_cpu_ops()` so that it can be quickly
+retrieved during power down sequences.
+
+The PSCI service, upon receiving a power down request, determines the highest
+affinity level at which to execute power down sequence for a particular CPU and
+invokes the corresponding 'prepare' power down handler in the CPU specific
+operations framework. For example, when a CPU executes a power down for affinity
+level 0, the `prepare_core_pwr_dwn()` retrieves the `cpu_ops` pointer from the
+per-CPU data and the corresponding `core_pwr_dwn()` is invoked. Similarly when
+a CPU executes power down at affinity level 1, the `prepare_cluster_pwr_dwn()`
+retrieves the `cpu_ops` pointer and the corresponding `cluster_pwr_dwn()` is
+invoked.
+
+At runtime the platform hooks for power down are invoked by the PSCI service to
+perform platform specific operations during a power down sequence, for example
+turning off CCI coherency during a cluster power down.
+
+### CPU specific register reporting during crash
+
+If the crash reporting is enabled in BL3-1, when a crash occurs, the crash
+reporting framework calls `do_cpu_reg_dump` which retrieves the matching
+`cpu_ops` using `get_cpu_ops_ptr()` function. The `cpu_reg_dump()` in
+`cpu_ops` is invoked, which then returns the CPU specific register values to
+be reported and a pointer to the ASCII list of register names in a format
+expected by the crash reporting framework.
+
+8.  Memory layout on FVP platforms
 ----------------------------------
 
 Each bootloader image can be divided in 2 parts:
@@ -1189,7 +1282,7 @@
 on FVP, BL3-1 and TSP need to know the limit address that their PROGBITS
 sections must not overstep. The platform code must provide those.
 
-8.  Firmware Image Package (FIP)
+9.  Firmware Image Package (FIP)
 --------------------------------
 
 Using a Firmware Image Package (FIP) allows for packing bootloader images (and
@@ -1267,7 +1360,7 @@
 platform policy can be modified to allow additional images.
 
 
-9.  Code Structure
+10.  Code Structure
 ------------------
 
 Trusted Firmware code is logically divided between the three boot loader
@@ -1312,7 +1405,7 @@
 kernel at boot time. These can be found in the `fdts` directory.
 
 
-10.  References
+11.  References
 --------------
 
 1.  Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available
@@ -1324,7 +1417,6 @@
 
 4.  [ARM Trusted Firmware Interrupt Management Design guide][INTRG].
 
-
 - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 _Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved._
@@ -1335,3 +1427,4 @@
 [UUID]:             https://tools.ietf.org/rfc/rfc4122.txt "A Universally Unique IDentifier (UUID) URN Namespace"
 [User Guide]:       ./user-guide.md
 [INTRG]:            ./interrupt-framework-design.md
+[ERRW]:             ./cpu-errata-workarounds.md
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 62ea6a0..0a07bff 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -460,6 +460,32 @@
 are just an ARM Trusted Firmware convention.
 
 
+### Function : plat_reset_handler()
+
+    Argument : void
+    Return   : void
+
+A platform may need to do additional initialization after reset. This function
+allows the platform to do the platform specific intializations. Platform
+specific errata workarounds could also be implemented here. The api should
+preserve the value in x10 register as it is used by the caller to store the
+return address.
+
+The default implementation doesn't do anything.
+
+### Function : plat_disable_acp()
+
+    Argument : void
+    Return   : void
+
+This api allows a platform to disable the Accelerator Coherency Port (if
+present) during a cluster power down sequence. The default weak implementation
+doesn't do anything. Since this api is called during the power down sequence,
+it has restrictions for stack usage and it can use the registers x0 - x17 as
+scratch registers. It should preserve the value in x18 register as it is used
+by the caller to store the return address.
+
+
 3.  Modifications specific to a Boot Loader stage
 -------------------------------------------------
 
diff --git a/include/bl31/cpu_data.h b/include/bl31/cpu_data.h
index ef0b68c..ba7ae06 100644
--- a/include/bl31/cpu_data.h
+++ b/include/bl31/cpu_data.h
@@ -32,18 +32,21 @@
 #define __CPU_DATA_H__
 
 /* Offsets for the cpu_data structure */
-#define CPU_DATA_CRASH_BUF_OFFSET	0x10
+#define CPU_DATA_CRASH_BUF_OFFSET	0x20
 #if CRASH_REPORTING
 #define CPU_DATA_LOG2SIZE		7
 #else
 #define CPU_DATA_LOG2SIZE		6
 #endif
 /* need enough space in crash buffer to save 8 registers */
-#define CPU_DATA_CRASH_BUF_SIZE	64
+#define CPU_DATA_CRASH_BUF_SIZE		64
+#define CPU_DATA_CPU_OPS_PTR		0x10
+
 #ifndef __ASSEMBLY__
 
 #include <arch_helpers.h>
 #include <platform_def.h>
+#include <psci.h>
 #include <stdint.h>
 
 /*******************************************************************************
@@ -63,9 +66,10 @@
  * by components that have per-cpu members. The member access macros should be
  * used for this.
  ******************************************************************************/
-
 typedef struct cpu_data {
 	void *cpu_context[2];
+	uint64_t cpu_ops_ptr;
+	struct psci_cpu_data psci_svc_cpu_data;
 #if CRASH_REPORTING
 	uint64_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
 #endif
@@ -81,6 +85,10 @@
 CASSERT((1 << CPU_DATA_LOG2SIZE) == sizeof(cpu_data_t),
 	assert_cpu_data_log2size_mismatch);
 
+CASSERT(CPU_DATA_CPU_OPS_PTR == __builtin_offsetof
+		(cpu_data_t, cpu_ops_ptr),
+		assert_cpu_data_cpu_ops_ptr_offset_mismatch);
+
 struct cpu_data *_cpu_data_by_index(uint32_t cpu_index);
 struct cpu_data *_cpu_data_by_mpidr(uint64_t mpidr);
 
@@ -104,6 +112,10 @@
 #define get_cpu_data_by_mpidr(_id, _m)	   _cpu_data_by_mpidr(_id)->_m
 #define set_cpu_data_by_mpidr(_id, _m, _v) _cpu_data_by_mpidr(_id)->_m = _v
 
+#define flush_cpu_data(_m)	   flush_dcache_range((uint64_t) 	  \
+						      &(_cpu_data()->_m), \
+						      sizeof(_cpu_data()->_m))
+
 
 #endif /* __ASSEMBLY__ */
 #endif /* __CPU_DATA_H__ */
diff --git a/include/bl31/services/psci.h b/include/bl31/services/psci.h
index 6512dfb..6c23f1b 100644
--- a/include/bl31/services/psci.h
+++ b/include/bl31/services/psci.h
@@ -131,6 +131,16 @@
 
 #include <stdint.h>
 
+/*******************************************************************************
+ * Structure used to store per-cpu information relevant to the PSCI service.
+ * It is populated in the per-cpu data array. In return we get a guarantee that
+ * this information will not reside on a cache line shared with another cpu.
+ ******************************************************************************/
+typedef struct psci_cpu_data {
+	uint32_t power_state;
+	uint32_t max_phys_off_afflvl;	/* Highest affinity level in physically
+					   powered off state */
+} psci_cpu_data_t;
 
 /*******************************************************************************
  * Structure populated by platform specific code to export routines which
@@ -179,8 +189,6 @@
  * Function & Data prototypes
  ******************************************************************************/
 unsigned int psci_version(void);
-int __psci_cpu_suspend(unsigned int, unsigned long, unsigned long);
-int __psci_cpu_off(void);
 int psci_affinity_info(unsigned long, unsigned int);
 int psci_migrate(unsigned int);
 unsigned int psci_migrate_info_type(void);
@@ -192,8 +200,10 @@
 void psci_aff_on_finish_entry(void);
 void psci_aff_suspend_finish_entry(void);
 void psci_register_spd_pm_hook(const spd_pm_ops_t *);
-int psci_get_suspend_stateid(unsigned long mpidr);
-int psci_get_suspend_afflvl(unsigned long mpidr);
+int psci_get_suspend_stateid_by_mpidr(unsigned long);
+int psci_get_suspend_stateid(void);
+int psci_get_suspend_afflvl(void);
+uint32_t psci_get_max_phys_off_afflvl(void);
 
 uint64_t psci_smc_handler(uint32_t smc_fid,
 			  uint64_t x1,
diff --git a/include/common/assert_macros.S b/include/common/assert_macros.S
index 45d699b..807972f 100644
--- a/include/common/assert_macros.S
+++ b/include/common/assert_macros.S
@@ -30,7 +30,10 @@
 
 	/*
 	 * Assembler macro to enable asm_assert. Use this macro wherever
-	 * assert is required in assembly.
+	 * assert is required in assembly. Please note that the macro makes
+	 * use of label '300' to provide the logic and the caller
+	 * should make sure that this label is not used to branch prior
+	 * to calling this macro.
 	 */
 #define ASM_ASSERT(_cc) \
 .ifndef .L_assert_filename ;\
@@ -39,8 +42,8 @@
 			.string	__FILE__ ;\
 	.popsection ;\
 .endif ;\
-	b._cc	1f ;\
+	b._cc	300f ;\
 	adr	x0, .L_assert_filename ;\
 	mov	x1, __LINE__ ;\
 	b	asm_assert ;\
-1:
+300:
diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h
index 0427208..b2aac2f 100644
--- a/include/lib/aarch64/arch.h
+++ b/include/lib/aarch64/arch.h
@@ -35,11 +35,12 @@
 /*******************************************************************************
  * MIDR bit definitions
  ******************************************************************************/
+#define MIDR_IMPL_MASK		0xff
+#define MIDR_IMPL_SHIFT		0x18
+#define MIDR_VAR_SHIFT		20
+#define MIDR_REV_SHIFT		0
 #define MIDR_PN_MASK		0xfff
 #define MIDR_PN_SHIFT		0x4
-#define MIDR_PN_AEM		0xd0f
-#define MIDR_PN_A57		0xd07
-#define MIDR_PN_A53		0xd03
 
 /*******************************************************************************
  * MPIDR macros
@@ -75,11 +76,6 @@
 #define ICC_PMR_EL1     S3_0_C4_C6_0
 
 /*******************************************************************************
- * Implementation defined sysreg encodings
- ******************************************************************************/
-#define CPUECTLR_EL1	S3_1_C15_C2_1
-
-/*******************************************************************************
  * Generic timer memory mapped registers & offsets
  ******************************************************************************/
 #define CNTCR_OFF			0x000
@@ -133,9 +129,6 @@
 #define SCTLR_WXN_BIT		(1 << 19)
 #define SCTLR_EE_BIT		(1 << 25)
 
-/* CPUECTLR definitions */
-#define CPUECTLR_SMP_BIT	(1 << 6)
-
 /* CPACR_El1 definitions */
 #define CPACR_EL1_FPEN(x)	(x << 20)
 #define CPACR_EL1_FP_TRAP_EL0	0x1
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 6ba37c2..09365fb 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -265,10 +265,6 @@
 DEFINE_SYSREG_RW_FUNCS(vpidr_el2)
 DEFINE_SYSREG_RW_FUNCS(vmpidr_el2)
 
-/* Implementation specific registers */
-
-DEFINE_RENAME_SYSREG_RW_FUNCS(cpuectlr_el1, CPUECTLR_EL1)
-
 /* GICv3 System Registers */
 
 DEFINE_RENAME_SYSREG_RW_FUNCS(icc_sre_el1, ICC_SRE_EL1)
@@ -299,9 +295,6 @@
 #define read_hcr()		read_hcr_el2()
 #define write_hcr(_v)		write_hcr_el2(_v)
 
-#define read_cpuectlr()		read_cpuectlr_el1()
-#define write_cpuectlr(_v)	write_cpuectlr_el1(_v)
-
 #define read_cpacr()		read_cpacr_el1()
 #define write_cpacr(_v)		write_cpacr_el1(_v)
 
diff --git a/lib/aarch64/cpu_helpers.S b/include/lib/cpus/aarch64/aem_generic.h
similarity index 70%
copy from lib/aarch64/cpu_helpers.S
copy to include/lib/cpus/aarch64/aem_generic.h
index abb996d..2f701d1 100644
--- a/lib/aarch64/cpu_helpers.S
+++ b/include/lib/cpus/aarch64/aem_generic.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,28 +28,14 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <arch.h>
-#include <asm_macros.S>
+#ifndef __AEM_GENERIC_H__
+#define __AEM_GENERIC_H__
 
-	.weak	cpu_reset_handler
+/* BASE AEM midr for revision 0 */
+#define BASE_AEM_MIDR 0x410FD0F0
 
+/* Foundation AEM midr for revision 0 */
+#define FOUNDATION_AEM_MIDR  0x410FD000
+
 
-func cpu_reset_handler
-	/* ---------------------------------------------
-	 * As a bare minimal enable the SMP bit.
-	 * ---------------------------------------------
-	 */
-	mrs	x0, midr_el1
-	lsr	x0, x0, #MIDR_PN_SHIFT
-	and	x0, x0, #MIDR_PN_MASK
-	cmp	x0, #MIDR_PN_A57
-	b.eq	smp_setup_begin
-	cmp	x0, #MIDR_PN_A53
-	b.ne	smp_setup_end
-smp_setup_begin:
-	mrs	x0, CPUECTLR_EL1
-	orr	x0, x0, #CPUECTLR_SMP_BIT
-	msr	CPUECTLR_EL1, x0
-	isb
-smp_setup_end:
-	ret
+#endif /* __AEM_GENERIC_H__ */
diff --git a/lib/aarch64/cpu_helpers.S b/include/lib/cpus/aarch64/cortex_a53.h
similarity index 70%
rename from lib/aarch64/cpu_helpers.S
rename to include/lib/cpus/aarch64/cortex_a53.h
index abb996d..14821ab 100644
--- a/lib/aarch64/cpu_helpers.S
+++ b/include/lib/cpus/aarch64/cortex_a53.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,28 +28,17 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <arch.h>
-#include <asm_macros.S>
+#ifndef __CORTEX_A53_H__
+#define __CORTEX_A53_H__
 
-	.weak	cpu_reset_handler
+/* Cortex-A53 midr for revision 0 */
+#define CORTEX_A53_MIDR 0x410FD030
 
+/*******************************************************************************
+ * CPU Extended Control register specific definitions.
+ ******************************************************************************/
+#define CPUECTLR_EL1			S3_1_C15_C2_1	/* Instruction def. */
+
+#define CPUECTLR_SMP_BIT		(1 << 6)
 
-func cpu_reset_handler
-	/* ---------------------------------------------
-	 * As a bare minimal enable the SMP bit.
-	 * ---------------------------------------------
-	 */
-	mrs	x0, midr_el1
-	lsr	x0, x0, #MIDR_PN_SHIFT
-	and	x0, x0, #MIDR_PN_MASK
-	cmp	x0, #MIDR_PN_A57
-	b.eq	smp_setup_begin
-	cmp	x0, #MIDR_PN_A53
-	b.ne	smp_setup_end
-smp_setup_begin:
-	mrs	x0, CPUECTLR_EL1
-	orr	x0, x0, #CPUECTLR_SMP_BIT
-	msr	CPUECTLR_EL1, x0
-	isb
-smp_setup_end:
-	ret
+#endif /* __CORTEX_A53_H__ */
diff --git a/include/lib/cpus/aarch64/cortex_a57.h b/include/lib/cpus/aarch64/cortex_a57.h
new file mode 100644
index 0000000..9cf8780
--- /dev/null
+++ b/include/lib/cpus/aarch64/cortex_a57.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CORTEX_A57_H__
+#define __CORTEX_A57_H__
+
+/* Cortex-A57 midr for revision 0 */
+#define CORTEX_A57_MIDR 0x410FD070
+
+/*******************************************************************************
+ * CPU Extended Control register specific definitions.
+ ******************************************************************************/
+#define CPUECTLR_EL1			S3_1_C15_C2_1	/* Instruction def. */
+
+#define CPUECTLR_SMP_BIT		(1 << 6)
+#define CPUECTLR_DIS_TWD_ACC_PFTCH_BIT	(1 << 38)
+#define CPUECTLR_L2_IPFTCH_DIST_MASK	(0x3 << 35)
+#define CPUECTLR_L2_DPFTCH_DIST_MASK	(0x3 << 32)
+
+/*******************************************************************************
+ * CPU Auxiliary Control register specific definitions.
+ ******************************************************************************/
+#define CPUACTLR_EL1			S3_1_C15_C2_0	/* Instruction def. */
+
+#define CPUACTLR_NO_ALLOC_WBWA         (1 << 49)
+#define CPUACTLR_DCC_AS_DCCI           (1 << 44)
+
+#endif /* __CORTEX_A57_H__ */
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
new file mode 100644
index 0000000..65fb82d
--- /dev/null
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+
+#define CPU_IMPL_PN_MASK	(MIDR_IMPL_MASK << MIDR_IMPL_SHIFT) | \
+				(MIDR_PN_MASK << MIDR_PN_SHIFT)
+
+	/*
+	 * Define the offsets to the fields in cpu_ops structure.
+	 */
+	.struct 0
+CPU_MIDR: /* cpu_ops midr */
+	.space  8
+/* Reset fn is needed in BL at reset vector */
+#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+CPU_RESET_FUNC: /* cpu_ops reset_func */
+	.space  8
+#endif
+#if IMAGE_BL31 /* The power down core and cluster is needed only in BL3-1 */
+CPU_PWR_DWN_CORE: /* cpu_ops core_pwr_dwn */
+	.space  8
+CPU_PWR_DWN_CLUSTER: /* cpu_ops cluster_pwr_dwn */
+	.space  8
+#endif
+#if (IMAGE_BL31 && CRASH_REPORTING)
+CPU_REG_DUMP: /* cpu specific register dump for crash reporting */
+	.space  8
+#endif
+CPU_OPS_SIZE = .
+
+	/*
+	 * Convenience macro to declare cpu_ops structure.
+	 * Make sure the structure fields are as per the offsets
+	 * defined above.
+	 */
+	.macro declare_cpu_ops _name:req, _midr:req, _noresetfunc = 0
+	.section cpu_ops, "a"; .align 3
+	.type cpu_ops_\_name, %object
+	.quad \_midr
+#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+	.if \_noresetfunc
+	.quad 0
+	.else
+	.quad \_name\()_reset_func
+	.endif
+#endif
+#if IMAGE_BL31
+	.quad \_name\()_core_pwr_dwn
+	.quad \_name\()_cluster_pwr_dwn
+#endif
+#if (IMAGE_BL31 && CRASH_REPORTING)
+	.quad \_name\()_cpu_reg_dump
+#endif
+	.endm
diff --git a/include/plat/common/plat_config.h b/include/plat/common/plat_config.h
index 826d01b..20d3c03 100644
--- a/include/plat/common/plat_config.h
+++ b/include/plat/common/plat_config.h
@@ -39,14 +39,12 @@
 
 
 enum plat_config_flags {
-	/* Whether CPUECTLR SMP bit should be enabled */
-	CONFIG_CPUECTLR_SMP_BIT		= 0x1,
 	/* Whether Base FVP memory map is in use */
-	CONFIG_BASE_MMAP		= 0x2,
+	CONFIG_BASE_MMAP		= 0x1,
 	/* Whether CCI should be enabled */
-	CONFIG_HAS_CCI			= 0x4,
+	CONFIG_HAS_CCI			= 0x2,
 	/* Whether TZC should be configured */
-	CONFIG_HAS_TZC			= 0x8
+	CONFIG_HAS_TZC			= 0x4
 };
 
 typedef struct plat_config {
diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S
new file mode 100644
index 0000000..58a64a6
--- /dev/null
+++ b/lib/cpus/aarch64/aem_generic.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <aem_generic.h>
+#include <arch.h>
+#include <asm_macros.S>
+#include <cpu_macros.S>
+
+func aem_generic_core_pwr_dwn
+	/* ---------------------------------------------
+	 * Disable the Data Cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+
+	mov	x0, #DCCISW
+
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	b	dcsw_op_louis
+
+
+func aem_generic_cluster_pwr_dwn
+	/* ---------------------------------------------
+	 * Disable the Data Cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	b	dcsw_op_all
+
+	/* ---------------------------------------------
+	 * This function provides cpu specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+func aem_generic_cpu_reg_dump
+	mov	x6, #0 /* no registers to report */
+	ret
+
+
+/* cpu_ops for Base AEM FVP */
+declare_cpu_ops aem_generic, BASE_AEM_MIDR, 1
+
+/* cpu_ops for Foundation FVP */
+declare_cpu_ops aem_generic, FOUNDATION_AEM_MIDR, 1
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
new file mode 100644
index 0000000..722ce7a
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <cortex_a53.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	/* ---------------------------------------------
+	 * Disable L1 data cache and unified L2 cache
+	 * ---------------------------------------------
+	 */
+func cortex_a53_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a53_disable_smp
+	mrs	x0, CPUECTLR_EL1
+	bic	x0, x0, #CPUECTLR_SMP_BIT
+	msr	CPUECTLR_EL1, x0
+	isb
+	dsb	sy
+	ret
+
+func cortex_a53_reset_func
+	/* ---------------------------------------------
+	 * As a bare minimum enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CPUECTLR_EL1
+	orr	x0, x0, #CPUECTLR_SMP_BIT
+	msr	CPUECTLR_EL1, x0
+	isb
+	ret
+
+func cortex_a53_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a53_disable_dcache
+
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_louis
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a53_disable_smp
+
+func cortex_a53_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a53_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_all
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a53_disable_smp
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a53 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a53_regs, "aS"
+cortex_a53_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_a53_cpu_reg_dump
+	adr	x6, cortex_a53_regs
+	mrs	x8, CPUECTLR_EL1
+	ret
+
+declare_cpu_ops cortex_a53, CORTEX_A53_MIDR
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
new file mode 100644
index 0000000..eed1bbb
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cortex_a57.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	/* ---------------------------------------------
+	 * Disable L1 data cache and unified L2 cache
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+
+	/* ---------------------------------------------
+	 * Disable all types of L2 prefetches.
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_l2_prefetch
+	mrs	x0, CPUECTLR_EL1
+	orr	x0, x0, #CPUECTLR_DIS_TWD_ACC_PFTCH_BIT
+	mov	x1, #CPUECTLR_L2_IPFTCH_DIST_MASK
+	orr	x1, x1, #CPUECTLR_L2_DPFTCH_DIST_MASK
+	bic	x0, x0, x1
+	msr	CPUECTLR_EL1, x0
+	isb
+	dsb	sy
+	ret
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_smp
+	mrs	x0, CPUECTLR_EL1
+	bic	x0, x0, #CPUECTLR_SMP_BIT
+	msr	CPUECTLR_EL1, x0
+	ret
+
+	/* ---------------------------------------------
+	 * Disable debug interfaces
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_ext_debug
+	mov	x0, #1
+	msr	osdlr_el1, x0
+	isb
+	dsb	sy
+	ret
+
+func cortex_a57_reset_func
+#if ERRATA_A57_806969 || ERRATA_A57_813420
+	/* ---------------------------------------------
+	 * Ensure that the following errata is only
+	 * applied on r0p0 parts.
+	 * ---------------------------------------------
+	 */
+#if ASM_ASSERTION
+	mrs	x0, midr_el1
+	ubfx	x1, x0, #MIDR_VAR_SHIFT, #4
+	ubfx	x2, x0, #MIDR_REV_SHIFT, #4
+	orr	x0, x1, x2
+	cmp	x0, #0
+	ASM_ASSERT(eq)
+#endif
+	mov	x1, xzr
+#if ERRATA_A57_806969
+	orr	x1, x1, #CPUACTLR_NO_ALLOC_WBWA
+#endif
+#if ERRATA_A57_813420
+	orr	x1, x1, #CPUACTLR_DCC_AS_DCCI
+#endif
+	mrs	x0, CPUACTLR_EL1
+	orr	x0, x0, x1
+	msr	CPUACTLR_EL1, x0
+#endif
+
+	/* ---------------------------------------------
+	 * As a bare minimum enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CPUECTLR_EL1
+	orr	x0, x0, #CPUECTLR_SMP_BIT
+	msr	CPUECTLR_EL1, x0
+	isb
+	ret
+
+func cortex_a57_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_louis
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a57_disable_ext_debug
+
+func cortex_a57_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_all
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a57_disable_ext_debug
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a57 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a57_regs, "aS"
+cortex_a57_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_a57_cpu_reg_dump
+	adr	x6, cortex_a57_regs
+	mrs	x8, CPUECTLR_EL1
+	ret
+
+
+declare_cpu_ops cortex_a57, CORTEX_A57_MIDR
diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S
new file mode 100644
index 0000000..46584b3
--- /dev/null
+++ b/lib/cpus/aarch64/cpu_helpers.S
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cpu_macros.S>
+#if IMAGE_BL31
+#include <cpu_data.h>
+#endif
+
+ /* Reset fn is needed in BL at reset vector */
+#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+	/*
+	 * The reset handler common to all platforms.  After a matching
+	 * cpu_ops structure entry is found, the correponding reset_handler
+	 * in the cpu_ops is invoked.
+	 */
+	.globl	reset_handler
+func reset_handler
+	mov	x10, x30
+
+	bl	plat_reset_handler
+
+	/* Get the matching cpu_ops pointer */
+	bl	get_cpu_ops_ptr
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the cpu_ops reset handler */
+	ldr	x2, [x0, #CPU_RESET_FUNC]
+	cbz	x2, 1f
+	blr	x2
+1:
+	ret	x10
+
+#endif /* IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31) */
+
+#if IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
+	/*
+	 * The prepare core power down function for all platforms.  After
+	 * the cpu_ops pointer is retrieved from cpu_data, the corresponding
+	 * pwr_dwn_core in the cpu_ops is invoked.
+	 */
+	.globl	prepare_core_pwr_dwn
+func prepare_core_pwr_dwn
+	mrs	x1, tpidr_el3
+	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the cpu_ops core_pwr_dwn handler */
+	ldr	x1, [x0, #CPU_PWR_DWN_CORE]
+	br	x1
+
+	/*
+	 * The prepare cluster power down function for all platforms.  After
+	 * the cpu_ops pointer is retrieved from cpu_data, the corresponding
+	 * pwr_dwn_cluster in the cpu_ops is invoked.
+	 */
+	.globl	prepare_cluster_pwr_dwn
+func prepare_cluster_pwr_dwn
+	mrs	x1, tpidr_el3
+	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the cpu_ops cluster_pwr_dwn handler */
+	ldr	x1, [x0, #CPU_PWR_DWN_CLUSTER]
+	br	x1
+
+
+	/*
+	 * Initializes the cpu_ops_ptr if not already initialized
+	 * in cpu_data. This can be called without a runtime stack.
+	 * clobbers: x0 - x6, x10
+	 */
+	.globl	init_cpu_ops
+func init_cpu_ops
+	mrs	x6, tpidr_el3
+	ldr	x0, [x6, #CPU_DATA_CPU_OPS_PTR]
+	cbnz	x0, 1f
+	mov	x10, x30
+	bl	get_cpu_ops_ptr
+#if ASM_ASSERTION
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif
+	str	x0, [x6, #CPU_DATA_CPU_OPS_PTR]
+	mov x30, x10
+1:
+	ret
+#endif /* IMAGE_BL31 */
+
+#if IMAGE_BL31 && CRASH_REPORTING
+	/*
+	 * The cpu specific registers which need to be reported in a crash
+	 * are reported via cpu_ops cpu_reg_dump function. After a matching
+	 * cpu_ops structure entry is found, the correponding cpu_reg_dump
+	 * in the cpu_ops is invoked.
+	 */
+	.globl	do_cpu_reg_dump
+func do_cpu_reg_dump
+	mov	x16, x30
+
+	/* Get the matching cpu_ops pointer */
+	bl	get_cpu_ops_ptr
+	cbz	x0, 1f
+
+	/* Get the cpu_ops cpu_reg_dump */
+	ldr	x2, [x0, #CPU_REG_DUMP]
+	cbz	x2, 1f
+	blr	x2
+1:
+	mov	x30, x16
+	ret
+#endif
+
+	/*
+	 * The below function returns the cpu_ops structure matching the
+	 * midr of the core. It reads the MIDR_EL1 and finds the matching
+	 * entry in cpu_ops entries. Only the implementation and part number
+	 * are used to match the entries.
+	 * Return :
+	 *     x0 - The matching cpu_ops pointer on Success
+	 *     x0 - 0 on failure.
+	 * Clobbers : x0 - x5
+	 */
+	.globl	get_cpu_ops_ptr
+func get_cpu_ops_ptr
+	/* Get the cpu_ops start and end locations */
+	adr	x4, (__CPU_OPS_START__ + CPU_MIDR)
+	adr	x5, (__CPU_OPS_END__ + CPU_MIDR)
+
+	/* Initialize the return parameter */
+	mov	x0, #0
+
+	/* Read the MIDR_EL1 */
+	mrs	x2, midr_el1
+	mov_imm	x3, CPU_IMPL_PN_MASK
+
+	/* Retain only the implementation and part number using mask */
+	and	w2, w2, w3
+1:
+	/* Check if we have reached end of list */
+	cmp	x4, x5
+	b.eq	error_exit
+
+	/* load the midr from the cpu_ops */
+	ldr	x1, [x4], #CPU_OPS_SIZE
+	and	w1, w1, w3
+
+	/* Check if midr matches to midr of this core */
+	cmp	w1, w2
+	b.ne	1b
+
+	/* Subtract the increment and offset to get the cpu-ops pointer */
+	sub	x0, x4, #(CPU_OPS_SIZE + CPU_MIDR)
+error_exit:
+	ret
diff --git a/lib/cpus/cpu-errata.mk b/lib/cpus/cpu-errata.mk
new file mode 100644
index 0000000..79f0156
--- /dev/null
+++ b/lib/cpus/cpu-errata.mk
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# Neither the name of ARM nor the names of its contributors may be used
+# to endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+# CPU Errata Build flags. These should be enabled by the
+# platform if the errata needs to be applied.
+
+# Flag to apply errata 806969 during reset. This errata applies only to
+# revision r0p0 of the Cortex A57 cpu.
+ERRATA_A57_806969	?=0
+
+# Flag to apply errata 813420 during reset. This errata applies only to
+# revision r0p0 of the Cortex A57 cpu.
+ERRATA_A57_813420	?=0
+
+# Process ERRATA_A57_806969 flag
+$(eval $(call assert_boolean,ERRATA_A57_806969))
+$(eval $(call add_define,ERRATA_A57_806969))
+
+# Process ERRATA_A57_813420 flag
+$(eval $(call assert_boolean,ERRATA_A57_813420))
+$(eval $(call add_define,ERRATA_A57_813420))
diff --git a/plat/common/aarch64/platform_helpers.S b/plat/common/aarch64/platform_helpers.S
index 6dc4ec6..c236fd7 100644
--- a/plat/common/aarch64/platform_helpers.S
+++ b/plat/common/aarch64/platform_helpers.S
@@ -38,6 +38,8 @@
 	.weak	plat_report_exception
 	.weak	plat_crash_console_init
 	.weak	plat_crash_console_putc
+	.weak	plat_reset_handler
+	.weak	plat_disable_acp
 
 	/* -----------------------------------------------------
 	 *  int platform_get_core_pos(int mpidr);
@@ -84,3 +86,20 @@
 	 */
 func plat_crash_console_putc
 	ret
+
+	/* -----------------------------------------------------
+	 * Placeholder function which should be redefined by
+	 * each platform. This function should preserve x10.
+	 * -----------------------------------------------------
+	 */
+func plat_reset_handler
+	ret
+
+	/* -----------------------------------------------------
+	 * Placeholder function which should be redefined by
+	 * each platform. This function is allowed to use
+	 * registers x0 - x17.
+	 * -----------------------------------------------------
+	 */
+func plat_disable_acp
+	ret
diff --git a/plat/fvp/aarch64/fvp_common.c b/plat/fvp/aarch64/fvp_common.c
index 89fd8b3..a25c4f0 100644
--- a/plat/fvp/aarch64/fvp_common.c
+++ b/plat/fvp/aarch64/fvp_common.c
@@ -134,7 +134,7 @@
  ******************************************************************************/
 int fvp_config_setup(void)
 {
-	unsigned int rev, hbi, bld, arch, sys_id, midr_pn;
+	unsigned int rev, hbi, bld, arch, sys_id;
 
 	sys_id = mmio_read_32(VE_SYSREGS_BASE + V2M_SYS_ID);
 	rev = (sys_id >> SYS_ID_REV_SHIFT) & SYS_ID_REV_MASK;
@@ -193,11 +193,6 @@
 		}
 		break;
 	case HBI_FVP_BASE:
-		midr_pn = (read_midr() >> MIDR_PN_SHIFT) & MIDR_PN_MASK;
-		plat_config.flags =
-			((midr_pn == MIDR_PN_A57) || (midr_pn == MIDR_PN_A53))
-			? CONFIG_CPUECTLR_SMP_BIT : 0;
-
 		plat_config.max_aff0 = 4;
 		plat_config.max_aff1 = 2;
 		plat_config.flags |= CONFIG_BASE_MMAP | CONFIG_HAS_CCI |
diff --git a/plat/fvp/fvp_pm.c b/plat/fvp/fvp_pm.c
index 87ef54c..2038e87 100644
--- a/plat/fvp/fvp_pm.c
+++ b/plat/fvp/fvp_pm.c
@@ -39,11 +39,84 @@
 #include <plat_config.h>
 #include <platform_def.h>
 #include <psci.h>
+#include <errno.h>
 #include "drivers/pwrc/fvp_pwrc.h"
 #include "fvp_def.h"
 #include "fvp_private.h"
 
 /*******************************************************************************
+ * Private FVP function to program the mailbox for a cpu before it is released
+ * from reset.
+ ******************************************************************************/
+static void fvp_program_mailbox(uint64_t mpidr, uint64_t address)
+{
+	uint64_t linear_id;
+	mailbox_t *fvp_mboxes;
+
+	linear_id = platform_get_core_pos(mpidr);
+	fvp_mboxes = (mailbox_t *)MBOX_BASE;
+	fvp_mboxes[linear_id].value = address;
+	flush_dcache_range((unsigned long) &fvp_mboxes[linear_id],
+			   sizeof(unsigned long));
+}
+
+/*******************************************************************************
+ * Function which implements the common FVP specific operations to power down a
+ * cpu in response to a CPU_OFF or CPU_SUSPEND request.
+ ******************************************************************************/
+static void fvp_cpu_pwrdwn_common()
+{
+	/* Prevent interrupts from spuriously waking up this cpu */
+	arm_gic_cpuif_deactivate();
+
+	/* Program the power controller to power off this cpu. */
+	fvp_pwrc_write_ppoffr(read_mpidr_el1());
+}
+
+/*******************************************************************************
+ * Function which implements the common FVP specific operations to power down a
+ * cluster in response to a CPU_OFF or CPU_SUSPEND request.
+ ******************************************************************************/
+static void fvp_cluster_pwrdwn_common()
+{
+	uint64_t mpidr = read_mpidr_el1();
+
+	/* Disable coherency if this cluster is to be turned off */
+	if (get_plat_config()->flags & CONFIG_HAS_CCI)
+		cci_disable_cluster_coherency(mpidr);
+
+	/* Program the power controller to turn the cluster off */
+	fvp_pwrc_write_pcoffr(mpidr);
+}
+
+/*******************************************************************************
+ * Private FVP function which is used to determine if any platform actions
+ * should be performed for the specified affinity instance given its
+ * state. Nothing needs to be done if the 'state' is not off or if this is not
+ * the highest affinity level which will enter the 'state'.
+ ******************************************************************************/
+static int32_t fvp_do_plat_actions(unsigned int afflvl, unsigned int state)
+{
+	unsigned int max_phys_off_afflvl;
+
+	assert(afflvl <= MPIDR_AFFLVL1);
+
+	if (state != PSCI_STATE_OFF)
+		return -EAGAIN;
+
+	/*
+	 * Find the highest affinity level which will be suspended and postpone
+	 * all the platform specific actions until that level is hit.
+	 */
+	max_phys_off_afflvl = psci_get_max_phys_off_afflvl();
+	assert(max_phys_off_afflvl != PSCI_INVALID_DATA);
+	if (afflvl != max_phys_off_afflvl)
+		return -EAGAIN;
+
+	return 0;
+}
+
+/*******************************************************************************
  * FVP handler called when an affinity instance is about to enter standby.
  ******************************************************************************/
 int fvp_affinst_standby(unsigned int power_state)
@@ -81,8 +154,6 @@
 		   unsigned int state)
 {
 	int rc = PSCI_E_SUCCESS;
-	unsigned long linear_id;
-	mailbox_t *fvp_mboxes;
 	unsigned int psysr;
 
 	/*
@@ -90,7 +161,7 @@
 	 * on the FVP. Ignore any other affinity level.
 	 */
 	if (afflvl != MPIDR_AFFLVL0)
-		goto exit;
+		return rc;
 
 	/*
 	 * Ensure that we do not cancel an inflight power off request
@@ -103,15 +174,9 @@
 		psysr = fvp_pwrc_read_psysr(mpidr);
 	} while (psysr & PSYSR_AFF_L0);
 
-	linear_id = platform_get_core_pos(mpidr);
-	fvp_mboxes = (mailbox_t *)MBOX_BASE;
-	fvp_mboxes[linear_id].value = sec_entrypoint;
-	flush_dcache_range((unsigned long) &fvp_mboxes[linear_id],
-			   sizeof(unsigned long));
-
+	fvp_program_mailbox(mpidr, sec_entrypoint);
 	fvp_pwrc_write_pponr(mpidr);
 
-exit:
 	return rc;
 }
 
@@ -130,60 +195,21 @@
 		    unsigned int afflvl,
 		    unsigned int state)
 {
-	int rc = PSCI_E_SUCCESS;
-	unsigned int ectlr;
-
-	switch (afflvl) {
-	case MPIDR_AFFLVL1:
-		if (state == PSCI_STATE_OFF) {
-			/*
-			 * Disable coherency if this cluster is to be
-			 * turned off
-			 */
-			if (get_plat_config()->flags & CONFIG_HAS_CCI)
-				cci_disable_cluster_coherency(mpidr);
-
-			/*
-			 * Program the power controller to turn the
-			 * cluster off
-			 */
-			fvp_pwrc_write_pcoffr(mpidr);
-
-		}
-		break;
+	/* Determine if any platform actions need to be executed */
+	if (fvp_do_plat_actions(afflvl, state) == -EAGAIN)
+		return PSCI_E_SUCCESS;
 
-	case MPIDR_AFFLVL0:
-		if (state == PSCI_STATE_OFF) {
-
-			/*
-			 * Take this cpu out of intra-cluster coherency if
-			 * the FVP flavour supports the SMP bit.
-			 */
-			if (get_plat_config()->flags & CONFIG_CPUECTLR_SMP_BIT) {
-				ectlr = read_cpuectlr();
-				ectlr &= ~CPUECTLR_SMP_BIT;
-				write_cpuectlr(ectlr);
-			}
-
-			/*
-			 * Prevent interrupts from spuriously waking up
-			 * this cpu
-			 */
-			arm_gic_cpuif_deactivate();
-
-			/*
-			 * Program the power controller to power this
-			 * cpu off
-			 */
-			fvp_pwrc_write_ppoffr(mpidr);
-		}
-		break;
+	/*
+	 * If execution reaches this stage then this affinity level will be
+	 * suspended. Perform at least the cpu specific actions followed the
+	 * cluster specific operations if applicable.
+	 */
+	fvp_cpu_pwrdwn_common();
 
-	default:
-		assert(0);
-	}
+	if (afflvl != MPIDR_AFFLVL0)
+		fvp_cluster_pwrdwn_common();
 
-	return rc;
+	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -203,69 +229,24 @@
 			unsigned int afflvl,
 			unsigned int state)
 {
-	int rc = PSCI_E_SUCCESS;
-	unsigned int ectlr;
-	unsigned long linear_id;
-	mailbox_t *fvp_mboxes;
-
-	switch (afflvl) {
-	case MPIDR_AFFLVL1:
-		if (state == PSCI_STATE_OFF) {
-			/*
-			 * Disable coherency if this cluster is to be
-			 * turned off
-			 */
-			if (get_plat_config()->flags & CONFIG_HAS_CCI)
-				cci_disable_cluster_coherency(mpidr);
-
-			/*
-			 * Program the power controller to turn the
-			 * cluster off
-			 */
-			fvp_pwrc_write_pcoffr(mpidr);
-
-		}
-		break;
-
-	case MPIDR_AFFLVL0:
-		if (state == PSCI_STATE_OFF) {
-			/*
-			 * Take this cpu out of intra-cluster coherency if
-			 * the FVP flavour supports the SMP bit.
-			 */
-			if (get_plat_config()->flags & CONFIG_CPUECTLR_SMP_BIT) {
-				ectlr = read_cpuectlr();
-				ectlr &= ~CPUECTLR_SMP_BIT;
-				write_cpuectlr(ectlr);
-			}
+	/* Determine if any platform actions need to be executed. */
+	if (fvp_do_plat_actions(afflvl, state) == -EAGAIN)
+		return PSCI_E_SUCCESS;
 
-			/* Program the jump address for the target cpu */
-			linear_id = platform_get_core_pos(mpidr);
-			fvp_mboxes = (mailbox_t *)MBOX_BASE;
-			fvp_mboxes[linear_id].value = sec_entrypoint;
-			flush_dcache_range((unsigned long) &fvp_mboxes[linear_id],
-					   sizeof(unsigned long));
+	/* Program the jump address for the target cpu */
+	fvp_program_mailbox(read_mpidr_el1(), sec_entrypoint);
 
-			/*
-			 * Prevent interrupts from spuriously waking up
-			 * this cpu
-			 */
-			arm_gic_cpuif_deactivate();
+	/* Program the power controller to enable wakeup interrupts. */
+	fvp_pwrc_set_wen(mpidr);
 
-			/*
-			 * Program the power controller to power this
-			 * cpu off and enable wakeup interrupts.
-			 */
-			fvp_pwrc_set_wen(mpidr);
-			fvp_pwrc_write_ppoffr(mpidr);
-		}
-		break;
+	/* Perform the common cpu specific operations */
+	fvp_cpu_pwrdwn_common();
 
-	default:
-		assert(0);
-	}
+	/* Perform the common cluster specific operations */
+	if (afflvl != MPIDR_AFFLVL0)
+		fvp_cluster_pwrdwn_common();
 
-	return rc;
+	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -280,73 +261,42 @@
 			  unsigned int state)
 {
 	int rc = PSCI_E_SUCCESS;
-	unsigned long linear_id;
-	mailbox_t *fvp_mboxes;
-	unsigned int ectlr;
-
-	switch (afflvl) {
-
-	case MPIDR_AFFLVL1:
-		/* Enable coherency if this cluster was off */
-		if (state == PSCI_STATE_OFF) {
-
-			/*
-			 * This CPU might have woken up whilst the
-			 * cluster was attempting to power down. In
-			 * this case the FVP power controller will
-			 * have a pending cluster power off request
-			 * which needs to be cleared by writing to the
-			 * PPONR register. This prevents the power
-			 * controller from interpreting a subsequent
-			 * entry of this cpu into a simple wfi as a
-			 * power down request.
-			 */
-			fvp_pwrc_write_pponr(mpidr);
-
-			fvp_cci_enable();
-		}
-		break;
 
-	case MPIDR_AFFLVL0:
-		/*
-		 * Ignore the state passed for a cpu. It could only have
-		 * been off if we are here.
-		 */
-
-		/*
-		 * Turn on intra-cluster coherency if the FVP flavour supports
-		 * it.
-		 */
-		if (get_plat_config()->flags & CONFIG_CPUECTLR_SMP_BIT) {
-			ectlr = read_cpuectlr();
-			ectlr |= CPUECTLR_SMP_BIT;
-			write_cpuectlr(ectlr);
-		}
+	/* Determine if any platform actions need to be executed. */
+	if (fvp_do_plat_actions(afflvl, state) == -EAGAIN)
+		return PSCI_E_SUCCESS;
 
+	/* Perform the common cluster specific operations */
+	if (afflvl != MPIDR_AFFLVL0) {
 		/*
-		 * Clear PWKUPR.WEN bit to ensure interrupts do not interfere
-		 * with a cpu power down unless the bit is set again
+		 * This CPU might have woken up whilst the cluster was
+		 * attempting to power down. In this case the FVP power
+		 * controller will have a pending cluster power off request
+		 * which needs to be cleared by writing to the PPONR register.
+		 * This prevents the power controller from interpreting a
+		 * subsequent entry of this cpu into a simple wfi as a power
+		 * down request.
 		 */
-		fvp_pwrc_clr_wen(mpidr);
+		fvp_pwrc_write_pponr(mpidr);
 
-		/* Zero the jump address in the mailbox for this cpu */
-		fvp_mboxes = (mailbox_t *)MBOX_BASE;
-		linear_id = platform_get_core_pos(mpidr);
-		fvp_mboxes[linear_id].value = 0;
-		flush_dcache_range((unsigned long) &fvp_mboxes[linear_id],
-				   sizeof(unsigned long));
+		/* Enable coherency if this cluster was off */
+		fvp_cci_enable();
+	}
 
-		/* Enable the gic cpu interface */
-		arm_gic_cpuif_setup();
+	/*
+	 * Clear PWKUPR.WEN bit to ensure interrupts do not interfere
+	 * with a cpu power down unless the bit is set again
+	 */
+	fvp_pwrc_clr_wen(mpidr);
 
-		/* TODO: This setup is needed only after a cold boot */
-		arm_gic_pcpu_distif_setup();
+	/* Zero the jump address in the mailbox for this cpu */
+	fvp_program_mailbox(read_mpidr_el1(), 0);
 
-		break;
+	/* Enable the gic cpu interface */
+	arm_gic_cpuif_setup();
 
-	default:
-		assert(0);
-	}
+	/* TODO: This setup is needed only after a cold boot */
+	arm_gic_pcpu_distif_setup();
 
 	return rc;
 }
diff --git a/plat/fvp/include/plat_macros.S b/plat/fvp/include/plat_macros.S
index 5d11d36..892dbfb 100644
--- a/plat/fvp/include/plat_macros.S
+++ b/plat/fvp/include/plat_macros.S
@@ -52,7 +52,7 @@
 	.macro plat_print_gic_regs
 	adr	x0, plat_config
 	ldr	w16, [x0, #CONFIG_GICC_BASE_OFFSET]
-	cbz	x16, 1f
+	cbz	x16, exit_print_gic_regs
 	/* gic base address is now in x16 */
 	adr	x6, gic_regs	/* Load the gic reg list to x6 */
 	/* Load the gic regs to gp regs used by str_in_crash_buf_print */
@@ -66,10 +66,10 @@
 	add	x7, x16, #GICD_ISPENDR
 	adr	x4, gicd_pend_reg
 	bl	asm_print_str
-2:
+gicd_ispendr_loop:
 	sub	x4, x7, x16
 	cmp	x4, #0x280
-	b.eq	1f
+	b.eq	exit_print_gic_regs
 	bl	asm_print_hex
 	adr	x4, spacer
 	bl	asm_print_str
@@ -77,8 +77,8 @@
 	bl	asm_print_hex
 	adr	x4, newline
 	bl	asm_print_str
-	b	2b
-1:
+	b	gicd_ispendr_loop
+exit_print_gic_regs:
 	.endm
 
 .section .rodata.cci_reg_name, "aS"
diff --git a/plat/fvp/platform.mk b/plat/fvp/platform.mk
index ffed7e4..e7c06a8 100644
--- a/plat/fvp/platform.mk
+++ b/plat/fvp/platform.mk
@@ -74,6 +74,9 @@
 				plat/fvp/fvp_io_storage.c
 
 BL1_SOURCES		+=	drivers/arm/cci400/cci400.c			\
+				lib/cpus/aarch64/aem_generic.S			\
+				lib/cpus/aarch64/cortex_a53.S			\
+				lib/cpus/aarch64/cortex_a57.S			\
 				plat/common/aarch64/platform_up_stack.S		\
 				plat/fvp/bl1_fvp_setup.c			\
 				plat/fvp/aarch64/fvp_common.c			\
@@ -90,6 +93,9 @@
 				drivers/arm/gic/gic_v2.c			\
 				drivers/arm/gic/gic_v3.c			\
 				drivers/arm/tzc400/tzc400.c			\
+				lib/cpus/aarch64/aem_generic.S			\
+				lib/cpus/aarch64/cortex_a53.S			\
+				lib/cpus/aarch64/cortex_a57.S			\
 				plat/common/plat_gic.c				\
 				plat/common/aarch64/platform_mp_stack.S		\
 				plat/fvp/bl31_fvp_setup.c			\
diff --git a/services/std_svc/psci/psci_afflvl_off.c b/services/std_svc/psci/psci_afflvl_off.c
index 83d19d3..7e05789 100644
--- a/services/std_svc/psci/psci_afflvl_off.c
+++ b/services/std_svc/psci/psci_afflvl_off.c
@@ -42,14 +42,10 @@
  ******************************************************************************/
 static int psci_afflvl0_off(aff_map_node_t *cpu_node)
 {
-	unsigned int plat_state;
 	int rc;
 
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
-	/* State management: mark this cpu as turned off */
-	psci_set_state(cpu_node, PSCI_STATE_OFF);
-
 	/*
 	 * Generic management: Get the index for clearing any lingering re-entry
 	 * information and allow the secure world to switch itself off
@@ -72,88 +68,68 @@
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
 
+	if (!psci_plat_pm_ops->affinst_off)
+		return PSCI_E_SUCCESS;
+
 	/*
 	 * Plat. management: Perform platform specific actions to turn this
 	 * cpu off e.g. exit cpu coherency, program the power controller etc.
 	 */
-	rc = PSCI_E_SUCCESS;
-	if (psci_plat_pm_ops->affinst_off) {
-
-		/* Get the current physical state of this cpu */
-		plat_state = psci_get_phys_state(cpu_node);
-		rc = psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
-						   cpu_node->level,
-						   plat_state);
-	}
-
-	return rc;
+	return psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
+					     cpu_node->level,
+					     psci_get_phys_state(cpu_node));
 }
 
 static int psci_afflvl1_off(aff_map_node_t *cluster_node)
 {
-	int rc = PSCI_E_SUCCESS;
-	unsigned int plat_state;
-
 	/* Sanity check the cluster level */
 	assert(cluster_node->level == MPIDR_AFFLVL1);
 
-	/* State management: Decrement the cluster reference count */
-	psci_set_state(cluster_node, PSCI_STATE_OFF);
-
-	/*
-	 * Keep the physical state of this cluster handy to decide
-	 * what action needs to be taken
-	 */
-	plat_state = psci_get_phys_state(cluster_node);
-
 	/*
 	 * Arch. Management. Flush all levels of caches to PoC if
-	 * the cluster is to be shutdown
+	 * the cluster is to be shutdown.
 	 */
-	if (plat_state == PSCI_STATE_OFF)
-		dcsw_op_all(DCCISW);
+	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL1);
+
+	if (!psci_plat_pm_ops->affinst_off)
+		return PSCI_E_SUCCESS;
 
 	/*
 	 * Plat. Management. Allow the platform to do its cluster
 	 * specific bookeeping e.g. turn off interconnect coherency,
 	 * program the power controller etc.
 	 */
-	if (psci_plat_pm_ops->affinst_off)
-		rc = psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
-						   cluster_node->level,
-						   plat_state);
-
-	return rc;
+	return psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
+					     cluster_node->level,
+					     psci_get_phys_state(cluster_node));
 }
 
 static int psci_afflvl2_off(aff_map_node_t *system_node)
 {
-	int rc = PSCI_E_SUCCESS;
-	unsigned int plat_state;
-
 	/* Cannot go beyond this level */
 	assert(system_node->level == MPIDR_AFFLVL2);
 
-	/* State management: Decrement the system reference count */
-	psci_set_state(system_node, PSCI_STATE_OFF);
-
 	/*
 	 * Keep the physical state of the system handy to decide what
 	 * action needs to be taken
 	 */
+
+	/*
+	 * Arch. Management. Flush all levels of caches to PoC if
+	 * the system is to be shutdown.
+	 */
-	plat_state = psci_get_phys_state(system_node);
+	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL2);
 
-	/* No arch. and generic bookeeping to do here currently */
+	if (!psci_plat_pm_ops->affinst_off)
+		return PSCI_E_SUCCESS;
 
 	/*
 	 * Plat. Management : Allow the platform to do its bookeeping
 	 * at this affinity level
 	 */
-	if (psci_plat_pm_ops->affinst_off)
-		rc = psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
-						   system_node->level,
-						   plat_state);
-	return rc;
+	return psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
+					     system_node->level,
+					     psci_get_phys_state(system_node));
 }
 
 static const afflvl_off_handler_t psci_afflvl_off_handlers[] = {
@@ -167,7 +143,7 @@
  * topology tree and calls the off handler for the corresponding affinity
  * levels
  ******************************************************************************/
-static int psci_call_off_handlers(mpidr_aff_map_nodes_t mpidr_nodes,
+static int psci_call_off_handlers(aff_map_node_t *mpidr_nodes[],
 				  int start_afflvl,
 				  int end_afflvl)
 {
@@ -216,7 +192,7 @@
 {
 	int rc = PSCI_E_SUCCESS;
 	mpidr_aff_map_nodes_t mpidr_nodes;
-
+	unsigned int max_phys_off_afflvl;
 
 	/*
 	 * Collect the pointers to the nodes in the topology tree for
@@ -240,12 +216,38 @@
 				  end_afflvl,
 				  mpidr_nodes);
 
+	/*
+	 * This function updates the state of each affinity instance
+	 * corresponding to the mpidr in the range of affinity levels
+	 * specified.
+	 */
+	psci_do_afflvl_state_mgmt(start_afflvl,
+				  end_afflvl,
+				  mpidr_nodes,
+				  PSCI_STATE_OFF);
+
+	max_phys_off_afflvl = psci_find_max_phys_off_afflvl(start_afflvl,
+							   end_afflvl,
+							   mpidr_nodes);
+	assert(max_phys_off_afflvl != PSCI_INVALID_DATA);
+
+	/* Stash the highest affinity level that will enter the OFF state. */
+	psci_set_max_phys_off_afflvl(max_phys_off_afflvl);
+
 	/* Perform generic, architecture and platform specific handling */
 	rc = psci_call_off_handlers(mpidr_nodes,
 				    start_afflvl,
 				    end_afflvl);
 
 	/*
+	 * Invalidate the entry for the highest affinity level stashed earlier.
+	 * This ensures that any reads of this variable outside the power
+	 * up/down sequences return PSCI_INVALID_DATA.
+	 *
+	 */
+	psci_set_max_phys_off_afflvl(PSCI_INVALID_DATA);
+
+	/*
 	 * Release the locks corresponding to each affinity level in the
 	 * reverse order to which they were acquired.
 	 */
diff --git a/services/std_svc/psci/psci_afflvl_on.c b/services/std_svc/psci/psci_afflvl_on.c
index 3b7d805..f1d30c9 100644
--- a/services/std_svc/psci/psci_afflvl_on.c
+++ b/services/std_svc/psci/psci_afflvl_on.c
@@ -75,7 +75,6 @@
 			   unsigned long ns_entrypoint,
 			   unsigned long context_id)
 {
-	unsigned int plat_state;
 	unsigned long psci_entrypoint;
 	uint32_t ns_scr_el3 = read_scr_el3();
 	uint32_t ns_sctlr_el1 = read_sctlr_el1();
@@ -113,26 +112,19 @@
 	/* Set the secure world (EL3) re-entry point after BL1 */
 	psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
 
-	/* State management: Set this cpu's state as ON PENDING */
-	psci_set_state(cpu_node, PSCI_STATE_ON_PENDING);
+	if (!psci_plat_pm_ops->affinst_on)
+		return PSCI_E_SUCCESS;
 
 	/*
 	 * Plat. management: Give the platform the current state
 	 * of the target cpu to allow it to perform the necessary
 	 * steps to power on.
 	 */
-	if (psci_plat_pm_ops->affinst_on) {
-
-		/* Get the current physical state of this cpu */
-		plat_state = psci_get_phys_state(cpu_node);
-		rc = psci_plat_pm_ops->affinst_on(target_cpu,
-						  psci_entrypoint,
-						  ns_entrypoint,
-						  cpu_node->level,
-						  plat_state);
-	}
-
-	return rc;
+	return psci_plat_pm_ops->affinst_on(target_cpu,
+					    psci_entrypoint,
+					    ns_entrypoint,
+					    cpu_node->level,
+					    psci_get_phys_state(cpu_node));
 }
 
 /*******************************************************************************
@@ -145,8 +137,6 @@
 			   unsigned long ns_entrypoint,
 			   unsigned long context_id)
 {
-	int rc = PSCI_E_SUCCESS;
-	unsigned int plat_state;
 	unsigned long psci_entrypoint;
 
 	assert(cluster_node->level == MPIDR_AFFLVL1);
@@ -158,22 +148,20 @@
 
 	/* State management: Is not required while turning a cluster on */
 
+	if (!psci_plat_pm_ops->affinst_on)
+		return PSCI_E_SUCCESS;
+
 	/*
 	 * Plat. management: Give the platform the current state
 	 * of the target cpu to allow it to perform the necessary
 	 * steps to power on.
 	 */
-	if (psci_plat_pm_ops->affinst_on) {
-		plat_state = psci_get_phys_state(cluster_node);
-		psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
-		rc = psci_plat_pm_ops->affinst_on(target_cpu,
-						  psci_entrypoint,
-						  ns_entrypoint,
-						  cluster_node->level,
-						  plat_state);
-	}
-
-	return rc;
+	psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
+	return psci_plat_pm_ops->affinst_on(target_cpu,
+					    psci_entrypoint,
+					    ns_entrypoint,
+					    cluster_node->level,
+					    psci_get_phys_state(cluster_node));
 }
 
 /*******************************************************************************
@@ -186,8 +174,6 @@
 			   unsigned long ns_entrypoint,
 			   unsigned long context_id)
 {
-	int rc = PSCI_E_SUCCESS;
-	unsigned int plat_state;
 	unsigned long psci_entrypoint;
 
 	/* Cannot go beyond affinity level 2 in this psci imp. */
@@ -200,22 +186,20 @@
 
 	/* State management: Is not required while turning a system on */
 
+	if (!psci_plat_pm_ops->affinst_on)
+		return PSCI_E_SUCCESS;
+
 	/*
 	 * Plat. management: Give the platform the current state
 	 * of the target cpu to allow it to perform the necessary
 	 * steps to power on.
 	 */
-	if (psci_plat_pm_ops->affinst_on) {
-		plat_state = psci_get_phys_state(system_node);
-		psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
-		rc = psci_plat_pm_ops->affinst_on(target_cpu,
-						  psci_entrypoint,
-						  ns_entrypoint,
-						  system_node->level,
-						  plat_state);
-	}
-
-	return rc;
+	psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
+	return psci_plat_pm_ops->affinst_on(target_cpu,
+					    psci_entrypoint,
+					    ns_entrypoint,
+					    system_node->level,
+					    psci_get_phys_state(system_node));
 }
 
 /* Private data structure to make this handlers accessible through indexing */
@@ -230,7 +214,7 @@
  * topology tree and calls the on handler for the corresponding affinity
  * levels
  ******************************************************************************/
-static int psci_call_on_handlers(mpidr_aff_map_nodes_t target_cpu_nodes,
+static int psci_call_on_handlers(aff_map_node_t *target_cpu_nodes[],
 				 int start_afflvl,
 				 int end_afflvl,
 				 unsigned long target_cpu,
@@ -318,6 +302,17 @@
 				   context_id);
 
 	/*
+	 * This function updates the state of each affinity instance
+	 * corresponding to the mpidr in the range of affinity levels
+	 * specified.
+	 */
+	if (rc == PSCI_E_SUCCESS)
+		psci_do_afflvl_state_mgmt(start_afflvl,
+					  end_afflvl,
+					  target_cpu_nodes,
+					  PSCI_STATE_ON_PENDING);
+
+	/*
 	 * This loop releases the lock corresponding to each affinity level
 	 * in the reverse order to which they were acquired.
 	 */
@@ -385,9 +380,6 @@
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
 
-	/* State management: mark this cpu as on */
-	psci_set_state(cpu_node, PSCI_STATE_ON);
-
 	/* Clean caches before re-entering normal world */
 	dcsw_op_louis(DCCSW);
 
@@ -397,10 +389,13 @@
 
 static unsigned int psci_afflvl1_on_finish(aff_map_node_t *cluster_node)
 {
-	unsigned int plat_state, rc = PSCI_E_SUCCESS;
+	unsigned int plat_state;
 
 	assert(cluster_node->level == MPIDR_AFFLVL1);
 
+	if (!psci_plat_pm_ops->affinst_on_finish)
+		return PSCI_E_SUCCESS;
+
 	/*
 	 * Plat. management: Perform the platform specific actions
 	 * as per the old state of the cluster e.g. enabling
@@ -409,30 +404,23 @@
 	 * then assert as there is no way to recover from this
 	 * situation.
 	 */
-	if (psci_plat_pm_ops->affinst_on_finish) {
-
-		/* Get the physical state of this cluster */
-		plat_state = psci_get_phys_state(cluster_node);
-		rc = psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
-							 cluster_node->level,
-							 plat_state);
-		assert(rc == PSCI_E_SUCCESS);
-	}
-
-	/* State management: Increment the cluster reference count */
-	psci_set_state(cluster_node, PSCI_STATE_ON);
-
-	return rc;
+	plat_state = psci_get_phys_state(cluster_node);
+	return psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
+						 cluster_node->level,
+						 plat_state);
 }
 
 
 static unsigned int psci_afflvl2_on_finish(aff_map_node_t *system_node)
 {
-	unsigned int plat_state, rc = PSCI_E_SUCCESS;
+	unsigned int plat_state;
 
 	/* Cannot go beyond this affinity level */
 	assert(system_node->level == MPIDR_AFFLVL2);
 
+	if (!psci_plat_pm_ops->affinst_on_finish)
+		return PSCI_E_SUCCESS;
+
 	/*
 	 * Currently, there are no architectural actions to perform
 	 * at the system level.
@@ -446,20 +434,10 @@
 	 * then assert as there is no way to recover from this
 	 * situation.
 	 */
-	if (psci_plat_pm_ops->affinst_on_finish) {
-
-		/* Get the physical state of the system */
-		plat_state = psci_get_phys_state(system_node);
-		rc = psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
-							 system_node->level,
-							 plat_state);
-		assert(rc == PSCI_E_SUCCESS);
-	}
-
-	/* State management: Increment the system reference count */
-	psci_set_state(system_node, PSCI_STATE_ON);
-
-	return rc;
+	plat_state = psci_get_phys_state(system_node);
+	return psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
+						   system_node->level,
+						   plat_state);
 }
 
 const afflvl_power_on_finisher_t psci_afflvl_on_finishers[] = {
@@ -467,4 +445,3 @@
 	psci_afflvl1_on_finish,
 	psci_afflvl2_on_finish,
 };
-
diff --git a/services/std_svc/psci/psci_afflvl_suspend.c b/services/std_svc/psci/psci_afflvl_suspend.c
index 70f90a1..4fcabfc 100644
--- a/services/std_svc/psci/psci_afflvl_suspend.c
+++ b/services/std_svc/psci/psci_afflvl_suspend.c
@@ -34,6 +34,7 @@
 #include <arch_helpers.h>
 #include <context.h>
 #include <context_mgmt.h>
+#include <cpu_data.h>
 #include <platform.h>
 #include <runtime_svc.h>
 #include <stddef.h>
@@ -45,76 +46,59 @@
 				      unsigned int);
 
 /*******************************************************************************
- * This function sets the power state of the current cpu while
- * powering down during a cpu_suspend call
+ * This function saves the power state parameter passed in the current PSCI
+ * cpu_suspend call in the per-cpu data array.
  ******************************************************************************/
-void psci_set_suspend_power_state(aff_map_node_t *node, unsigned int power_state)
+void psci_set_suspend_power_state(unsigned int power_state)
 {
-	/*
-	 * Check that nobody else is calling this function on our behalf &
-	 * this information is being set only in the cpu node
-	 */
-	assert(node->mpidr == (read_mpidr() & MPIDR_AFFINITY_MASK));
-	assert(node->level == MPIDR_AFFLVL0);
-
-	/*
-	 * Save PSCI power state parameter for the core in suspend context.
-	 * The node is in always-coherent RAM so it does not need to be flushed
-	 */
-	node->power_state = power_state;
+	set_cpu_data(psci_svc_cpu_data.power_state, power_state);
+	flush_cpu_data(psci_svc_cpu_data.power_state);
 }
 
 /*******************************************************************************
- * This function gets the affinity level till which a cpu is powered down
- * during a cpu_suspend call. Returns PSCI_INVALID_DATA if the
- * power state saved for the node is invalid
+ * This function gets the affinity level till which the current cpu could be
+ * powered down during a cpu_suspend call. Returns PSCI_INVALID_DATA if the
+ * power state is invalid.
  ******************************************************************************/
-int psci_get_suspend_afflvl(unsigned long mpidr)
+int psci_get_suspend_afflvl()
 {
-	aff_map_node_t *node;
+	unsigned int power_state;
 
-	node = psci_get_aff_map_node(mpidr & MPIDR_AFFINITY_MASK,
-			MPIDR_AFFLVL0);
-	assert(node);
+	power_state = get_cpu_data(psci_svc_cpu_data.power_state);
 
-	return psci_get_aff_map_node_suspend_afflvl(node);
+	return ((power_state == PSCI_INVALID_DATA) ?
+		power_state : psci_get_pstate_afflvl(power_state));
 }
 
-
 /*******************************************************************************
- * This function gets the affinity level till which the current cpu was powered
- * down during a cpu_suspend call. Returns PSCI_INVALID_DATA if the
- * power state saved for the node is invalid
+ * This function gets the state id of the current cpu from the power state
+ * parameter saved in the per-cpu data array. Returns PSCI_INVALID_DATA if the
+ * power state saved is invalid.
  ******************************************************************************/
-int psci_get_aff_map_node_suspend_afflvl(aff_map_node_t *node)
+int psci_get_suspend_stateid()
 {
 	unsigned int power_state;
 
-	assert(node->level == MPIDR_AFFLVL0);
+	power_state = get_cpu_data(psci_svc_cpu_data.power_state);
 
-	power_state = node->power_state;
 	return ((power_state == PSCI_INVALID_DATA) ?
-				power_state : psci_get_pstate_afflvl(power_state));
+		power_state : psci_get_pstate_id(power_state));
 }
 
 /*******************************************************************************
- * This function gets the state id of a cpu stored in suspend context
- * while powering down during a cpu_suspend call. Returns 0xFFFFFFFF
- * if the power state saved for the node is invalid
+ * This function gets the state id of the cpu specified by the 'mpidr' parameter
+ * from the power state parameter saved in the per-cpu data array. Returns
+ * PSCI_INVALID_DATA if the power state saved is invalid.
  ******************************************************************************/
-int psci_get_suspend_stateid(unsigned long mpidr)
+int psci_get_suspend_stateid_by_mpidr(unsigned long mpidr)
 {
-	aff_map_node_t *node;
 	unsigned int power_state;
 
-	node = psci_get_aff_map_node(mpidr & MPIDR_AFFINITY_MASK,
-			MPIDR_AFFLVL0);
-	assert(node);
-	assert(node->level == MPIDR_AFFLVL0);
+	power_state = get_cpu_data_by_mpidr(mpidr,
+					    psci_svc_cpu_data.power_state);
 
-	power_state = node->power_state;
 	return ((power_state == PSCI_INVALID_DATA) ?
-					power_state : psci_get_pstate_id(power_state));
+		power_state : psci_get_pstate_id(power_state));
 }
 
 /*******************************************************************************
@@ -126,7 +110,6 @@
 				unsigned long context_id,
 				unsigned int power_state)
 {
-	unsigned int plat_state;
 	unsigned long psci_entrypoint;
 	uint32_t ns_scr_el3 = read_scr_el3();
 	uint32_t ns_sctlr_el1 = read_sctlr_el1();
@@ -136,7 +119,7 @@
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
 	/* Save PSCI power state parameter for the core in suspend context */
-	psci_set_suspend_power_state(cpu_node, power_state);
+	psci_set_suspend_power_state(power_state);
 
 	/*
 	 * Generic management: Store the re-entry information for the non-secure
@@ -151,9 +134,6 @@
 	if (psci_spd_pm && psci_spd_pm->svc_suspend)
 		psci_spd_pm->svc_suspend(power_state);
 
-	/* State management: mark this cpu as suspended */
-	psci_set_state(cpu_node, PSCI_STATE_SUSPEND);
-
 	/*
 	 * Generic management: Store the re-entry information for the
 	 * non-secure world
@@ -172,24 +152,20 @@
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
 
+	if (!psci_plat_pm_ops->affinst_suspend)
+		return PSCI_E_SUCCESS;
+
 	/*
 	 * Plat. management: Allow the platform to perform the
 	 * necessary actions to turn off this cpu e.g. set the
 	 * platform defined mailbox with the psci entrypoint,
 	 * program the power controller etc.
 	 */
-	rc = PSCI_E_SUCCESS;
-
-	if (psci_plat_pm_ops->affinst_suspend) {
-		plat_state = psci_get_phys_state(cpu_node);
-		rc = psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
-						       psci_entrypoint,
-						       ns_entrypoint,
-						       cpu_node->level,
-						       plat_state);
-	}
-
-	return rc;
+	return psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
+						 psci_entrypoint,
+						 ns_entrypoint,
+						 cpu_node->level,
+						 psci_get_phys_state(cpu_node));
 }
 
 static int psci_afflvl1_suspend(aff_map_node_t *cluster_node,
@@ -197,51 +173,36 @@
 				unsigned long context_id,
 				unsigned int power_state)
 {
-	int rc = PSCI_E_SUCCESS;
 	unsigned int plat_state;
 	unsigned long psci_entrypoint;
 
 	/* Sanity check the cluster level */
 	assert(cluster_node->level == MPIDR_AFFLVL1);
 
-	/* State management: Decrement the cluster reference count */
-	psci_set_state(cluster_node, PSCI_STATE_SUSPEND);
-
 	/*
-	 * Keep the physical state of this cluster handy to decide
-	 * what action needs to be taken
-	 */
-	plat_state = psci_get_phys_state(cluster_node);
-
-	/*
 	 * Arch. management: Flush all levels of caches to PoC if the
-	 * cluster is to be shutdown
+	 * cluster is to be shutdown.
 	 */
-	if (plat_state == PSCI_STATE_OFF)
-		dcsw_op_all(DCCISW);
+	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL1);
+
+	if (!psci_plat_pm_ops->affinst_suspend)
+		return PSCI_E_SUCCESS;
 
 	/*
-	 * Plat. Management. Allow the platform to do its cluster
-	 * specific bookeeping e.g. turn off interconnect coherency,
-	 * program the power controller etc.
+	 * Plat. Management. Allow the platform to do its cluster specific
+	 * bookeeping e.g. turn off interconnect coherency, program the power
+	 * controller etc. Sending the psci entrypoint is currently redundant
+	 * beyond affinity level 0 but one never knows what a platform might
+	 * do. Also it allows us to keep the platform handler prototype the
+	 * same.
 	 */
-	if (psci_plat_pm_ops->affinst_suspend) {
-
-		/*
-		 * Sending the psci entrypoint is currently redundant
-		 * beyond affinity level 0 but one never knows what a
-		 * platform might do. Also it allows us to keep the
-		 * platform handler prototype the same.
-		 */
-		psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
-		rc = psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
-						       psci_entrypoint,
-						       ns_entrypoint,
-						       cluster_node->level,
-						       plat_state);
-	}
-
-	return rc;
+	plat_state = psci_get_phys_state(cluster_node);
+	psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
+	return psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
+						 psci_entrypoint,
+						 ns_entrypoint,
+						 cluster_node->level,
+						 plat_state);
 }
 
 
@@ -250,16 +211,12 @@
 				unsigned long context_id,
 				unsigned int power_state)
 {
-	int rc = PSCI_E_SUCCESS;
 	unsigned int plat_state;
 	unsigned long psci_entrypoint;
 
 	/* Cannot go beyond this */
 	assert(system_node->level == MPIDR_AFFLVL2);
 
-	/* State management: Decrement the system reference count */
-	psci_set_state(system_node, PSCI_STATE_SUSPEND);
-
 	/*
 	 * Keep the physical state of the system handy to decide what
 	 * action needs to be taken
@@ -267,26 +224,31 @@
 	plat_state = psci_get_phys_state(system_node);
 
 	/*
+	 * Arch. management: Flush all levels of caches to PoC if the
+	 * system is to be shutdown.
+	 */
+	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL2);
+
+	/*
 	 * Plat. Management : Allow the platform to do its bookeeping
 	 * at this affinity level
 	 */
-	if (psci_plat_pm_ops->affinst_suspend) {
+	if (!psci_plat_pm_ops->affinst_suspend)
+		return PSCI_E_SUCCESS;
 
-		/*
-		 * Sending the psci entrypoint is currently redundant
-		 * beyond affinity level 0 but one never knows what a
-		 * platform might do. Also it allows us to keep the
-		 * platform handler prototype the same.
-		 */
-		psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
-		rc = psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
-						       psci_entrypoint,
-						       ns_entrypoint,
-						       system_node->level,
-						       plat_state);
-	}
-
-	return rc;
+	/*
+	 * Sending the psci entrypoint is currently redundant
+	 * beyond affinity level 0 but one never knows what a
+	 * platform might do. Also it allows us to keep the
+	 * platform handler prototype the same.
+	 */
+	plat_state = psci_get_phys_state(system_node);
+	psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
+	return psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
+						 psci_entrypoint,
+						 ns_entrypoint,
+						 system_node->level,
+						 plat_state);
 }
 
 static const afflvl_suspend_handler_t psci_afflvl_suspend_handlers[] = {
@@ -300,7 +262,7 @@
  * topology tree and calls the suspend handler for the corresponding affinity
  * levels
  ******************************************************************************/
-static int psci_call_suspend_handlers(mpidr_aff_map_nodes_t mpidr_nodes,
+static int psci_call_suspend_handlers(aff_map_node_t *mpidr_nodes[],
 				      int start_afflvl,
 				      int end_afflvl,
 				      unsigned long entrypoint,
@@ -358,6 +320,7 @@
 {
 	int rc = PSCI_E_SUCCESS;
 	mpidr_aff_map_nodes_t mpidr_nodes;
+	unsigned int max_phys_off_afflvl;
 
 	/*
 	 * Collect the pointers to the nodes in the topology tree for
@@ -381,6 +344,24 @@
 				  end_afflvl,
 				  mpidr_nodes);
 
+	/*
+	 * This function updates the state of each affinity instance
+	 * corresponding to the mpidr in the range of affinity levels
+	 * specified.
+	 */
+	psci_do_afflvl_state_mgmt(start_afflvl,
+				  end_afflvl,
+				  mpidr_nodes,
+				  PSCI_STATE_SUSPEND);
+
+	max_phys_off_afflvl = psci_find_max_phys_off_afflvl(start_afflvl,
+							    end_afflvl,
+							    mpidr_nodes);
+	assert(max_phys_off_afflvl != PSCI_INVALID_DATA);
+
+	/* Stash the highest affinity level that will be turned off */
+	psci_set_max_phys_off_afflvl(max_phys_off_afflvl);
+
 	/* Perform generic, architecture and platform specific handling */
 	rc = psci_call_suspend_handlers(mpidr_nodes,
 					start_afflvl,
@@ -390,6 +371,13 @@
 					power_state);
 
 	/*
+	 * Invalidate the entry for the highest affinity level stashed earlier.
+	 * This ensures that any reads of this variable outside the power
+	 * up/down sequences return PSCI_INVALID_DATA.
+	 */
+	psci_set_max_phys_off_afflvl(PSCI_INVALID_DATA);
+
+	/*
 	 * Release the locks corresponding to each affinity level in the
 	 * reverse order to which they were acquired.
 	 */
@@ -451,13 +439,13 @@
 	 * error, it's expected to assert within
 	 */
 	if (psci_spd_pm && psci_spd_pm->svc_suspend) {
-		suspend_level = psci_get_aff_map_node_suspend_afflvl(cpu_node);
+		suspend_level = psci_get_suspend_afflvl();
 		assert (suspend_level != PSCI_INVALID_DATA);
 		psci_spd_pm->svc_suspend_finish(suspend_level);
 	}
 
 	/* Invalidate the suspend context for the node */
-	psci_set_suspend_power_state(cpu_node, PSCI_INVALID_DATA);
+	psci_set_suspend_power_state(PSCI_INVALID_DATA);
 
 	/*
 	 * Generic management: Now we just need to retrieve the
@@ -466,9 +454,6 @@
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
 
-	/* State management: mark this cpu as on */
-	psci_set_state(cpu_node, PSCI_STATE_ON);
-
 	/* Clean caches before re-entering normal world */
 	dcsw_op_louis(DCCSW);
 
@@ -500,9 +485,6 @@
 		assert(rc == PSCI_E_SUCCESS);
 	}
 
-	/* State management: Increment the cluster reference count */
-	psci_set_state(cluster_node, PSCI_STATE_ON);
-
 	return rc;
 }
 
@@ -537,9 +519,6 @@
 		assert(rc == PSCI_E_SUCCESS);
 	}
 
-	/* State management: Increment the system reference count */
-	psci_set_state(system_node, PSCI_STATE_ON);
-
 	return rc;
 }
 
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index 2fd1764..2267ad0 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -59,39 +59,101 @@
 const plat_pm_ops_t *psci_plat_pm_ops;
 
 /*******************************************************************************
+ * This function is passed an array of pointers to affinity level nodes in the
+ * topology tree for an mpidr. It iterates through the nodes to find the highest
+ * affinity level which is marked as physically powered off.
+ ******************************************************************************/
+uint32_t psci_find_max_phys_off_afflvl(uint32_t start_afflvl,
+				       uint32_t end_afflvl,
+				       aff_map_node_t *mpidr_nodes[])
+{
+	uint32_t max_afflvl = PSCI_INVALID_DATA;
+
+	for (; start_afflvl <= end_afflvl; start_afflvl++) {
+		if (mpidr_nodes[start_afflvl] == NULL)
+			continue;
+
+		if (psci_get_phys_state(mpidr_nodes[start_afflvl]) ==
+		    PSCI_STATE_OFF)
+			max_afflvl = start_afflvl;
+	}
+
+	return max_afflvl;
+}
+
+/*******************************************************************************
+ * This function saves the highest affinity level which is in OFF state. The
+ * affinity instance with which the level is associated is determined by the
+ * caller.
+ ******************************************************************************/
+void psci_set_max_phys_off_afflvl(uint32_t afflvl)
+{
+	set_cpu_data(psci_svc_cpu_data.max_phys_off_afflvl, afflvl);
+
+	/*
+	 * Ensure that the saved value is flushed to main memory and any
+	 * speculatively pre-fetched stale copies are invalidated from the
+	 * caches of other cpus in the same coherency domain. This ensures that
+	 * the value can be safely read irrespective of the state of the data
+	 * cache.
+	 */
+	flush_cpu_data(psci_svc_cpu_data.max_phys_off_afflvl);
+}
+
+/*******************************************************************************
+ * This function reads the saved highest affinity level which is in OFF
+ * state. The affinity instance with which the level is associated is determined
+ * by the caller.
+ ******************************************************************************/
+uint32_t psci_get_max_phys_off_afflvl(void)
+{
+	/*
+	 * Ensure that the last update of this value in this cpu's cache is
+	 * flushed to main memory and any speculatively pre-fetched stale copies
+	 * are invalidated from the caches of other cpus in the same coherency
+	 * domain. This ensures that the value is always read from the main
+	 * memory when it was written before the data cache was enabled.
+	 */
+	flush_cpu_data(psci_svc_cpu_data.max_phys_off_afflvl);
+	return get_cpu_data(psci_svc_cpu_data.max_phys_off_afflvl);
+}
+
+/*******************************************************************************
  * Routine to return the maximum affinity level to traverse to after a cpu has
  * been physically powered up. It is expected to be called immediately after
- * reset from assembler code. It has to find its 'aff_map_node' instead of
- * getting it as an argument.
- * TODO: Calling psci_get_aff_map_node() with the MMU disabled is slow. Add
- * support to allow faster access to the target affinity level.
+ * reset from assembler code.
  ******************************************************************************/
-int get_power_on_target_afflvl(unsigned long mpidr)
+int get_power_on_target_afflvl()
 {
-	aff_map_node_t *node;
-	unsigned int state;
 	int afflvl;
 
+#if DEBUG
+	unsigned int state;
+	aff_map_node_t *node;
+
 	/* Retrieve our node from the topology tree */
-	node = psci_get_aff_map_node(mpidr & MPIDR_AFFINITY_MASK,
-			MPIDR_AFFLVL0);
+	node = psci_get_aff_map_node(read_mpidr_el1() & MPIDR_AFFINITY_MASK,
+				     MPIDR_AFFLVL0);
 	assert(node);
 
 	/*
-	 * Return the maximum supported affinity level if this cpu was off.
-	 * Call the handler in the suspend code if this cpu had been suspended.
-	 * Any other state is invalid.
+	 * Sanity check the state of the cpu. It should be either suspend or "on
+	 * pending"
 	 */
 	state = psci_get_state(node);
-	if (state == PSCI_STATE_ON_PENDING)
-		return get_max_afflvl();
+	assert(state == PSCI_STATE_SUSPEND || state == PSCI_STATE_ON_PENDING);
+#endif
 
-	if (state == PSCI_STATE_SUSPEND) {
-		afflvl = psci_get_aff_map_node_suspend_afflvl(node);
-		assert(afflvl != PSCI_INVALID_DATA);
-		return afflvl;
-	}
-	return PSCI_E_INVALID_PARAMS;
+	/*
+	 * Assume that this cpu was suspended and retrieve its target affinity
+	 * level. If it is invalid then it could only have been turned off
+	 * earlier. get_max_afflvl() will return the highest affinity level a
+	 * cpu can be turned off to.
+	 */
+	afflvl = psci_get_suspend_afflvl();
+	if (afflvl == PSCI_INVALID_DATA)
+		afflvl = get_max_afflvl();
+	return afflvl;
 }
 
 /*******************************************************************************
@@ -153,12 +215,31 @@
 
 /*******************************************************************************
  * This function is passed an array of pointers to affinity level nodes in the
+ * topology tree for an mpidr and the state which each node should transition
+ * to. It updates the state of each node between the specified affinity levels.
+ ******************************************************************************/
+void psci_do_afflvl_state_mgmt(uint32_t start_afflvl,
+			       uint32_t end_afflvl,
+			       aff_map_node_t *mpidr_nodes[],
+			       uint32_t state)
+{
+	uint32_t level;
+
+	for (level = start_afflvl; level <= end_afflvl; level++) {
+		if (mpidr_nodes[level] == NULL)
+			continue;
+		psci_set_state(mpidr_nodes[level], state);
+	}
+}
+
+/*******************************************************************************
+ * This function is passed an array of pointers to affinity level nodes in the
  * topology tree for an mpidr. It picks up locks for each affinity level bottom
  * up in the range specified.
  ******************************************************************************/
 void psci_acquire_afflvl_locks(int start_afflvl,
 			       int end_afflvl,
-			       mpidr_aff_map_nodes_t mpidr_nodes)
+			       aff_map_node_t *mpidr_nodes[])
 {
 	int level;
 
@@ -176,7 +257,7 @@
  ******************************************************************************/
 void psci_release_afflvl_locks(int start_afflvl,
 			       int end_afflvl,
-			       mpidr_aff_map_nodes_t mpidr_nodes)
+			       aff_map_node_t *mpidr_nodes[])
 {
 	int level;
 
@@ -348,7 +429,7 @@
  * topology tree and calls the physical power on handler for the corresponding
  * affinity levels
  ******************************************************************************/
-static int psci_call_power_on_handlers(mpidr_aff_map_nodes_t mpidr_nodes,
+static int psci_call_power_on_handlers(aff_map_node_t *mpidr_nodes[],
 				       int start_afflvl,
 				       int end_afflvl,
 				       afflvl_power_on_finisher_t *pon_handlers)
@@ -397,6 +478,8 @@
 {
 	mpidr_aff_map_nodes_t mpidr_nodes;
 	int rc;
+	unsigned int max_phys_off_afflvl;
+
 
 	/*
 	 * Collect the pointers to the nodes in the topology tree for
@@ -420,6 +503,17 @@
 				  end_afflvl,
 				  mpidr_nodes);
 
+	max_phys_off_afflvl = psci_find_max_phys_off_afflvl(start_afflvl,
+							    end_afflvl,
+							    mpidr_nodes);
+	assert(max_phys_off_afflvl != PSCI_INVALID_DATA);
+
+	/*
+	 * Stash the highest affinity level that will come out of the OFF or
+	 * SUSPEND states.
+	 */
+	psci_set_max_phys_off_afflvl(max_phys_off_afflvl);
+
 	/* Perform generic, architecture and platform specific handling */
 	rc = psci_call_power_on_handlers(mpidr_nodes,
 					 start_afflvl,
@@ -429,6 +523,23 @@
 		panic();
 
 	/*
+	 * This function updates the state of each affinity instance
+	 * corresponding to the mpidr in the range of affinity levels
+	 * specified.
+	 */
+	psci_do_afflvl_state_mgmt(start_afflvl,
+				  end_afflvl,
+				  mpidr_nodes,
+				  PSCI_STATE_ON);
+
+	/*
+	 * Invalidate the entry for the highest affinity level stashed earlier.
+	 * This ensures that any reads of this variable outside the power
+	 * up/down sequences return PSCI_INVALID_DATA
+	 */
+	psci_set_max_phys_off_afflvl(PSCI_INVALID_DATA);
+
+	/*
 	 * This loop releases the lock corresponding to each affinity level
 	 * in the reverse order to which they were acquired.
 	 */
diff --git a/services/std_svc/psci/psci_entry.S b/services/std_svc/psci/psci_entry.S
index 68b917e..8145012 100644
--- a/services/std_svc/psci/psci_entry.S
+++ b/services/std_svc/psci/psci_entry.S
@@ -35,8 +35,6 @@
 
 	.globl	psci_aff_on_finish_entry
 	.globl	psci_aff_suspend_finish_entry
-	.globl	__psci_cpu_off
-	.globl	__psci_cpu_suspend
 	.globl	psci_power_down_wfi
 
 	/* -----------------------------------------------------
@@ -80,6 +78,12 @@
 	bl	init_cpu_data_ptr
 
 	/* ---------------------------------------------
+	 * Initialize the cpu_ops pointer.
+	 * ---------------------------------------------
+	 */
+	bl	init_cpu_ops
+
+	/* ---------------------------------------------
 	 * Set the exception vectors
 	 * ---------------------------------------------
 	 */
@@ -134,18 +138,13 @@
 	 * level 0.
 	 * ---------------------------------------------
 	 */
-	mrs	x0, mpidr_el1
 	bl	get_power_on_target_afflvl
-	cmp	x0, xzr
-	b.lt	_panic
 	mov	x2, x23
 	mov	x1, x0
 	mov	x0, #MPIDR_AFFLVL0
 	bl	psci_afflvl_power_on_finish
 
 	b	el3_exit
-_panic:
-	b	_panic
 
 	/* --------------------------------------------
 	 * This function is called to indicate to the
diff --git a/services/std_svc/psci/psci_helpers.S b/services/std_svc/psci/psci_helpers.S
index 21b5688..9a51d5c 100644
--- a/services/std_svc/psci/psci_helpers.S
+++ b/services/std_svc/psci/psci_helpers.S
@@ -30,7 +30,9 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <assert_macros.S>
 #include <platform_def.h>
+#include <psci.h>
 
 	.globl	psci_do_pwrdown_cache_maintenance
 	.globl	psci_do_pwrup_cache_maintenance
@@ -38,26 +40,30 @@
 /* -----------------------------------------------------------------------
  * void psci_do_pwrdown_cache_maintenance(uint32_t affinity level);
  *
- * This function performs cache maintenance before this cpu is powered
- * off. The levels of cache affected are determined by the affinity level
- * which is passed as the argument. Additionally, this function also
- * ensures that stack memory is correctly flushed out to avoid coherency
- * issues due to a change in its memory attributes after the data cache
- * is disabled.
+ * This function performs cache maintenance if the specified affinity
+ * level is the equal to the level of the highest affinity instance which
+ * will be/is physically powered off. The levels of cache affected are
+ * determined by the affinity level which is passed as the argument i.e.
+ * level 0 results in a flush of the L1 cache. Both the L1 and L2 caches
+ * are flushed for a higher affinity level.
+ *
+ * Additionally, this function also ensures that stack memory is correctly
+ * flushed out to avoid coherency issues due to a change in its memory
+ * attributes after the data cache is disabled.
  * -----------------------------------------------------------------------
  */
 func psci_do_pwrdown_cache_maintenance
 	stp     x29, x30, [sp,#-16]!
 	stp     x19, x20, [sp,#-16]!
 
-	/* ---------------------------------------------
-	 * Disable the Data Cache.
-	 * ---------------------------------------------
-	 */
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
+	mov	x19, x0
+	bl	psci_get_max_phys_off_afflvl
+#if ASM_ASSERTION
+	cmp	x0, #PSCI_INVALID_DATA
+	ASM_ASSERT(ne)
+#endif
+	cmp	x0, x19
+	b.ne	1f
 
 	/* ---------------------------------------------
 	 * Determine to how many levels of cache will be
@@ -72,29 +78,12 @@
 	 * ---------------------------------------------
 	 */
 	cmp	x0, #MPIDR_AFFLVL0
-	mov	x0, #DCCISW
-	b.ne	flush_caches_to_poc
-
-	/* ---------------------------------------------
-	 * Flush L1 cache to PoU.
-	 * ---------------------------------------------
-	 */
-	bl	dcsw_op_louis
+	b.eq	do_core_pwr_dwn
+	bl	prepare_cluster_pwr_dwn
 	b	do_stack_maintenance
 
-	/* ---------------------------------------------
-	 * Flush L1 and L2 caches to PoC.
-	 * ---------------------------------------------
-	 */
-flush_caches_to_poc:
-	bl	dcsw_op_all
-
-	/* ---------------------------------------------
-	 * TODO: Intra-cluster coherency should be
-	 * turned off here once cpu-specific
-	 * abstractions are in place.
-	 * ---------------------------------------------
-	 */
+do_core_pwr_dwn:
+	bl	prepare_core_pwr_dwn
 
 	/* ---------------------------------------------
 	 * Do stack maintenance by flushing the used
@@ -127,6 +116,7 @@
 	sub	x1, sp, x0
 	bl	inv_dcache_range
 
+1:
 	ldp	x19, x20, [sp], #16
 	ldp	x29, x30, [sp], #16
 	ret
diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h
index 4bf9107..924a24f 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/services/std_svc/psci/psci_private.h
@@ -52,7 +52,6 @@
 	unsigned short ref_count;
 	unsigned char state;
 	unsigned char level;
-	unsigned int power_state;
 	bakery_lock_t lock;
 } aff_map_node_t;
 
@@ -85,7 +84,7 @@
 void psci_set_state(aff_map_node_t *node, unsigned short state);
 unsigned long mpidr_set_aff_inst(unsigned long, unsigned char, int);
 int psci_validate_mpidr(unsigned long, int);
-int get_power_on_target_afflvl(unsigned long mpidr);
+int get_power_on_target_afflvl(void);
 void psci_afflvl_power_on_finish(int,
 				int,
 				afflvl_power_on_finisher_t *);
@@ -93,19 +92,27 @@
 		       uint64_t entrypoint, uint64_t context_id,
 		       uint32_t caller_scr_el3, uint32_t caller_sctlr_el1);
 int psci_check_afflvl_range(int start_afflvl, int end_afflvl);
+void psci_do_afflvl_state_mgmt(uint32_t start_afflvl,
+			       uint32_t end_afflvl,
+			       aff_map_node_t *mpidr_nodes[],
+			       uint32_t state);
 void psci_acquire_afflvl_locks(int start_afflvl,
-				int end_afflvl,
-				mpidr_aff_map_nodes_t mpidr_nodes);
+			       int end_afflvl,
+			       aff_map_node_t *mpidr_nodes[]);
 void psci_release_afflvl_locks(int start_afflvl,
 				int end_afflvl,
 				mpidr_aff_map_nodes_t mpidr_nodes);
 void psci_print_affinity_map(void);
+void psci_set_max_phys_off_afflvl(uint32_t afflvl);
+uint32_t psci_find_max_phys_off_afflvl(uint32_t start_afflvl,
+				       uint32_t end_afflvl,
+				       aff_map_node_t *mpidr_nodes[]);
 
 /* Private exported functions from psci_setup.c */
 int psci_get_aff_map_nodes(unsigned long mpidr,
 				int start_afflvl,
 				int end_afflvl,
-				mpidr_aff_map_nodes_t mpidr_nodes);
+				aff_map_node_t *mpidr_nodes[]);
 aff_map_node_t *psci_get_aff_map_node(unsigned long, int);
 
 /* Private exported functions from psci_affinity_on.c */
@@ -119,15 +126,13 @@
 int psci_afflvl_off(int, int);
 
 /* Private exported functions from psci_affinity_suspend.c */
-void psci_set_suspend_power_state(aff_map_node_t *node,
-				unsigned int power_state);
-int psci_get_aff_map_node_suspend_afflvl(aff_map_node_t *node);
 int psci_afflvl_suspend(unsigned long,
 			unsigned long,
 			unsigned int,
 			int,
 			int);
 unsigned int psci_afflvl_suspend_finish(int, int);
+void psci_set_suspend_power_state(unsigned int power_state);
 
 /* Private exported functions from psci_helpers.S */
 void psci_do_pwrdown_cache_maintenance(uint32_t affinity_level);
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index 68f19a0..a5ae4ef 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -116,7 +116,7 @@
 int psci_get_aff_map_nodes(unsigned long mpidr,
 			   int start_afflvl,
 			   int end_afflvl,
-			   mpidr_aff_map_nodes_t mpidr_nodes)
+			   aff_map_node_t *mpidr_nodes[])
 {
 	int rc = PSCI_E_INVALID_PARAMS, level;
 	aff_map_node_t *node;
@@ -189,9 +189,6 @@
 		if (state & PSCI_AFF_PRESENT)
 			psci_set_state(&psci_aff_map[idx], PSCI_STATE_OFF);
 
-		/* Invalidate the suspend context for the node */
-		psci_aff_map[idx].power_state = PSCI_INVALID_DATA;
-
 		/*
 		 * Associate a non-secure context with this affinity
 		 * instance through the context management library.
@@ -199,6 +196,20 @@
 		linear_id = platform_get_core_pos(mpidr);
 		assert(linear_id < PLATFORM_CORE_COUNT);
 
+		/* Invalidate the suspend context for the node */
+		set_cpu_data_by_index(linear_id,
+				      psci_svc_cpu_data.power_state,
+				      PSCI_INVALID_DATA);
+
+		/*
+		 * There is no state associated with the current execution
+		 * context so ensure that any reads of the highest affinity
+		 * level in a powered down state return PSCI_INVALID_DATA.
+		 */
+		set_cpu_data_by_index(linear_id,
+				      psci_svc_cpu_data.max_phys_off_afflvl,
+				      PSCI_INVALID_DATA);
+
 		cm_set_context_by_mpidr(mpidr,
 					(void *) &psci_ns_context[linear_id],
 					NON_SECURE);