Merge pull request #661 from dp-arm/master

Replace fip_create with fiptool
diff --git a/Makefile b/Makefile
index bbda656..a68335b 100644
--- a/Makefile
+++ b/Makefile
@@ -108,6 +108,10 @@
 ENABLE_PMF			:= 0
 # Flag to enable PSCI STATs functionality
 ENABLE_PSCI_STAT	:= 0
+# Whether code and read-only data should be put on separate memory pages.
+# The platform Makefile is free to override this value.
+SEPARATE_CODE_AND_RODATA	:= 0
+
 
 ################################################################################
 # Checkpatch script options
@@ -222,20 +226,24 @@
 				plat/common/aarch64/platform_helpers.S	\
 				${STDLIB_SRCS}
 
-INCLUDES		+=	-Iinclude/bl1			\
-				-Iinclude/bl31			\
-				-Iinclude/bl31/services		\
-				-Iinclude/common		\
-				-Iinclude/drivers		\
-				-Iinclude/drivers/arm		\
-				-Iinclude/drivers/auth		\
-				-Iinclude/drivers/io		\
-				-Iinclude/drivers/ti/uart	\
-				-Iinclude/lib			\
-				-Iinclude/lib/aarch64		\
-				-Iinclude/lib/cpus/aarch64	\
-				-Iinclude/plat/common		\
-				${PLAT_INCLUDES}		\
+INCLUDES		+=	-Iinclude/bl1				\
+				-Iinclude/bl31				\
+				-Iinclude/common			\
+				-Iinclude/common/aarch64		\
+				-Iinclude/drivers			\
+				-Iinclude/drivers/arm			\
+				-Iinclude/drivers/auth			\
+				-Iinclude/drivers/io			\
+				-Iinclude/drivers/ti/uart		\
+				-Iinclude/lib				\
+				-Iinclude/lib/aarch64			\
+				-Iinclude/lib/cpus/aarch64		\
+				-Iinclude/lib/el3_runtime		\
+				-Iinclude/lib/el3_runtime/aarch64	\
+				-Iinclude/lib/psci			\
+				-Iinclude/plat/common			\
+				-Iinclude/services			\
+				${PLAT_INCLUDES}			\
 				${SPD_INCLUDES}
 
 
@@ -419,6 +427,7 @@
 $(eval $(call assert_boolean,PL011_GENERIC_UART))
 $(eval $(call assert_boolean,ENABLE_PMF))
 $(eval $(call assert_boolean,ENABLE_PSCI_STAT))
+$(eval $(call assert_boolean,SEPARATE_CODE_AND_RODATA))
 
 
 ################################################################################
@@ -448,6 +457,7 @@
 $(eval $(call add_define,PL011_GENERIC_UART))
 $(eval $(call add_define,ENABLE_PMF))
 $(eval $(call add_define,ENABLE_PSCI_STAT))
+$(eval $(call add_define,SEPARATE_CODE_AND_RODATA))
 # Define the EL3_PAYLOAD_BASE flag only if it is provided.
 ifdef EL3_PAYLOAD_BASE
         $(eval $(call add_define,EL3_PAYLOAD_BASE))
diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S
index df9a799..b9554d1 100644
--- a/bl1/bl1.ld.S
+++ b/bl1/bl1.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,6 +45,43 @@
     ASSERT(. == ALIGN(4096),
            "BL1_RO_BASE address is not aligned on a page boundary.")
 
+#if SEPARATE_CODE_AND_RODATA
+    .text . : {
+        __TEXT_START__ = .;
+        *bl1_entrypoint.o(.text*)
+        *(.text*)
+        *(.vectors)
+        . = NEXT(4096);
+        __TEXT_END__ = .;
+     } >ROM
+
+    .rodata . : {
+        __RODATA_START__ = .;
+        *(.rodata*)
+
+        /* Ensure 8-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(8);
+        __PARSER_LIB_DESCS_START__ = .;
+        KEEP(*(.img_parser_lib_descs))
+        __PARSER_LIB_DESCS_END__ = .;
+
+        /*
+         * Ensure 8-byte alignment for cpu_ops so that its fields are also
+         * aligned. Also ensure cpu_ops inclusion.
+         */
+        . = ALIGN(8);
+        __CPU_OPS_START__ = .;
+        KEEP(*(cpu_ops))
+        __CPU_OPS_END__ = .;
+
+        /*
+         * No need to pad out the .rodata section to a page boundary. Next is
+         * the .data section, which can mapped in ROM with the same memory
+         * attributes as the .rodata section.
+         */
+        __RODATA_END__ = .;
+    } >ROM
+#else
     ro . : {
         __RO_START__ = .;
         *bl1_entrypoint.o(.text*)
@@ -69,6 +106,7 @@
         *(.vectors)
         __RO_END__ = .;
     } >ROM
+#endif
 
     ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
            "cpu_ops not defined for this platform.")
@@ -139,12 +177,14 @@
 
     __DATA_ROM_START__ = LOADADDR(.data);
     __DATA_SIZE__ = SIZEOF(.data);
+
     /*
      * The .data section is the last PROGBITS section so its end marks the end
-     * of the read-only part of BL1's binary.
+     * of BL1's actual content in Trusted ROM.
      */
-    ASSERT(__DATA_ROM_START__ + __DATA_SIZE__ <= BL1_RO_LIMIT,
-           "BL1's RO section has exceeded its limit.")
+    __BL1_ROM_END__ =  __DATA_ROM_START__ + __DATA_SIZE__;
+    ASSERT(__BL1_ROM_END__ <= BL1_RO_LIMIT,
+           "BL1's ROM content has exceeded its limit.")
 
     __BSS_SIZE__ = SIZEOF(.bss);
 
diff --git a/bl1/bl1.mk b/bl1/bl1.mk
index 21e87c7..591e047 100644
--- a/bl1/bl1.mk
+++ b/bl1/bl1.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -33,9 +33,9 @@
 				bl1/aarch64/bl1_entrypoint.S		\
 				bl1/aarch64/bl1_exceptions.S		\
 				bl1/bl1_context_mgmt.c			\
-				common/aarch64/context.S		\
-				common/context_mgmt.c			\
 				lib/cpus/aarch64/cpu_helpers.S		\
+				lib/el3_runtime/aarch64/context.S	\
+				lib/el3_runtime/aarch64/context_mgmt.c	\
 				plat/common/plat_bl1_common.c
 
 ifeq (${TRUSTED_BOARD_BOOT},1)
diff --git a/bl1/bl1_private.h b/bl1/bl1_private.h
index 283bbb9..79dde73 100644
--- a/bl1/bl1_private.h
+++ b/bl1/bl1_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,8 +35,11 @@
 
 /*******************************************************************************
  * Declarations of linker defined symbols which will tell us where BL1 lives
- * in Trusted RAM
+ * in Trusted ROM and RAM
  ******************************************************************************/
+extern uint64_t __BL1_ROM_END__;
+#define BL1_ROM_END (uint64_t)(&__BL1_ROM_END__)
+
 extern uint64_t __BL1_RAM_START__;
 extern uint64_t __BL1_RAM_END__;
 #define BL1_RAM_BASE (uint64_t)(&__BL1_RAM_START__)
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index a660bda..fa694de 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -45,6 +45,30 @@
     ASSERT(. == ALIGN(4096),
            "BL2_BASE address is not aligned on a page boundary.")
 
+#if SEPARATE_CODE_AND_RODATA
+    .text . : {
+        __TEXT_START__ = .;
+        *bl2_entrypoint.o(.text*)
+        *(.text*)
+        *(.vectors)
+        . = NEXT(4096);
+        __TEXT_END__ = .;
+     } >RAM
+
+    .rodata . : {
+        __RODATA_START__ = .;
+        *(.rodata*)
+
+        /* Ensure 8-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(8);
+        __PARSER_LIB_DESCS_START__ = .;
+        KEEP(*(.img_parser_lib_descs))
+        __PARSER_LIB_DESCS_END__ = .;
+
+        . = NEXT(4096);
+        __RODATA_END__ = .;
+    } >RAM
+#else
     ro . : {
         __RO_START__ = .;
         *bl2_entrypoint.o(.text*)
@@ -67,6 +91,7 @@
         . = NEXT(4096);
         __RO_END__ = .;
     } >RAM
+#endif
 
     /*
      * Define a linker symbol to mark start of the RW memory area for this
diff --git a/bl2u/bl2u.ld.S b/bl2u/bl2u.ld.S
index ec12077..d72589f 100644
--- a/bl2u/bl2u.ld.S
+++ b/bl2u/bl2u.ld.S
@@ -45,6 +45,23 @@
     ASSERT(. == ALIGN(4096),
            "BL2U_BASE address is not aligned on a page boundary.")
 
+#if SEPARATE_CODE_AND_RODATA
+    .text . : {
+        __TEXT_START__ = .;
+        *bl2u_entrypoint.o(.text*)
+        *(.text*)
+        *(.vectors)
+        . = NEXT(4096);
+        __TEXT_END__ = .;
+     } >RAM
+
+    .rodata . : {
+        __RODATA_START__ = .;
+        *(.rodata*)
+        . = NEXT(4096);
+        __RODATA_END__ = .;
+    } >RAM
+#else
     ro . : {
         __RO_START__ = .;
         *bl2u_entrypoint.o(.text*)
@@ -61,6 +78,7 @@
         . = NEXT(4096);
         __RO_END__ = .;
     } >RAM
+#endif
 
     /*
      * Define a linker symbol to mark start of the RW memory area for this
diff --git a/bl31/aarch64/bl31_arch_setup.c b/bl31/aarch64/bl31_arch_setup.c
deleted file mode 100644
index 3deacba..0000000
--- a/bl31/aarch64/bl31_arch_setup.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <arch.h>
-#include <arch_helpers.h>
-#include <assert.h>
-#include <bl_common.h>
-#include <bl31.h>
-#include <cpu_data.h>
-#include <platform.h>
-
-/*******************************************************************************
- * This duplicates what the primary cpu did after a cold boot in BL1. The same
- * needs to be done when a cpu is hotplugged in. This function could also over-
- * ride any EL3 setup done by BL1 as this code resides in rw memory.
- ******************************************************************************/
-void bl31_arch_setup(void)
-{
-	/* Program the counter frequency */
-	write_cntfrq_el0(plat_get_syscnt_freq2());
-
-	/* Initialize the cpu_ops pointer. */
-	init_cpu_ops();
-}
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 1c8eed9..4c3a515 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,9 +31,10 @@
 #include <arch.h>
 #include <bl_common.h>
 #include <el3_common_macros.S>
+#include <xlat_tables.h>
 
 	.globl	bl31_entrypoint
-
+	.globl	bl31_warm_entrypoint
 
 	/* -----------------------------------------------------
 	 * bl31_entrypoint() is the cold boot entrypoint,
@@ -132,3 +133,60 @@
 
 	b	el3_exit
 endfunc bl31_entrypoint
+
+	/* --------------------------------------------------------------------
+	 * This CPU has been physically powered up. It is either resuming from
+	 * suspend or has simply been turned on. In both cases, call the BL31
+	 * warmboot entrypoint
+	 * --------------------------------------------------------------------
+	 */
+func bl31_warm_entrypoint
+	/*
+	 * On the warm boot path, most of the EL3 initialisations performed by
+	 * 'el3_entrypoint_common' must be skipped:
+	 *
+	 *  - Only when the platform bypasses the BL1/BL31 entrypoint by
+	 *    programming the reset address do we need to set the CPU endianness.
+	 *    In other cases, we assume this has been taken care by the
+	 *    entrypoint code.
+	 *
+	 *  - No need to determine the type of boot, we know it is a warm boot.
+	 *
+	 *  - Do not try to distinguish between primary and secondary CPUs, this
+	 *    notion only exists for a cold boot.
+	 *
+	 *  - No need to initialise the memory or the C runtime environment,
+	 *    it has been done once and for all on the cold boot path.
+	 */
+	el3_entrypoint_common					\
+		_set_endian=PROGRAMMABLE_RESET_ADDRESS		\
+		_warm_boot_mailbox=0				\
+		_secondary_cold_boot=0				\
+		_init_memory=0					\
+		_init_c_runtime=0				\
+		_exception_vectors=runtime_exceptions
+
+	/* --------------------------------------------
+	 * Enable the MMU with the DCache disabled. It
+	 * is safe to use stacks allocated in normal
+	 * memory as a result. All memory accesses are
+	 * marked nGnRnE when the MMU is disabled. So
+	 * all the stack writes will make it to memory.
+	 * All memory accesses are marked Non-cacheable
+	 * when the MMU is enabled but D$ is disabled.
+	 * So used stack memory is guaranteed to be
+	 * visible immediately after the MMU is enabled
+	 * Enabling the DCache at the same time as the
+	 * MMU can lead to speculatively fetched and
+	 * possibly stale stack memory being read from
+	 * other caches. This can lead to coherency
+	 * issues.
+	 * --------------------------------------------
+	 */
+	mov	x0, #DISABLE_DCACHE
+	bl	bl31_plat_enable_mmu
+
+	bl	psci_warmboot_entrypoint
+
+	b	el3_exit
+endfunc bl31_warm_entrypoint
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 33cbe4b..743e65c 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -46,6 +46,47 @@
     ASSERT(. == ALIGN(4096),
            "BL31_BASE address is not aligned on a page boundary.")
 
+#if SEPARATE_CODE_AND_RODATA
+    .text . : {
+        __TEXT_START__ = .;
+        *bl31_entrypoint.o(.text*)
+        *(.text*)
+        *(.vectors)
+        . = NEXT(4096);
+        __TEXT_END__ = .;
+    } >RAM
+
+    .rodata . : {
+        __RODATA_START__ = .;
+        *(.rodata*)
+
+        /* Ensure 8-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(8);
+        __RT_SVC_DESCS_START__ = .;
+        KEEP(*(rt_svc_descs))
+        __RT_SVC_DESCS_END__ = .;
+
+#if ENABLE_PMF
+        /* Ensure 8-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(8);
+        __PMF_SVC_DESCS_START__ = .;
+        KEEP(*(pmf_svc_descs))
+        __PMF_SVC_DESCS_END__ = .;
+#endif /* ENABLE_PMF */
+
+        /*
+         * Ensure 8-byte alignment for cpu_ops so that its fields are also
+         * aligned. Also ensure cpu_ops inclusion.
+         */
+        . = ALIGN(8);
+        __CPU_OPS_START__ = .;
+        KEEP(*(cpu_ops))
+        __CPU_OPS_END__ = .;
+
+        . = NEXT(4096);
+        __RODATA_END__ = .;
+    } >RAM
+#else
     ro . : {
         __RO_START__ = .;
         *bl31_entrypoint.o(.text*)
@@ -85,6 +126,7 @@
         . = NEXT(4096);
         __RO_END__ = .;
     } >RAM
+#endif
 
     ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
            "cpu_ops not defined for this platform.")
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index 8a7fccb..4de511b 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -28,45 +28,22 @@
 # POSSIBILITY OF SUCH DAMAGE.
 #
 
+include lib/psci/psci_lib.mk
+
 BL31_SOURCES		+=	bl31/bl31_main.c				\
-				bl31/cpu_data_array.c				\
-				bl31/runtime_svc.c				\
 				bl31/interrupt_mgmt.c				\
-				bl31/aarch64/bl31_arch_setup.c			\
 				bl31/aarch64/bl31_entrypoint.S			\
-				bl31/aarch64/cpu_data.S				\
 				bl31/aarch64/runtime_exceptions.S		\
 				bl31/aarch64/crash_reporting.S			\
 				bl31/bl31_context_mgmt.c			\
-				common/aarch64/context.S			\
-				common/context_mgmt.c				\
-				lib/cpus/aarch64/cpu_helpers.S			\
-				lib/locks/exclusive/spinlock.S			\
+				common/runtime_svc.c				\
 				services/std_svc/std_svc_setup.c		\
-				services/std_svc/psci/psci_off.c		\
-				services/std_svc/psci/psci_on.c			\
-				services/std_svc/psci/psci_suspend.c		\
-				services/std_svc/psci/psci_common.c		\
-				services/std_svc/psci/psci_entry.S		\
-				services/std_svc/psci/psci_helpers.S		\
-				services/std_svc/psci/psci_main.c		\
-				services/std_svc/psci/psci_setup.c		\
-				services/std_svc/psci/psci_system_off.c
-
-ifeq (${USE_COHERENT_MEM}, 1)
-BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_coherent.c
-else
-BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_normal.c
-endif
+				${PSCI_LIB_SOURCES}
 
 ifeq (${ENABLE_PMF}, 1)
 BL31_SOURCES		+=	lib/pmf/pmf_main.c
 endif
 
-ifeq (${ENABLE_PSCI_STAT}, 1)
-BL31_SOURCES		+=	services/std_svc/psci/psci_stat.c
-endif
-
 BL31_LINKERFILE		:=	bl31/bl31.ld.S
 
 # Flag used to indicate if Crash reporting via console should be included
diff --git a/bl31/bl31_context_mgmt.c b/bl31/bl31_context_mgmt.c
index ae24424..f8751c2 100644
--- a/bl31/bl31_context_mgmt.c
+++ b/bl31/bl31_context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,7 @@
 
 #include <assert.h>
 #include <bl31.h>
+#include <bl_common.h>
 #include <context.h>
 #include <context_mgmt.h>
 #include <cpu_data.h>
@@ -130,4 +131,4 @@
 	else
 		cm_init_context_by_index(platform_get_core_pos(mpidr), ep);
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index 7f04d21..f95ef41 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -59,6 +59,12 @@
 void bl31_lib_init(void)
 {
 	cm_init();
+
+	/*
+	 * Initialize the PSCI library here. This also does EL3 architectural
+	 * setup.
+	 */
+	psci_setup((uintptr_t)bl31_warm_entrypoint);
 }
 
 /*******************************************************************************
@@ -74,9 +80,6 @@
 	NOTICE("BL31: %s\n", version_string);
 	NOTICE("BL31: %s\n", build_message);
 
-	/* Perform remaining generic architectural setup from EL3 */
-	bl31_arch_setup();
-
 	/* Perform platform setup in BL31 */
 	bl31_platform_setup();
 
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index 9c7ed38..7e24f66 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -46,6 +46,23 @@
     ASSERT(. == ALIGN(4096),
            "BL32_BASE address is not aligned on a page boundary.")
 
+#if SEPARATE_CODE_AND_RODATA
+    .text . : {
+        __TEXT_START__ = .;
+        *tsp_entrypoint.o(.text*)
+        *(.text*)
+        *(.vectors)
+        . = NEXT(4096);
+        __TEXT_END__ = .;
+    } >RAM
+
+    .rodata . : {
+        __RODATA_START__ = .;
+        *(.rodata*)
+        . = NEXT(4096);
+        __RODATA_END__ = .;
+    } >RAM
+#else
     ro . : {
         __RO_START__ = .;
         *tsp_entrypoint.o(.text*)
@@ -61,6 +78,7 @@
         . = NEXT(4096);
         __RO_END__ = .;
     } >RAM
+#endif
 
     /*
      * Define a linker symbol to mark start of the RW memory area for this
diff --git a/bl32/tsp/tsp_main.c b/bl32/tsp/tsp_main.c
index 359b9e1..d03f7e2 100644
--- a/bl32/tsp/tsp_main.c
+++ b/bl32/tsp/tsp_main.c
@@ -56,12 +56,12 @@
 work_statistics_t tsp_stats[PLATFORM_CORE_COUNT];
 
 /*******************************************************************************
- * The BL32 memory footprint starts with an RO sections and ends
- * with the linker symbol __BL32_END__. Use it to find the memory size
+ * The TSP memory footprint starts at address BL32_BASE and ends with the
+ * linker symbol __BL32_END__. Use these addresses to compute the TSP image
+ * size.
  ******************************************************************************/
-#define BL32_TOTAL_BASE (unsigned long)(&__RO_START__)
-
 #define BL32_TOTAL_LIMIT (unsigned long)(&__BL32_END__)
+#define BL32_TOTAL_SIZE (BL32_TOTAL_LIMIT - (unsigned long) BL32_BASE)
 
 static tsp_args_t *set_smc_args(uint64_t arg0,
 			     uint64_t arg1,
@@ -102,9 +102,8 @@
 {
 	NOTICE("TSP: %s\n", version_string);
 	NOTICE("TSP: %s\n", build_message);
-	INFO("TSP: Total memory base : 0x%lx\n", BL32_TOTAL_BASE);
-	INFO("TSP: Total memory size : 0x%lx bytes\n",
-			 BL32_TOTAL_LIMIT - BL32_TOTAL_BASE);
+	INFO("TSP: Total memory base : 0x%lx\n", (unsigned long) BL32_BASE);
+	INFO("TSP: Total memory size : 0x%lx bytes\n", BL32_TOTAL_SIZE);
 
 	uint32_t linear_id = plat_my_core_pos();
 
diff --git a/common/bl_common.c b/common/bl_common.c
index 7cafe63..be56256 100644
--- a/common/bl_common.c
+++ b/common/bl_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,37 +38,66 @@
 #include <io_storage.h>
 #include <platform.h>
 #include <string.h>
+#include <utils.h>
 #include <xlat_tables.h>
 
-unsigned long page_align(unsigned long value, unsigned dir)
+uintptr_t page_align(uintptr_t value, unsigned dir)
 {
-	unsigned long page_size = 1 << FOUR_KB_SHIFT;
-
 	/* Round up the limit to the next page boundary */
-	if (value & (page_size - 1)) {
-		value &= ~(page_size - 1);
+	if (value & (PAGE_SIZE - 1)) {
+		value &= ~(PAGE_SIZE - 1);
 		if (dir == UP)
-			value += page_size;
+			value += PAGE_SIZE;
 	}
 
 	return value;
 }
 
-static inline unsigned int is_page_aligned (unsigned long addr) {
-	const unsigned long page_size = 1 << FOUR_KB_SHIFT;
-
-	return (addr & (page_size - 1)) == 0;
+static inline unsigned int is_page_aligned (uintptr_t addr) {
+	return (addr & (PAGE_SIZE - 1)) == 0;
 }
 
 /******************************************************************************
  * Determine whether the memory region delimited by 'addr' and 'size' is free,
  * given the extents of free memory.
- * Return 1 if it is free, 0 otherwise.
+ * Return 1 if it is free, 0 if it is not free or if the input values are
+ * invalid.
  *****************************************************************************/
-static int is_mem_free(uint64_t free_base, size_t free_size,
-		       uint64_t addr, size_t size)
+static int is_mem_free(uintptr_t free_base, size_t free_size,
+		uintptr_t addr, size_t size)
 {
-	return (addr >= free_base) && (addr + size <= free_base + free_size);
+	uintptr_t free_end, requested_end;
+
+	/*
+	 * Handle corner cases first.
+	 *
+	 * The order of the 2 tests is important, because if there's no space
+	 * left (i.e. free_size == 0) but we don't ask for any memory
+	 * (i.e. size == 0) then we should report that the memory is free.
+	 */
+	if (size == 0)
+		return 1;	/* A zero-byte region is always free */
+	if (free_size == 0)
+		return 0;
+
+	/*
+	 * Check that the end addresses don't overflow.
+	 * If they do, consider that this memory region is not free, as this
+	 * is an invalid scenario.
+	 */
+	if (check_uptr_overflow(free_base, free_size - 1))
+		return 0;
+	free_end = free_base + (free_size - 1);
+
+	if (check_uptr_overflow(addr, size - 1))
+		return 0;
+	requested_end = addr + (size - 1);
+
+	/*
+	 * Finally, check that the requested memory region lies within the free
+	 * region.
+	 */
+	return (addr >= free_base) && (requested_end <= free_end);
 }
 
 /******************************************************************************
@@ -77,9 +106,9 @@
  * size of the smallest chunk of free memory surrounding the sub-region in
  * 'small_chunk_size'.
  *****************************************************************************/
-static unsigned int choose_mem_pos(uint64_t mem_start, uint64_t mem_end,
-				   uint64_t submem_start, uint64_t submem_end,
-				   size_t *small_chunk_size)
+static unsigned int choose_mem_pos(uintptr_t mem_start, uintptr_t mem_end,
+				  uintptr_t submem_start, uintptr_t submem_end,
+				  size_t *small_chunk_size)
 {
 	size_t top_chunk_size, bottom_chunk_size;
 
@@ -104,10 +133,11 @@
  * Reserve the memory region delimited by 'addr' and 'size'. The extents of free
  * memory are passed in 'free_base' and 'free_size' and they will be updated to
  * reflect the memory usage.
- * The caller must ensure the memory to reserve is free.
+ * The caller must ensure the memory to reserve is free and that the addresses
+ * and sizes passed in arguments are sane.
  *****************************************************************************/
-void reserve_mem(uint64_t *free_base, size_t *free_size,
-		 uint64_t addr, size_t size)
+void reserve_mem(uintptr_t *free_base, size_t *free_size,
+		 uintptr_t addr, size_t size)
 {
 	size_t discard_size;
 	size_t reserved_size;
@@ -117,8 +147,13 @@
 	assert(free_size != NULL);
 	assert(is_mem_free(*free_base, *free_size, addr, size));
 
-	pos = choose_mem_pos(*free_base, *free_base + *free_size,
-			     addr, addr + size,
+	if (size == 0) {
+		WARN("Nothing to allocate, requested size is zero\n");
+		return;
+	}
+
+	pos = choose_mem_pos(*free_base, *free_base + (*free_size - 1),
+			     addr, addr + (size - 1),
 			     &discard_size);
 
 	reserved_size = size + discard_size;
@@ -127,26 +162,26 @@
 	if (pos == BOTTOM)
 		*free_base = addr + size;
 
-	VERBOSE("Reserved 0x%lx bytes (discarded 0x%lx bytes %s)\n",
+	VERBOSE("Reserved 0x%zx bytes (discarded 0x%zx bytes %s)\n",
 	     reserved_size, discard_size,
 	     pos == TOP ? "above" : "below");
 }
 
-static void dump_load_info(unsigned long image_load_addr,
-			   unsigned long image_size,
+static void dump_load_info(uintptr_t image_load_addr,
+			   size_t image_size,
 			   const meminfo_t *mem_layout)
 {
-	INFO("Trying to load image at address 0x%lx, size = 0x%lx\n",
-		image_load_addr, image_size);
+	INFO("Trying to load image at address %p, size = 0x%zx\n",
+		(void *)image_load_addr, image_size);
 	INFO("Current memory layout:\n");
-	INFO("  total region = [0x%lx, 0x%lx]\n", mem_layout->total_base,
-			mem_layout->total_base + mem_layout->total_size);
-	INFO("  free region = [0x%lx, 0x%lx]\n", mem_layout->free_base,
-			mem_layout->free_base + mem_layout->free_size);
+	INFO("  total region = [base = %p, size = 0x%zx]\n",
+		(void *) mem_layout->total_base, mem_layout->total_size);
+	INFO("  free region = [base = %p, size = 0x%zx]\n",
+		(void *) mem_layout->free_base, mem_layout->free_size);
 }
 
 /* Generic function to return the size of an image */
-unsigned long image_size(unsigned int image_id)
+size_t image_size(unsigned int image_id)
 {
 	uintptr_t dev_handle;
 	uintptr_t image_handle;
@@ -252,8 +287,8 @@
 	/* Check that the memory where the image will be loaded is free */
 	if (!is_mem_free(mem_layout->free_base, mem_layout->free_size,
 			 image_base, image_size)) {
-		WARN("Failed to reserve memory: %p - %p\n", (void *) image_base,
-		     (void *) (image_base + image_size));
+		WARN("Failed to reserve region [base = %p, size = 0x%zx]\n",
+		     (void *) image_base, image_size);
 		dump_load_info(image_base, image_size, mem_layout);
 		io_result = -ENOMEM;
 		goto exit;
@@ -282,8 +317,8 @@
 				image_base, image_size);
 		entry_point_info->pc = image_base;
 	} else {
-		INFO("Skip reserving memory: %p - %p\n", (void *) image_base,
-		     (void *) (image_base + image_size));
+		INFO("Skip reserving region [base = %p, size = 0x%zx]\n",
+		     (void *) image_base, image_size);
 	}
 
 	/*
@@ -293,8 +328,8 @@
 	 */
 	flush_dcache_range(image_base, image_size);
 
-	INFO("Image id=%u loaded: %p - %p\n", image_id, (void *) image_base,
-	     (void *) (image_base + image_size));
+	INFO("Image id=%u loaded at address %p, size = 0x%zx\n", image_id,
+		(void *) image_base, image_size);
 
 exit:
 	io_close(image_handle);
@@ -367,9 +402,8 @@
  ******************************************************************************/
 void print_entry_point_info(const entry_point_info_t *ep_info)
 {
-	INFO("Entry point address = 0x%llx\n",
-		(unsigned long long) ep_info->pc);
-	INFO("SPSR = 0x%lx\n", (unsigned long) ep_info->spsr);
+	INFO("Entry point address = %p\n", (void *)ep_info->pc);
+	INFO("SPSR = 0x%x\n", ep_info->spsr);
 
 #define PRINT_IMAGE_ARG(n)					\
 	VERBOSE("Argument #" #n " = 0x%llx\n",			\
diff --git a/bl31/runtime_svc.c b/common/runtime_svc.c
similarity index 82%
rename from bl31/runtime_svc.c
rename to common/runtime_svc.c
index f011f11..b8af6cd 100644
--- a/bl31/runtime_svc.c
+++ b/common/runtime_svc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,6 +28,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <assert.h>
 #include <debug.h>
 #include <errno.h>
 #include <runtime_svc.h>
@@ -42,15 +43,18 @@
  * 'rt_svc_descs_indices' array. This gives the index of the descriptor in the
  * 'rt_svc_descs' array which contains the SMC handler.
  ******************************************************************************/
-#define RT_SVC_DESCS_START	((uint64_t) (&__RT_SVC_DESCS_START__))
-#define RT_SVC_DESCS_END	((uint64_t) (&__RT_SVC_DESCS_END__))
+#define RT_SVC_DESCS_START	((uintptr_t) (&__RT_SVC_DESCS_START__))
+#define RT_SVC_DESCS_END	((uintptr_t) (&__RT_SVC_DESCS_END__))
 uint8_t rt_svc_descs_indices[MAX_RT_SVCS];
 static rt_svc_desc_t *rt_svc_descs;
 
+#define RT_SVC_DECS_NUM		((RT_SVC_DESCS_END - RT_SVC_DESCS_START)\
+					/ sizeof(rt_svc_desc_t))
+
 /*******************************************************************************
  * Simple routine to sanity check a runtime service descriptor before using it
  ******************************************************************************/
-static int32_t validate_rt_svc_desc(rt_svc_desc_t *desc)
+static int32_t validate_rt_svc_desc(const rt_svc_desc_t *desc)
 {
 	if (desc == NULL)
 		return -EINVAL;
@@ -80,33 +84,33 @@
  ******************************************************************************/
 void runtime_svc_init(void)
 {
-	int32_t rc = 0;
-	uint32_t index, start_idx, end_idx;
-	uint64_t rt_svc_descs_num;
+	int rc = 0, index, start_idx, end_idx;
+
+	/* Assert the number of descriptors detected are less than maximum indices */
+	assert((RT_SVC_DESCS_END >= RT_SVC_DESCS_START) &&
+			(RT_SVC_DECS_NUM < MAX_RT_SVCS));
 
 	/* If no runtime services are implemented then simply bail out */
-	rt_svc_descs_num = RT_SVC_DESCS_END - RT_SVC_DESCS_START;
-	rt_svc_descs_num /= sizeof(rt_svc_desc_t);
-	if (rt_svc_descs_num == 0)
+	if (RT_SVC_DECS_NUM == 0)
 		return;
 
 	/* Initialise internal variables to invalid state */
 	memset(rt_svc_descs_indices, -1, sizeof(rt_svc_descs_indices));
 
 	rt_svc_descs = (rt_svc_desc_t *) RT_SVC_DESCS_START;
-	for (index = 0; index < rt_svc_descs_num; index++) {
+	for (index = 0; index < RT_SVC_DECS_NUM; index++) {
+		rt_svc_desc_t *service = &rt_svc_descs[index];
 
 		/*
 		 * An invalid descriptor is an error condition since it is
 		 * difficult to predict the system behaviour in the absence
 		 * of this service.
 		 */
-		rc = validate_rt_svc_desc(&rt_svc_descs[index]);
+		rc = validate_rt_svc_desc(service);
 		if (rc) {
-			ERROR("Invalid runtime service descriptor %p (%s)\n",
-					(void *) &rt_svc_descs[index],
-					rt_svc_descs[index].name);
-			goto error;
+			ERROR("Invalid runtime service descriptor %p\n",
+				(void *) service);
+			panic();
 		}
 
 		/*
@@ -116,11 +120,11 @@
 		 * an initialisation routine defined. Call the initialisation
 		 * routine for this runtime service, if it is defined.
 		 */
-		if (rt_svc_descs[index].init) {
-			rc = rt_svc_descs[index].init();
+		if (service->init) {
+			rc = service->init();
 			if (rc) {
 				ERROR("Error initializing runtime service %s\n",
-						rt_svc_descs[index].name);
+						service->name);
 				continue;
 			}
 		}
@@ -132,15 +136,12 @@
 		 * entity range.
 		 */
 		start_idx = get_unique_oen(rt_svc_descs[index].start_oen,
-				rt_svc_descs[index].call_type);
+				service->call_type);
+		assert(start_idx < MAX_RT_SVCS);
 		end_idx = get_unique_oen(rt_svc_descs[index].end_oen,
-				rt_svc_descs[index].call_type);
-
+				service->call_type);
+		assert(end_idx < MAX_RT_SVCS);
 		for (; start_idx <= end_idx; start_idx++)
 			rt_svc_descs_indices[start_idx] = index;
 	}
-
-	return;
-error:
-	panic();
 }
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index 74ea5eb..68cad2e 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -14,8 +14,9 @@
 9.  [Memory layout of BL images](#9-memory-layout-of-bl-images)
 10. [Firmware Image Package (FIP)](#10--firmware-image-package-fip)
 11. [Use of coherent memory in Trusted Firmware](#11--use-of-coherent-memory-in-trusted-firmware)
-12. [Code Structure](#12--code-structure)
-13. [References](#13--references)
+12. [Isolating code and read-only data on separate memory pages](#12--isolating-code-and-read-only-data-on-separate-memory-pages)
+13. [Code Structure](#13--code-structure)
+14. [References](#14--references)
 
 
 1.  Introduction
@@ -1052,10 +1053,10 @@
 All PROGBITS sections are grouped together at the beginning of the image,
 followed by all NOBITS sections. This is true for all Trusted Firmware images
 and it is governed by the linker scripts. This ensures that the raw binary
-images are as small as possible. If a NOBITS section would sneak in between
-PROGBITS sections then the resulting binary file would contain a bunch of zero
-bytes at the location of this NOBITS section, making the image unnecessarily
-bigger. Smaller images allow faster loading from the FIP to the main memory.
+images are as small as possible. If a NOBITS section was inserted in between
+PROGBITS sections then the resulting binary file would contain zero bytes in
+place of this NOBITS section, making the image unnecessarily bigger. Smaller
+images allow faster loading from the FIP to the main memory.
 
 ### Linker scripts and symbols
 
@@ -1110,47 +1111,48 @@
 
 #### Common linker symbols
 
-Early setup code needs to know the extents of the BSS section to zero-initialise
-it before executing any C code. The following linker symbols are defined for
-this purpose:
+All BL images share the following requirements:
 
-* `__BSS_START__` This address must be aligned on a 16-byte boundary.
-* `__BSS_SIZE__`
+*   The BSS section must be zero-initialised before executing any C code.
+*   The coherent memory section (if enabled) must be zero-initialised as well.
+*   The MMU setup code needs to know the extents of the coherent and read-only
+    memory regions to set the right memory attributes. When
+    `SEPARATE_CODE_AND_RODATA=1`, it needs to know more specifically how the
+    read-only memory region is divided between code and data.
 
-Similarly, the coherent memory section (if enabled) must be zero-initialised.
-Also, the MMU setup code needs to know the extents of this section to set the
-right memory attributes for it. The following linker symbols are defined for
-this purpose:
+The following linker symbols are defined for this purpose:
 
-* `__COHERENT_RAM_START__` This address must be aligned on a page-size boundary.
-* `__COHERENT_RAM_END__` This address must be aligned on a page-size boundary.
-* `__COHERENT_RAM_UNALIGNED_SIZE__`
+*   `__BSS_START__`          Must be aligned on a 16-byte boundary.
+*   `__BSS_SIZE__`
+*   `__COHERENT_RAM_START__` Must be aligned on a page-size boundary.
+*   `__COHERENT_RAM_END__`   Must be aligned on a page-size boundary.
+*   `__COHERENT_RAM_UNALIGNED_SIZE__`
+*   `__RO_START__`
+*   `__RO_END__`
+*   `__TEXT_START__`
+*   `__TEXT_END__`
+*   `__RODATA_START__`
+*   `__RODATA_END__`
 
 #### BL1's linker symbols
 
-BL1's early setup code needs to know the extents of the .data section to
-relocate it from ROM to RAM before executing any C code. The following linker
-symbols are defined for this purpose:
+BL1 being the ROM image, it has additional requirements. BL1 resides in ROM and
+it is entirely executed in place but it needs some read-write memory for its
+mutable data. Its `.data` section (i.e. its allocated read-write data) must be
+relocated from ROM to RAM before executing any C code.
 
-* `__DATA_ROM_START__` This address must be aligned on a 16-byte boundary.
-* `__DATA_RAM_START__` This address must be aligned on a 16-byte boundary.
-* `__DATA_SIZE__`
+The following additional linker symbols are defined for BL1:
 
-BL1's platform setup code needs to know the extents of its read-write data
-region to figure out its memory layout. The following linker symbols are defined
-for this purpose:
+*   `__BL1_ROM_END__`    End address of BL1's ROM contents, covering its code
+                         and `.data` section in ROM.
+*   `__DATA_ROM_START__` Start address of the `.data` section in ROM. Must be
+                         aligned on a 16-byte boundary.
+*   `__DATA_RAM_START__` Address in RAM where the `.data` section should be
+                         copied over. Must be aligned on a 16-byte boundary.
+*   `__DATA_SIZE__`      Size of the `.data` section (in ROM or RAM).
+*   `__BL1_RAM_START__`  Start address of BL1 read-write data.
+*   `__BL1_RAM_END__`    End address of BL1 read-write data.
 
-* `__BL1_RAM_START__` This is the start address of BL1 RW data.
-* `__BL1_RAM_END__` This is the end address of BL1 RW data.
-
-#### BL2's, BL31's and TSP's linker symbols
-
-BL2, BL31 and TSP need to know the extents of their read-only section to set
-the right memory attributes for this memory region in their MMU setup code. The
-following linker symbols are defined for this purpose:
-
-* `__RO_START__`
-* `__RO_END__`
 
 ### How to choose the right base addresses for each bootloader stage image
 
@@ -1772,7 +1774,86 @@
 optionally define macro `PLAT_PERCPU_BAKERY_LOCK_SIZE`  (see the [Porting
 Guide]). Refer to the reference platform code for examples.
 
+
+12.  Isolating code and read-only data on separate memory pages
+---------------------------------------------------------------
+
+In the ARMv8 VMSA, translation table entries include fields that define the
+properties of the target memory region, such as its access permissions. The
+smallest unit of memory that can be addressed by a translation table entry is
+a memory page. Therefore, if software needs to set different permissions on two
+memory regions then it needs to map them using different memory pages.
+
+The default memory layout for each BL image is as follows:
+
+       |        ...        |
+       +-------------------+
+       |  Read-write data  |
+       +-------------------+ Page boundary
+       |     <Padding>     |
+       +-------------------+
+       | Exception vectors |
+       +-------------------+ 2 KB boundary
+       |     <Padding>     |
+       +-------------------+
+       |  Read-only data   |
+       +-------------------+
+       |       Code        |
+       +-------------------+ BLx_BASE
+
+Note: The 2KB alignment for the exception vectors is an architectural
+requirement.
+
-12.  Code Structure
+The read-write data start on a new memory page so that they can be mapped with
+read-write permissions, whereas the code and read-only data below are configured
+as read-only.
+
+However, the read-only data are not aligned on a page boundary. They are
+contiguous to the code. Therefore, the end of the code section and the beginning
+of the read-only data one might share a memory page. This forces both to be
+mapped with the same memory attributes. As the code needs to be executable, this
+means that the read-only data stored on the same memory page as the code are
+executable as well. This could potentially be exploited as part of a security
+attack.
+
+TF provides the build flag `SEPARATE_CODE_AND_RODATA` to isolate the code and
+read-only data on separate memory pages. This in turn allows independent control
+of the access permissions for the code and read-only data. In this case,
+platform code gets a finer-grained view of the image layout and can
+appropriately map the code region as executable and the read-only data as
+execute-never.
+
+This has an impact on memory footprint, as padding bytes need to be introduced
+between the code and read-only data to ensure the segragation of the two. To
+limit the memory cost, this flag also changes the memory layout such that the
+code and exception vectors are now contiguous, like so:
+
+       |        ...        |
+       +-------------------+
+       |  Read-write data  |
+       +-------------------+ Page boundary
+       |     <Padding>     |
+       +-------------------+
+       |  Read-only data   |
+       +-------------------+ Page boundary
+       |     <Padding>     |
+       +-------------------+
+       | Exception vectors |
+       +-------------------+ 2 KB boundary
+       |     <Padding>     |
+       +-------------------+
+       |       Code        |
+       +-------------------+ BLx_BASE
+
+With this more condensed memory layout, the separation of read-only data will
+add zero or one page to the memory footprint of each BL image. Each platform
+should consider the trade-off between memory footprint and security.
+
+This build flag is disabled by default, minimising memory footprint. On ARM
+platforms, it is enabled.
+
+
+13.  Code Structure
 -------------------
 
 Trusted Firmware code is logically divided between the three boot loader
@@ -1783,10 +1864,11 @@
     the platform.
 *   **Common code.** This is platform and architecture agnostic code.
 *   **Library code.** This code comprises of functionality commonly used by all
-    other code.
+    other code. The PSCI implementation and other EL3 runtime frameworks reside
+    as Library components.
 *   **Stage specific.** Code specific to a boot stage.
 *   **Drivers.**
-*   **Services.** EL3 runtime services, e.g. PSCI or SPD. Specific SPD services
+*   **Services.** EL3 runtime services (eg: SPD). Specific SPD services
     reside in the `services/spd` directory (e.g. `services/spd/tspd`).
 
 Each boot loader stage uses code from one or more of the above mentioned
@@ -1815,7 +1897,7 @@
 kernel at boot time. These can be found in the `fdts` directory.
 
 
-13.  References
+14.  References
 ---------------
 
 1.  Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 23033d5..8dad4a0 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -545,7 +545,7 @@
 ### Function : plat_get_my_entrypoint() [mandatory when PROGRAMMABLE_RESET_ADDRESS == 0]
 
     Argument : void
-    Return   : unsigned long
+    Return   : uintptr_t
 
 This function is called with the called with the MMU and caches disabled
 (`SCTLR_EL3.M` = 0 and `SCTLR_EL3.C` = 0). The function is responsible for
@@ -748,7 +748,7 @@
 ### Function : plat_get_my_stack()
 
     Argument : void
-    Return   : unsigned long
+    Return   : uintptr_t
 
 This function returns the base address of the normal memory stack that
 has been allocated for the current CPU. For BL images that only require a
@@ -966,7 +966,7 @@
 
 ### Function : bl1_init_bl2_mem_layout() [optional]
 
-    Argument : meminfo *, meminfo *, unsigned int, unsigned long
+    Argument : meminfo *, meminfo *
     Return   : void
 
 BL1 needs to tell the next stage the amount of secure RAM available
diff --git a/docs/psci-pd-tree.md b/docs/psci-pd-tree.md
index 6ae686d..c253905 100644
--- a/docs/psci-pd-tree.md
+++ b/docs/psci-pd-tree.md
@@ -203,7 +203,7 @@
 } non_cpu_pd_node_t;
 
 typedef struct cpu_pwr_domain_node {
-    unsigned long mpidr;
+    u_register_t mpidr;
 
     /* Index of the parent power domain node */
     unsigned int parent_node;
diff --git a/docs/rt-svc-writers-guide.md b/docs/rt-svc-writers-guide.md
index 40cee14..4b811fe 100644
--- a/docs/rt-svc-writers-guide.md
+++ b/docs/rt-svc-writers-guide.md
@@ -95,8 +95,7 @@
 
 ARM Trusted Firmware has a [`services`] directory in the source tree under which
 each owning entity can place the implementation of its runtime service.  The
-[PSCI] implementation is located here in the [`services/std_svc/psci`]
-directory.
+[PSCI] implementation is located here in the [`lib/psci`] directory.
 
 Runtime service sources will need to include the [`runtime_svc.h`] header file.
 
@@ -114,7 +113,7 @@
     is also used for diagnostic purposes
 
 *   `_start` and `_end` values must be based on the `OEN_*` values defined in
-    [`smcc_helpers.h`]
+    [`smcc.h`]
 
 *   `_type` must be one of `SMC_TYPE_FAST` or `SMC_TYPE_STD`
 
@@ -124,12 +123,12 @@
 
 *   `_smch` is the SMC handler function with the `rt_svc_handle` signature:
 
-        typedef uint64_t (*rt_svc_handle)(uint32_t smc_fid,
-                                          uint64_t x1, uint64_t x2,
-                                          uint64_t x3, uint64_t x4,
+        typedef uintptr_t (*rt_svc_handle_t)(uint32_t smc_fid,
+                                          u_register_t x1, u_register_t x2,
+                                          u_register_t x3, u_register_t x4,
                                           void *cookie,
                                           void *handle,
-                                          uint64_t flags);
+                                          u_register_t flags);
 
 Details of the requirements and behavior of the two callbacks is provided in
 the following sections.
@@ -189,12 +188,12 @@
 handler function (`_smch` in the service declaration). This function must have
 the following signature:
 
-    typedef uint64_t (*rt_svc_handle)(uint32_t smc_fid,
-                                      uint64_t x1, uint64_t x2,
-                                      uint64_t x3, uint64_t x4,
-                                      void *reserved,
-                                      void *handle,
-                                      uint64_t flags);
+    typedef uintptr_t (*rt_svc_handle_t)(uint32_t smc_fid,
+                                       u_register_t x1, u_register_t x2,
+                                       u_register_t x3, u_register_t x4,
+                                       void *cookie,
+                                       void *handle,
+                                       u_register_t flags);
 
 The handler is responsible for:
 
@@ -253,10 +252,9 @@
         SMC_RET3(handle, x0, x1, x2);
         SMC_RET4(handle, x0, x1, x2, x3);
 
-The `reserved` parameter to the handler is reserved for future use and can be
-ignored. The value returned by a SMC handler is also reserved for future use -
-completion of the handler function must always be via one of the `SMC_RETn()`
-macros.
+The `cookie` parameter to the handler is reserved for future use and can be
+ignored. The `handle` is returned by the SMC handler - completion of the
+handler function must always be via one of the `SMC_RETn()` macros.
 
 NOTE: The PSCI and Test Secure-EL1 Payload Dispatcher services do not follow
 all of the above requirements yet.
@@ -299,12 +297,11 @@
 _Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved._
 
 
-[Firmware Design]:  ./firmware-design.md
-
+[Firmware Design]:          ./firmware-design.md
 [`services`]:               ../services
-[`services/std_svc/psci`]:  ../services/std_svc/psci
+[`lib/psci`]:               ../lib/psci
 [`std_svc_setup.c`]:        ../services/std_svc/std_svc_setup.c
-[`runtime_svc.h`]:          ../include/bl31/runtime_svc.h
-[`smcc_helpers.h`]:          ../include/common/smcc_helpers.h
+[`runtime_svc.h`]:          ../include/common/runtime_svc.h
+[`smcc.h`]:                 ../include/lib/smcc.h
 [PSCI]:                     http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf "Power State Coordination Interface PDD (ARM DEN 0022C)"
 [SMCCC]:                    http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html "SMC Calling Convention PDD (ARM DEN 0028A)"
diff --git a/docs/user-guide.md b/docs/user-guide.md
index 0f3c999..0acab57 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -416,6 +416,12 @@
      Enabling this option enables the `ENABLE_PMF` build option as well.
      The PMF is used for collecting the statistics.
 
+*   `SEPARATE_CODE_AND_RODATA`: Whether code and read-only data should be
+    isolated on separate memory pages. This is a trade-off between security and
+    memory usage. See "Isolating code and read-only data on separate memory
+    pages" section in [Firmware Design]. This flag is disabled by default and
+    affects all BL images.
+
 #### ARM development platform specific build options
 
 *   `ARM_TSP_RAM_LOCATION`: location of the TSP binary. Options:
@@ -465,12 +471,13 @@
     match the frame used by the Non-Secure image (normally the Linux kernel).
     Default is true (access to the frame is allowed).
 
-*   `ARM_BOARD_OPTIMISE_MMAP`: Boolean option to enable or disable optimisation
-    of page table and MMU related macros `PLAT_ARM_MMAP_ENTRIES` and
-    `MAX_XLAT_TABLES`. By default this flag is 0, which means it uses the
-    default unoptimised values for these macros. ARM development platforms
-    that wish to optimise memory usage for page tables need to set this flag to 1
-    and must override the related macros.
+*   `ARM_BOARD_OPTIMISE_MEM`: Boolean option to enable or disable optimisation
+    of the memory reserved for each image. This affects the maximum size of each
+    BL image as well as the number of allocated memory regions and translation
+    tables. By default this flag is 0, which means it uses the default
+    unoptimised values for these macros. ARM development platforms that wish to
+    optimise memory usage need to set this flag to 1 and must override the
+    related macros.
 
 *   'ARM_BL31_IN_DRAM': Boolean option to select loading of BL31 in TZC secured
     DRAM. By default, BL31 is in the secure SRAM. Set this flag to 1 to load
diff --git a/drivers/arm/ccn/ccn_private.h b/drivers/arm/ccn/ccn_private.h
index fffa2ca..a5a6146 100644
--- a/drivers/arm/ccn/ccn_private.h
+++ b/drivers/arm/ccn/ccn_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -104,7 +104,7 @@
 #define WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(region_id, stat_reg_offset,		\
 					   op_reg_offset, rn_id_map)		\
 	{									\
-		uint64_t status_reg;						\
+		unsigned long long status_reg;						\
 		do {								\
 			status_reg = ccn_reg_read((ccn_plat_desc->periphbase),	\
 						  (region_id),			\
@@ -208,7 +208,7 @@
 /*
  * Helper function to return number of set bits in bitmap
  */
-static inline unsigned int count_set_bits(uint64_t bitmap)
+static inline unsigned int count_set_bits(unsigned long long bitmap)
 {
 	unsigned int count = 0;
 
diff --git a/drivers/arm/gic/arm_gic.c b/drivers/arm/gic/arm_gic.c
index ecd5a93..82c5448 100644
--- a/drivers/arm/gic/arm_gic.c
+++ b/drivers/arm/gic/arm_gic.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -205,10 +205,14 @@
 
 	assert(g_irq_sec_ptr);
 	sec_ppi_sgi_mask = 0;
+
+	/* Ensure all SGIs and PPIs are Group0 to begin with */
+	gicd_write_igroupr(g_gicd_base, 0, 0);
+
 	for (index = 0; index < g_num_irqs; index++) {
 		irq_num = g_irq_sec_ptr[index];
 		if (irq_num < MIN_SPI_ID) {
-			/* We have an SGI or a PPI. They are Group0 at reset */
+			/* We have an SGI or a PPI */
 			sec_ppi_sgi_mask |= 1U << irq_num;
 			gicd_set_ipriorityr(g_gicd_base, irq_num,
 				GIC_HIGHEST_SEC_PRIORITY);
diff --git a/drivers/arm/gic/v3/gicv3_helpers.c b/drivers/arm/gic/v3/gicv3_helpers.c
index 07ae54c..0a81c86 100644
--- a/drivers/arm/gic/v3/gicv3_helpers.c
+++ b/drivers/arm/gic/v3/gicv3_helpers.c
@@ -250,7 +250,7 @@
 					uintptr_t gicr_base,
 					mpidr_hash_fn mpidr_to_core_pos)
 {
-	unsigned long mpidr;
+	u_register_t mpidr;
 	unsigned int proc_num;
 	unsigned long long typer_val;
 	uintptr_t rdistif_base = gicr_base;
@@ -320,7 +320,7 @@
 				     unsigned int int_grp)
 {
 	unsigned int index, irq_num;
-	uint64_t gic_affinity_val;
+	unsigned long long gic_affinity_val;
 
 	assert((int_grp == INTR_GROUP1S) || (int_grp == INTR_GROUP0));
 	/* If `num_ints` is not 0, ensure that `sec_intr_list` is not NULL */
diff --git a/drivers/arm/gic/v3/gicv3_private.h b/drivers/arm/gic/v3/gicv3_private.h
index 5e2409f..9aa8338 100644
--- a/drivers/arm/gic/v3/gicv3_private.h
+++ b/drivers/arm/gic/v3/gicv3_private.h
@@ -141,6 +141,7 @@
 
 static inline unsigned long long gicd_read_irouter(uintptr_t base, unsigned int id)
 {
+	assert(id >= MIN_SPI_ID);
 	return mmio_read_64(base + GICD_IROUTER + (id << 3));
 }
 
@@ -148,6 +149,7 @@
 				      unsigned int id,
 				      unsigned long long affinity)
 {
+	assert(id >= MIN_SPI_ID);
 	mmio_write_64(base + GICD_IROUTER + (id << 3), affinity);
 }
 
diff --git a/include/bl31/bl31.h b/include/bl31/bl31.h
index 96867b0..8352c49 100644
--- a/include/bl31/bl31.h
+++ b/include/bl31/bl31.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,11 +36,11 @@
 /*******************************************************************************
  * Function prototypes
  ******************************************************************************/
-void bl31_arch_setup(void);
 void bl31_next_el_arch_setup(uint32_t security_state);
 void bl31_set_next_image_type(uint32_t type);
 uint32_t bl31_get_next_image_type(void);
 void bl31_prepare_next_image_entry(void);
 void bl31_register_bl32_init(int32_t (*)(void));
+void bl31_warm_entrypoint(void);
 
 #endif /* __BL31_H__ */
diff --git a/include/common/asm_macros.S b/include/common/aarch64/asm_macros.S
similarity index 79%
rename from include/common/asm_macros.S
rename to include/common/aarch64/asm_macros.S
index d4bd11e..cc8f424 100644
--- a/include/common/asm_macros.S
+++ b/include/common/aarch64/asm_macros.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,6 +31,7 @@
 #define __ASM_MACROS_S__
 
 #include <arch.h>
+#include <asm_macros_common.S>
 
 
 	.macro	func_prologue
@@ -104,65 +105,6 @@
 	  .endif
 	.endm
 
-	/*
-	 * This macro is used to create a function label and place the
-	 * code into a separate text section based on the function name
-	 * to enable elimination of unused code during linking
-	 */
-	.macro func _name
-	.section .text.\_name, "ax"
-	.type \_name, %function
-	.func \_name
-	\_name:
-	.endm
-
-	/*
-	 * This macro is used to mark the end of a function.
-	 */
-	.macro endfunc _name
-	.endfunc
-	.size \_name, . - \_name
-	.endm
-
-	/*
-	 * Theses macros are used to create function labels for deprecated
-	 * APIs. If ERROR_DEPRECATED is non zero, the callers of these APIs
-	 * will fail to link and cause build failure.
-	 */
-#if ERROR_DEPRECATED
-	.macro func_deprecated _name
-	func deprecated\_name
-	.endm
-
-	.macro endfunc_deprecated _name
-	endfunc deprecated\_name
-	.endm
-#else
-	.macro func_deprecated _name
-	func \_name
-	.endm
-
-	.macro endfunc_deprecated _name
-	endfunc \_name
-	.endm
-#endif
-
-	/*
-	 * This macro declares an array of 1 or more stacks, properly
-	 * aligned and in the requested section
-	 */
-#define STACK_ALIGN	6
-
-	.macro declare_stack _name, _section, _size, _count
-	.if ((\_size & ((1 << STACK_ALIGN) - 1)) <> 0)
-	  .error "Stack size not correctly aligned"
-	.endif
-	.section    \_section, "aw", %nobits
-	.align STACK_ALIGN
-	\_name:
-	.space ((\_count) * (\_size)), 0
-	.endm
-
 #if ENABLE_PLAT_COMPAT
 	/*
 	 * This macro calculates the base address of an MP stack using the
diff --git a/include/common/assert_macros.S b/include/common/aarch64/assert_macros.S
similarity index 96%
rename from include/common/assert_macros.S
rename to include/common/aarch64/assert_macros.S
index cb6c78b..b7e536c 100644
--- a/include/common/assert_macros.S
+++ b/include/common/aarch64/assert_macros.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/include/common/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
similarity index 100%
rename from include/common/el3_common_macros.S
rename to include/common/aarch64/el3_common_macros.S
diff --git a/include/common/asm_macros_common.S b/include/common/asm_macros_common.S
new file mode 100644
index 0000000..ee59a93
--- /dev/null
+++ b/include/common/asm_macros_common.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __ASM_MACROS_COMMON_S__
+#define __ASM_MACROS_COMMON_S__
+
+#include <arch.h>
+
+	/*
+	 * This macro is used to create a function label and place the
+	 * code into a separate text section based on the function name
+	 * to enable elimination of unused code during linking
+	 */
+	.macro func _name
+	.section .text.\_name, "ax"
+	.type \_name, %function
+	.func \_name
+	\_name:
+	.endm
+
+	/*
+	 * This macro is used to mark the end of a function.
+	 */
+	.macro endfunc _name
+	.endfunc
+	.size \_name, . - \_name
+	.endm
+
+	/*
+	 * Theses macros are used to create function labels for deprecated
+	 * APIs. If ERROR_DEPRECATED is non zero, the callers of these APIs
+	 * will fail to link and cause build failure.
+	 */
+#if ERROR_DEPRECATED
+	.macro func_deprecated _name
+	func deprecated\_name
+	.endm
+
+	.macro endfunc_deprecated _name
+	endfunc deprecated\_name
+	.endm
+#else
+	.macro func_deprecated _name
+	func \_name
+	.endm
+
+	.macro endfunc_deprecated _name
+	endfunc \_name
+	.endm
+#endif
+
+	/*
+	 * Helper assembler macro to count trailing zeros. The output is
+	 * populated in the `TZ_COUNT` symbol.
+	 */
+	.macro count_tz _value, _tz_count
+	.if \_value
+	  count_tz "(\_value >> 1)", "(\_tz_count + 1)"
+	.else
+	  .equ TZ_COUNT, (\_tz_count - 1)
+	.endif
+	.endm
+
+	/*
+	 * This macro declares an array of 1 or more stacks, properly
+	 * aligned and in the requested section
+	 */
+#define DEFAULT_STACK_ALIGN	(1 << 6)   /* In case the caller doesnt provide alignment */
+
+	.macro declare_stack _name, _section, _size, _count, _align=DEFAULT_STACK_ALIGN
+	count_tz \_align, 0
+	.if (\_align - (1 << TZ_COUNT))
+	  .error "Incorrect stack alignment specified (Must be a power of 2)."
+	.endif
+	.if ((\_size & ((1 << TZ_COUNT) - 1)) <> 0)
+	  .error "Stack size not correctly aligned"
+	.endif
+	.section    \_section, "aw", %nobits
+	.align TZ_COUNT
+	\_name:
+	.space ((\_count) * (\_size)), 0
+	.endm
+
+
+#endif /* __ASM_MACROS_COMMON_S__ */
diff --git a/include/common/bl_common.h b/include/common/bl_common.h
index f13dc31..3aa0836 100644
--- a/include/common/bl_common.h
+++ b/include/common/bl_common.h
@@ -137,28 +137,36 @@
 #include <cassert.h>
 #include <stdint.h>
 #include <stddef.h>
-
-#define ARRAY_SIZE(a)	(sizeof(a) / sizeof((a)[0]))
+#include <types.h>
+#include <utils.h> /* To retain compatibility */
 
 /*
  * Declarations of linker defined symbols to help determine memory layout of
  * BL images
  */
-extern unsigned long __RO_START__;
-extern unsigned long __RO_END__;
+#if SEPARATE_CODE_AND_RODATA
+extern uintptr_t __TEXT_START__;
+extern uintptr_t __TEXT_END__;
+extern uintptr_t __RODATA_START__;
+extern uintptr_t __RODATA_END__;
+#else
+extern uintptr_t __RO_START__;
+extern uintptr_t __RO_END__;
+#endif
+
 #if IMAGE_BL2
-extern unsigned long __BL2_END__;
+extern uintptr_t __BL2_END__;
 #elif IMAGE_BL2U
-extern unsigned long __BL2U_END__;
+extern uintptr_t __BL2U_END__;
 #elif IMAGE_BL31
-extern unsigned long __BL31_END__;
+extern uintptr_t __BL31_END__;
 #elif IMAGE_BL32
-extern unsigned long __BL32_END__;
+extern uintptr_t __BL32_END__;
 #endif /* IMAGE_BLX */
 
 #if USE_COHERENT_MEM
-extern unsigned long __COHERENT_RAM_START__;
-extern unsigned long __COHERENT_RAM_END__;
+extern uintptr_t __COHERENT_RAM_START__;
+extern uintptr_t __COHERENT_RAM_END__;
 #endif
 
 
@@ -167,21 +175,21 @@
  * memory is available for its use and how much is already used.
  ******************************************************************************/
 typedef struct meminfo {
-	uint64_t total_base;
+	uintptr_t total_base;
 	size_t total_size;
-	uint64_t free_base;
+	uintptr_t free_base;
 	size_t free_size;
 } meminfo_t;
 
 typedef struct aapcs64_params {
-	unsigned long arg0;
-	unsigned long arg1;
-	unsigned long arg2;
-	unsigned long arg3;
-	unsigned long arg4;
-	unsigned long arg5;
-	unsigned long arg6;
-	unsigned long arg7;
+	u_register_t arg0;
+	u_register_t arg1;
+	u_register_t arg2;
+	u_register_t arg3;
+	u_register_t arg4;
+	u_register_t arg5;
+	u_register_t arg6;
+	u_register_t arg7;
 } aapcs64_params_t;
 
 /***************************************************************************
@@ -277,7 +285,7 @@
 		__builtin_offsetof(entry_point_info_t, args), \
 		assert_BL31_args_offset_mismatch);
 
-CASSERT(sizeof(unsigned long) ==
+CASSERT(sizeof(uintptr_t) ==
 		__builtin_offsetof(entry_point_info_t, spsr) - \
 		__builtin_offsetof(entry_point_info_t, pc), \
 		assert_entrypoint_and_spsr_should_be_adjacent);
@@ -285,8 +293,8 @@
 /*******************************************************************************
  * Function & variable prototypes
  ******************************************************************************/
-unsigned long page_align(unsigned long, unsigned);
-unsigned long image_size(unsigned int image_id);
+uintptr_t page_align(uintptr_t, unsigned);
+size_t image_size(unsigned int image_id);
 int load_image(meminfo_t *mem_layout,
 	       unsigned int image_id,
 	       uintptr_t image_base,
@@ -300,8 +308,8 @@
 extern const char build_message[];
 extern const char version_string[];
 
-void reserve_mem(uint64_t *free_base, size_t *free_size,
-		uint64_t addr, size_t size);
+void reserve_mem(uintptr_t *free_base, size_t *free_size,
+		uintptr_t addr, size_t size);
 
 void print_entry_point_info(const entry_point_info_t *ep_info);
 
diff --git a/include/bl31/runtime_svc.h b/include/common/runtime_svc.h
similarity index 93%
rename from include/bl31/runtime_svc.h
rename to include/common/runtime_svc.h
index 03f906e..adafcee 100644
--- a/include/bl31/runtime_svc.h
+++ b/include/common/runtime_svc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -66,14 +66,14 @@
  * can be accessed using the handle pointer. The cookie parameter is reserved
  * for future use
  */
-typedef uint64_t (*rt_svc_handle_t)(uint32_t smc_fid,
-				  uint64_t x1,
-				  uint64_t x2,
-				  uint64_t x3,
-				  uint64_t x4,
+typedef uintptr_t (*rt_svc_handle_t)(uint32_t smc_fid,
+				  u_register_t x1,
+				  u_register_t x2,
+				  u_register_t x3,
+				  u_register_t x4,
 				  void *cookie,
 				  void *handle,
-				  uint64_t flags);
+				  u_register_t flags);
 typedef struct rt_svc_desc {
 	uint8_t start_oen;
 	uint8_t end_oen;
@@ -127,8 +127,8 @@
  * Function & variable prototypes
  ******************************************************************************/
 void runtime_svc_init(void);
-extern uint64_t __RT_SVC_DESCS_START__;
-extern uint64_t __RT_SVC_DESCS_END__;
+extern uintptr_t __RT_SVC_DESCS_START__;
+extern uintptr_t __RT_SVC_DESCS_END__;
 void init_crash_reporting(void);
 
 #endif /*__ASSEMBLY__*/
diff --git a/include/drivers/arm/gic_v3.h b/include/drivers/arm/gic_v3.h
index a1b6f1b..c5360ff 100644
--- a/include/drivers/arm/gic_v3.h
+++ b/include/drivers/arm/gic_v3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -41,6 +41,7 @@
 
 #include <mmio.h>
 #include <stdint.h>
+#include <types.h>
 
 
 /* GICv3 Re-distributor interface registers & shifts */
@@ -74,7 +75,7 @@
 /*******************************************************************************
  * Function prototypes
  ******************************************************************************/
-uintptr_t gicv3_get_rdist(uintptr_t gicr_base, uint64_t mpidr);
+uintptr_t gicv3_get_rdist(uintptr_t gicr_base, u_register_t mpidr);
 
 /*******************************************************************************
  * GIC Redistributor interface accessors
diff --git a/include/drivers/arm/gicv3.h b/include/drivers/arm/gicv3.h
index ae6fd91..b7ad778 100644
--- a/include/drivers/arm/gicv3.h
+++ b/include/drivers/arm/gicv3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -55,7 +55,11 @@
 #define GICD_SETSPI_SR		0x50
 #define GICD_CLRSPI_SR		0x50
 #define GICD_IGRPMODR		0xd00
-#define GICD_IROUTER		0x6100
+/*
+ * GICD_IROUTER<n> register is at 0x6000 + 8n, where n is the interrupt id and
+ * n >= 32, making the effective offset as 0x6100.
+ */
+#define GICD_IROUTER		0x6000
 #define GICD_PIDR2_GICV3	0xffe8
 
 #define IGRPMODR_SHIFT		5
@@ -170,6 +174,7 @@
 #ifndef __ASSEMBLY__
 
 #include <stdint.h>
+#include <types.h>
 
 #define gicv3_is_intr_id_special_identifier(id)	\
 	(((id) >= PENDING_G1S_INTID) && ((id) <= GIC_SPURIOUS_INTERRUPT))
@@ -234,7 +239,7 @@
  *    a hash function. Otherwise, the "Processor Number" field will be used to
  *    access the array elements.
  ******************************************************************************/
-typedef unsigned int (*mpidr_hash_fn)(unsigned long mpidr);
+typedef unsigned int (*mpidr_hash_fn)(u_register_t mpidr);
 
 typedef struct gicv3_driver_data {
 	uintptr_t gicd_base;
diff --git a/include/lib/aarch64/smcc_helpers.h b/include/lib/aarch64/smcc_helpers.h
new file mode 100644
index 0000000..617a5bc
--- /dev/null
+++ b/include/lib/aarch64/smcc_helpers.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SMCC_HELPERS_H__
+#define __SMCC_HELPERS_H__
+
+#include <smcc.h>
+
+#ifndef __ASSEMBLY__
+#include <context.h>
+
+/* Convenience macros to return from SMC handler */
+#define SMC_RET0(_h)	{					\
+	return (uint64_t) (_h);					\
+}
+#define SMC_RET1(_h, _x0)	{				\
+	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X0, (_x0));	\
+	SMC_RET0(_h);						\
+}
+#define SMC_RET2(_h, _x0, _x1)	{				\
+	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X1, (_x1));	\
+	SMC_RET1(_h, (_x0));					\
+}
+#define SMC_RET3(_h, _x0, _x1, _x2)	{			\
+	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X2, (_x2));	\
+	SMC_RET2(_h, (_x0), (_x1));				\
+}
+#define SMC_RET4(_h, _x0, _x1, _x2, _x3)	{		\
+	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X3, (_x3));	\
+	SMC_RET3(_h, (_x0), (_x1), (_x2));			\
+}
+
+/*
+ * Convenience macros to access general purpose registers using handle provided
+ * to SMC handler. These take the offset values defined in context.h
+ */
+#define SMC_GET_GP(_h, _g)					\
+	read_ctx_reg(get_gpregs_ctx(_h), (_g))
+#define SMC_SET_GP(_h, _g, _v)					\
+	write_ctx_reg(get_gpregs_ctx(_h), (_g), (_v))
+
+/*
+ * Convenience macros to access EL3 context registers using handle provided to
+ * SMC handler. These take the offset values defined in context.h
+ */
+#define SMC_GET_EL3(_h, _e)					\
+	read_ctx_reg(get_el3state_ctx(_h), (_e))
+#define SMC_SET_EL3(_h, _e, _v)					\
+	write_ctx_reg(get_el3state_ctx(_h), (_e), (_v))
+
+/* Return a UUID in the SMC return registers */
+#define SMC_UUID_RET(_h, _uuid)					\
+	SMC_RET4(handle, ((const uint32_t *) &(_uuid))[0],	\
+			 ((const uint32_t *) &(_uuid))[1],	\
+			 ((const uint32_t *) &(_uuid))[2],	\
+			 ((const uint32_t *) &(_uuid))[3])
+
+#endif /*__ASSEMBLY__*/
+#endif /* __SMCC_HELPERS_H__ */
diff --git a/include/lib/cpus/aarch64/cortex_a53.h b/include/lib/cpus/aarch64/cortex_a53.h
index 169d8f4..6976b80 100644
--- a/include/lib/cpus/aarch64/cortex_a53.h
+++ b/include/lib/cpus/aarch64/cortex_a53.h
@@ -57,6 +57,11 @@
 #define CPUECTLR_FPU_RET_CTRL_MASK	(0x7 << CPUECTLR_FPU_RET_CTRL_SHIFT)
 
 /*******************************************************************************
+ * CPU Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define CPUMERRSR_EL1			S3_1_C15_C2_2	/* Instruction def. */
+
+/*******************************************************************************
  * CPU Auxiliary Control register specific definitions.
  ******************************************************************************/
 #define CPUACTLR_EL1			S3_1_C15_C2_0	/* Instruction def. */
@@ -79,4 +84,9 @@
 #define L2ECTLR_RET_CTRL_SHIFT		0
 #define L2ECTLR_RET_CTRL_MASK		(0x7 << L2ECTLR_RET_CTRL_SHIFT)
 
+/*******************************************************************************
+ * L2 Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define L2MERRSR_EL1			S3_1_C15_C2_3	/* Instruction def. */
+
 #endif /* __CORTEX_A53_H__ */
diff --git a/include/lib/cpus/aarch64/cortex_a57.h b/include/lib/cpus/aarch64/cortex_a57.h
index ac4ae57..c5a218b 100644
--- a/include/lib/cpus/aarch64/cortex_a57.h
+++ b/include/lib/cpus/aarch64/cortex_a57.h
@@ -57,6 +57,11 @@
 #define CPUECTLR_CPU_RET_CTRL_MASK	(0x7 << CPUECTLR_CPU_RET_CTRL_SHIFT)
 
 /*******************************************************************************
+ * CPU Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define CPUMERRSR_EL1			S3_1_C15_C2_2	/* Instruction def. */
+
+/*******************************************************************************
  * CPU Auxiliary Control register specific definitions.
  ******************************************************************************/
 #define CPUACTLR_EL1			S3_1_C15_C2_0	/* Instruction def. */
@@ -90,4 +95,9 @@
 #define L2ECTLR_RET_CTRL_SHIFT		0
 #define L2ECTLR_RET_CTRL_MASK		(0x7 << L2ECTLR_RET_CTRL_SHIFT)
 
+/*******************************************************************************
+ * L2 Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define L2MERRSR_EL1			S3_1_C15_C2_3	/* Instruction def. */
+
 #endif /* __CORTEX_A57_H__ */
diff --git a/include/lib/cpus/aarch64/cortex_a72.h b/include/lib/cpus/aarch64/cortex_a72.h
index fa10ca9..01edf43 100644
--- a/include/lib/cpus/aarch64/cortex_a72.h
+++ b/include/lib/cpus/aarch64/cortex_a72.h
@@ -45,6 +45,11 @@
 #define CPUECTLR_L2_DPFTCH_DIST_MASK	(0x3 << 32)
 
 /*******************************************************************************
+ * CPU Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define CPUMERRSR_EL1			S3_1_C15_C2_2	/* Instruction def. */
+
+/*******************************************************************************
  * CPU Auxiliary Control register specific definitions.
  ******************************************************************************/
 #define CPUACTLR_EL1			S3_1_C15_C2_0	/* Instruction def. */
@@ -65,4 +70,9 @@
 #define L2_TAG_RAM_LATENCY_2_CYCLES	0x1
 #define L2_TAG_RAM_LATENCY_3_CYCLES	0x2
 
+/*******************************************************************************
+ * L2 Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define L2MERRSR_EL1			S3_1_C15_C2_3	/* Instruction def. */
+
 #endif /* __CORTEX_A72_H__ */
diff --git a/include/lib/cpus/aarch64/cortex_a73.h b/include/lib/cpus/aarch64/cortex_a73.h
index 2ad0467..13e114a 100644
--- a/include/lib/cpus/aarch64/cortex_a73.h
+++ b/include/lib/cpus/aarch64/cortex_a73.h
@@ -41,4 +41,9 @@
 
 #define CORTEX_A73_CPUECTLR_SMP_BIT	(1 << 6)
 
+/*******************************************************************************
+ * L2 Memory Error Syndrome register specific definitions.
+ ******************************************************************************/
+#define CORTEX_A73_L2MERRSR_EL1		S3_1_C15_C2_3   /* Instruction def. */
+
 #endif /* __CORTEX_A73_H__ */
diff --git a/include/common/context.h b/include/lib/el3_runtime/aarch64/context.h
similarity index 98%
rename from include/common/context.h
rename to include/lib/el3_runtime/aarch64/context.h
index ec47f2a..b528c03 100644
--- a/include/common/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -298,35 +298,35 @@
  */
 #define set_aapcs_args0(ctx, x0)				do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X0, x0);	\
-	} while (0);
+	} while (0)
 #define set_aapcs_args1(ctx, x0, x1)				do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X1, x1);	\
 		set_aapcs_args0(ctx, x0);				\
-	} while (0);
+	} while (0)
 #define set_aapcs_args2(ctx, x0, x1, x2)			do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X2, x2);	\
 		set_aapcs_args1(ctx, x0, x1);				\
-	} while (0);
+	} while (0)
 #define set_aapcs_args3(ctx, x0, x1, x2, x3)			do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X3, x3);	\
 		set_aapcs_args2(ctx, x0, x1, x2);			\
-	} while (0);
+	} while (0)
 #define set_aapcs_args4(ctx, x0, x1, x2, x3, x4)		do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X4, x4);	\
 		set_aapcs_args3(ctx, x0, x1, x2, x3);			\
-	} while (0);
+	} while (0)
 #define set_aapcs_args5(ctx, x0, x1, x2, x3, x4, x5)		do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X5, x5);	\
 		set_aapcs_args4(ctx, x0, x1, x2, x3, x4);		\
-	} while (0);
+	} while (0)
 #define set_aapcs_args6(ctx, x0, x1, x2, x3, x4, x5, x6)	do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X6, x6);	\
 		set_aapcs_args5(ctx, x0, x1, x2, x3, x4, x5);		\
-	} while (0);
+	} while (0)
 #define set_aapcs_args7(ctx, x0, x1, x2, x3, x4, x5, x6, x7)	do {	\
 		write_ctx_reg(get_gpregs_ctx(ctx), CTX_GPREG_X7, x7);	\
 		set_aapcs_args6(ctx, x0, x1, x2, x3, x4, x5, x6);	\
-	} while (0);
+	} while (0)
 
 /*******************************************************************************
  * Function prototypes
diff --git a/include/common/context_mgmt.h b/include/lib/el3_runtime/context_mgmt.h
similarity index 94%
rename from include/common/context_mgmt.h
rename to include/lib/el3_runtime/context_mgmt.h
index a76ecbe..672ea11 100644
--- a/include/common/context_mgmt.h
+++ b/include/lib/el3_runtime/context_mgmt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,6 @@
 #define __CM_H__
 
 #include <arch.h>
-#include <bl_common.h>
 
 /*******************************************************************************
  * Forward declarations
@@ -63,9 +62,9 @@
 void cm_prepare_el3_exit(uint32_t security_state);
 void cm_el1_sysregs_context_save(uint32_t security_state);
 void cm_el1_sysregs_context_restore(uint32_t security_state);
-void cm_set_elr_el3(uint32_t security_state, uint64_t entrypoint);
+void cm_set_elr_el3(uint32_t security_state, uintptr_t entrypoint);
 void cm_set_elr_spsr_el3(uint32_t security_state,
-			 uint64_t entrypoint, uint32_t spsr);
+			uintptr_t entrypoint, uint32_t spsr);
 void cm_write_scr_el3_bit(uint32_t security_state,
 			  uint32_t bit_pos,
 			  uint32_t value);
diff --git a/include/bl31/cpu_data.h b/include/lib/el3_runtime/cpu_data.h
similarity index 93%
rename from include/bl31/cpu_data.h
rename to include/lib/el3_runtime/cpu_data.h
index 2b506c7..4fc801b 100644
--- a/include/bl31/cpu_data.h
+++ b/include/lib/el3_runtime/cpu_data.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -78,9 +78,9 @@
  ******************************************************************************/
 typedef struct cpu_data {
 	void *cpu_context[2];
-	uint64_t cpu_ops_ptr;
+	uintptr_t cpu_ops_ptr;
 #if CRASH_REPORTING
-	uint64_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
+	u_register_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
 #endif
 	struct psci_cpu_data psci_svc_cpu_data;
 #if PLAT_PCPU_DATA_SIZE
@@ -123,14 +123,14 @@
 #define get_cpu_data_by_index(_ix, _m)	   _cpu_data_by_index(_ix)->_m
 #define set_cpu_data_by_index(_ix, _m, _v) _cpu_data_by_index(_ix)->_m = _v
 
-#define flush_cpu_data(_m)	   flush_dcache_range((uint64_t) 	  \
+#define flush_cpu_data(_m)	   flush_dcache_range((uintptr_t)	  \
 						      &(_cpu_data()->_m), \
 						      sizeof(_cpu_data()->_m))
-#define inv_cpu_data(_m)	   inv_dcache_range((uint64_t) 	  \
+#define inv_cpu_data(_m)	   inv_dcache_range((uintptr_t)	  	  \
 						      &(_cpu_data()->_m), \
 						      sizeof(_cpu_data()->_m))
 #define flush_cpu_data_by_index(_ix, _m)	\
-				   flush_dcache_range((uint64_t)	  \
+				   flush_dcache_range((uintptr_t)	  \
 					 &(_cpu_data_by_index(_ix)->_m),  \
 					 sizeof(_cpu_data_by_index(_ix)->_m))
 
diff --git a/include/bl31/services/psci.h b/include/lib/psci/psci.h
similarity index 88%
rename from include/bl31/services/psci.h
rename to include/lib/psci/psci.h
index 20aa52e..c3e9ef7 100644
--- a/include/bl31/services/psci.h
+++ b/include/lib/psci/psci.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -97,6 +97,12 @@
 #define PSCI_NUM_CALLS			18
 #endif
 
+/* The macros below are used to identify PSCI calls from the SMC function ID */
+#define PSCI_FID_MASK			0xffe0u
+#define PSCI_FID_VALUE			0u
+#define is_psci_fid(_fid) \
+	(((_fid) & PSCI_FID_MASK) == PSCI_FID_VALUE)
+
 /*******************************************************************************
  * PSCI Migrate and friends
  ******************************************************************************/
@@ -296,13 +302,13 @@
  * migrate capability etc.
  ******************************************************************************/
 typedef struct spd_pm_ops {
-	void (*svc_on)(uint64_t target_cpu);
-	int32_t (*svc_off)(uint64_t __unused);
-	void (*svc_suspend)(uint64_t max_off_pwrlvl);
-	void (*svc_on_finish)(uint64_t __unused);
-	void (*svc_suspend_finish)(uint64_t max_off_pwrlvl);
-	int32_t (*svc_migrate)(uint64_t from_cpu, uint64_t to_cpu);
-	int32_t (*svc_migrate_info)(uint64_t *resident_cpu);
+	void (*svc_on)(u_register_t target_cpu);
+	int32_t (*svc_off)(u_register_t __unused);
+	void (*svc_suspend)(u_register_t max_off_pwrlvl);
+	void (*svc_on_finish)(u_register_t __unused);
+	void (*svc_suspend_finish)(u_register_t max_off_pwrlvl);
+	int32_t (*svc_migrate)(u_register_t from_cpu, u_register_t to_cpu);
+	int32_t (*svc_migrate_info)(u_register_t *resident_cpu);
 	void (*svc_system_off)(void);
 	void (*svc_system_reset)(void);
 } spd_pm_ops_t;
@@ -326,19 +332,33 @@
 long psci_migrate_info_up_cpu(void);
 int psci_features(unsigned int psci_fid);
 void __dead2 psci_power_down_wfi(void);
-void psci_entrypoint(void);
-void psci_register_spd_pm_hook(const spd_pm_ops_t *);
-uint64_t psci_smc_handler(uint32_t smc_fid,
-			  uint64_t x1,
-			  uint64_t x2,
-			  uint64_t x3,
-			  uint64_t x4,
+void psci_arch_setup(void);
+
+/*
+ * The below API is deprecated. This is now replaced by bl31_warmboot_entry in
+ * AArch64.
+ */
+void psci_entrypoint(void) __deprecated;
+
+/*******************************************************************************
+ * Forward declarations
+ ******************************************************************************/
+struct entry_point_info;
+
+/******************************************************************************
+ * PSCI Library Interfaces
+ *****************************************************************************/
+u_register_t psci_smc_handler(uint32_t smc_fid,
+			  u_register_t x1,
+			  u_register_t x2,
+			  u_register_t x3,
+			  u_register_t x4,
 			  void *cookie,
 			  void *handle,
-			  uint64_t flags);
-
-/* PSCI setup function */
-int psci_setup(void);
+			  u_register_t flags);
+int psci_setup(uintptr_t mailbox_ep);
+void psci_warmboot_entrypoint(void);
+void psci_register_spd_pm_hook(const spd_pm_ops_t *pm);
 
 #endif /*__ASSEMBLY__*/
 
diff --git a/include/bl31/services/psci_compat.h b/include/lib/psci/psci_compat.h
similarity index 98%
rename from include/bl31/services/psci_compat.h
rename to include/lib/psci/psci_compat.h
index 24bd8dc..3554667 100644
--- a/include/bl31/services/psci_compat.h
+++ b/include/lib/psci/psci_compat.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/include/common/smcc_helpers.h b/include/lib/smcc.h
similarity index 71%
rename from include/common/smcc_helpers.h
rename to include/lib/smcc.h
index 6a07b01..c415ba1 100644
--- a/include/common/smcc_helpers.h
+++ b/include/lib/smcc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,8 +28,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __SMCC_HELPERS_H__
-#define __SMCC_HELPERS_H__
+#ifndef __SMCC_H__
+#define __SMCC_H__
 
 /*******************************************************************************
  * Bit definitions inside the function id as per the SMC calling convention
@@ -83,7 +83,6 @@
 #ifndef __ASSEMBLY__
 
 #include <cassert.h>
-#include <context.h>
 #include <stdint.h>
 
 /* Various flags passed to SMC handlers */
@@ -93,45 +92,6 @@
 #define is_caller_non_secure(_f)	(!!(_f & SMC_FROM_NON_SECURE))
 #define is_caller_secure(_f)		(!(is_caller_non_secure(_f)))
 
-/* Convenience macros to return from SMC handler */
-#define SMC_RET0(_h)	{ \
-	return (uint64_t) (_h);		\
-}
-#define SMC_RET1(_h, _x0)	{ \
-	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X0, (_x0)); \
-	SMC_RET0(_h);						\
-}
-#define SMC_RET2(_h, _x0, _x1)	{ \
-	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X1, (_x1)); \
-	SMC_RET1(_h, (_x0)); \
-}
-#define SMC_RET3(_h, _x0, _x1, _x2)	{ \
-	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X2, (_x2)); \
-	SMC_RET2(_h, (_x0), (_x1)); \
-}
-#define SMC_RET4(_h, _x0, _x1, _x2, _x3)	{ \
-	write_ctx_reg(get_gpregs_ctx(_h), CTX_GPREG_X3, (_x3)); \
-	SMC_RET3(_h, (_x0), (_x1), (_x2)); \
-}
-
-/*
- * Convenience macros to access general purpose registers using handle provided
- * to SMC handler. These takes the offset values defined in context.h
- */
-#define SMC_GET_GP(_h, _g) \
-	read_ctx_reg(get_gpregs_ctx(_h), (_g));
-#define SMC_SET_GP(_h, _g, _v) \
-	write_ctx_reg(get_gpregs_ctx(_h), (_g), (_v));
-
-/*
- * Convenience macros to access EL3 context registers using handle provided to
- * SMC handler. These takes the offset values defined in context.h
- */
-#define SMC_GET_EL3(_h, _e) \
-	read_ctx_reg(get_el3state_ctx(_h), (_e));
-#define SMC_SET_EL3(_h, _e, _v) \
-	write_ctx_reg(get_el3state_ctx(_h), (_e), (_v));
-
 /* The macro below is used to identify a Standard Service SMC call */
 #define is_std_svc_call(_fid)		((((_fid) >> FUNCID_OEN_SHIFT) & \
 					   FUNCID_OEN_MASK) == OEN_STD_START)
@@ -154,12 +114,5 @@
 		{ _n0, _n1, _n2, _n3, _n4, _n5 } \
 	}
 
-/* Return a UUID in the SMC return registers */
-#define SMC_UUID_RET(_h, _uuid) \
-	SMC_RET4(handle, ((const uint32_t *) &(_uuid))[0], \
-			 ((const uint32_t *) &(_uuid))[1], \
-			 ((const uint32_t *) &(_uuid))[2], \
-			 ((const uint32_t *) &(_uuid))[3])
-
 #endif /*__ASSEMBLY__*/
-#endif /* __SMCC_HELPERS_H__ */
+#endif /* __SMCC_H__ */
diff --git a/include/lib/stdlib/machine/_stdint.h b/include/lib/stdlib/machine/_stdint.h
index e36c659..9a4f35f 100644
--- a/include/lib/stdlib/machine/_stdint.h
+++ b/include/lib/stdlib/machine/_stdint.h
@@ -30,6 +30,11 @@
  * $FreeBSD$
  */
 
+/*
+ * Portions copyright (c) 2016, ARM Limited and Contributors.
+ * All rights reserved.
+ */
+
 #ifndef	_MACHINE__STDINT_H_
 #define	_MACHINE__STDINT_H_
 
@@ -38,12 +43,12 @@
 #define	INT8_C(c)		(c)
 #define	INT16_C(c)		(c)
 #define	INT32_C(c)		(c)
-#define	INT64_C(c)		(c ## L)
+#define	INT64_C(c)		(c ## LL)
 
 #define	UINT8_C(c)		(c)
 #define	UINT16_C(c)		(c)
 #define	UINT32_C(c)		(c ## U)
-#define	UINT64_C(c)		(c ## UL)
+#define	UINT64_C(c)		(c ## ULL)
 
 #define	INTMAX_C(c)		INT64_C(c)
 #define	UINTMAX_C(c)		UINT64_C(c)
@@ -60,19 +65,19 @@
 #define	INT8_MIN	(-0x7f-1)
 #define	INT16_MIN	(-0x7fff-1)
 #define	INT32_MIN	(-0x7fffffff-1)
-#define	INT64_MIN	(-0x7fffffffffffffffL-1)
+#define	INT64_MIN	(-0x7fffffffffffffffLL-1)
 
 /* Maximum values of exact-width signed integer types. */
 #define	INT8_MAX	0x7f
 #define	INT16_MAX	0x7fff
 #define	INT32_MAX	0x7fffffff
-#define	INT64_MAX	0x7fffffffffffffffL
+#define	INT64_MAX	0x7fffffffffffffffLL
 
 /* Maximum values of exact-width unsigned integer types. */
 #define	UINT8_MAX	0xff
 #define	UINT16_MAX	0xffff
 #define	UINT32_MAX	0xffffffffU
-#define	UINT64_MAX	0xffffffffffffffffUL
+#define	UINT64_MAX	0xffffffffffffffffULL
 
 /*
  * ISO/IEC 9899:1999
diff --git a/include/lib/utils.h b/include/lib/utils.h
new file mode 100644
index 0000000..0936cbb
--- /dev/null
+++ b/include/lib/utils.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __UTILS_H__
+#define __UTILS_H__
+
+/* Compute the number of elements in the given array */
+#define ARRAY_SIZE(a)				\
+	(sizeof(a) / sizeof((a)[0]))
+
+#define IS_POWER_OF_TWO(x)			\
+	(((x) & ((x) - 1)) == 0)
+
+/*
+ * The round_up() macro rounds up a value to the given boundary in a
+ * type-agnostic yet type-safe manner. The boundary must be a power of two.
+ * In other words, it computes the smallest multiple of boundary which is
+ * greater than or equal to value.
+ *
+ * round_down() is similar but rounds the value down instead.
+ */
+#define round_boundary(value, boundary)		\
+	((__typeof__(value))((boundary) - 1))
+
+#define round_up(value, boundary)		\
+	((((value) - 1) | round_boundary(value, boundary)) + 1)
+
+#define round_down(value, boundary)		\
+	((value) & ~round_boundary(value, boundary))
+
+/*
+ * Evaluates to 1 if (ptr + inc) overflows, 0 otherwise.
+ * Both arguments must be unsigned pointer values (i.e. uintptr_t).
+ */
+#define check_uptr_overflow(ptr, inc)		\
+	(((ptr) > UINTPTR_MAX - (inc)) ? 1 : 0)
+
+#endif /* __UTILS_H__ */
diff --git a/include/lib/xlat_tables.h b/include/lib/xlat_tables.h
index 7d57521..b51a1de 100644
--- a/include/lib/xlat_tables.h
+++ b/include/lib/xlat_tables.h
@@ -134,6 +134,8 @@
 #define MT_PERM_SHIFT	3
 /* Security state (SECURE/NS) */
 #define MT_SEC_SHIFT	4
+/* Access permissions for instruction execution (EXECUTE/EXECUTE_NEVER) */
+#define MT_EXECUTE_SHIFT	5
 
 /*
  * Memory mapping attributes
@@ -155,8 +157,21 @@
 
 	MT_SECURE	= 0 << MT_SEC_SHIFT,
 	MT_NS		= 1 << MT_SEC_SHIFT,
+
+	/*
+	 * Access permissions for instruction execution are only relevant for
+	 * normal read-only memory, i.e. MT_MEMORY | MT_RO. They are ignored
+	 * (and potentially overridden) otherwise:
+	 *  - Device memory is always marked as execute-never.
+	 *  - Read-write normal memory is always marked as execute-never.
+	 */
+	MT_EXECUTE		= 0 << MT_EXECUTE_SHIFT,
+	MT_EXECUTE_NEVER	= 1 << MT_EXECUTE_SHIFT,
 } mmap_attr_t;
 
+#define MT_CODE		(MT_MEMORY | MT_RO | MT_EXECUTE)
+#define MT_RO_DATA	(MT_MEMORY | MT_RO | MT_EXECUTE_NEVER)
+
 /*
  * Structure for specifying a single region of memory.
  */
diff --git a/include/plat/arm/board/common/board_arm_def.h b/include/plat/arm/board/common/board_arm_def.h
index d70fbb4..ad82923 100644
--- a/include/plat/arm/board/common/board_arm_def.h
+++ b/include/plat/arm/board/common/board_arm_def.h
@@ -61,10 +61,10 @@
 
 /*
  * The constants below are not optimised for memory usage. Platforms that wish
- * to optimise these constants should set `ARM_BOARD_OPTIMISE_MMAP` to 1 and
+ * to optimise these constants should set `ARM_BOARD_OPTIMISE_MEM` to 1 and
  * provide there own values.
  */
-#if !ARM_BOARD_OPTIMISE_MMAP
+#if !ARM_BOARD_OPTIMISE_MEM
 /*
  * PLAT_ARM_MMAP_ENTRIES depends on the number of entries in the
  * plat_arm_mmap array defined for each BL stage.
@@ -81,7 +81,29 @@
 # define MAX_XLAT_TABLES		5
 #endif
 
-#endif /* ARM_BOARD_OPTIMISE_MMAP */
+/*
+ * PLAT_ARM_MAX_BL1_RW_SIZE is calculated using the current BL1 RW debug size
+ * plus a little space for growth.
+ */
+#define PLAT_ARM_MAX_BL1_RW_SIZE	0xA000
+
+/*
+ * PLAT_ARM_MAX_BL2_SIZE is calculated using the current BL2 debug size plus a
+ * little space for growth.
+ */
+#if TRUSTED_BOARD_BOOT
+# define PLAT_ARM_MAX_BL2_SIZE		0x1D000
+#else
+# define PLAT_ARM_MAX_BL2_SIZE		0xF000
+#endif
+
+/*
+ * PLAT_ARM_MAX_BL31_SIZE is calculated using the current BL31 debug size plus a
+ * little space for growth.
+ */
+#define PLAT_ARM_MAX_BL31_SIZE		0x1D000
+
+#endif /* ARM_BOARD_OPTIMISE_MEM */
 
 #define MAX_IO_DEVICES			3
 #define MAX_IO_HANDLES			4
diff --git a/include/plat/arm/board/common/v2m_def.h b/include/plat/arm/board/common/v2m_def.h
index 888792e..7cee4e8 100644
--- a/include/plat/arm/board/common/v2m_def.h
+++ b/include/plat/arm/board/common/v2m_def.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -119,13 +119,26 @@
 #define V2M_SP810_CTRL_TIM2_SEL		(1 << 19)
 #define V2M_SP810_CTRL_TIM3_SEL		(1 << 21)
 
+/*
+ * The flash can be mapped either as read-only or read-write.
+ *
+ * If it is read-write then it should also be mapped as device memory because
+ * NOR flash programming involves sending a fixed, ordered sequence of commands.
+ *
+ * If it is read-only then it should also be mapped as:
+ * - Normal memory, because reading from NOR flash is transparent, it is like
+ *   reading from RAM.
+ * - Non-executable by default. If some parts of the flash need to be executable
+ *   then platform code is responsible for re-mapping the appropriate portion
+ *   of it as executable.
+ */
 #define V2M_MAP_FLASH0_RW		MAP_REGION_FLAT(V2M_FLASH0_BASE,\
 						V2M_FLASH0_SIZE,	\
 						MT_DEVICE | MT_RW | MT_SECURE)
 
 #define V2M_MAP_FLASH0_RO		MAP_REGION_FLAT(V2M_FLASH0_BASE,\
 						V2M_FLASH0_SIZE,	\
-						MT_MEMORY | MT_RO | MT_SECURE)
+						MT_RO_DATA | MT_SECURE)
 
 #define V2M_MAP_IOFPGA			MAP_REGION_FLAT(V2M_IOFPGA_BASE,\
 						V2M_IOFPGA_SIZE,		\
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index 2fe0a69..0ffdb5c 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -31,10 +31,10 @@
 #define __PLAT_ARM_H__
 
 #include <bakery_lock.h>
-#include <bl_common.h>
 #include <cassert.h>
 #include <cpu_data.h>
 #include <stdint.h>
+#include <utils.h>
 #include <xlat_tables.h>
 
 #define ARM_CASSERT_MMAP						\
@@ -45,23 +45,15 @@
 /*
  * Utility functions common to ARM standard platforms
  */
-
-void arm_configure_mmu_el1(unsigned long total_base,
-			unsigned long total_size,
-			unsigned long ro_start,
-			unsigned long ro_limit
-#if USE_COHERENT_MEM
-			, unsigned long coh_start,
-			unsigned long coh_limit
-#endif
-);
-void arm_configure_mmu_el3(unsigned long total_base,
-			unsigned long total_size,
-			unsigned long ro_start,
-			unsigned long ro_limit
+void arm_setup_page_tables(uintptr_t total_base,
+			size_t total_size,
+			uintptr_t code_start,
+			uintptr_t code_limit,
+			uintptr_t rodata_start,
+			uintptr_t rodata_limit
 #if USE_COHERENT_MEM
-			, unsigned long coh_start,
-			unsigned long coh_limit
+			, uintptr_t coh_start,
+			uintptr_t coh_limit
 #endif
 );
 
diff --git a/include/plat/common/common_def.h b/include/plat/common/common_def.h
index 9fac9fa..d6b7772 100644
--- a/include/plat/common/common_def.h
+++ b/include/plat/common/common_def.h
@@ -80,5 +80,44 @@
 	.ep_info.pc = BL2_BASE,				\
 }
 
-#endif /* __COMMON_DEF_H__ */
+/*
+ * The following constants identify the extents of the code & read-only data
+ * regions. These addresses are used by the MMU setup code and therefore they
+ * must be page-aligned.
+ *
+ * When the code and read-only data are mapped as a single atomic section
+ * (i.e. when SEPARATE_CODE_AND_RODATA=0) then we treat the whole section as
+ * code by specifying the read-only data section as empty.
+ *
+ * BL1 is different than the other images in the sense that its read-write data
+ * originally lives in Trusted ROM and needs to be relocated in Trusted SRAM at
+ * run-time. Therefore, the read-write data in ROM can be mapped with the same
+ * memory attributes as the read-only data region. For this reason, BL1 uses
+ * different macros.
+ *
+ * Note that BL1_ROM_END is not necessarily aligned on a page boundary as it
+ * just points to the end of BL1's actual content in Trusted ROM. Therefore it
+ * needs to be rounded up to the next page size in order to map the whole last
+ * page of it with the right memory attributes.
+ */
+#if SEPARATE_CODE_AND_RODATA
+#define BL_CODE_BASE		(unsigned long)(&__TEXT_START__)
+#define BL_CODE_LIMIT		(unsigned long)(&__TEXT_END__)
+#define BL_RO_DATA_BASE		(unsigned long)(&__RODATA_START__)
+#define BL_RO_DATA_LIMIT	(unsigned long)(&__RODATA_END__)
+
+#define BL1_CODE_LIMIT		BL_CODE_LIMIT
+#define BL1_RO_DATA_BASE	(unsigned long)(&__RODATA_START__)
+#define BL1_RO_DATA_LIMIT	round_up(BL1_ROM_END, PAGE_SIZE)
+#else
+#define BL_CODE_BASE		(unsigned long)(&__RO_START__)
+#define BL_CODE_LIMIT		(unsigned long)(&__RO_END__)
+#define BL_RO_DATA_BASE		0
+#define BL_RO_DATA_LIMIT	0
 
+#define BL1_CODE_LIMIT		round_up(BL1_ROM_END, PAGE_SIZE)
+#define BL1_RO_DATA_BASE	0
+#define BL1_RO_DATA_LIMIT	0
+#endif /* SEPARATE_CODE_AND_RODATA */
+
+#endif /* __COMMON_DEF_H__ */
diff --git a/include/plat/common/platform.h b/include/plat/common/platform.h
index 390721f..1d2a373 100644
--- a/include/plat/common/platform.h
+++ b/include/plat/common/platform.h
@@ -83,7 +83,7 @@
 /*******************************************************************************
  * Optional common functions (may be overridden)
  ******************************************************************************/
-unsigned long plat_get_my_stack(void);
+uintptr_t plat_get_my_stack(void);
 void plat_report_exception(unsigned long);
 int plat_crash_console_init(void);
 int plat_crash_console_putc(int c);
diff --git a/include/bl31/services/std_svc.h b/include/services/std_svc.h
similarity index 86%
rename from include/bl31/services/std_svc.h
rename to include/services/std_svc.h
index cbd5b62..0feb2ea 100644
--- a/include/bl31/services/std_svc.h
+++ b/include/services/std_svc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -42,10 +42,4 @@
 #define STD_SVC_VERSION_MAJOR		0x0
 #define STD_SVC_VERSION_MINOR		0x1
 
-/* The macros below are used to identify PSCI calls from the SMC function ID */
-#define PSCI_FID_MASK			0xffe0u
-#define PSCI_FID_VALUE			0u
-#define is_psci_fid(_fid) \
-	(((_fid) & PSCI_FID_MASK) == PSCI_FID_VALUE)
-
 #endif /* __STD_SVC_H__ */
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index bb56516..ed546e7 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -234,11 +234,13 @@
 	 */
 .section .rodata.cortex_a53_regs, "aS"
 cortex_a53_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
+	.asciz	"cpuectlr_el1", "cpumerrsr_el1", "l2merrsr_el1", ""
 
 func cortex_a53_cpu_reg_dump
 	adr	x6, cortex_a53_regs
 	mrs	x8, CPUECTLR_EL1
+	mrs	x9, CPUMERRSR_EL1
+	mrs	x10, L2MERRSR_EL1
 	ret
 endfunc cortex_a53_cpu_reg_dump
 
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index 60929a0..d6b181d 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -477,11 +477,13 @@
 	 */
 .section .rodata.cortex_a57_regs, "aS"
 cortex_a57_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
+	.asciz	"cpuectlr_el1", "cpumerrsr_el1", "l2merrsr_el1", ""
 
 func cortex_a57_cpu_reg_dump
 	adr	x6, cortex_a57_regs
 	mrs	x8, CPUECTLR_EL1
+	mrs	x9, CPUMERRSR_EL1
+	mrs	x10, L2MERRSR_EL1
 	ret
 endfunc cortex_a57_cpu_reg_dump
 
diff --git a/lib/cpus/aarch64/cortex_a72.S b/lib/cpus/aarch64/cortex_a72.S
index eb37f2c..9f04fb7 100644
--- a/lib/cpus/aarch64/cortex_a72.S
+++ b/lib/cpus/aarch64/cortex_a72.S
@@ -231,11 +231,13 @@
 	 */
 .section .rodata.cortex_a72_regs, "aS"
 cortex_a72_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
+	.asciz	"cpuectlr_el1", "cpumerrsr_el1", "l2merrsr_el1", ""
 
 func cortex_a72_cpu_reg_dump
 	adr	x6, cortex_a72_regs
 	mrs	x8, CPUECTLR_EL1
+	mrs	x9, CPUMERRSR_EL1
+	mrs	x10, L2MERRSR_EL1
 	ret
 endfunc cortex_a72_cpu_reg_dump
 
diff --git a/lib/cpus/aarch64/cortex_a73.S b/lib/cpus/aarch64/cortex_a73.S
index 70b4c6a..e1615db 100644
--- a/lib/cpus/aarch64/cortex_a73.S
+++ b/lib/cpus/aarch64/cortex_a73.S
@@ -144,11 +144,12 @@
 	 */
 .section .rodata.cortex_a73_regs, "aS"
 cortex_a73_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
+	.asciz	"cpuectlr_el1", "l2merrsr_el1", ""
 
 func cortex_a73_cpu_reg_dump
 	adr	x6, cortex_a73_regs
 	mrs	x8, CORTEX_A73_CPUECTLR_EL1
+	mrs	x9, CORTEX_A73_L2MERRSR_EL1
 	ret
 endfunc cortex_a73_cpu_reg_dump
 
diff --git a/common/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
similarity index 99%
rename from common/aarch64/context.S
rename to lib/el3_runtime/aarch64/context.S
index d51daa7..7982e50 100644
--- a/common/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/common/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
similarity index 98%
rename from common/context_mgmt.c
rename to lib/el3_runtime/aarch64/context_mgmt.c
index 3ccbd03..4527aa3 100644
--- a/common/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -285,7 +285,7 @@
  * This function populates ELR_EL3 member of 'cpu_context' pertaining to the
  * given security state with the given entrypoint
  ******************************************************************************/
-void cm_set_elr_el3(uint32_t security_state, uint64_t entrypoint)
+void cm_set_elr_el3(uint32_t security_state, uintptr_t entrypoint)
 {
 	cpu_context_t *ctx;
 	el3_state_t *state;
@@ -303,7 +303,7 @@
  * pertaining to the given security state
  ******************************************************************************/
 void cm_set_elr_spsr_el3(uint32_t security_state,
-			 uint64_t entrypoint, uint32_t spsr)
+			uintptr_t entrypoint, uint32_t spsr)
 {
 	cpu_context_t *ctx;
 	el3_state_t *state;
diff --git a/bl31/aarch64/cpu_data.S b/lib/el3_runtime/aarch64/cpu_data.S
similarity index 97%
rename from bl31/aarch64/cpu_data.S
rename to lib/el3_runtime/aarch64/cpu_data.S
index 0842825..2cc07ba 100644
--- a/bl31/aarch64/cpu_data.S
+++ b/lib/el3_runtime/aarch64/cpu_data.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/bl31/cpu_data_array.c b/lib/el3_runtime/cpu_data_array.c
similarity index 94%
rename from bl31/cpu_data_array.c
rename to lib/el3_runtime/cpu_data_array.c
index 4cba118..eba21a5 100644
--- a/bl31/cpu_data_array.c
+++ b/lib/el3_runtime/cpu_data_array.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c
index 45b870b..efc1b57 100644
--- a/lib/locks/bakery/bakery_lock_normal.c
+++ b/lib/locks/bakery/bakery_lock_normal.c
@@ -82,13 +82,13 @@
 
 #define write_cache_op(addr, cached)	\
 				do {	\
-					(cached ? dccvac((uint64_t)addr) :\
-						dcivac((uint64_t)addr));\
+					(cached ? dccvac((uintptr_t)addr) :\
+						dcivac((uintptr_t)addr));\
 						dsbish();\
 				} while (0)
 
 #define read_cache_op(addr, cached)	if (cached) \
-					    dccivac((uint64_t)addr)
+					    dccivac((uintptr_t)addr)
 
 static unsigned int bakery_get_ticket(bakery_lock_t *lock,
 						unsigned int me, int is_cached)
diff --git a/services/std_svc/psci/psci_helpers.S b/lib/psci/aarch64/psci_helpers.S
similarity index 83%
rename from services/std_svc/psci/psci_helpers.S
rename to lib/psci/aarch64/psci_helpers.S
index 6ccf943..ff250a0 100644
--- a/services/std_svc/psci/psci_helpers.S
+++ b/lib/psci/aarch64/psci_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,6 +35,10 @@
 
 	.globl	psci_do_pwrdown_cache_maintenance
 	.globl	psci_do_pwrup_cache_maintenance
+	.globl	psci_power_down_wfi
+#if !ERROR_DEPRECATED
+	.globl psci_entrypoint
+#endif
 
 /* -----------------------------------------------------------------------
  * void psci_do_pwrdown_cache_maintenance(unsigned int power level);
@@ -152,3 +156,25 @@
 	ldp	x29, x30, [sp], #16
 	ret
 endfunc psci_do_pwrup_cache_maintenance
+
+/* -----------------------------------------------------------------------
+ * void psci_power_down_wfi(void);
+ * This function is called to indicate to the power controller that it
+ * is safe to power down this cpu. It should not exit the wfi and will
+ * be released from reset upon power up.
+ * -----------------------------------------------------------------------
+ */
+func psci_power_down_wfi
+	dsb	sy		// ensure write buffer empty
+	wfi
+	bl	plat_panic_handler
+endfunc psci_power_down_wfi
+
+/* -----------------------------------------------------------------------
+ * void psci_entrypoint(void);
+ * The deprecated entry point for PSCI on warm boot for AArch64.
+ * -----------------------------------------------------------------------
+ */
+func_deprecated psci_entrypoint
+	b	bl31_warm_entrypoint
+endfunc_deprecated psci_entrypoint
diff --git a/services/std_svc/psci/psci_common.c b/lib/psci/psci_common.c
similarity index 98%
rename from services/std_svc/psci/psci_common.c
rename to lib/psci/psci_common.c
index 5090037..e87e8c0 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -252,8 +252,8 @@
  * function will be called after a cpu is powered on to find the local state
  * each power domain has emerged from.
  *****************************************************************************/
-static void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
-					     psci_power_state_t *target_state)
+void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
+				      psci_power_state_t *target_state)
 {
 	unsigned int parent_idx, lvl;
 	plat_local_state_t *pd_state = target_state->pwr_domain_state;
@@ -596,10 +596,10 @@
 			       uintptr_t entrypoint,
 			       u_register_t context_id)
 {
-	unsigned long ep_attr, sctlr;
+	u_register_t ep_attr, sctlr;
 	unsigned int daif, ee, mode;
-	unsigned long ns_scr_el3 = read_scr_el3();
-	unsigned long ns_sctlr_el1 = read_sctlr_el1();
+	u_register_t ns_scr_el3 = read_scr_el3();
+	u_register_t ns_sctlr_el1 = read_sctlr_el1();
 
 	sctlr = ns_scr_el3 & SCR_HCE_BIT ? read_sctlr_el2() : ns_sctlr_el1;
 	ee = 0;
@@ -683,7 +683,7 @@
  * code to enable the gic cpu interface and for a cluster it will enable
  * coherency at the interconnect level in addition to gic cpu interface.
  ******************************************************************************/
-void psci_power_up_finish(void)
+void psci_warmboot_entrypoint(void)
 {
 	unsigned int end_pwrlvl, cpu_idx = plat_my_core_pos();
 	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
@@ -817,7 +817,7 @@
 	plat_local_state_type_t state_type;
 
 	/* This array maps to the PSCI_STATE_X definitions in psci.h */
-	static const char *psci_state_type_str[] = {
+	static const char * const psci_state_type_str[] = {
 		"ON",
 		"RETENTION",
 		"OFF",
@@ -839,9 +839,9 @@
 	for (idx = 0; idx < PLATFORM_CORE_COUNT; idx++) {
 		state = psci_get_cpu_local_state_by_idx(idx);
 		state_type = find_local_state_type(state);
-		INFO("  CPU Node : MPID 0x%lx, parent_node %d,"
+		INFO("  CPU Node : MPID 0x%llx, parent_node %d,"
 				" State %s (0x%x)\n",
-				psci_cpu_pd_nodes[idx].mpidr,
+				(unsigned long long)psci_cpu_pd_nodes[idx].mpidr,
 				psci_cpu_pd_nodes[idx].parent_node,
 				psci_state_type_str[state_type],
 				psci_get_cpu_local_state_by_idx(idx));
diff --git a/lib/psci/psci_lib.mk b/lib/psci/psci_lib.mk
new file mode 100644
index 0000000..662e14a
--- /dev/null
+++ b/lib/psci/psci_lib.mk
@@ -0,0 +1,54 @@
+#
+# Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# Neither the name of ARM nor the names of its contributors may be used
+# to endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+PSCI_LIB_SOURCES	:=	lib/el3_runtime/cpu_data_array.c	\
+				lib/el3_runtime/aarch64/context.S	\
+				lib/el3_runtime/aarch64/cpu_data.S	\
+				lib/el3_runtime/aarch64/context_mgmt.c	\
+				lib/cpus/aarch64/cpu_helpers.S		\
+				lib/locks/exclusive/spinlock.S		\
+				lib/psci/psci_off.c			\
+				lib/psci/psci_on.c			\
+				lib/psci/psci_suspend.c			\
+				lib/psci/psci_common.c			\
+				lib/psci/psci_main.c			\
+				lib/psci/psci_setup.c			\
+				lib/psci/psci_system_off.c		\
+				lib/psci/aarch64/psci_helpers.S
+
+ifeq (${USE_COHERENT_MEM}, 1)
+PSCI_LIB_SOURCES		+=	lib/locks/bakery/bakery_lock_coherent.c
+else
+PSCI_LIB_SOURCES		+=	lib/locks/bakery/bakery_lock_normal.c
+endif
+
+ifeq (${ENABLE_PSCI_STAT}, 1)
+PSCI_LIB_SOURCES		+=	lib/psci/psci_stat.c
+endif
diff --git a/services/std_svc/psci/psci_main.c b/lib/psci/psci_main.c
similarity index 88%
rename from services/std_svc/psci/psci_main.c
rename to lib/psci/psci_main.c
index 86f45ca..3ad3dd4 100644
--- a/services/std_svc/psci/psci_main.c
+++ b/lib/psci/psci_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,8 +33,7 @@
 #include <assert.h>
 #include <debug.h>
 #include <platform.h>
-#include <runtime_svc.h>
-#include <std_svc.h>
+#include <smcc.h>
 #include <string.h>
 #include "psci_private.h"
 
@@ -94,10 +93,14 @@
 	is_power_down_state = psci_get_pstate_type(power_state);
 
 	/* Sanity check the requested suspend levels */
-	assert (psci_validate_suspend_req(&state_info, is_power_down_state)
+	assert(psci_validate_suspend_req(&state_info, is_power_down_state)
 			== PSCI_E_SUCCESS);
 
 	target_pwrlvl = psci_find_target_suspend_lvl(&state_info);
+	if (target_pwrlvl == PSCI_INVALID_PWR_LVL) {
+		ERROR("Invalid target power level for suspend operation\n");
+		panic();
+	}
 
 	/* Fast path for CPU standby.*/
 	if (is_cpu_standby_req(is_power_down_state, target_pwrlvl)) {
@@ -217,7 +220,7 @@
 	 * The only error cpu_off can return is E_DENIED. So check if that's
 	 * indeed the case.
 	 */
-	assert (rc == PSCI_E_DENIED);
+	assert(rc == PSCI_E_DENIED);
 
 	return rc;
 }
@@ -327,21 +330,21 @@
 /*******************************************************************************
  * PSCI top level handler for servicing SMCs.
  ******************************************************************************/
-uint64_t psci_smc_handler(uint32_t smc_fid,
-			  uint64_t x1,
-			  uint64_t x2,
-			  uint64_t x3,
-			  uint64_t x4,
+u_register_t psci_smc_handler(uint32_t smc_fid,
+			  u_register_t x1,
+			  u_register_t x2,
+			  u_register_t x3,
+			  u_register_t x4,
 			  void *cookie,
 			  void *handle,
-			  uint64_t flags)
+			  u_register_t flags)
 {
 	if (is_caller_secure(flags))
-		SMC_RET1(handle, SMC_UNK);
+		return SMC_UNK;
 
 	/* Check the fid against the capabilities */
 	if (!(psci_caps & define_psci_cap(smc_fid)))
-		SMC_RET1(handle, SMC_UNK);
+		return SMC_UNK;
 
 	if (((smc_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_32) {
 		/* 32-bit PSCI function, clear top parameter bits */
@@ -352,31 +355,31 @@
 
 		switch (smc_fid) {
 		case PSCI_VERSION:
-			SMC_RET1(handle, psci_version());
+			return psci_version();
 
 		case PSCI_CPU_OFF:
-			SMC_RET1(handle, psci_cpu_off());
+			return psci_cpu_off();
 
 		case PSCI_CPU_SUSPEND_AARCH32:
-			SMC_RET1(handle, psci_cpu_suspend(x1, x2, x3));
+			return psci_cpu_suspend(x1, x2, x3);
 
 		case PSCI_CPU_ON_AARCH32:
-			SMC_RET1(handle, psci_cpu_on(x1, x2, x3));
+			return psci_cpu_on(x1, x2, x3);
 
 		case PSCI_AFFINITY_INFO_AARCH32:
-			SMC_RET1(handle, psci_affinity_info(x1, x2));
+			return psci_affinity_info(x1, x2);
 
 		case PSCI_MIG_AARCH32:
-			SMC_RET1(handle, psci_migrate(x1));
+			return psci_migrate(x1);
 
 		case PSCI_MIG_INFO_TYPE:
-			SMC_RET1(handle, psci_migrate_info_type());
+			return psci_migrate_info_type();
 
 		case PSCI_MIG_INFO_UP_CPU_AARCH32:
-			SMC_RET1(handle, psci_migrate_info_up_cpu());
+			return psci_migrate_info_up_cpu();
 
 		case PSCI_SYSTEM_SUSPEND_AARCH32:
-			SMC_RET1(handle, psci_system_suspend(x1, x2));
+			return psci_system_suspend(x1, x2);
 
 		case PSCI_SYSTEM_OFF:
 			psci_system_off();
@@ -387,14 +390,14 @@
 			/* We should never return from psci_system_reset() */
 
 		case PSCI_FEATURES:
-			SMC_RET1(handle, psci_features(x1));
+			return psci_features(x1);
 
 #if ENABLE_PSCI_STAT
 		case PSCI_STAT_RESIDENCY_AARCH32:
-			SMC_RET1(handle, psci_stat_residency(x1, x2));
+			return psci_stat_residency(x1, x2);
 
 		case PSCI_STAT_COUNT_AARCH32:
-			SMC_RET1(handle, psci_stat_count(x1, x2));
+			return psci_stat_count(x1, x2);
 #endif
 
 		default:
@@ -405,29 +408,29 @@
 
 		switch (smc_fid) {
 		case PSCI_CPU_SUSPEND_AARCH64:
-			SMC_RET1(handle, psci_cpu_suspend(x1, x2, x3));
+			return psci_cpu_suspend(x1, x2, x3);
 
 		case PSCI_CPU_ON_AARCH64:
-			SMC_RET1(handle, psci_cpu_on(x1, x2, x3));
+			return psci_cpu_on(x1, x2, x3);
 
 		case PSCI_AFFINITY_INFO_AARCH64:
-			SMC_RET1(handle, psci_affinity_info(x1, x2));
+			return psci_affinity_info(x1, x2);
 
 		case PSCI_MIG_AARCH64:
-			SMC_RET1(handle, psci_migrate(x1));
+			return psci_migrate(x1);
 
 		case PSCI_MIG_INFO_UP_CPU_AARCH64:
-			SMC_RET1(handle, psci_migrate_info_up_cpu());
+			return psci_migrate_info_up_cpu();
 
 		case PSCI_SYSTEM_SUSPEND_AARCH64:
-			SMC_RET1(handle, psci_system_suspend(x1, x2));
+			return psci_system_suspend(x1, x2);
 
 #if ENABLE_PSCI_STAT
 		case PSCI_STAT_RESIDENCY_AARCH64:
-			SMC_RET1(handle, psci_stat_residency(x1, x2));
+			return psci_stat_residency(x1, x2);
 
 		case PSCI_STAT_COUNT_AARCH64:
-			SMC_RET1(handle, psci_stat_count(x1, x2));
+			return psci_stat_count(x1, x2);
 #endif
 
 		default:
@@ -436,5 +439,5 @@
 	}
 
 	WARN("Unimplemented PSCI Call: 0x%x \n", smc_fid);
-	SMC_RET1(handle, SMC_UNK);
+	return SMC_UNK;
 }
diff --git a/services/std_svc/psci/psci_off.c b/lib/psci/psci_off.c
similarity index 98%
rename from services/std_svc/psci/psci_off.c
rename to lib/psci/psci_off.c
index 36dab49..471141d 100644
--- a/services/std_svc/psci/psci_off.c
+++ b/lib/psci/psci_off.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/services/std_svc/psci/psci_on.c b/lib/psci/psci_on.c
similarity index 97%
rename from services/std_svc/psci/psci_on.c
rename to lib/psci/psci_on.c
index c8c36cd..f4bb797 100644
--- a/services/std_svc/psci/psci_on.c
+++ b/lib/psci/psci_on.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,11 +32,9 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <bl_common.h>
-#include <bl31.h>
 #include <debug.h>
 #include <context_mgmt.h>
 #include <platform.h>
-#include <runtime_svc.h>
 #include <stddef.h>
 #include "psci_private.h"
 
@@ -177,7 +175,7 @@
 	 * on have completed. Perform enough arch.initialization
 	 * to run in the non-secure address space.
 	 */
-	bl31_arch_setup();
+	psci_arch_setup();
 
 	/*
 	 * Lock the CPU spin lock to make sure that the context initialization
diff --git a/services/std_svc/psci/psci_private.h b/lib/psci/psci_private.h
similarity index 97%
rename from services/std_svc/psci/psci_private.h
rename to lib/psci/psci_private.h
index ffb0732..b795c8e 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -192,7 +192,8 @@
 void psci_query_sys_suspend_pwrstate(psci_power_state_t *state_info);
 int psci_validate_mpidr(u_register_t mpidr);
 void psci_init_req_local_pwr_states(void);
-void psci_power_up_finish(void);
+void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
+				      psci_power_state_t *target_state);
 int psci_validate_entry_point(entry_point_info_t *ep,
 			uintptr_t entrypoint, u_register_t context_id);
 void psci_get_parent_pwr_domain_nodes(unsigned int cpu_idx,
@@ -214,7 +215,7 @@
 int psci_spd_migrate_info(u_register_t *mpidr);
 
 /* Private exported functions from psci_on.c */
-int psci_cpu_on_start(unsigned long target_cpu,
+int psci_cpu_on_start(u_register_t target_cpu,
 		      entry_point_info_t *ep);
 
 void psci_cpu_on_finish(unsigned int cpu_idx,
diff --git a/services/std_svc/psci/psci_setup.c b/lib/psci/psci_setup.c
similarity index 87%
rename from services/std_svc/psci/psci_setup.c
rename to lib/psci/psci_setup.c
index 975b257..d35e000 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/lib/psci/psci_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -184,15 +184,17 @@
 }
 
 /*******************************************************************************
- * This function initializes the power domain topology tree by querying the
- * platform. The power domain nodes higher than the CPU are populated in the
- * array psci_non_cpu_pd_nodes[] and the CPU power domains are populated in
- * psci_cpu_pd_nodes[]. The platform exports its static topology map through the
+ * This function does the architectural setup and takes the warm boot
+ * entry-point `mailbox_ep` as an argument. The function also initializes the
+ * power domain topology tree by querying the platform. The power domain nodes
+ * higher than the CPU are populated in the array psci_non_cpu_pd_nodes[] and
+ * the CPU power domains are populated in psci_cpu_pd_nodes[]. The platform
+ * exports its static topology map through the
  * populate_power_domain_topology_tree() API. The algorithm populates the
  * psci_non_cpu_pd_nodes and psci_cpu_pd_nodes iteratively by using this
- * topology map.  On a platform that implements two clusters of 2 cpus each, and
- * supporting 3 domain levels, the populated psci_non_cpu_pd_nodes would look
- * like this:
+ * topology map.  On a platform that implements two clusters of 2 cpus each,
+ * and supporting 3 domain levels, the populated psci_non_cpu_pd_nodes would
+ * look like this:
  *
  * ---------------------------------------------------
  * | system node | cluster 0 node  | cluster 1 node  |
@@ -204,10 +206,13 @@
  * |   CPU 0   |   CPU 1   |   CPU 2   |   CPU 3  |
  * ------------------------------------------------
  ******************************************************************************/
-int psci_setup(void)
+int psci_setup(uintptr_t mailbox_ep)
 {
 	const unsigned char *topology_tree;
 
+	/* Do the Architectural initialization */
+	psci_arch_setup();
+
 	/* Query the topology map from the platform */
 	topology_tree = plat_get_power_domain_tree_desc();
 
@@ -229,8 +234,8 @@
 	 */
 	psci_set_pwr_domains_to_run(PLAT_MAX_PWR_LVL);
 
-	plat_setup_psci_ops((uintptr_t)psci_entrypoint,
-					&psci_plat_pm_ops);
+	assert(mailbox_ep);
+	plat_setup_psci_ops(mailbox_ep, &psci_plat_pm_ops);
 	assert(psci_plat_pm_ops);
 
 	/* Initialize the psci capability */
@@ -259,3 +264,17 @@
 
 	return 0;
 }
+
+/*******************************************************************************
+ * This duplicates what the primary cpu did after a cold boot in BL1. The same
+ * needs to be done when a cpu is hotplugged in. This function could also over-
+ * ride any EL3 setup done by BL1 as this code resides in rw memory.
+ ******************************************************************************/
+void psci_arch_setup(void)
+{
+	/* Program the counter frequency */
+	write_cntfrq_el0(plat_get_syscnt_freq2());
+
+	/* Initialize the cpu_ops pointer. */
+	init_cpu_ops();
+}
diff --git a/services/std_svc/psci/psci_stat.c b/lib/psci/psci_stat.c
similarity index 98%
rename from services/std_svc/psci/psci_stat.c
rename to lib/psci/psci_stat.c
index 155bbb0..ecbe592 100644
--- a/services/std_svc/psci/psci_stat.c
+++ b/lib/psci/psci_stat.c
@@ -259,8 +259,10 @@
 
 	/* Find the highest power level */
 	pwrlvl = psci_find_target_suspend_lvl(&state_info);
-	if (pwrlvl == PSCI_INVALID_PWR_LVL)
-		return PSCI_E_INVALID_PARAMS;
+	if (pwrlvl == PSCI_INVALID_PWR_LVL) {
+		ERROR("Invalid target power level for PSCI statistics operation\n");
+		panic();
+	}
 
 	/* Get the index into the stats array */
 	local_state = state_info.pwr_domain_state[pwrlvl];
diff --git a/services/std_svc/psci/psci_suspend.c b/lib/psci/psci_suspend.c
similarity index 94%
rename from services/std_svc/psci/psci_suspend.c
rename to lib/psci/psci_suspend.c
index e6c8cd9..0887e3b 100644
--- a/services/std_svc/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,7 +37,6 @@
 #include <cpu_data.h>
 #include <debug.h>
 #include <platform.h>
-#include <runtime_svc.h>
 #include <stddef.h>
 #include "psci_private.h"
 
@@ -46,17 +45,25 @@
  * from standby/retention states at multiple power levels.
  ******************************************************************************/
 static void psci_suspend_to_standby_finisher(unsigned int cpu_idx,
-					     psci_power_state_t *state_info,
 					     unsigned int end_pwrlvl)
 {
+	psci_power_state_t state_info;
+
 	psci_acquire_pwr_domain_locks(end_pwrlvl,
 				cpu_idx);
 
 	/*
+	 * Find out which retention states this CPU has exited from until the
+	 * 'end_pwrlvl'. The exit retention state could be deeper than the entry
+	 * state as a result of state coordination amongst other CPUs post wfi.
+	 */
+	psci_get_target_local_pwr_states(end_pwrlvl, &state_info);
+
+	/*
 	 * Plat. management: Allow the platform to do operations
 	 * on waking up from retention.
 	 */
-	psci_plat_pm_ops->pwr_domain_suspend_finish(state_info);
+	psci_plat_pm_ops->pwr_domain_suspend_finish(&state_info);
 
 	/*
 	 * Set the requested and target state of this CPU and all the higher
@@ -223,7 +230,7 @@
 	 * After we wake up from context retaining suspend, call the
 	 * context retaining suspend finisher.
 	 */
-	psci_suspend_to_standby_finisher(idx, state_info, end_pwrlvl);
+	psci_suspend_to_standby_finisher(idx, end_pwrlvl);
 }
 
 /*******************************************************************************
diff --git a/services/std_svc/psci/psci_system_off.c b/lib/psci/psci_system_off.c
similarity index 96%
rename from services/std_svc/psci/psci_system_off.c
rename to lib/psci/psci_system_off.c
index 28315d6..de9ec64 100644
--- a/services/std_svc/psci/psci_system_off.c
+++ b/lib/psci/psci_system_off.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
diff --git a/lib/xlat_tables/aarch64/xlat_tables.c b/lib/xlat_tables/aarch64/xlat_tables.c
index 051e46a..f432237 100644
--- a/lib/xlat_tables/aarch64/xlat_tables.c
+++ b/lib/xlat_tables/aarch64/xlat_tables.c
@@ -33,11 +33,10 @@
 #include <assert.h>
 #include <cassert.h>
 #include <platform_def.h>
+#include <utils.h>
 #include <xlat_tables.h>
 #include "../xlat_tables_private.h"
 
-#define IS_POWER_OF_TWO(x)	(((x) & ((x) - 1)) == 0)
-
 /*
  * The virtual address space size must be a power of two (as set in TCR.T0SZ).
  * As we start the initial lookup at level 1, it must also be between 2 GB and
@@ -48,7 +47,6 @@
 CASSERT(ADDR_SPACE_SIZE >= (1ull << 31) && ADDR_SPACE_SIZE <= (1ull << 39) &&
 	IS_POWER_OF_TWO(ADDR_SPACE_SIZE), assert_valid_addr_space_size);
 
-#define UNSET_DESC	~0ul
 #define NUM_L1_ENTRIES (ADDR_SPACE_SIZE >> L1_XLAT_ADDRESS_SHIFT)
 
 static uint64_t l1_xlation_table[NUM_L1_ENTRIES]
diff --git a/lib/xlat_tables/xlat_tables_common.c b/lib/xlat_tables/xlat_tables_common.c
index fd10084..33784c2 100644
--- a/lib/xlat_tables/xlat_tables_common.c
+++ b/lib/xlat_tables/xlat_tables_common.c
@@ -31,11 +31,12 @@
 #include <arch.h>
 #include <arch_helpers.h>
 #include <assert.h>
-#include <bl_common.h>
 #include <cassert.h>
 #include <debug.h>
 #include <platform_def.h>
 #include <string.h>
+#include <types.h>
+#include <utils.h>
 #include <xlat_tables.h>
 
 #if LOG_LEVEL >= LOG_LEVEL_VERBOSE
@@ -52,7 +53,7 @@
 #define debug_print(...) ((void)0)
 #endif
 
-#define UNSET_DESC	~0ul
+#define UNSET_DESC	~0ull
 
 static uint64_t xlat_tables[MAX_XLAT_TABLES][XLAT_TABLE_ENTRIES]
 			__aligned(XLAT_TABLE_SIZE) __section("xlat_table");
@@ -194,37 +195,66 @@
 static uint64_t mmap_desc(unsigned attr, unsigned long long addr_pa,
 							int level)
 {
-	uint64_t desc = addr_pa;
+	uint64_t desc;
 	int mem_type;
 
-	desc |= level == 3 ? TABLE_DESC : BLOCK_DESC;
-
-	desc |= attr & MT_NS ? LOWER_ATTRS(NS) : 0;
-
-	desc |= attr & MT_RW ? LOWER_ATTRS(AP_RW) : LOWER_ATTRS(AP_RO);
-
+	desc = addr_pa;
+	desc |= (level == 3) ? TABLE_DESC : BLOCK_DESC;
+	desc |= (attr & MT_NS) ? LOWER_ATTRS(NS) : 0;
+	desc |= (attr & MT_RW) ? LOWER_ATTRS(AP_RW) : LOWER_ATTRS(AP_RO);
 	desc |= LOWER_ATTRS(ACCESS_FLAG);
 
+	/*
+	 * Deduce shareability domain and executability of the memory region
+	 * from the memory type.
+	 *
+	 * Data accesses to device memory and non-cacheable normal memory are
+	 * coherent for all observers in the system, and correspondingly are
+	 * always treated as being Outer Shareable. Therefore, for these 2 types
+	 * of memory, it is not strictly needed to set the shareability field
+	 * in the translation tables.
+	 */
 	mem_type = MT_TYPE(attr);
-	if (mem_type == MT_MEMORY) {
-		desc |= LOWER_ATTRS(ATTR_IWBWA_OWBWA_NTR_INDEX | ISH);
-		if (attr & MT_RW)
-			desc |= UPPER_ATTRS(XN);
-	} else if (mem_type == MT_NON_CACHEABLE) {
-		desc |= LOWER_ATTRS(ATTR_NON_CACHEABLE_INDEX | OSH);
-		if (attr & MT_RW)
-			desc |= UPPER_ATTRS(XN);
-	} else {
-		assert(mem_type == MT_DEVICE);
+	if (mem_type == MT_DEVICE) {
 		desc |= LOWER_ATTRS(ATTR_DEVICE_INDEX | OSH);
+		/*
+		 * Always map device memory as execute-never.
+		 * This is to avoid the possibility of a speculative instruction
+		 * fetch, which could be an issue if this memory region
+		 * corresponds to a read-sensitive peripheral.
+		 */
 		desc |= UPPER_ATTRS(XN);
+	} else { /* Normal memory */
+		/*
+		 * Always map read-write normal memory as execute-never.
+		 * (Trusted Firmware doesn't self-modify its code, therefore
+		 * R/W memory is reserved for data storage, which must not be
+		 * executable.)
+		 * Note that setting the XN bit here is for consistency only.
+		 * The enable_mmu_elx() function sets the SCTLR_EL3.WXN bit,
+		 * which makes any writable memory region to be treated as
+		 * execute-never, regardless of the value of the XN bit in the
+		 * translation table.
+		 *
+		 * For read-only memory, rely on the MT_EXECUTE/MT_EXECUTE_NEVER
+		 * attribute to figure out the value of the XN bit.
+		 */
+		if ((attr & MT_RW) || (attr & MT_EXECUTE_NEVER))
+			desc |= UPPER_ATTRS(XN);
+
+		if (mem_type == MT_MEMORY) {
+			desc |= LOWER_ATTRS(ATTR_IWBWA_OWBWA_NTR_INDEX | ISH);
+		} else {
+			assert(mem_type == MT_NON_CACHEABLE);
+			desc |= LOWER_ATTRS(ATTR_NON_CACHEABLE_INDEX | OSH);
+		}
 	}
 
 	debug_print((mem_type == MT_MEMORY) ? "MEM" :
 		((mem_type == MT_NON_CACHEABLE) ? "NC" : "DEV"));
 	debug_print(attr & MT_RW ? "-RW" : "-RO");
 	debug_print(attr & MT_NS ? "-NS" : "-S");
-
+	debug_print(attr & MT_EXECUTE_NEVER ? "-XN" : "-EXEC");
 	return desc;
 }
 
@@ -284,9 +314,9 @@
 	unsigned level_size_shift = L1_XLAT_ADDRESS_SHIFT - (level - 1) *
 						XLAT_TABLE_ENTRIES_SHIFT;
 	unsigned level_size = 1 << level_size_shift;
-	unsigned long long level_index_mask =
-		((unsigned long long) XLAT_TABLE_ENTRIES_MASK)
-		<< level_size_shift;
+	u_register_t level_index_mask =
+		(u_register_t)(((u_register_t) XLAT_TABLE_ENTRIES_MASK)
+		<< level_size_shift);
 
 	assert(level > 0 && level <= 3);
 
@@ -328,7 +358,7 @@
 			/* Area not covered by a region so need finer table */
 			uint64_t *new_table = xlat_tables[next_xlat++];
 			assert(next_xlat <= MAX_XLAT_TABLES);
-			desc = TABLE_DESC | (uint64_t)new_table;
+			desc = TABLE_DESC | (uintptr_t)new_table;
 
 			/* Recurse to fill in new table */
 			mm = init_xlation_table_inner(mm, base_va,
diff --git a/plat/arm/board/common/board_common.mk b/plat/arm/board/common/board_common.mk
index 6ddc0c9..a5636d5 100644
--- a/plat/arm/board/common/board_common.mk
+++ b/plat/arm/board/common/board_common.mk
@@ -61,8 +61,8 @@
 endif
 
 # This flag controls whether memory usage needs to be optimised
-ARM_BOARD_OPTIMISE_MMAP	?=	0
+ARM_BOARD_OPTIMISE_MEM	?=	0
 
 # Process flags
-$(eval $(call assert_boolean,ARM_BOARD_OPTIMISE_MMAP))
-$(eval $(call add_define,ARM_BOARD_OPTIMISE_MMAP))
+$(eval $(call assert_boolean,ARM_BOARD_OPTIMISE_MEM))
+$(eval $(call add_define,ARM_BOARD_OPTIMISE_MEM))
diff --git a/plat/arm/board/common/board_css_common.c b/plat/arm/board/common/board_css_common.c
index 62253f8..69b744d 100644
--- a/plat/arm/board/common/board_css_common.c
+++ b/plat/arm/board/common/board_css_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,9 +31,9 @@
 #include <plat_arm.h>
 
 /*
- * Table of regions for different BL stages to map using the MMU.
- * This doesn't include Trusted RAM as the 'mem_layout' argument passed to
- * arm_configure_mmu_elx() will give the available subset of that,
+ * Table of memory regions for different BL stages to map using the MMU.
+ * This doesn't include Trusted SRAM as arm_setup_page_tables() already
+ * takes care of mapping it.
  */
 #if IMAGE_BL1
 const mmap_region_t plat_arm_mmap[] = {
diff --git a/plat/arm/board/fvp/aarch64/fvp_helpers.S b/plat/arm/board/fvp/aarch64/fvp_helpers.S
index 884fee8..6a7ad23 100644
--- a/plat/arm/board/fvp/aarch64/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch64/fvp_helpers.S
@@ -127,7 +127,7 @@
 endfunc plat_secondary_cold_boot_setup
 
 	/* ---------------------------------------------------------------------
-	 * unsigned long plat_get_my_entrypoint (void);
+	 * uintptr_t plat_get_my_entrypoint (void);
 	 *
 	 * Main job of this routine is to distinguish between a cold and warm
 	 * boot. On FVP, this information can be queried from the power
diff --git a/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.c b/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.c
index e004281..c8df78c 100644
--- a/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.c
+++ b/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -41,12 +41,12 @@
  */
 ARM_INSTANTIATE_LOCK
 
-unsigned int fvp_pwrc_get_cpu_wkr(unsigned long mpidr)
+unsigned int fvp_pwrc_get_cpu_wkr(u_register_t mpidr)
 {
 	return PSYSR_WK(fvp_pwrc_read_psysr(mpidr));
 }
 
-unsigned int fvp_pwrc_read_psysr(unsigned long mpidr)
+unsigned int fvp_pwrc_read_psysr(u_register_t mpidr)
 {
 	unsigned int rc;
 	arm_lock_get();
@@ -56,21 +56,21 @@
 	return rc;
 }
 
-void fvp_pwrc_write_pponr(unsigned long mpidr)
+void fvp_pwrc_write_pponr(u_register_t mpidr)
 {
 	arm_lock_get();
 	mmio_write_32(PWRC_BASE + PPONR_OFF, (unsigned int) mpidr);
 	arm_lock_release();
 }
 
-void fvp_pwrc_write_ppoffr(unsigned long mpidr)
+void fvp_pwrc_write_ppoffr(u_register_t mpidr)
 {
 	arm_lock_get();
 	mmio_write_32(PWRC_BASE + PPOFFR_OFF, (unsigned int) mpidr);
 	arm_lock_release();
 }
 
-void fvp_pwrc_set_wen(unsigned long mpidr)
+void fvp_pwrc_set_wen(u_register_t mpidr)
 {
 	arm_lock_get();
 	mmio_write_32(PWRC_BASE + PWKUPR_OFF,
@@ -78,7 +78,7 @@
 	arm_lock_release();
 }
 
-void fvp_pwrc_clr_wen(unsigned long mpidr)
+void fvp_pwrc_clr_wen(u_register_t mpidr)
 {
 	arm_lock_get();
 	mmio_write_32(PWRC_BASE + PWKUPR_OFF,
@@ -86,7 +86,7 @@
 	arm_lock_release();
 }
 
-void fvp_pwrc_write_pcoffr(unsigned long mpidr)
+void fvp_pwrc_write_pcoffr(u_register_t mpidr)
 {
 	arm_lock_get();
 	mmio_write_32(PWRC_BASE + PCOFFR_OFF, (unsigned int) mpidr);
diff --git a/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.h b/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.h
index 3dc9aad..1dbf128 100644
--- a/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.h
+++ b/plat/arm/board/fvp/drivers/pwrc/fvp_pwrc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -64,13 +64,13 @@
 /*******************************************************************************
  * Function & variable prototypes
  ******************************************************************************/
-void fvp_pwrc_write_pcoffr(unsigned long);
-void fvp_pwrc_write_ppoffr(unsigned long);
-void fvp_pwrc_write_pponr(unsigned long);
-void fvp_pwrc_set_wen(unsigned long);
-void fvp_pwrc_clr_wen(unsigned long);
-unsigned int fvp_pwrc_read_psysr(unsigned long);
-unsigned int fvp_pwrc_get_cpu_wkr(unsigned long);
+void fvp_pwrc_write_pcoffr(u_register_t);
+void fvp_pwrc_write_ppoffr(u_register_t);
+void fvp_pwrc_write_pponr(u_register_t);
+void fvp_pwrc_set_wen(u_register_t);
+void fvp_pwrc_clr_wen(u_register_t);
+unsigned int fvp_pwrc_read_psysr(u_register_t);
+unsigned int fvp_pwrc_get_cpu_wkr(u_register_t);
 
 #endif /*__ASSEMBLY__*/
 
diff --git a/plat/arm/board/fvp/fvp_common.c b/plat/arm/board/fvp/fvp_common.c
index 0f557af..002cff6 100644
--- a/plat/arm/board/fvp/fvp_common.c
+++ b/plat/arm/board/fvp/fvp_common.c
@@ -66,9 +66,12 @@
 
 
 /*
- * Table of regions for various BL stages to map using the MMU.
- * This doesn't include TZRAM as the 'mem_layout' argument passed to
- * arm_configure_mmu_elx() will give the available subset of that,
+ * Table of memory regions for various BL stages to map using the MMU.
+ * This doesn't include Trusted SRAM as arm_setup_page_tables() already
+ * takes care of mapping it.
+ *
+ * The flash needs to be mapped as writable in order to erase the FIP's Table of
+ * Contents in case of unrecoverable error (see plat_error_handler()).
  */
 #if IMAGE_BL1
 const mmap_region_t plat_arm_mmap[] = {
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index 1e906d8..d0898ad 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -146,26 +146,4 @@
 
 #define PLAT_ARM_G0_IRQS		ARM_G0_IRQS
 
-/*
- * PLAT_ARM_MAX_BL1_RW_SIZE is calculated using the current BL1 RW debug size
- * plus a little space for growth.
- */
-#define PLAT_ARM_MAX_BL1_RW_SIZE	0xA000
-
-/*
- * PLAT_ARM_MAX_BL2_SIZE is calculated using the current BL2 debug size plus a
- * little space for growth.
- */
-#if TRUSTED_BOARD_BOOT
-# define PLAT_ARM_MAX_BL2_SIZE		0x1D000
-#else
-# define PLAT_ARM_MAX_BL2_SIZE		0xC000
-#endif
-
-/*
- * PLAT_ARM_MAX_BL31_SIZE is calculated using the current BL31 debug size plus a
- * little space for growth.
- */
-#define PLAT_ARM_MAX_BL31_SIZE		0x1D000
-
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/arm/board/juno/aarch64/juno_helpers.S b/plat/arm/board/juno/aarch64/juno_helpers.S
index 377b0cb..9291fa4 100644
--- a/plat/arm/board/juno/aarch64/juno_helpers.S
+++ b/plat/arm/board/juno/aarch64/juno_helpers.S
@@ -206,7 +206,7 @@
 endfunc plat_reset_handler
 
 	/* -----------------------------------------------------
-	 *  unsigned int plat_arm_calc_core_pos(uint64_t mpidr)
+	 *  unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
 	 *  Helper function to calculate the core position.
 	 * -----------------------------------------------------
 	 */
diff --git a/plat/arm/board/juno/include/platform_def.h b/plat/arm/board/juno/include/platform_def.h
index a2cf036..c53e938 100644
--- a/plat/arm/board/juno/include/platform_def.h
+++ b/plat/arm/board/juno/include/platform_def.h
@@ -74,10 +74,10 @@
 #endif /* TRUSTED_BOARD_BOOT */
 
 /*
- * If ARM_BOARD_OPTIMISE_MMAP=0 then Juno uses the default, unoptimised values
+ * If ARM_BOARD_OPTIMISE_MEM=0 then Juno uses the default, unoptimised values
  * defined for ARM development platforms.
  */
-#if ARM_BOARD_OPTIMISE_MMAP
+#if ARM_BOARD_OPTIMISE_MEM
 /*
  * PLAT_ARM_MMAP_ENTRIES depends on the number of entries in the
  * plat_arm_mmap array defined for each BL stage.
@@ -107,7 +107,33 @@
 # define MAX_XLAT_TABLES		3
 #endif
 
-#endif /* ARM_BOARD_OPTIMISE_MMAP */
+/*
+ * PLAT_ARM_MAX_BL1_RW_SIZE is calculated using the current BL1 RW debug size
+ * plus a little space for growth.
+ */
+#if TRUSTED_BOARD_BOOT
+# define PLAT_ARM_MAX_BL1_RW_SIZE	0x9000
+#else
+# define PLAT_ARM_MAX_BL1_RW_SIZE	0x6000
+#endif
+
+/*
+ * PLAT_ARM_MAX_BL2_SIZE is calculated using the current BL2 debug size plus a
+ * little space for growth.
+ */
+#if TRUSTED_BOARD_BOOT
+# define PLAT_ARM_MAX_BL2_SIZE		0x1D000
+#else
+# define PLAT_ARM_MAX_BL2_SIZE		0xC000
+#endif
+
+/*
+ * PLAT_ARM_MAX_BL31_SIZE is calculated using the current BL31 debug size plus a
+ * little space for growth.
+ */
+#define PLAT_ARM_MAX_BL31_SIZE		0x1D000
+
+#endif /* ARM_BOARD_OPTIMISE_MEM */
 
 /* CCI related constants */
 #define PLAT_ARM_CCI_BASE		0x2c090000
@@ -183,30 +209,4 @@
 /* CSS SoC NIC-400 Global Programmers View (GPV) */
 #define PLAT_SOC_CSS_NIC400_BASE	0x2a000000
 
-/*
- * PLAT_ARM_MAX_BL1_RW_SIZE is calculated using the current BL1 RW debug size
- * plus a little space for growth.
- */
-#if TRUSTED_BOARD_BOOT
-# define PLAT_ARM_MAX_BL1_RW_SIZE	0x9000
-#else
-# define PLAT_ARM_MAX_BL1_RW_SIZE	0x6000
-#endif
-
-/*
- * PLAT_ARM_MAX_BL2_SIZE is calculated using the current BL2 debug size plus a
- * little space for growth.
- */
-#if TRUSTED_BOARD_BOOT
-# define PLAT_ARM_MAX_BL2_SIZE		0x1D000
-#else
-# define PLAT_ARM_MAX_BL2_SIZE		0xC000
-#endif
-
-/*
- * PLAT_ARM_MAX_BL31_SIZE is calculated using the current BL31 debug size plus a
- * little space for growth.
- */
-#define PLAT_ARM_MAX_BL31_SIZE		0x1D000
-
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/arm/board/juno/platform.mk b/plat/arm/board/juno/platform.mk
index 4fda4ca..c1cfffc 100644
--- a/plat/arm/board/juno/platform.mk
+++ b/plat/arm/board/juno/platform.mk
@@ -79,7 +79,7 @@
 ENABLE_PLAT_COMPAT		:= 	0
 
 # Enable memory map related constants optimisation
-ARM_BOARD_OPTIMISE_MMAP		:=	1
+ARM_BOARD_OPTIMISE_MEM		:=	1
 
 include plat/arm/board/common/board_css.mk
 include plat/arm/common/arm_common.mk
diff --git a/plat/arm/common/aarch64/arm_helpers.S b/plat/arm/common/aarch64/arm_helpers.S
index a0338f1..d782020 100644
--- a/plat/arm/common/aarch64/arm_helpers.S
+++ b/plat/arm/common/aarch64/arm_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -49,7 +49,7 @@
 endfunc plat_my_core_pos
 
 	/* -----------------------------------------------------
-	 *  unsigned int plat_arm_calc_core_pos(uint64_t mpidr)
+	 *  unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
 	 *  Helper function to calculate the core position.
 	 *  With this function: CorePos = (ClusterId * 4) +
 	 *  				  CoreId
diff --git a/plat/arm/common/arm_bl1_setup.c b/plat/arm/common/arm_bl1_setup.c
index 951f48a..c94f0cd 100644
--- a/plat/arm/common/arm_bl1_setup.c
+++ b/plat/arm/common/arm_bl1_setup.c
@@ -35,6 +35,8 @@
 #include <platform_def.h>
 #include <plat_arm.h>
 #include <sp805.h>
+#include <utils.h>
+#include <xlat_tables.h>
 #include "../../../bl1/bl1_private.h"
 
 
@@ -118,15 +120,18 @@
  *****************************************************************************/
 void arm_bl1_plat_arch_setup(void)
 {
-	arm_configure_mmu_el3(bl1_tzram_layout.total_base,
+	arm_setup_page_tables(bl1_tzram_layout.total_base,
 			      bl1_tzram_layout.total_size,
-			      BL1_RO_BASE,
-			      BL1_RO_LIMIT
+			      BL_CODE_BASE,
+			      BL1_CODE_LIMIT,
+			      BL1_RO_DATA_BASE,
+			      BL1_RO_DATA_LIMIT
 #if USE_COHERENT_MEM
 			      , BL1_COHERENT_RAM_BASE,
 			      BL1_COHERENT_RAM_LIMIT
 #endif
 			     );
+	enable_mmu_el3(0);
 }
 
 void bl1_plat_arch_setup(void)
diff --git a/plat/arm/common/arm_bl2_setup.c b/plat/arm/common/arm_bl2_setup.c
index 681dc8a..b6afaa7 100644
--- a/plat/arm/common/arm_bl2_setup.c
+++ b/plat/arm/common/arm_bl2_setup.c
@@ -36,16 +36,6 @@
 #include <plat_arm.h>
 #include <string.h>
 
-
-/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
- */
-#define BL2_RO_BASE (unsigned long)(&__RO_START__)
-#define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
-
 #if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
@@ -234,15 +224,18 @@
  ******************************************************************************/
 void arm_bl2_plat_arch_setup(void)
 {
-	arm_configure_mmu_el1(bl2_tzram_layout.total_base,
+	arm_setup_page_tables(bl2_tzram_layout.total_base,
 			      bl2_tzram_layout.total_size,
-			      BL2_RO_BASE,
-			      BL2_RO_LIMIT
+			      BL_CODE_BASE,
+			      BL_CODE_LIMIT,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_LIMIT
 #if USE_COHERENT_MEM
 			      , BL2_COHERENT_RAM_BASE,
 			      BL2_COHERENT_RAM_LIMIT
 #endif
 			      );
+	enable_mmu_el1(0);
 }
 
 void bl2_plat_arch_setup(void)
diff --git a/plat/arm/common/arm_bl2u_setup.c b/plat/arm/common/arm_bl2u_setup.c
index 5b7354b..de7d0c2 100644
--- a/plat/arm/common/arm_bl2u_setup.c
+++ b/plat/arm/common/arm_bl2u_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,16 +36,6 @@
 #include <plat_arm.h>
 #include <string.h>
 
-
-/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
- */
-#define BL2U_RO_BASE (unsigned long)(&__RO_START__)
-#define BL2U_RO_LIMIT (unsigned long)(&__RO_END__)
-
 #if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
@@ -102,16 +92,19 @@
  ******************************************************************************/
 void arm_bl2u_plat_arch_setup(void)
 {
-	arm_configure_mmu_el1(BL2U_RO_LIMIT,
+	arm_setup_page_tables(BL2U_BASE,
 			      BL31_LIMIT,
-			      BL2U_RO_BASE,
-			      BL2U_RO_LIMIT
+			      BL_CODE_BASE,
+			      BL_CODE_LIMIT,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_LIMIT
 #if USE_COHERENT_MEM
 			      ,
 			      BL2U_COHERENT_RAM_BASE,
 			      BL2U_COHERENT_RAM_LIMIT
 #endif
 		);
+	enable_mmu_el1(0);
 }
 
 void bl2u_plat_arch_setup(void)
diff --git a/plat/arm/common/arm_bl31_setup.c b/plat/arm/common/arm_bl31_setup.c
index 8eb6818..4ed2477 100644
--- a/plat/arm/common/arm_bl31_setup.c
+++ b/plat/arm/common/arm_bl31_setup.c
@@ -38,17 +38,7 @@
 #include <plat_arm.h>
 #include <platform.h>
 
-
-/*
- * The next 3 constants identify the extents of the code, RO data region and the
- * limit of the BL31 image.  These addresses are used by the MMU setup code and
- * therefore they must be page-aligned.  It is the responsibility of the linker
- * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
- * refer to page-aligned addresses.
- */
-#define BL31_RO_BASE (unsigned long)(&__RO_START__)
-#define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
-#define BL31_END (unsigned long)(&__BL31_END__)
+#define BL31_END (uintptr_t)(&__BL31_END__)
 
 #if USE_COHERENT_MEM
 /*
@@ -58,8 +48,8 @@
  * __COHERENT_RAM_START__ and __COHERENT_RAM_END__ linker symbols
  * refer to page-aligned addresses.
  */
-#define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
-#define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#define BL31_COHERENT_RAM_BASE (uintptr_t)(&__COHERENT_RAM_START__)
+#define BL31_COHERENT_RAM_LIMIT (uintptr_t)(&__COHERENT_RAM_END__)
 #endif
 
 /*
@@ -140,11 +130,8 @@
 	 * Tell BL31 where the non-trusted software image
 	 * is located and the entry state information
 	 */
-#ifdef PRELOADED_BL33_BASE
-	bl33_image_ep_info.pc = PRELOADED_BL33_BASE;
-#else
 	bl33_image_ep_info.pc = plat_get_ns_image_entrypoint();
-#endif /* PRELOADED_BL33_BASE */
+
 	bl33_image_ep_info.spsr = arm_get_spsr_for_bl33_entry();
 	SET_SECURITY_STATE(bl33_image_ep_info.h.attr, NON_SECURE);
 
@@ -246,20 +233,25 @@
 }
 
 /*******************************************************************************
- * Perform the very early platform specific architectural setup here. At the
- * moment this is only intializes the mmu in a quick and dirty way.
+ * Perform the very early platform specific architectural setup shared between
+ * ARM standard platforms. This only does basic initialization. Later
+ * architectural setup (bl31_arch_setup()) does not do anything platform
+ * specific.
  ******************************************************************************/
 void arm_bl31_plat_arch_setup(void)
 {
-	arm_configure_mmu_el3(BL31_RO_BASE,
-			      (BL31_END - BL31_RO_BASE),
-			      BL31_RO_BASE,
-			      BL31_RO_LIMIT
+	arm_setup_page_tables(BL31_BASE,
+			      BL31_END - BL31_BASE,
+			      BL_CODE_BASE,
+			      BL_CODE_LIMIT,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_LIMIT
 #if USE_COHERENT_MEM
 			      , BL31_COHERENT_RAM_BASE,
 			      BL31_COHERENT_RAM_LIMIT
 #endif
 			      );
+	enable_mmu_el3(0);
 }
 
 void bl31_plat_arch_setup(void)
diff --git a/plat/arm/common/arm_cci.c b/plat/arm/common/arm_cci.c
index 41054c2..40cfb48 100644
--- a/plat/arm/common/arm_cci.c
+++ b/plat/arm/common/arm_cci.c
@@ -32,6 +32,7 @@
 #include <cci.h>
 #include <plat_arm.h>
 #include <platform_def.h>
+#include <utils.h>
 
 static const int cci_map[] = {
 	PLAT_ARM_CCI_CLUSTER0_SL_IFACE_IX,
diff --git a/plat/arm/common/aarch64/arm_common.c b/plat/arm/common/arm_common.c
similarity index 71%
rename from plat/arm/common/aarch64/arm_common.c
rename to plat/arm/common/arm_common.c
index c4cc80e..93355fe 100644
--- a/plat/arm/common/aarch64/arm_common.c
+++ b/plat/arm/common/arm_common.c
@@ -46,65 +46,77 @@
  * conflicts with the definition in plat/common. */
 #if ERROR_DEPRECATED
 #pragma weak plat_get_syscnt_freq2
-#else
-#pragma weak plat_get_syscnt_freq
 #endif
 
-/*******************************************************************************
- * Macro generating the code for the function setting up the pagetables as per
- * the platform memory map & initialize the mmu, for the given exception level
- ******************************************************************************/
+/*
+ * Set up the page tables for the generic and platform-specific memory regions.
+ * The extents of the generic memory regions are specified by the function
+ * arguments and consist of:
+ * - Trusted SRAM seen by the BL image;
+ * - Code section;
+ * - Read-only data section;
+ * - Coherent memory region, if applicable.
+ */
+void arm_setup_page_tables(uintptr_t total_base,
+			   size_t total_size,
+			   uintptr_t code_start,
+			   uintptr_t code_limit,
+			   uintptr_t rodata_start,
+			   uintptr_t rodata_limit
 #if USE_COHERENT_MEM
-#define DEFINE_CONFIGURE_MMU_EL(_el)					\
-	void arm_configure_mmu_el##_el(unsigned long total_base,	\
-				   unsigned long total_size,		\
-				   unsigned long ro_start,		\
-				   unsigned long ro_limit,		\
-				   unsigned long coh_start,		\
-				   unsigned long coh_limit)		\
-	{								\
-		mmap_add_region(total_base, total_base,			\
-				total_size,				\
-				MT_MEMORY | MT_RW | MT_SECURE);		\
-		mmap_add_region(ro_start, ro_start,			\
-				ro_limit - ro_start,			\
-				MT_MEMORY | MT_RO | MT_SECURE);		\
-		mmap_add_region(coh_start, coh_start,			\
-				coh_limit - coh_start,			\
-				MT_DEVICE | MT_RW | MT_SECURE);		\
-		mmap_add(plat_arm_get_mmap());				\
-		init_xlat_tables();					\
-									\
-		enable_mmu_el##_el(0);					\
-	}
-#else
-#define DEFINE_CONFIGURE_MMU_EL(_el)					\
-	void arm_configure_mmu_el##_el(unsigned long total_base,	\
-				   unsigned long total_size,		\
-				   unsigned long ro_start,		\
-				   unsigned long ro_limit)		\
-	{								\
-		mmap_add_region(total_base, total_base,			\
-				total_size,				\
-				MT_MEMORY | MT_RW | MT_SECURE);		\
-		mmap_add_region(ro_start, ro_start,			\
-				ro_limit - ro_start,			\
-				MT_MEMORY | MT_RO | MT_SECURE);		\
-		mmap_add(plat_arm_get_mmap());				\
-		init_xlat_tables();					\
-									\
-		enable_mmu_el##_el(0);					\
-	}
+			   ,
+			   uintptr_t coh_start,
+			   uintptr_t coh_limit
 #endif
+			   )
+{
+	/*
+	 * Map the Trusted SRAM with appropriate memory attributes.
+	 * Subsequent mappings will adjust the attributes for specific regions.
+	 */
+	VERBOSE("Trusted SRAM seen by this BL image: %p - %p\n",
+		(void *) total_base, (void *) (total_base + total_size));
+	mmap_add_region(total_base, total_base,
+			total_size,
+			MT_MEMORY | MT_RW | MT_SECURE);
 
-/* Define EL1 and EL3 variants of the function initialising the MMU */
-DEFINE_CONFIGURE_MMU_EL(1)
-DEFINE_CONFIGURE_MMU_EL(3)
+	/* Re-map the code section */
+	VERBOSE("Code region: %p - %p\n",
+		(void *) code_start, (void *) code_limit);
+	mmap_add_region(code_start, code_start,
+			code_limit - code_start,
+			MT_CODE | MT_SECURE);
 
+	/* Re-map the read-only data section */
+	VERBOSE("Read-only data region: %p - %p\n",
+		(void *) rodata_start, (void *) rodata_limit);
+	mmap_add_region(rodata_start, rodata_start,
+			rodata_limit - rodata_start,
+			MT_RO_DATA | MT_SECURE);
+
+#if USE_COHERENT_MEM
+	/* Re-map the coherent memory region */
+	VERBOSE("Coherent region: %p - %p\n",
+		(void *) coh_start, (void *) coh_limit);
+	mmap_add_region(coh_start, coh_start,
+			coh_limit - coh_start,
+			MT_DEVICE | MT_RW | MT_SECURE);
+#endif
+
+	/* Now (re-)map the platform-specific memory regions */
+	mmap_add(plat_arm_get_mmap());
+
+	/* Create the page tables to reflect the above mappings */
+	init_xlat_tables();
+}
 
 uintptr_t plat_get_ns_image_entrypoint(void)
 {
+#ifdef PRELOADED_BL33_BASE
+	return PRELOADED_BL33_BASE;
+#else
 	return PLAT_ARM_NS_IMAGE_OFFSET;
+#endif
 }
 
 /*******************************************************************************
@@ -173,15 +185,9 @@
 
 #ifdef ARM_SYS_CNTCTL_BASE
 
-#if ERROR_DEPRECATED
 unsigned int plat_get_syscnt_freq2(void)
 {
 	unsigned int counter_base_frequency;
-#else
-unsigned long long plat_get_syscnt_freq(void)
-{
-	unsigned long long counter_base_frequency;
-#endif /* ERROR_DEPRECATED */
 
 	/* Read the frequency from Frequency modes table */
 	counter_base_frequency = mmio_read_32(ARM_SYS_CNTCTL_BASE + CNTFID_OFF);
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index bcb3f6f..03b9fe4 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -85,6 +85,11 @@
 # Enable PSCI_STAT_COUNT/RESIDENCY APIs on ARM platforms
 ENABLE_PSCI_STAT = 1
 
+# On ARM platforms, separate the code and read-only data sections to allow
+# mapping the former as executable and the latter as execute-never.
+SEPARATE_CODE_AND_RODATA	:=	1
+
+
 PLAT_INCLUDES		+=	-Iinclude/common/tbbr				\
 				-Iinclude/plat/arm/common			\
 				-Iinclude/plat/arm/common/aarch64
@@ -92,8 +97,8 @@
 
 PLAT_BL_COMMON_SOURCES	+=	lib/xlat_tables/xlat_tables_common.c		\
 				lib/xlat_tables/aarch64/xlat_tables.c		\
-				plat/arm/common/aarch64/arm_common.c		\
 				plat/arm/common/aarch64/arm_helpers.S		\
+				plat/arm/common/arm_common.c			\
 				plat/common/aarch64/plat_common.c
 
 BL1_SOURCES		+=	drivers/arm/sp805/sp805.c			\
@@ -123,7 +128,7 @@
 				plat/arm/common/arm_pm.c			\
 				plat/arm/common/arm_topology.c			\
 				plat/common/aarch64/platform_mp_stack.S		\
-				plat/common/aarch64/plat_psci_common.c
+				plat/common/plat_psci_common.c
 
 ifneq (${TRUSTED_BOARD_BOOT},0)
 
diff --git a/plat/arm/common/arm_io_storage.c b/plat/arm/common/arm_io_storage.c
index 153fdfe..42435a7 100644
--- a/plat/arm/common/arm_io_storage.c
+++ b/plat/arm/common/arm_io_storage.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,7 +28,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <assert.h>
-#include <bl_common.h>		/* For ARRAY_SIZE */
 #include <debug.h>
 #include <firmware_image_package.h>
 #include <io_driver.h>
@@ -37,6 +36,7 @@
 #include <io_storage.h>
 #include <platform_def.h>
 #include <string.h>
+#include <utils.h>
 
 /* IO devices */
 static const io_dev_connector_t *fip_dev_con;
diff --git a/plat/arm/common/tsp/arm_tsp_setup.c b/plat/arm/common/tsp/arm_tsp_setup.c
index 2a67fd1..09029f4 100644
--- a/plat/arm/common/tsp/arm_tsp_setup.c
+++ b/plat/arm/common/tsp/arm_tsp_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,16 +35,6 @@
 #include <platform_tsp.h>
 #include <plat_arm.h>
 
-
-/*
- * The next 3 constants identify the extents of the code & RO data region and
- * the limit of the BL32 image. These addresses are used by the MMU setup code
- * and therefore they must be page-aligned.  It is the responsibility of the
- * linker script to ensure that __RO_START__, __RO_END__ & & __BL32_END__
- * linker symbols refer to page-aligned addresses.
- */
-#define BL32_RO_BASE (unsigned long)(&__RO_START__)
-#define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
 #define BL32_END (unsigned long)(&__BL32_END__)
 
 #if USE_COHERENT_MEM
@@ -98,13 +88,16 @@
  ******************************************************************************/
 void tsp_plat_arch_setup(void)
 {
-	arm_configure_mmu_el1(BL32_RO_BASE,
-			      (BL32_END - BL32_RO_BASE),
-			      BL32_RO_BASE,
-			      BL32_RO_LIMIT
+	arm_setup_page_tables(BL32_BASE,
+			      (BL32_END - BL32_BASE),
+			      BL_CODE_BASE,
+			      BL_CODE_LIMIT,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_LIMIT
 #if USE_COHERENT_MEM
 			      , BL32_COHERENT_RAM_BASE,
 			      BL32_COHERENT_RAM_LIMIT
 #endif
 			      );
+	enable_mmu_el1(0);
 }
diff --git a/plat/arm/css/common/aarch64/css_helpers.S b/plat/arm/css/common/aarch64/css_helpers.S
index 0763a3e..92b0e81 100644
--- a/plat/arm/css/common/aarch64/css_helpers.S
+++ b/plat/arm/css/common/aarch64/css_helpers.S
@@ -70,7 +70,7 @@
 endfunc plat_secondary_cold_boot_setup
 
 	/* ---------------------------------------------------------------------
-	 * unsigned long plat_get_my_entrypoint (void);
+	 * uintptr_t plat_get_my_entrypoint (void);
 	 *
 	 * Main job of this routine is to distinguish between a cold and a warm
 	 * boot. On CSS platforms, this distinction is based on the contents of
@@ -90,7 +90,7 @@
 endfunc plat_get_my_entrypoint
 
 	/* -----------------------------------------------------------
-	 * unsigned int css_calc_core_pos_swap_cluster(uint64_t mpidr)
+	 * unsigned int css_calc_core_pos_swap_cluster(u_register_t mpidr)
 	 * Utility function to calculate the core position by
 	 * swapping the cluster order. This is necessary in order to
 	 * match the format of the boot information passed by the SCP
diff --git a/plat/common/aarch64/plat_psci_common.c b/plat/common/aarch64/plat_psci_common.c
index 0748ef4..804da93 100644
--- a/plat/common/aarch64/plat_psci_common.c
+++ b/plat/common/aarch64/plat_psci_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,36 +28,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <arch.h>
-#include <assert.h>
-#include <platform.h>
-#include <psci.h>
-
-/*
- * The PSCI generic code uses this API to let the platform participate in state
- * coordination during a power management operation. It compares the platform
- * specific local power states requested by each cpu for a given power domain
- * and returns the coordinated target power state that the domain should
- * enter. A platform assigns a number to a local power state. This default
- * implementation assumes that the platform assigns these numbers in order of
- * increasing depth of the power state i.e. for two power states X & Y, if X < Y
- * then X represents a shallower power state than Y. As a result, the
- * coordinated target local power state for a power domain will be the minimum
- * of the requested local power states.
- */
-plat_local_state_t plat_get_target_pwr_state(unsigned int lvl,
-					     const plat_local_state_t *states,
-					     unsigned int ncpu)
-{
-	plat_local_state_t target = PLAT_MAX_OFF_STATE, temp;
-
-	assert(ncpu);
-
-	do {
-		temp = *states++;
-		if (temp < target)
-			target = temp;
-	} while (--ncpu);
-
-	return target;
-}
+#if !ERROR_DEPRECATED
+#include "../plat_psci_common.c"
+#endif
diff --git a/plat/common/aarch64/platform_mp_stack.S b/plat/common/aarch64/platform_mp_stack.S
index c719019..e3063d1 100644
--- a/plat/common/aarch64/platform_mp_stack.S
+++ b/plat/common/aarch64/platform_mp_stack.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -159,7 +159,7 @@
 endfunc_deprecated platform_set_stack
 
 	/* -----------------------------------------------------
-	 * unsigned long plat_get_my_stack ()
+	 * uintptr_t plat_get_my_stack ()
 	 *
 	 * For the current CPU, this function returns the stack
 	 * pointer for a stack allocated in device memory.
@@ -193,4 +193,5 @@
 	 * -----------------------------------------------------
 	 */
 declare_stack platform_normal_stacks, tzfw_normal_stacks, \
-		PLATFORM_STACK_SIZE, PLATFORM_CORE_COUNT
+		PLATFORM_STACK_SIZE, PLATFORM_CORE_COUNT, \
+		CACHE_WRITEBACK_GRANULE
diff --git a/plat/common/aarch64/platform_up_stack.S b/plat/common/aarch64/platform_up_stack.S
index c01534a..5b82630 100644
--- a/plat/common/aarch64/platform_up_stack.S
+++ b/plat/common/aarch64/platform_up_stack.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -40,7 +40,7 @@
 	.globl	platform_get_stack
 
 	/* -----------------------------------------------------
-	 * unsigned long plat_get_my_stack ()
+	 * uintptr_t plat_get_my_stack ()
 	 *
 	 * For cold-boot BL images, only the primary CPU needs a
 	 * stack. This function returns the stack pointer for a
@@ -99,4 +99,4 @@
 	 * -----------------------------------------------------
 	 */
 declare_stack platform_normal_stacks, tzfw_normal_stacks, \
-		PLATFORM_STACK_SIZE, 1
+		PLATFORM_STACK_SIZE, 1, CACHE_WRITEBACK_GRANULE
diff --git a/plat/common/plat_psci_common.c b/plat/common/plat_psci_common.c
new file mode 100644
index 0000000..3eb6886
--- /dev/null
+++ b/plat/common/plat_psci_common.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <assert.h>
+#include <platform.h>
+#include <psci.h>
+
+/*
+ * The PSCI generic code uses this API to let the platform participate in state
+ * coordination during a power management operation. It compares the platform
+ * specific local power states requested by each cpu for a given power domain
+ * and returns the coordinated target power state that the domain should
+ * enter. A platform assigns a number to a local power state. This default
+ * implementation assumes that the platform assigns these numbers in order of
+ * increasing depth of the power state i.e. for two power states X & Y, if X < Y
+ * then X represents a shallower power state than Y. As a result, the
+ * coordinated target local power state for a power domain will be the minimum
+ * of the requested local power states.
+ */
+plat_local_state_t plat_get_target_pwr_state(unsigned int lvl,
+					     const plat_local_state_t *states,
+					     unsigned int ncpu)
+{
+	plat_local_state_t target = PLAT_MAX_OFF_STATE, temp;
+
+	assert(ncpu);
+
+	do {
+		temp = *states++;
+		if (temp < target)
+			target = temp;
+	} while (--ncpu);
+
+	return target;
+}
diff --git a/plat/compat/plat_compat.mk b/plat/compat/plat_compat.mk
index c0c8ece..d9d50f6 100644
--- a/plat/compat/plat_compat.mk
+++ b/plat/compat/plat_compat.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -36,6 +36,6 @@
 
 PLAT_BL_COMMON_SOURCES	+=	plat/compat/aarch64/plat_helpers_compat.S
 
-BL31_SOURCES		+=	plat/common/aarch64/plat_psci_common.c	\
+BL31_SOURCES		+=	plat/common/plat_psci_common.c		\
 				plat/compat/plat_pm_compat.c		\
 				plat/compat/plat_topology_compat.c
diff --git a/plat/mediatek/mt8173/aarch64/platform_common.c b/plat/mediatek/mt8173/aarch64/platform_common.c
index 365df1b..70639ed 100644
--- a/plat/mediatek/mt8173/aarch64/platform_common.c
+++ b/plat/mediatek/mt8173/aarch64/platform_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,7 @@
 #include <debug.h>
 #include <mt8173_def.h>
 #include <platform_def.h>
+#include <utils.h>
 #include <xlat_tables.h>
 
 static const int cci_map[] = {
diff --git a/plat/mediatek/mt8173/plat_mt_gic.c b/plat/mediatek/mt8173/plat_mt_gic.c
index c9bdaa9..402a0f4 100644
--- a/plat/mediatek/mt8173/plat_mt_gic.c
+++ b/plat/mediatek/mt8173/plat_mt_gic.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,7 @@
 #include <arm_gic.h>
 #include <bl_common.h>
 #include <mt8173_def.h>
+#include <utils.h>
 
 const unsigned int mt_irq_sec_array[] = {
 	MT_IRQ_SEC_SGI_0,
diff --git a/plat/nvidia/tegra/common/tegra_common.mk b/plat/nvidia/tegra/common/tegra_common.mk
index 2ecf5f5..03ca773 100644
--- a/plat/nvidia/tegra/common/tegra_common.mk
+++ b/plat/nvidia/tegra/common/tegra_common.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -52,7 +52,7 @@
 				drivers/delay_timer/delay_timer.c		\
 				drivers/ti/uart/16550_console.S			\
 				plat/common/aarch64/platform_mp_stack.S		\
-				plat/common/aarch64/plat_psci_common.c		\
+				plat/common/plat_psci_common.c			\
 				${COMMON_DIR}/aarch64/tegra_helpers.S		\
 				${COMMON_DIR}/drivers/memctrl/memctrl.c		\
 				${COMMON_DIR}/drivers/pmc/pmc.c			\
diff --git a/plat/rockchip/common/aarch64/plat_helpers.S b/plat/rockchip/common/aarch64/plat_helpers.S
index 1bbb614..d06d4cb 100644
--- a/plat/rockchip/common/aarch64/plat_helpers.S
+++ b/plat/rockchip/common/aarch64/plat_helpers.S
@@ -35,6 +35,7 @@
 #include <cortex_a72.h>
 #include <plat_private.h>
 #include <platform_def.h>
+#include <plat_pmu_macros.S>
 
 	.globl	cpuson_entry_point
 	.globl	cpuson_flags
@@ -47,68 +48,6 @@
 	.globl	plat_my_core_pos
 	.globl	plat_reset_handler
 
-
-#define RK_REVISION(rev) RK_PLAT_CFG##rev
-#define RK_HANDLER(rev) plat_reset_handler_juno_r##rev
-#define JUMP_TO_HANDLER_IF_RK_R(revision)	\
-	jump_to_handler RK_REVISION(revision), RK_HANDLER(revision)
-
-	/*
-	 * Helper macro to jump to the given handler if the board revision
-	 * matches.
-	 * Expects the Juno board revision in x0.
-	 *
-	 */
-	.macro jump_to_handler _revision, _handler
-	cmp	x0, #\_revision
-	b.eq	\_handler
-	.endm
-
-	/*
-	 * Helper macro that reads the part number of the current CPU and jumps
-	 * to the given label if it matches the CPU MIDR provided.
-	 */
-	.macro  jump_if_cpu_midr _cpu_midr, _label
-	mrs	x0, midr_el1
-	ubfx	x0, x0, MIDR_PN_SHIFT, #12
-	cmp     w0, #((\_cpu_midr >> MIDR_PN_SHIFT) & MIDR_PN_MASK)
-	b.eq	\_label
-	.endm
-
-	/*
-	 * Platform reset handler for rockchip.
-	 * only A53 cores
-	 */
-func RK_HANDLER(0)
-	ret
-endfunc RK_HANDLER(0)
-
-	/*
-	 * Platform reset handler for rockchip.
-	 * - Cortex-A53 processor cluster;
-	 * - Cortex-A72 processor cluster.
-	 *
-	 * This handler does the following:
-	 * - Set the L2 Data RAM latency to 2 (i.e. 3 cycles) for Cortex-A72
-	 * - Set the L2 Tag RAM latency to 1 (i.e. 2 cycles) for Cortex-A72
-	 */
-func RK_HANDLER(1)
-	/*
-	 * Nothing to do on Cortex-A53.
-	 *
-	 */
-	jump_if_cpu_midr CORTEX_A72_MIDR, A72
-	ret
-
-A72:
-	/* Cortex-A72 specific settings */
-	mov	x0, #((2 << L2CTLR_DATA_RAM_LATENCY_SHIFT) |	\
-		     (0x1 << 5))
-	msr     L2CTLR_EL1, x0
-	isb
-	ret
-endfunc RK_HANDLER(1)
-
 	/*
 	 * void plat_reset_handler(void);
 	 *
@@ -117,22 +56,30 @@
 	 *
 	 */
 func plat_reset_handler
-
-	mov	x0, RK_PLAT_AARCH_CFG
-
-	JUMP_TO_HANDLER_IF_RK_R(0)
-	JUMP_TO_HANDLER_IF_RK_R(1)
-
-	/* SOC type is not supported */
-not_supported:
-	b	not_supported
+	mrs x0, midr_el1
+	ubfx x0, x0, MIDR_PN_SHIFT, #12
+	cmp w0, #((CORTEX_A72_MIDR >> MIDR_PN_SHIFT) & MIDR_PN_MASK)
+	b.eq	handler_a72
+	b	handler_end
+handler_a72:
+	/*
+	 * This handler does the following:
+	 * Set the L2 Data RAM latency for Cortex-A72.
+	 * Set the L2 Tag RAM latency to for Cortex-A72.
+	 */
+	mov x0, #((2 << L2CTLR_DATA_RAM_LATENCY_SHIFT) |	\
+			 (0x1 << 5))
+	msr	L2CTLR_EL1, x0
+	isb
+handler_end:
+	ret
 endfunc plat_reset_handler
 
 func plat_my_core_pos
 	mrs	x0, mpidr_el1
 	and	x1, x0, #MPIDR_CPU_MASK
 	and	x0, x0, #MPIDR_CLUSTER_MASK
-	add	x0, x1, x0, LSR #6
+	add	x0, x1, x0, LSR #PLAT_RK_CLST_TO_CPUID_SHIFT
 	ret
 endfunc plat_my_core_pos
 
@@ -192,30 +139,30 @@
 	.align	16
 func platform_cpu_warmboot
 	mrs	x0, MPIDR_EL1
-	and	x1, x0, #MPIDR_CPU_MASK
-	and	x0, x0, #MPIDR_CLUSTER_MASK
+	and	x19, x0, #MPIDR_CPU_MASK
+	and	x20, x0, #MPIDR_CLUSTER_MASK
+	mov	x0, x20
+	func_rockchip_clst_warmboot
 	/* --------------------------------------------------------------------
 	 * big cluster id is 1
 	 * big cores id is from 0-3, little cores id 4-7
 	 * --------------------------------------------------------------------
 	 */
-	add	x0, x1, x0, lsr #6
+	add	x21, x19, x20, lsr #PLAT_RK_CLST_TO_CPUID_SHIFT
 	/* --------------------------------------------------------------------
 	 * get per cpuup flag
          * --------------------------------------------------------------------
 	 */
 	adr	x4, cpuson_flags
-	add	x4, x4, x0, lsl #2
+	add	x4, x4, x21, lsl #2
 	ldr	w1, [x4]
 	/* --------------------------------------------------------------------
 	 * check cpuon reason
          * --------------------------------------------------------------------
 	 */
-	ldr	w3, =PMU_CPU_AUTO_PWRDN
-	cmp	w1, w3
+	cmp	w1, PMU_CPU_AUTO_PWRDN
 	b.eq	boot_entry
-	ldr	w3, =PMU_CPU_HOTPLUG
-	cmp	w1, w3
+	cmp	w1, PMU_CPU_HOTPLUG
 	b.eq	boot_entry
 	/* --------------------------------------------------------------------
 	 * If the boot core cpuson_flags or cpuson_entry_point is not
@@ -226,15 +173,13 @@
 	wfe
 	b	wfe_loop
 boot_entry:
-	mov	w1, #0
-	str	w1, [x4]
+	str	wzr, [x4]
 	/* --------------------------------------------------------------------
 	 * get per cpuup boot addr
 	 * --------------------------------------------------------------------
 	 */
 	adr	x5, cpuson_entry_point
-	ldr	x2, [x5, x0, lsl #3]
-
+	ldr	x2, [x5, x21, lsl #3]
 	br	x2
 endfunc platform_cpu_warmboot
 
@@ -252,3 +197,4 @@
 	.rept	PLATFORM_CORE_COUNT
 	.word	0
 	.endr
+rockchip_clst_warmboot_data
diff --git a/plat/rockchip/common/aarch64/platform_common.c b/plat/rockchip/common/aarch64/platform_common.c
index 6e9dab7..40cd29e 100644
--- a/plat/rockchip/common/aarch64/platform_common.c
+++ b/plat/rockchip/common/aarch64/platform_common.c
@@ -37,6 +37,7 @@
 #include <xlat_tables.h>
 #include <platform_def.h>
 #include <plat_private.h>
+#include <utils.h>
 
 #ifdef PLAT_RK_CCI_BASE
 static const int cci_map[] = {
diff --git a/plat/rockchip/common/drivers/pmu/pmu_com.h b/plat/rockchip/common/drivers/pmu/pmu_com.h
index 4cffb61..a6d3186 100644
--- a/plat/rockchip/common/drivers/pmu/pmu_com.h
+++ b/plat/rockchip/common/drivers/pmu/pmu_com.h
@@ -31,7 +31,7 @@
  * Use this macro to instantiate lock before it is used in below
  * rockchip_pd_lock_xxx() macros
  */
-DEFINE_BAKERY_LOCK(rockchip_pd_lock);
+DECLARE_BAKERY_LOCK(rockchip_pd_lock);
 
 /*
  * These are wrapper macros to the powe domain Bakery Lock API.
diff --git a/plat/rockchip/common/include/plat_private.h b/plat/rockchip/common/include/plat_private.h
index 031a341..7199899 100644
--- a/plat/rockchip/common/include/plat_private.h
+++ b/plat/rockchip/common/include/plat_private.h
@@ -35,6 +35,7 @@
 #include <mmio.h>
 #include <stdint.h>
 #include <xlat_tables.h>
+#include <psci.h>
 
 /******************************************************************************
  * For rockchip socs pm ops
@@ -45,6 +46,12 @@
 	int (*cores_pwr_dm_on_finish)(void);
 	int (*cores_pwr_dm_suspend)(void);
 	int (*cores_pwr_dm_resume)(void);
+	/* hlvl is used for clusters or system level */
+	int (*hlvl_pwr_dm_suspend)(uint32_t lvl, plat_local_state_t lvl_state);
+	int (*hlvl_pwr_dm_resume)(uint32_t lvl, plat_local_state_t lvl_state);
+	int (*hlvl_pwr_dm_off)(uint32_t lvl, plat_local_state_t lvl_state);
+	int (*hlvl_pwr_dm_on_finish)(uint32_t lvl,
+				     plat_local_state_t lvl_state);
 	int (*sys_pwr_dm_suspend)(void);
 	int (*sys_pwr_dm_resume)(void);
 	void (*sys_gbl_soft_reset)(void) __dead2;
@@ -109,6 +116,7 @@
 void plat_rockchip_pmu_init(void);
 void plat_rockchip_soc_init(void);
 void plat_setup_rockchip_pm_ops(struct rockchip_pm_ops_cb *ops);
+uintptr_t plat_get_sec_entrypoint(void);
 
 void platform_cpu_warmboot(void);
 
@@ -126,10 +134,12 @@
 extern const mmap_region_t plat_rk_mmap[];
 #endif /* __ASSEMBLY__ */
 
-/* only Cortex-A53 */
-#define RK_PLAT_CFG0	0
-
-/* include Cortex-A72 */
-#define RK_PLAT_CFG1	1
+/******************************************************************************
+ * cpu up status
+ * The bits of macro value is not more than 12 bits for cmp instruction!
+ ******************************************************************************/
+#define PMU_CPU_HOTPLUG		0xf00
+#define PMU_CPU_AUTO_PWRDN	0xf0
+#define PMU_CLST_RET	0xa5
 
 #endif /* __PLAT_PRIVATE_H__ */
diff --git a/bl31/cpu_data_array.c b/plat/rockchip/common/include/rockchip_sip_svc.h
similarity index 69%
copy from bl31/cpu_data_array.c
copy to plat/rockchip/common/include/rockchip_sip_svc.h
index 4cba118..9e31082 100644
--- a/bl31/cpu_data_array.c
+++ b/plat/rockchip/common/include/rockchip_sip_svc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -11,10 +11,6 @@
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -28,9 +24,24 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef __ROCKCHIP_SIP_SVC_H__
+#define __ROCKCHIP_SIP_SVC_H__
+
+/* SMC function IDs for SiP Service queries */
+#define SIP_SVC_CALL_COUNT		0x8200ff00
+#define SIP_SVC_UID			0x8200ff01
+#define SIP_SVC_VERSION			0x8200ff03
+
-#include <cassert.h>
-#include <cpu_data.h>
-#include <platform_def.h>
+/* rockchip SiP Service Calls version numbers */
+#define RK_SIP_SVC_VERSION_MAJOR	0x0
+#define RK_SIP_SVC_VERSION_MINOR	0x1
+
+/* Number of ROCKCHIP SiP Calls implemented */
+#define RK_COMMON_SIP_NUM_CALLS		0x3
+
+enum {
+	RK_SIP_E_SUCCESS = 0,
+	RK_SIP_E_INVALID_PARAM = -1
+};
 
-/* The per_cpu_ptr_cache_t space allocation */
-cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+#endif
diff --git a/plat/rockchip/common/plat_pm.c b/plat/rockchip/common/plat_pm.c
index d20a683..b6291bb 100644
--- a/plat/rockchip/common/plat_pm.c
+++ b/plat/rockchip/common/plat_pm.c
@@ -50,21 +50,6 @@
 
 static struct rockchip_pm_ops_cb *rockchip_ops;
 
-static void plat_rockchip_sys_pwr_domain_resume(void)
-{
-	if (rockchip_ops && rockchip_ops->sys_pwr_dm_resume)
-		rockchip_ops->sys_pwr_dm_resume();
-}
-
-static void plat_rockchip_cores_pwr_domain_resume(void)
-{
-	if (rockchip_ops && rockchip_ops->cores_pwr_dm_resume)
-		rockchip_ops->cores_pwr_dm_resume();
-
-	/* Program the gic per-cpu distributor or re-distributor interface */
-	plat_rockchip_gic_cpuif_enable();
-}
-
 /*******************************************************************************
  * Rockchip standard platform handler called to check the validity of the power
  * state parameter.
@@ -96,6 +81,10 @@
 		for (i = MPIDR_AFFLVL0; i <= pwr_lvl; i++)
 			req_state->pwr_domain_state[i] =
 					PLAT_MAX_OFF_STATE;
+
+		for (i = (pwr_lvl + 1); i <= PLAT_MAX_PWR_LVL; i++)
+			req_state->pwr_domain_state[i] =
+					PLAT_MAX_RET_STATE;
 	}
 
 	/* We expect the 'state id' to be zero */
@@ -154,14 +143,28 @@
  ******************************************************************************/
 void rockchip_pwr_domain_off(const psci_power_state_t *target_state)
 {
+	uint32_t lvl;
+	plat_local_state_t lvl_state;
+
 	assert(RK_CORE_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE);
 
 	plat_rockchip_gic_cpuif_disable();
 
 	if (RK_CLUSTER_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE)
 		plat_cci_disable();
-	if (rockchip_ops && rockchip_ops->cores_pwr_dm_off)
-		rockchip_ops->cores_pwr_dm_off();
+
+	if (!rockchip_ops || !rockchip_ops->cores_pwr_dm_off)
+		return;
+
+	rockchip_ops->cores_pwr_dm_off();
+
+	if (!rockchip_ops->hlvl_pwr_dm_off)
+		return;
+
+	for (lvl = MPIDR_AFFLVL1; lvl <= PLAT_MAX_PWR_LVL; lvl++) {
+		lvl_state = target_state->pwr_domain_state[lvl];
+		rockchip_ops->hlvl_pwr_dm_off(lvl, lvl_state);
+	}
 }
 
 /*******************************************************************************
@@ -170,17 +173,19 @@
  ******************************************************************************/
 void rockchip_pwr_domain_suspend(const psci_power_state_t *target_state)
 {
-	if (RK_CORE_PWR_STATE(target_state) == PLAT_MAX_RET_STATE)
-		return;
+	uint32_t lvl;
+	plat_local_state_t lvl_state;
 
-	assert(RK_CORE_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE);
+	if (RK_CORE_PWR_STATE(target_state) != PLAT_MAX_OFF_STATE)
+		return;
 
-	if (RK_SYSTEM_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE) {
-		if (rockchip_ops && rockchip_ops->sys_pwr_dm_suspend)
+	if (rockchip_ops) {
+		if (RK_SYSTEM_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE &&
+		    rockchip_ops->sys_pwr_dm_suspend) {
 			rockchip_ops->sys_pwr_dm_suspend();
-	} else {
-		if (rockchip_ops && rockchip_ops->cores_pwr_dm_suspend)
+		} else if (rockchip_ops->cores_pwr_dm_suspend) {
 			rockchip_ops->cores_pwr_dm_suspend();
+		}
 	}
 
 	/* Prevent interrupts from spuriously waking up this cpu */
@@ -189,6 +194,14 @@
 	/* Perform the common cluster specific operations */
 	if (RK_CLUSTER_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE)
 		plat_cci_disable();
+
+	if (!rockchip_ops || !rockchip_ops->hlvl_pwr_dm_suspend)
+		return;
+
+	for (lvl = MPIDR_AFFLVL1; lvl <= PLAT_MAX_PWR_LVL; lvl++) {
+		lvl_state = target_state->pwr_domain_state[lvl];
+		rockchip_ops->hlvl_pwr_dm_suspend(lvl, lvl_state);
+	}
 }
 
 /*******************************************************************************
@@ -198,10 +211,24 @@
  ******************************************************************************/
 void rockchip_pwr_domain_on_finish(const psci_power_state_t *target_state)
 {
+	uint32_t lvl;
+	plat_local_state_t lvl_state;
+
 	assert(RK_CORE_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE);
 
+	if (!rockchip_ops)
+		goto comm_finish;
+
+	if (rockchip_ops->hlvl_pwr_dm_on_finish) {
+		for (lvl = MPIDR_AFFLVL1; lvl <= PLAT_MAX_PWR_LVL; lvl++) {
+			lvl_state = target_state->pwr_domain_state[lvl];
+			rockchip_ops->hlvl_pwr_dm_on_finish(lvl, lvl_state);
+		}
+	}
+
-	if (rockchip_ops && rockchip_ops->cores_pwr_dm_on_finish)
+	if (rockchip_ops->cores_pwr_dm_on_finish)
 		rockchip_ops->cores_pwr_dm_on_finish();
+comm_finish:
 
 	/* Perform the common cluster specific operations */
 	if (RK_CLUSTER_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE) {
@@ -225,15 +252,37 @@
  ******************************************************************************/
 void rockchip_pwr_domain_suspend_finish(const psci_power_state_t *target_state)
 {
+	uint32_t lvl;
+	plat_local_state_t lvl_state;
+
 	/* Nothing to be done on waking up from retention from CPU level */
-	if (RK_CORE_PWR_STATE(target_state) == PLAT_MAX_RET_STATE)
+	if (RK_CORE_PWR_STATE(target_state) != PLAT_MAX_OFF_STATE)
 		return;
 
 	/* Perform system domain restore if woken up from system suspend */
-	if (RK_SYSTEM_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE)
-		plat_rockchip_sys_pwr_domain_resume();
-	else
-		plat_rockchip_cores_pwr_domain_resume();
+	if (!rockchip_ops)
+		goto comm_finish;
+
+	if (rockchip_ops->hlvl_pwr_dm_resume) {
+		for (lvl = MPIDR_AFFLVL1; lvl <= PLAT_MAX_PWR_LVL; lvl++) {
+			lvl_state = target_state->pwr_domain_state[lvl];
+			rockchip_ops->hlvl_pwr_dm_resume(lvl, lvl_state);
+		}
+	}
+
+	if (RK_SYSTEM_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE &&
+	    rockchip_ops->sys_pwr_dm_resume) {
+		rockchip_ops->sys_pwr_dm_resume();
+	} else if (rockchip_ops->cores_pwr_dm_resume) {
+		rockchip_ops->cores_pwr_dm_resume();
+	}
+
+comm_finish:
+	/*
+	 * Program the gic per-cpu distributor
+	 * or re-distributor interface
+	 */
+	plat_rockchip_gic_cpuif_enable();
 
 	/* Perform the common cluster specific operations */
 	if (RK_CLUSTER_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE) {
@@ -288,6 +337,12 @@
 	return 0;
 }
 
+uintptr_t plat_get_sec_entrypoint(void)
+{
+	assert(rockchip_sec_entrypoint);
+	return rockchip_sec_entrypoint;
+}
+
 void plat_setup_rockchip_pm_ops(struct rockchip_pm_ops_cb *ops)
 {
 	rockchip_ops = ops;
diff --git a/plat/rockchip/common/plat_topology.c b/plat/rockchip/common/plat_topology.c
index 911978a..8a13945 100644
--- a/plat/rockchip/common/plat_topology.c
+++ b/plat/rockchip/common/plat_topology.c
@@ -45,11 +45,13 @@
 {
 	unsigned int cluster_id, cpu_id;
 
-	cpu_id = MPIDR_AFFLVL0_VAL(mpidr);
-	cluster_id = MPIDR_AFFLVL1_VAL(mpidr);
+	cpu_id = mpidr & MPIDR_AFFLVL_MASK;
+	cluster_id = mpidr & MPIDR_CLUSTER_MASK;
 
-	if (cluster_id >= PLATFORM_CLUSTER_COUNT)
+	cpu_id += (cluster_id >> PLAT_RK_CLST_TO_CPUID_SHIFT);
+
+	if (cpu_id >= PLATFORM_CORE_COUNT)
 		return -1;
 
-	return ((cluster_id * PLATFORM_CLUSTER0_CORE_COUNT) + cpu_id);
+	return cpu_id;
 }
diff --git a/plat/rockchip/common/pmusram/pmu_sram_cpus_on.S b/plat/rockchip/common/pmusram/pmu_sram_cpus_on.S
index 9f94b0c..3378272 100644
--- a/plat/rockchip/common/pmusram/pmu_sram_cpus_on.S
+++ b/plat/rockchip/common/pmusram/pmu_sram_cpus_on.S
@@ -70,7 +70,9 @@
 psram_data:
 	.quad	PSRAM_DT_BASE
 sys_wakeup_entry:
+#if !ERROR_DEPRECATED
 	.quad	psci_entrypoint
+#endif
 pmu_cpuson_entrypoint_end:
 	.word	0
 endfunc pmu_cpuson_entrypoint
diff --git a/plat/rockchip/common/rockchip_gicv2.c b/plat/rockchip/common/rockchip_gicv2.c
index 3e1fa91..c2dca1f 100644
--- a/plat/rockchip/common/rockchip_gicv2.c
+++ b/plat/rockchip/common/rockchip_gicv2.c
@@ -31,6 +31,7 @@
 #include <bl_common.h>
 #include <gicv2.h>
 #include <platform_def.h>
+#include <utils.h>
 
 /******************************************************************************
  * The following functions are defined as weak to allow a platform to override
diff --git a/plat/rockchip/common/rockchip_gicv3.c b/plat/rockchip/common/rockchip_gicv3.c
index d197aba..7730896 100644
--- a/plat/rockchip/common/rockchip_gicv3.c
+++ b/plat/rockchip/common/rockchip_gicv3.c
@@ -32,6 +32,7 @@
 #include <gicv3.h>
 #include <platform.h>
 #include <platform_def.h>
+#include <utils.h>
 
 /******************************************************************************
  * The following functions are defined as weak to allow a platform to override
diff --git a/plat/rockchip/common/rockchip_sip_svc.c b/plat/rockchip/common/rockchip_sip_svc.c
new file mode 100644
index 0000000..cbc9105
--- /dev/null
+++ b/plat/rockchip/common/rockchip_sip_svc.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <debug.h>
+#include <mmio.h>
+#include <plat_sip_calls.h>
+#include <rockchip_sip_svc.h>
+#include <runtime_svc.h>
+#include <uuid.h>
+
+/* Rockchip SiP Service UUID */
+DEFINE_SVC_UUID(rk_sip_svc_uid,
+		0xe86fc7e2, 0x313e, 0x11e6, 0xb7, 0x0d,
+		0x8f, 0x88, 0xee, 0x74, 0x7b, 0x72);
+
+#pragma weak rockchip_plat_sip_handler
+uint64_t rockchip_plat_sip_handler(uint32_t smc_fid,
+				   uint64_t x1,
+				   uint64_t x2,
+				   uint64_t x3,
+				   uint64_t x4,
+				   void *cookie,
+				   void *handle,
+				   uint64_t flags)
+{
+	ERROR("%s: unhandled SMC (0x%x)\n", __func__, smc_fid);
+	SMC_RET1(handle, SMC_UNK);
+}
+
+/*
+ * This function is responsible for handling all SiP calls from the NS world
+ */
+uint64_t sip_smc_handler(uint32_t smc_fid,
+			 uint64_t x1,
+			 uint64_t x2,
+			 uint64_t x3,
+			 uint64_t x4,
+			 void *cookie,
+			 void *handle,
+			 uint64_t flags)
+{
+	uint32_t ns;
+
+	/* Determine which security state this SMC originated from */
+	ns = is_caller_non_secure(flags);
+	if (!ns)
+		SMC_RET1(handle, SMC_UNK);
+
+	switch (smc_fid) {
+	case SIP_SVC_CALL_COUNT:
+		/* Return the number of Rockchip SiP Service Calls. */
+		SMC_RET1(handle,
+			 RK_COMMON_SIP_NUM_CALLS + RK_PLAT_SIP_NUM_CALLS);
+
+	case SIP_SVC_UID:
+		/* Return UID to the caller */
+		SMC_UUID_RET(handle, rk_sip_svc_uid);
+		break;
+
+	case SIP_SVC_VERSION:
+		/* Return the version of current implementation */
+		SMC_RET2(handle, RK_SIP_SVC_VERSION_MAJOR,
+			RK_SIP_SVC_VERSION_MINOR);
+		break;
+
+	default:
+		return rockchip_plat_sip_handler(smc_fid, x1, x2, x3, x4,
+			cookie, handle, flags);
+	}
+}
+
+/* Define a runtime service descriptor for fast SMC calls */
+DECLARE_RT_SVC(
+	rockchip_sip_svc,
+	OEN_SIP_START,
+	OEN_SIP_END,
+	SMC_TYPE_FAST,
+	NULL,
+	sip_smc_handler
+);
diff --git a/bl31/cpu_data_array.c b/plat/rockchip/rk3368/drivers/pmu/plat_pmu_macros.S
similarity index 77%
copy from bl31/cpu_data_array.c
copy to plat/rockchip/rk3368/drivers/pmu/plat_pmu_macros.S
index 4cba118..5fd3c41 100644
--- a/bl31/cpu_data_array.c
+++ b/plat/rockchip/rk3368/drivers/pmu/plat_pmu_macros.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -11,10 +11,6 @@
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -28,9 +24,14 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <cassert.h>
-#include <cpu_data.h>
+#include <arch.h>
+#include <asm_macros.S>
 #include <platform_def.h>
 
-/* The per_cpu_ptr_cache_t space allocation */
-cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+.macro	func_rockchip_clst_warmboot
+	/* Nothing to do for rk3368 */
+.endm
+
+.macro rockchip_clst_warmboot_data
+	/* Nothing to do for rk3368 */
+.endm
diff --git a/plat/rockchip/rk3368/drivers/pmu/pmu.c b/plat/rockchip/rk3368/drivers/pmu/pmu.c
index 53d333b..fc44a4c 100644
--- a/plat/rockchip/rk3368/drivers/pmu/pmu.c
+++ b/plat/rockchip/rk3368/drivers/pmu/pmu.c
@@ -40,6 +40,8 @@
 #include <ddr_rk3368.h>
 #include <pmu_com.h>
 
+DEFINE_BAKERY_LOCK(rockchip_pd_lock);
+
 static struct psram_data_t *psram_sleep_cfg =
 	(struct psram_data_t *)PSRAM_DT_BASE;
 
diff --git a/bl31/cpu_data_array.c b/plat/rockchip/rk3368/include/plat_sip_calls.h
similarity index 64%
copy from bl31/cpu_data_array.c
copy to plat/rockchip/rk3368/include/plat_sip_calls.h
index 4cba118..a778f49 100644
--- a/bl31/cpu_data_array.c
+++ b/plat/rockchip/rk3368/include/plat_sip_calls.h
@@ -1,20 +1,16 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
  *
- * Redistributions in binary form must reproduce the above copyright notice,
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -28,9 +24,9 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <cassert.h>
-#include <cpu_data.h>
-#include <platform_def.h>
+#ifndef __PLAT_SIP_CALLS_H__
+#define __PLAT_SIP_CALLS_H__
+
+#define RK_PLAT_SIP_NUM_CALLS	0
 
-/* The per_cpu_ptr_cache_t space allocation */
-cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+#endif /* __PLAT_SIP_CALLS_H__ */
diff --git a/plat/rockchip/rk3368/include/platform_def.h b/plat/rockchip/rk3368/include/platform_def.h
index 299704d..5d801cf 100644
--- a/plat/rockchip/rk3368/include/platform_def.h
+++ b/plat/rockchip/rk3368/include/platform_def.h
@@ -74,6 +74,8 @@
 					 PLATFORM_CLUSTER_COUNT +	\
 					 PLATFORM_CORE_COUNT)
 
+#define PLAT_RK_CLST_TO_CPUID_SHIFT	8
+
 #define PLAT_MAX_PWR_LVL		MPIDR_AFFLVL2
 
 /*
@@ -144,6 +146,4 @@
 
 #define PLAT_RK_PRIMARY_CPU	0x0
 
-#define RK_PLAT_AARCH_CFG	RK_PLAT_CFG0
-
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/bl31/cpu_data_array.c b/plat/rockchip/rk3368/plat_sip_calls.c
similarity index 60%
copy from bl31/cpu_data_array.c
copy to plat/rockchip/rk3368/plat_sip_calls.c
index 4cba118..3d2f39a 100644
--- a/bl31/cpu_data_array.c
+++ b/plat/rockchip/rk3368/plat_sip_calls.c
@@ -1,20 +1,16 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
  *
- * Redistributions in binary form must reproduce the above copyright notice,
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -28,9 +24,24 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <cassert.h>
-#include <cpu_data.h>
-#include <platform_def.h>
+#include <debug.h>
+#include <mmio.h>
+#include <plat_sip_calls.h>
+#include <rockchip_sip_svc.h>
+#include <runtime_svc.h>
 
-/* The per_cpu_ptr_cache_t space allocation */
-cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+uint64_t rockchip_plat_sip_handler(uint32_t smc_fid,
+				   uint64_t x1,
+				   uint64_t x2,
+				   uint64_t x3,
+				   uint64_t x4,
+				   void *cookie,
+				   void *handle,
+				   uint64_t flags)
+{
+	switch (smc_fid) {
+	default:
+		ERROR("%s: unhandled SMC (0x%x)\n", __func__, smc_fid);
+		SMC_RET1(handle, SMC_UNK);
+	}
+}
diff --git a/plat/rockchip/rk3368/platform.mk b/plat/rockchip/rk3368/platform.mk
index 50eda32..1dca4c5 100644
--- a/plat/rockchip/rk3368/platform.mk
+++ b/plat/rockchip/rk3368/platform.mk
@@ -51,7 +51,7 @@
 PLAT_BL_COMMON_SOURCES	:=	lib/xlat_tables/xlat_tables_common.c		\
 				lib/xlat_tables/aarch64/xlat_tables.c		\
 				plat/common/aarch64/plat_common.c		\
-				plat/common/aarch64/plat_psci_common.c
+				plat/common/plat_psci_common.c
 
 BL31_SOURCES		+=	${RK_GIC_SOURCES}				\
 				drivers/arm/cci/cci.c				\
@@ -69,6 +69,8 @@
 				${RK_PLAT_COMMON}/plat_pm.c			\
 				${RK_PLAT_COMMON}/plat_topology.c		\
 				${RK_PLAT_COMMON}/aarch64/platform_common.c	\
+				${RK_PLAT_COMMON}/rockchip_sip_svc.c		\
+				${RK_PLAT_SOC}/plat_sip_calls.c			\
 				${RK_PLAT_SOC}/drivers/pmu/pmu.c		\
 				${RK_PLAT_SOC}/drivers/soc/soc.c		\
 				${RK_PLAT_SOC}/drivers/ddr/ddr_rk3368.c		\
diff --git a/plat/rockchip/rk3368/rk3368_def.h b/plat/rockchip/rk3368/rk3368_def.h
index 2242cee..01e4910 100644
--- a/plat/rockchip/rk3368/rk3368_def.h
+++ b/plat/rockchip/rk3368/rk3368_def.h
@@ -106,12 +106,6 @@
 #define PLAT_RK_CCI_CLUSTER1_SL_IFACE_IX	4
 
 /******************************************************************************
- * cpu up status
- ******************************************************************************/
-#define PMU_CPU_HOTPLUG		0xdeadbeaf
-#define PMU_CPU_AUTO_PWRDN	0xabcdef12
-
-/******************************************************************************
  * sgi, ppi
  ******************************************************************************/
 #define RK_IRQ_SEC_PHY_TIMER	29
diff --git a/plat/rockchip/rk3399/drivers/pmu/plat_pmu_macros.S b/plat/rockchip/rk3399/drivers/pmu/plat_pmu_macros.S
new file mode 100644
index 0000000..7241964
--- /dev/null
+++ b/plat/rockchip/rk3399/drivers/pmu/plat_pmu_macros.S
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <platform_def.h>
+
+	.globl	clst_warmboot_data
+
+#define PLL_MODE_SHIFT	(0x8)
+#define PLL_NORMAL_MODE	((0x3 << (PLL_MODE_SHIFT + 16)) | \
+						 (0x1 << PLL_MODE_SHIFT))
+#define MPIDR_CLST_L_BITS 0x0
+	/*
+	 * For different socs, if we want to speed up warmboot,
+	 * we need to config some regs here.
+	 * If scu was suspend, we must resume related clk
+	 * from slow (24M) mode to normal mode first.
+	 * X0: MPIDR_EL1 & MPIDR_CLUSTER_MASK
+	 */
+.macro	func_rockchip_clst_warmboot
+	adr	x4, clst_warmboot_data
+	lsr	x5, x0, #6
+	ldr	w3, [x4, x5]
+	str	wzr, [x4, x5]
+	cmp	w3, #PMU_CLST_RET
+	b.ne	clst_warmboot_end
+	ldr	w6, =(PLL_NORMAL_MODE)
+	/*
+	 * core_l offset is CRU_BASE + 0xc,
+	 * core_b offset is CRU_BASE + 0x2c
+	 */
+	ldr	x7, =(CRU_BASE + 0xc)
+	lsr	x2, x0, #3
+	str	w6, [x7, x2]
+clst_warmboot_end:
+.endm
+
+.macro rockchip_clst_warmboot_data
+clst_warmboot_data:
+	.rept	PLATFORM_CLUSTER_COUNT
+	.word	0
+	.endr
+.endm
diff --git a/plat/rockchip/rk3399/drivers/pmu/pmu.c b/plat/rockchip/rk3399/drivers/pmu/pmu.c
index 9b95621..72d71bf 100644
--- a/plat/rockchip/rk3399/drivers/pmu/pmu.c
+++ b/plat/rockchip/rk3399/drivers/pmu/pmu.c
@@ -46,6 +46,8 @@
 #include <pmu.h>
 #include <pmu_com.h>
 
+DEFINE_BAKERY_LOCK(rockchip_pd_lock);
+
 static struct psram_data_t *psram_sleep_cfg =
 	(struct psram_data_t *)PSRAM_DT_BASE;
 
@@ -67,6 +69,320 @@
 #endif
 ;/* coheront */
 
+static void pmu_bus_idle_req(uint32_t bus, uint32_t state)
+{
+	uint32_t bus_id = BIT(bus);
+	uint32_t bus_req;
+	uint32_t wait_cnt = 0;
+	uint32_t bus_state, bus_ack;
+
+	if (state)
+		bus_req = BIT(bus);
+	else
+		bus_req = 0;
+
+	mmio_clrsetbits_32(PMU_BASE + PMU_BUS_IDLE_REQ, bus_id, bus_req);
+
+	do {
+		bus_state = mmio_read_32(PMU_BASE + PMU_BUS_IDLE_ST) & bus_id;
+		bus_ack = mmio_read_32(PMU_BASE + PMU_BUS_IDLE_ACK) & bus_id;
+		wait_cnt++;
+	} while ((bus_state != bus_req || bus_ack != bus_req) &&
+		 (wait_cnt < MAX_WAIT_COUNT));
+
+	if (bus_state != bus_req || bus_ack != bus_req) {
+		INFO("%s:st=%x(%x)\n", __func__,
+		     mmio_read_32(PMU_BASE + PMU_BUS_IDLE_ST),
+		     bus_state);
+		INFO("%s:st=%x(%x)\n", __func__,
+		     mmio_read_32(PMU_BASE + PMU_BUS_IDLE_ACK),
+		     bus_ack);
+	}
+
+}
+
+struct pmu_slpdata_s pmu_slpdata;
+
+static void qos_save(void)
+{
+	if (pmu_power_domain_st(PD_GPU) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.gpu_qos, GPU);
+	if (pmu_power_domain_st(PD_ISP0) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.isp0_m0_qos, ISP0_M0);
+		RESTORE_QOS(pmu_slpdata.isp0_m1_qos, ISP0_M1);
+	}
+	if (pmu_power_domain_st(PD_ISP1) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.isp1_m0_qos, ISP1_M0);
+		RESTORE_QOS(pmu_slpdata.isp1_m1_qos, ISP1_M1);
+	}
+	if (pmu_power_domain_st(PD_VO) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.vop_big_r, VOP_BIG_R);
+		RESTORE_QOS(pmu_slpdata.vop_big_w, VOP_BIG_W);
+		RESTORE_QOS(pmu_slpdata.vop_little, VOP_LITTLE);
+	}
+	if (pmu_power_domain_st(PD_HDCP) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.hdcp_qos, HDCP);
+	if (pmu_power_domain_st(PD_GMAC) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.gmac_qos, GMAC);
+	if (pmu_power_domain_st(PD_CCI) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.cci_m0_qos, CCI_M0);
+		RESTORE_QOS(pmu_slpdata.cci_m1_qos, CCI_M1);
+	}
+	if (pmu_power_domain_st(PD_SD) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.sdmmc_qos, SDMMC);
+	if (pmu_power_domain_st(PD_EMMC) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.emmc_qos, EMMC);
+	if (pmu_power_domain_st(PD_SDIOAUDIO) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.sdio_qos, SDIO);
+	if (pmu_power_domain_st(PD_GIC) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.gic_qos, GIC);
+	if (pmu_power_domain_st(PD_RGA) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.rga_r_qos, RGA_R);
+		RESTORE_QOS(pmu_slpdata.rga_w_qos, RGA_W);
+	}
+	if (pmu_power_domain_st(PD_IEP) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.iep_qos, IEP);
+	if (pmu_power_domain_st(PD_USB3) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.usb_otg0_qos, USB_OTG0);
+		RESTORE_QOS(pmu_slpdata.usb_otg1_qos, USB_OTG1);
+	}
+	if (pmu_power_domain_st(PD_PERIHP) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.usb_host0_qos, USB_HOST0);
+		RESTORE_QOS(pmu_slpdata.usb_host1_qos, USB_HOST1);
+		RESTORE_QOS(pmu_slpdata.perihp_nsp_qos, PERIHP_NSP);
+	}
+	if (pmu_power_domain_st(PD_PERILP) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.dmac0_qos, DMAC0);
+		RESTORE_QOS(pmu_slpdata.dmac1_qos, DMAC1);
+		RESTORE_QOS(pmu_slpdata.dcf_qos, DCF);
+		RESTORE_QOS(pmu_slpdata.crypto0_qos, CRYPTO0);
+		RESTORE_QOS(pmu_slpdata.crypto1_qos, CRYPTO1);
+		RESTORE_QOS(pmu_slpdata.perilp_nsp_qos, PERILP_NSP);
+		RESTORE_QOS(pmu_slpdata.perilpslv_nsp_qos, PERILPSLV_NSP);
+		RESTORE_QOS(pmu_slpdata.peri_cm1_qos, PERI_CM1);
+	}
+	if (pmu_power_domain_st(PD_VDU) == pmu_pd_on)
+		RESTORE_QOS(pmu_slpdata.video_m0_qos, VIDEO_M0);
+	if (pmu_power_domain_st(PD_VCODEC) == pmu_pd_on) {
+		RESTORE_QOS(pmu_slpdata.video_m1_r_qos, VIDEO_M1_R);
+		RESTORE_QOS(pmu_slpdata.video_m1_w_qos, VIDEO_M1_W);
+	}
+}
+
+static void qos_restore(void)
+{
+	if (pmu_power_domain_st(PD_GPU) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.gpu_qos, GPU);
+	if (pmu_power_domain_st(PD_ISP0) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.isp0_m0_qos, ISP0_M0);
+		SAVE_QOS(pmu_slpdata.isp0_m1_qos, ISP0_M1);
+	}
+	if (pmu_power_domain_st(PD_ISP1) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.isp1_m0_qos, ISP1_M0);
+		SAVE_QOS(pmu_slpdata.isp1_m1_qos, ISP1_M1);
+	}
+	if (pmu_power_domain_st(PD_VO) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.vop_big_r, VOP_BIG_R);
+		SAVE_QOS(pmu_slpdata.vop_big_w, VOP_BIG_W);
+		SAVE_QOS(pmu_slpdata.vop_little, VOP_LITTLE);
+	}
+	if (pmu_power_domain_st(PD_HDCP) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.hdcp_qos, HDCP);
+	if (pmu_power_domain_st(PD_GMAC) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.gmac_qos, GMAC);
+	if (pmu_power_domain_st(PD_CCI) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.cci_m0_qos, CCI_M0);
+		SAVE_QOS(pmu_slpdata.cci_m1_qos, CCI_M1);
+	}
+	if (pmu_power_domain_st(PD_SD) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.sdmmc_qos, SDMMC);
+	if (pmu_power_domain_st(PD_EMMC) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.emmc_qos, EMMC);
+	if (pmu_power_domain_st(PD_SDIOAUDIO) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.sdio_qos, SDIO);
+	if (pmu_power_domain_st(PD_GIC) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.gic_qos, GIC);
+	if (pmu_power_domain_st(PD_RGA) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.rga_r_qos, RGA_R);
+		SAVE_QOS(pmu_slpdata.rga_w_qos, RGA_W);
+	}
+	if (pmu_power_domain_st(PD_IEP) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.iep_qos, IEP);
+	if (pmu_power_domain_st(PD_USB3) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.usb_otg0_qos, USB_OTG0);
+		SAVE_QOS(pmu_slpdata.usb_otg1_qos, USB_OTG1);
+	}
+	if (pmu_power_domain_st(PD_PERIHP) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.usb_host0_qos, USB_HOST0);
+		SAVE_QOS(pmu_slpdata.usb_host1_qos, USB_HOST1);
+		SAVE_QOS(pmu_slpdata.perihp_nsp_qos, PERIHP_NSP);
+	}
+	if (pmu_power_domain_st(PD_PERILP) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.dmac0_qos, DMAC0);
+		SAVE_QOS(pmu_slpdata.dmac1_qos, DMAC1);
+		SAVE_QOS(pmu_slpdata.dcf_qos, DCF);
+		SAVE_QOS(pmu_slpdata.crypto0_qos, CRYPTO0);
+		SAVE_QOS(pmu_slpdata.crypto1_qos, CRYPTO1);
+		SAVE_QOS(pmu_slpdata.perilp_nsp_qos, PERILP_NSP);
+		SAVE_QOS(pmu_slpdata.perilpslv_nsp_qos, PERILPSLV_NSP);
+		SAVE_QOS(pmu_slpdata.peri_cm1_qos, PERI_CM1);
+	}
+	if (pmu_power_domain_st(PD_VDU) == pmu_pd_on)
+		SAVE_QOS(pmu_slpdata.video_m0_qos, VIDEO_M0);
+	if (pmu_power_domain_st(PD_VCODEC) == pmu_pd_on) {
+		SAVE_QOS(pmu_slpdata.video_m1_r_qos, VIDEO_M1_R);
+		SAVE_QOS(pmu_slpdata.video_m1_w_qos, VIDEO_M1_W);
+	}
+}
+
+static int pmu_set_power_domain(uint32_t pd_id, uint32_t pd_state)
+{
+	uint32_t state;
+
+	if (pmu_power_domain_st(pd_id) == pd_state)
+		goto out;
+
+	if (pd_state == pmu_pd_on)
+		pmu_power_domain_ctr(pd_id, pd_state);
+
+	state = (pd_state == pmu_pd_off) ? BUS_IDLE : BUS_ACTIVE;
+
+	switch (pd_id) {
+	case PD_GPU:
+		pmu_bus_idle_req(BUS_ID_GPU, state);
+		break;
+	case PD_VIO:
+		pmu_bus_idle_req(BUS_ID_VIO, state);
+		break;
+	case PD_ISP0:
+		pmu_bus_idle_req(BUS_ID_ISP0, state);
+		break;
+	case PD_ISP1:
+		pmu_bus_idle_req(BUS_ID_ISP1, state);
+		break;
+	case PD_VO:
+		pmu_bus_idle_req(BUS_ID_VOPB, state);
+		pmu_bus_idle_req(BUS_ID_VOPL, state);
+		break;
+	case PD_HDCP:
+		pmu_bus_idle_req(BUS_ID_HDCP, state);
+		break;
+	case PD_TCPD0:
+		break;
+	case PD_TCPD1:
+		break;
+	case PD_GMAC:
+		pmu_bus_idle_req(BUS_ID_GMAC, state);
+		break;
+	case PD_CCI:
+		pmu_bus_idle_req(BUS_ID_CCIM0, state);
+		pmu_bus_idle_req(BUS_ID_CCIM1, state);
+		break;
+	case PD_SD:
+		pmu_bus_idle_req(BUS_ID_SD, state);
+		break;
+	case PD_EMMC:
+		pmu_bus_idle_req(BUS_ID_EMMC, state);
+		break;
+	case PD_EDP:
+		pmu_bus_idle_req(BUS_ID_EDP, state);
+		break;
+	case PD_SDIOAUDIO:
+		pmu_bus_idle_req(BUS_ID_SDIOAUDIO, state);
+		break;
+	case PD_GIC:
+		pmu_bus_idle_req(BUS_ID_GIC, state);
+		break;
+	case PD_RGA:
+		pmu_bus_idle_req(BUS_ID_RGA, state);
+		break;
+	case PD_VCODEC:
+		pmu_bus_idle_req(BUS_ID_VCODEC, state);
+		break;
+	case PD_VDU:
+		pmu_bus_idle_req(BUS_ID_VDU, state);
+		break;
+	case PD_IEP:
+		pmu_bus_idle_req(BUS_ID_IEP, state);
+		break;
+	case PD_USB3:
+		pmu_bus_idle_req(BUS_ID_USB3, state);
+		break;
+	case PD_PERIHP:
+		pmu_bus_idle_req(BUS_ID_PERIHP, state);
+		break;
+	default:
+		break;
+	}
+
+	if (pd_state == pmu_pd_off)
+		pmu_power_domain_ctr(pd_id, pd_state);
+
+out:
+	return 0;
+}
+
+static uint32_t pmu_powerdomain_state;
+
+static void pmu_power_domains_suspend(void)
+{
+	clk_gate_con_save();
+	clk_gate_con_disable();
+	qos_save();
+	pmu_powerdomain_state = mmio_read_32(PMU_BASE + PMU_PWRDN_ST);
+	pmu_set_power_domain(PD_GPU, pmu_pd_off);
+	pmu_set_power_domain(PD_TCPD0, pmu_pd_off);
+	pmu_set_power_domain(PD_TCPD1, pmu_pd_off);
+	pmu_set_power_domain(PD_VO, pmu_pd_off);
+	pmu_set_power_domain(PD_ISP0, pmu_pd_off);
+	pmu_set_power_domain(PD_ISP1, pmu_pd_off);
+	pmu_set_power_domain(PD_HDCP, pmu_pd_off);
+	pmu_set_power_domain(PD_SDIOAUDIO, pmu_pd_off);
+	pmu_set_power_domain(PD_GMAC, pmu_pd_off);
+	pmu_set_power_domain(PD_EDP, pmu_pd_off);
+	pmu_set_power_domain(PD_IEP, pmu_pd_off);
+	pmu_set_power_domain(PD_RGA, pmu_pd_off);
+	pmu_set_power_domain(PD_VCODEC, pmu_pd_off);
+	pmu_set_power_domain(PD_VDU, pmu_pd_off);
+	clk_gate_con_restore();
+}
+
+static void pmu_power_domains_resume(void)
+{
+	clk_gate_con_save();
+	clk_gate_con_disable();
+	if (!(pmu_powerdomain_state & BIT(PD_VDU)))
+		pmu_set_power_domain(PD_VDU, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_VCODEC)))
+		pmu_set_power_domain(PD_VCODEC, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_RGA)))
+		pmu_set_power_domain(PD_RGA, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_IEP)))
+		pmu_set_power_domain(PD_IEP, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_EDP)))
+		pmu_set_power_domain(PD_EDP, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_GMAC)))
+		pmu_set_power_domain(PD_GMAC, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_SDIOAUDIO)))
+		pmu_set_power_domain(PD_SDIOAUDIO, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_HDCP)))
+		pmu_set_power_domain(PD_HDCP, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_ISP1)))
+		pmu_set_power_domain(PD_ISP1, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_ISP0)))
+		pmu_set_power_domain(PD_ISP0, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_VO)))
+		pmu_set_power_domain(PD_VO, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_TCPD1)))
+		pmu_set_power_domain(PD_TCPD1, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_TCPD0)))
+		pmu_set_power_domain(PD_TCPD0, pmu_pd_on);
+	if (!(pmu_powerdomain_state & BIT(PD_GPU)))
+		pmu_set_power_domain(PD_GPU, pmu_pd_on);
+	qos_restore();
+	clk_gate_con_restore();
+}
+
 void rk3399_flash_l2_b(void)
 {
 	uint32_t wait_cnt = 0;
@@ -77,7 +393,7 @@
 	while (!(mmio_read_32(PMU_BASE + PMU_CORE_PWR_ST) &
 		 BIT(L2_FLUSHDONE_CLUSTER_B))) {
 		wait_cnt++;
-		if (!(wait_cnt % MAX_WAIT_CONUT))
+		if (wait_cnt >= MAX_WAIT_COUNT)
 			WARN("%s:reg %x,wait\n", __func__,
 			     mmio_read_32(PMU_BASE + PMU_CORE_PWR_ST));
 	}
@@ -103,7 +419,7 @@
 	while (!(mmio_read_32(PMU_BASE + PMU_CORE_PWR_ST) &
 		 BIT(STANDBY_BY_WFIL2_CLUSTER_B))) {
 		wait_cnt++;
-		if (!(wait_cnt % MAX_WAIT_CONUT))
+		if (wait_cnt >= MAX_WAIT_COUNT)
 			ERROR("%s:wait cluster-b l2(%x)\n", __func__,
 			      mmio_read_32(PMU_BASE + PMU_CORE_PWR_ST));
 	}
@@ -220,6 +536,78 @@
 	return 0;
 }
 
+static inline void clst_pwr_domain_suspend(plat_local_state_t lvl_state)
+{
+	uint32_t cpu_id = plat_my_core_pos();
+	uint32_t pll_id, clst_st_msk, clst_st_chk_msk, pmu_st;
+
+	assert(cpu_id < PLATFORM_CORE_COUNT);
+
+	if (lvl_state == PLAT_MAX_RET_STATE  ||
+	    lvl_state == PLAT_MAX_OFF_STATE) {
+		if (cpu_id < PLATFORM_CLUSTER0_CORE_COUNT) {
+			pll_id = ALPLL_ID;
+			clst_st_msk = CLST_L_CPUS_MSK;
+		} else {
+			pll_id = ABPLL_ID;
+			clst_st_msk = CLST_B_CPUS_MSK <<
+				       PLATFORM_CLUSTER0_CORE_COUNT;
+		}
+
+		clst_st_chk_msk = clst_st_msk & ~(BIT(cpu_id));
+
+		pmu_st = mmio_read_32(PMU_BASE + PMU_PWRDN_ST);
+
+		pmu_st &= clst_st_msk;
+
+		if (pmu_st == clst_st_chk_msk) {
+			mmio_write_32(CRU_BASE + CRU_PLL_CON(pll_id, 3),
+				      PLL_SLOW_MODE);
+
+			clst_warmboot_data[pll_id] = PMU_CLST_RET;
+
+			pmu_st = mmio_read_32(PMU_BASE + PMU_PWRDN_ST);
+			pmu_st &= clst_st_msk;
+			if (pmu_st == clst_st_chk_msk)
+				return;
+			/*
+			 * it is mean that others cpu is up again,
+			 * we must resume the cfg at once.
+			 */
+			mmio_write_32(CRU_BASE + CRU_PLL_CON(pll_id, 3),
+				      PLL_NOMAL_MODE);
+			clst_warmboot_data[pll_id] = 0;
+		}
+	}
+}
+
+static int clst_pwr_domain_resume(plat_local_state_t lvl_state)
+{
+	uint32_t cpu_id = plat_my_core_pos();
+	uint32_t pll_id, pll_st;
+
+	assert(cpu_id < PLATFORM_CORE_COUNT);
+
+	if (lvl_state == PLAT_MAX_RET_STATE ||
+	    lvl_state == PLAT_MAX_OFF_STATE) {
+		if (cpu_id < PLATFORM_CLUSTER0_CORE_COUNT)
+			pll_id = ALPLL_ID;
+		else
+			pll_id = ABPLL_ID;
+
+		pll_st = mmio_read_32(CRU_BASE + CRU_PLL_CON(pll_id, 3)) >>
+				 PLL_MODE_SHIFT;
+
+		if (pll_st != NORMAL_MODE) {
+			WARN("%s: clst (%d) is in error mode (%d)\n",
+			     __func__, pll_id, pll_st);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 static void nonboot_cpus_off(void)
 {
 	uint32_t boot_cpu, cpu;
@@ -258,6 +646,19 @@
 	return 0;
 }
 
+static int hlvl_pwr_domain_off(uint32_t lvl, plat_local_state_t lvl_state)
+{
+	switch (lvl) {
+	case MPIDR_AFFLVL1:
+		clst_pwr_domain_suspend(lvl_state);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static int cores_pwr_domain_suspend(void)
 {
 	uint32_t cpu_id = plat_my_core_pos();
@@ -265,7 +666,7 @@
 	assert(cpu_id < PLATFORM_CORE_COUNT);
 	assert(cpuson_flags[cpu_id] == 0);
 	cpuson_flags[cpu_id] = PMU_CPU_AUTO_PWRDN;
-	cpuson_entry_point[cpu_id] = (uintptr_t)psci_entrypoint;
+	cpuson_entry_point[cpu_id] = plat_get_sec_entrypoint();
 	dsb();
 
 	cpus_power_domain_off(cpu_id, core_pwr_wfi_int);
@@ -273,12 +674,38 @@
 	return 0;
 }
 
+static int hlvl_pwr_domain_suspend(uint32_t lvl, plat_local_state_t lvl_state)
+{
+	switch (lvl) {
+	case MPIDR_AFFLVL1:
+		clst_pwr_domain_suspend(lvl_state);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static int cores_pwr_domain_on_finish(void)
 {
 	uint32_t cpu_id = plat_my_core_pos();
 
-	/* Disable core_pm */
-	mmio_write_32(PMU_BASE + PMU_CORE_PM_CON(cpu_id), CORES_PM_DISABLE);
+	mmio_write_32(PMU_BASE + PMU_CORE_PM_CON(cpu_id),
+		      CORES_PM_DISABLE);
+	return 0;
+}
+
+static int hlvl_pwr_domain_on_finish(uint32_t lvl,
+				     plat_local_state_t lvl_state)
+{
+	switch (lvl) {
+	case MPIDR_AFFLVL1:
+		clst_pwr_domain_resume(lvl_state);
+		break;
+	default:
+		break;
+	}
 
 	return 0;
 }
@@ -293,10 +720,23 @@
 	return 0;
 }
 
+static int hlvl_pwr_domain_resume(uint32_t lvl, plat_local_state_t lvl_state)
+{
+	switch (lvl) {
+	case MPIDR_AFFLVL1:
+		clst_pwr_domain_resume(lvl_state);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static void sys_slp_config(void)
 {
 	uint32_t slp_mode_cfg = 0;
 
+	mmio_write_32(GRF_BASE + GRF_SOC_CON4, CCI_FORCE_WAKEUP);
 	mmio_write_32(PMU_BASE + PMU_CCI500_CON,
 		      BIT_WITH_WMSK(PMU_CLR_PREQ_CCI500_HW) |
 		      BIT_WITH_WMSK(PMU_CLR_QREQ_CCI500_HW) |
@@ -307,33 +747,148 @@
 		      BIT_WITH_WMSK(PMU_CLR_CORE_L_2GIC_HW) |
 		      BIT_WITH_WMSK(PMU_CLR_GIC2_CORE_L_HW));
 
-	mmio_write_32(PMUGRF_BASE + PMUGRF_GPIO1A_IOMUX,
-		      BIT_WITH_WMSK(AP_PWROFF));
-
 	slp_mode_cfg = BIT(PMU_PWR_MODE_EN) |
 		       BIT(PMU_POWER_OFF_REQ_CFG) |
 		       BIT(PMU_CPU0_PD_EN) |
 		       BIT(PMU_L2_FLUSH_EN) |
 		       BIT(PMU_L2_IDLE_EN) |
-		       BIT(PMU_SCU_PD_EN);
+		       BIT(PMU_SCU_PD_EN) |
+		       BIT(PMU_CCI_PD_EN) |
+		       BIT(PMU_CLK_CORE_SRC_GATE_EN) |
+		       BIT(PMU_PERILP_PD_EN) |
+		       BIT(PMU_CLK_PERILP_SRC_GATE_EN) |
+		       BIT(PMU_ALIVE_USE_LF) |
+		       BIT(PMU_SREF0_ENTER_EN) |
+		       BIT(PMU_SREF1_ENTER_EN) |
+		       BIT(PMU_DDRC0_GATING_EN) |
+		       BIT(PMU_DDRC1_GATING_EN) |
+		       BIT(PMU_DDRIO0_RET_EN) |
+		       BIT(PMU_DDRIO1_RET_EN) |
+		       BIT(PMU_DDRIO_RET_HW_DE_REQ) |
+		       BIT(PMU_PLL_PD_EN) |
+		       BIT(PMU_CLK_CENTER_SRC_GATE_EN) |
+		       BIT(PMU_OSC_DIS) |
+		       BIT(PMU_PMU_USE_LF);
 
-	mmio_setbits_32(PMU_BASE + PMU_WKUP_CFG4, PMU_CLUSTER_L_WKUP_EN);
-	mmio_setbits_32(PMU_BASE + PMU_WKUP_CFG4, PMU_CLUSTER_B_WKUP_EN);
-	mmio_clrbits_32(PMU_BASE + PMU_WKUP_CFG4, PMU_GPIO_WKUP_EN);
-
+	mmio_setbits_32(PMU_BASE + PMU_WKUP_CFG4, BIT(PMU_CLUSTER_L_WKUP_EN));
+	mmio_setbits_32(PMU_BASE + PMU_WKUP_CFG4, BIT(PMU_CLUSTER_B_WKUP_EN));
+	mmio_setbits_32(PMU_BASE + PMU_WKUP_CFG4, BIT(PMU_GPIO_WKUP_EN));
 	mmio_write_32(PMU_BASE + PMU_PWRMODE_CON, slp_mode_cfg);
 
+	mmio_write_32(PMU_BASE + PMU_SCU_L_PWRDN_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_SCU_L_PWRUP_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_SCU_B_PWRDN_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_SCU_B_PWRUP_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_CENTER_PWRDN_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_CENTER_PWRUP_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_WAKEUP_RST_CLR_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_OSC_CNT, CYCL_32K_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_DDRIO_PWRON_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_PLLLOCK_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_PLLRST_CNT, CYCL_24M_CNT_MS(3));
+	mmio_write_32(PMU_BASE + PMU_STABLE_CNT, CYCL_24M_CNT_MS(3));
+	mmio_clrbits_32(PMU_BASE + PMU_SFT_CON, BIT(PMU_24M_EN_CFG));
+
+	mmio_write_32(PMU_BASE + PMU_PLL_CON, PLL_PD_HW);
+	mmio_write_32(PMUGRF_BASE + PMUGRF_SOC_CON0, EXTERNAL_32K);
+	mmio_write_32(PMUGRF_BASE, IOMUX_CLK_32K); /*32k iomux*/
+}
+
+static void set_hw_idle(uint32_t hw_idle)
+{
+	mmio_setbits_32(PMU_BASE + PMU_BUS_CLR, hw_idle);
+}
+
+static void clr_hw_idle(uint32_t hw_idle)
+{
+	mmio_clrbits_32(PMU_BASE + PMU_BUS_CLR, hw_idle);
+}
+
+struct pwm_data_s pwm_data;
+
+/*
+ * Save the PWMs data.
+ */
+static void save_pwms(void)
+{
+	uint32_t i;
+
+	pwm_data.iomux_bitmask = 0;
+
+	/* Save all IOMUXes */
+	if (mmio_read_32(GRF_BASE + GRF_GPIO4C_IOMUX) & GPIO4C2_IOMUX_PWM)
+		pwm_data.iomux_bitmask |= PWM0_IOMUX_PWM_EN;
+	if (mmio_read_32(GRF_BASE + GRF_GPIO4C_IOMUX) & GPIO4C6_IOMUX_PWM)
+		pwm_data.iomux_bitmask |= PWM1_IOMUX_PWM_EN;
+	if (mmio_read_32(PMUGRF_BASE + PMUGRF_GPIO1C_IOMUX) &
+			 GPIO1C3_IOMUX_PWM)
+		pwm_data.iomux_bitmask |= PWM2_IOMUX_PWM_EN;
+	if (mmio_read_32(PMUGRF_BASE + PMUGRF_GPIO0A_IOMUX) &
+			 GPIO0A6_IOMUX_PWM)
+		pwm_data.iomux_bitmask |= PWM3_IOMUX_PWM_EN;
+
+	for (i = 0; i < 4; i++) {
+		/* Save cnt, period, duty and ctrl for PWM i */
+		pwm_data.cnt[i] = mmio_read_32(PWM_BASE + PWM_CNT(i));
+		pwm_data.duty[i] = mmio_read_32(PWM_BASE + PWM_PERIOD_HPR(i));
+		pwm_data.period[i] = mmio_read_32(PWM_BASE + PWM_DUTY_LPR(i));
+		pwm_data.ctrl[i] = mmio_read_32(PWM_BASE + PWM_CTRL(i));
+	}
+
-	mmio_write_32(PMU_BASE + PMU_STABLE_CNT, CYCL_24M_CNT_MS(5));
-	mmio_write_32(PMU_BASE + PMU_SCU_L_PWRDN_CNT, CYCL_24M_CNT_MS(2));
-	mmio_write_32(PMU_BASE + PMU_SCU_L_PWRUP_CNT, CYCL_24M_CNT_MS(2));
-	mmio_write_32(PMU_BASE + PMU_SCU_B_PWRDN_CNT, CYCL_24M_CNT_MS(2));
-	mmio_write_32(PMU_BASE + PMU_SCU_B_PWRUP_CNT, CYCL_24M_CNT_MS(2));
+	/* PWMs all IOMUXes switch to the gpio mode */
+	mmio_write_32(GRF_BASE + GRF_GPIO4C_IOMUX, GPIO4C2_IOMUX_GPIO);
+	mmio_write_32(GRF_BASE + GRF_GPIO4C_IOMUX, GPIO4C6_IOMUX_GPIO);
+	mmio_write_32(PMUGRF_BASE  + PMUGRF_GPIO1C_IOMUX, GPIO1C3_IOMUX_GPIO);
+	mmio_write_32(PMUGRF_BASE + PMUGRF_GPIO0A_IOMUX, GPIO0A6_IOMUX_GPIO);
 }
 
+/*
+ * Restore the PWMs data.
+ */
+static void restore_pwms(void)
+{
+	uint32_t i;
+
+	/* Restore all IOMUXes */
+	if (pwm_data.iomux_bitmask & PWM3_IOMUX_PWM_EN)
+		mmio_write_32(PMUGRF_BASE + PMUGRF_GPIO0A_IOMUX,
+			      GPIO0A6_IOMUX_PWM);
+	if (pwm_data.iomux_bitmask & PWM2_IOMUX_PWM_EN)
+		mmio_write_32(PMUGRF_BASE + PMUGRF_GPIO1C_IOMUX,
+			      GPIO1C3_IOMUX_PWM);
+	if (pwm_data.iomux_bitmask & PWM1_IOMUX_PWM_EN)
+		mmio_write_32(GRF_BASE + GRF_GPIO4C_IOMUX, GPIO4C6_IOMUX_PWM);
+	if (pwm_data.iomux_bitmask & PWM0_IOMUX_PWM_EN)
+		mmio_write_32(GRF_BASE + GRF_GPIO4C_IOMUX, GPIO4C2_IOMUX_PWM);
+
+	for (i = 0; i < 4; i++) {
+		/* Restore ctrl, duty, period and cnt for PWM i */
+		mmio_write_32(PWM_BASE + PWM_CTRL(i), pwm_data.ctrl[i]);
+		mmio_write_32(PWM_BASE + PWM_DUTY_LPR(i), pwm_data.period[i]);
+		mmio_write_32(PWM_BASE + PWM_PERIOD_HPR(i), pwm_data.duty[i]);
+		mmio_write_32(PWM_BASE + PWM_CNT(i), pwm_data.cnt[i]);
+	}
+}
+
 static int sys_pwr_domain_suspend(void)
 {
+	uint32_t wait_cnt = 0;
+	uint32_t status = 0;
+
+	pmu_power_domains_suspend();
+	set_hw_idle(BIT(PMU_CLR_CENTER1) |
+		    BIT(PMU_CLR_ALIVE) |
+		    BIT(PMU_CLR_MSCH0) |
+		    BIT(PMU_CLR_MSCH1) |
+		    BIT(PMU_CLR_CCIM0) |
+		    BIT(PMU_CLR_CCIM1) |
+		    BIT(PMU_CLR_CENTER) |
+		    BIT(PMU_CLR_PERILP) |
+		    BIT(PMU_CLR_PMU) |
+		    BIT(PMU_CLR_PERILPM0) |
+		    BIT(PMU_CLR_GIC));
+
 	sys_slp_config();
-	plls_suspend();
 	pmu_sgrf_rst_hld();
 
 	mmio_write_32(SGRF_BASE + SGRF_SOC_CON0_1(1),
@@ -347,42 +902,81 @@
 		      BIT_WITH_WMSK(PMU_PWRDWN_REQ_CORE_B_SW) |
 		      BIT_WITH_WMSK(PMU_PWRDWN_REQ_GIC2_CORE_B_SW));
 	dsb();
+	status = BIT(PMU_PWRDWN_REQ_CORE_B_2GIC_SW_ST) |
+		BIT(PMU_PWRDWN_REQ_CORE_B_SW_ST) |
+		BIT(PMU_PWRDWN_REQ_GIC2_CORE_B_SW_ST);
+	while ((mmio_read_32(PMU_BASE +
+	       PMU_ADB400_ST) & status) != status) {
+		wait_cnt++;
+		if (wait_cnt >= MAX_WAIT_COUNT) {
+			ERROR("%s:wait cluster-b l2(%x)\n", __func__,
+			      mmio_read_32(PMU_BASE + PMU_ADB400_ST));
+			panic();
+		}
+	}
 	mmio_setbits_32(PMU_BASE + PMU_PWRDN_CON, BIT(PMU_SCU_B_PWRDWN_EN));
 
+	save_pwms();
+
 	return 0;
 }
 
 static int sys_pwr_domain_resume(void)
 {
+	uint32_t wait_cnt = 0;
+	uint32_t status = 0;
+
+	restore_pwms();
+
 	pmu_sgrf_rst_hld();
 
 	mmio_write_32(SGRF_BASE + SGRF_SOC_CON0_1(1),
 		      (cpu_warm_boot_addr >> CPU_BOOT_ADDR_ALIGN) |
 		      CPU_BOOT_ADDR_WMASK);
 
-	plls_resume();
-
 	mmio_write_32(PMU_BASE + PMU_CCI500_CON,
 		      WMSK_BIT(PMU_CLR_PREQ_CCI500_HW) |
 		      WMSK_BIT(PMU_CLR_QREQ_CCI500_HW) |
 		      WMSK_BIT(PMU_QGATING_CCI500_CFG));
+	dsb();
+	mmio_clrbits_32(PMU_BASE + PMU_PWRDN_CON,
+			BIT(PMU_SCU_B_PWRDWN_EN));
 
 	mmio_write_32(PMU_BASE + PMU_ADB400_CON,
+		      WMSK_BIT(PMU_PWRDWN_REQ_CORE_B_2GIC_SW) |
+		      WMSK_BIT(PMU_PWRDWN_REQ_CORE_B_SW) |
+		      WMSK_BIT(PMU_PWRDWN_REQ_GIC2_CORE_B_SW) |
 		      WMSK_BIT(PMU_CLR_CORE_L_HW) |
 		      WMSK_BIT(PMU_CLR_CORE_L_2GIC_HW) |
 		      WMSK_BIT(PMU_CLR_GIC2_CORE_L_HW));
 
-	mmio_clrbits_32(PMU_BASE + PMU_PWRDN_CON,
-			BIT(PMU_SCU_B_PWRDWN_EN));
+	status = BIT(PMU_PWRDWN_REQ_CORE_B_2GIC_SW_ST) |
+		BIT(PMU_PWRDWN_REQ_CORE_B_SW_ST) |
+		BIT(PMU_PWRDWN_REQ_GIC2_CORE_B_SW_ST);
 
-	mmio_write_32(PMU_BASE + PMU_ADB400_CON,
-		      WMSK_BIT(PMU_PWRDWN_REQ_CORE_B_2GIC_SW) |
-		      WMSK_BIT(PMU_PWRDWN_REQ_CORE_B_SW) |
-		      WMSK_BIT(PMU_PWRDWN_REQ_GIC2_CORE_B_SW));
+	while ((mmio_read_32(PMU_BASE +
+	   PMU_ADB400_ST) & status)) {
+		wait_cnt++;
+		if (wait_cnt >= MAX_WAIT_COUNT) {
+			ERROR("%s:wait cluster-b l2(%x)\n", __func__,
+			      mmio_read_32(PMU_BASE + PMU_ADB400_ST));
+			panic();
+		}
+	}
 
 	pmu_scu_b_pwrup();
 
-	plat_rockchip_gic_cpuif_enable();
+	pmu_power_domains_resume();
+	clr_hw_idle(BIT(PMU_CLR_CENTER1) |
+				BIT(PMU_CLR_ALIVE) |
+				BIT(PMU_CLR_MSCH0) |
+				BIT(PMU_CLR_MSCH1) |
+				BIT(PMU_CLR_CCIM0) |
+				BIT(PMU_CLR_CCIM1) |
+				BIT(PMU_CLR_CENTER) |
+				BIT(PMU_CLR_PERILP) |
+				BIT(PMU_CLR_PMU) |
+				BIT(PMU_CLR_GIC));
 	return 0;
 }
 
@@ -434,6 +1028,10 @@
 	.cores_pwr_dm_on_finish = cores_pwr_domain_on_finish,
 	.cores_pwr_dm_suspend = cores_pwr_domain_suspend,
 	.cores_pwr_dm_resume = cores_pwr_domain_resume,
+	.hlvl_pwr_dm_suspend = hlvl_pwr_domain_suspend,
+	.hlvl_pwr_dm_resume = hlvl_pwr_domain_resume,
+	.hlvl_pwr_dm_off = hlvl_pwr_domain_off,
+	.hlvl_pwr_dm_on_finish = hlvl_pwr_domain_on_finish,
 	.sys_pwr_dm_suspend = sys_pwr_domain_suspend,
 	.sys_pwr_dm_resume = sys_pwr_domain_resume,
 	.sys_gbl_soft_reset = soc_soft_reset,
@@ -453,15 +1051,19 @@
 	for (cpu = 0; cpu < PLATFORM_CORE_COUNT; cpu++)
 		cpuson_flags[cpu] = 0;
 
+	for (cpu = 0; cpu < PLATFORM_CLUSTER_COUNT; cpu++)
+		clst_warmboot_data[cpu] = 0;
+
 	psram_sleep_cfg->ddr_func = 0x00;
 	psram_sleep_cfg->ddr_data = 0x00;
 	psram_sleep_cfg->ddr_flag = 0x00;
 	psram_sleep_cfg->boot_mpidr = read_mpidr_el1() & 0xffff;
 
-	/* cpu boot from pmusram */
+	/* config cpu's warm boot address */
 	mmio_write_32(SGRF_BASE + SGRF_SOC_CON0_1(1),
 		      (cpu_warm_boot_addr >> CPU_BOOT_ADDR_ALIGN) |
 		      CPU_BOOT_ADDR_WMASK);
+	mmio_write_32(PMU_BASE + PMU_NOC_AUTO_ENA, NOC_AUTO_ENABLE);
 
 	nonboot_cpus_off();
 
diff --git a/plat/rockchip/rk3399/drivers/pmu/pmu.h b/plat/rockchip/rk3399/drivers/pmu/pmu.h
index 8f935e9..ddb1c16 100644
--- a/plat/rockchip/rk3399/drivers/pmu/pmu.h
+++ b/plat/rockchip/rk3399/drivers/pmu/pmu.h
@@ -808,14 +808,158 @@
 #define PMU_NOC_AUTO_ENA	0xd8
 #define PMU_PWRDN_CON1		0xdc
 
+#define PMUGRF_GPIO0A_IOMUX	0x00
 #define PMUGRF_GPIO1A_IOMUX	0x10
+#define PMUGRF_GPIO1C_IOMUX	0x18
+
 #define AP_PWROFF		0x0a
+
+#define GPIO0A6_IOMUX_GPIO	BITS_WITH_WMASK(0, 3, 12)
+#define GPIO0A6_IOMUX_PWM	BITS_WITH_WMASK(1, 3, 12)
+#define GPIO1C3_IOMUX_GPIO	BITS_WITH_WMASK(0, 3, 6)
+#define GPIO1C3_IOMUX_PWM	BITS_WITH_WMASK(1, 3, 6)
+#define GPIO4C2_IOMUX_GPIO	BITS_WITH_WMASK(0, 3, 4)
+#define GPIO4C2_IOMUX_PWM	BITS_WITH_WMASK(1, 3, 4)
+#define GPIO4C6_IOMUX_GPIO	BITS_WITH_WMASK(0, 3, 12)
+#define GPIO4C6_IOMUX_PWM	BITS_WITH_WMASK(1, 3, 12)
 #define GPIO1A6_IOMUX		BITS_WITH_WMASK(0, 3, 12)
+
 #define TSADC_INT_PIN		38
 #define CORES_PM_DISABLE	0x0
+#define CPU_AXI_QOS_ID_COREID		0x00
+#define CPU_AXI_QOS_REVISIONID		0x04
+#define CPU_AXI_QOS_PRIORITY		0x08
+#define CPU_AXI_QOS_MODE		0x0c
+#define CPU_AXI_QOS_BANDWIDTH		0x10
+#define CPU_AXI_QOS_SATURATION		0x14
+#define CPU_AXI_QOS_EXTCONTROL		0x18
+#define CPU_AXI_QOS_NUM_REGS		0x07
+
+#define CPU_AXI_CCI_M0_QOS_BASE		0xffa50000
+#define CPU_AXI_CCI_M1_QOS_BASE		0xffad8000
+#define CPU_AXI_DMAC0_QOS_BASE		0xffa64200
+#define CPU_AXI_DMAC1_QOS_BASE		0xffa64280
+#define CPU_AXI_DCF_QOS_BASE		0xffa64180
+#define CPU_AXI_CRYPTO0_QOS_BASE	0xffa64100
+#define CPU_AXI_CRYPTO1_QOS_BASE	0xffa64080
+#define CPU_AXI_PMU_CM0_QOS_BASE	0xffa68000
+#define CPU_AXI_PERI_CM1_QOS_BASE	0xffa64300
+#define CPU_AXI_GIC_QOS_BASE		0xffa78000
+#define CPU_AXI_SDIO_QOS_BASE		0xffa76000
+#define CPU_AXI_SDMMC_QOS_BASE		0xffa74000
+#define CPU_AXI_EMMC_QOS_BASE		0xffa58000
+#define CPU_AXI_GMAC_QOS_BASE		0xffa5c000
+#define CPU_AXI_USB_OTG0_QOS_BASE	0xffa70000
+#define CPU_AXI_USB_OTG1_QOS_BASE	0xffa70080
+#define CPU_AXI_USB_HOST0_QOS_BASE	0xffa60100
+#define CPU_AXI_USB_HOST1_QOS_BASE	0xffa60180
+#define CPU_AXI_GPU_QOS_BASE		0xffae0000
+#define CPU_AXI_VIDEO_M0_QOS_BASE	0xffab8000
+#define CPU_AXI_VIDEO_M1_R_QOS_BASE	0xffac0000
+#define CPU_AXI_VIDEO_M1_W_QOS_BASE	0xffac0080
+#define CPU_AXI_RGA_R_QOS_BASE		0xffab0000
+#define CPU_AXI_RGA_W_QOS_BASE		0xffab0080
+#define CPU_AXI_IEP_QOS_BASE		0xffa98000
+#define CPU_AXI_VOP_BIG_R_QOS_BASE	0xffac8000
+#define CPU_AXI_VOP_BIG_W_QOS_BASE	0xffac8080
+#define CPU_AXI_VOP_LITTLE_QOS_BASE	0xffad0000
+#define CPU_AXI_ISP0_M0_QOS_BASE	0xffaa0000
+#define CPU_AXI_ISP0_M1_QOS_BASE	0xffaa0080
+#define CPU_AXI_ISP1_M0_QOS_BASE	0xffaa8000
+#define CPU_AXI_ISP1_M1_QOS_BASE	0xffaa8080
+#define CPU_AXI_HDCP_QOS_BASE		0xffa90000
+#define CPU_AXI_PERIHP_NSP_QOS_BASE	0xffad8080
+#define CPU_AXI_PERILP_NSP_QOS_BASE	0xffad8180
+#define CPU_AXI_PERILPSLV_NSP_QOS_BASE	0xffad8100
 
 #define PD_CTR_LOOP		500
 #define CHK_CPU_LOOP		500
-#define MAX_WAIT_CONUT		1000
+#define MAX_WAIT_COUNT		1000
+
+#define	GRF_SOC_CON4		0x0e210
+#define GRF_GPIO4C_IOMUX	0x0e028
+
+#define PMUGRF_SOC_CON0		0x0180
+
+#define CCI_FORCE_WAKEUP	WMSK_BIT(8)
+#define EXTERNAL_32K		WMSK_BIT(0)
+
+#define PLL_PD_HW		0xff
+#define IOMUX_CLK_32K		0x00030002
+#define NOC_AUTO_ENABLE		0x3fffffff
+
+#define SAVE_QOS(array, NAME) \
+	RK3399_CPU_AXI_SAVE_QOS(array, CPU_AXI_##NAME##_QOS_BASE)
+#define RESTORE_QOS(array, NAME) \
+	RK3399_CPU_AXI_RESTORE_QOS(array, CPU_AXI_##NAME##_QOS_BASE)
+
+#define RK3399_CPU_AXI_SAVE_QOS(array, base) do { \
+	array[0] = mmio_read_32(base + CPU_AXI_QOS_ID_COREID); \
+	array[1] = mmio_read_32(base + CPU_AXI_QOS_REVISIONID); \
+	array[2] = mmio_read_32(base + CPU_AXI_QOS_PRIORITY); \
+	array[3] = mmio_read_32(base + CPU_AXI_QOS_MODE); \
+	array[4] = mmio_read_32(base + CPU_AXI_QOS_BANDWIDTH); \
+	array[5] = mmio_read_32(base + CPU_AXI_QOS_SATURATION); \
+	array[6] = mmio_read_32(base + CPU_AXI_QOS_EXTCONTROL); \
+} while (0)
+
+#define RK3399_CPU_AXI_RESTORE_QOS(array, base) do { \
+	mmio_write_32(base + CPU_AXI_QOS_ID_COREID, array[0]); \
+	mmio_write_32(base + CPU_AXI_QOS_REVISIONID, array[1]); \
+	mmio_write_32(base + CPU_AXI_QOS_PRIORITY, array[2]); \
+	mmio_write_32(base + CPU_AXI_QOS_MODE, array[3]); \
+	mmio_write_32(base + CPU_AXI_QOS_BANDWIDTH, array[4]); \
+	mmio_write_32(base + CPU_AXI_QOS_SATURATION, array[5]); \
+	mmio_write_32(base + CPU_AXI_QOS_EXTCONTROL, array[6]); \
+} while (0)
+
+/* there are 4 PWMs on rk3399 */
+struct pwm_data_s {
+	uint32_t iomux_bitmask;
+	uint64_t cnt[4];
+	uint64_t duty[4];
+	uint64_t period[4];
+	uint64_t ctrl[4];
+};
+
+struct pmu_slpdata_s {
+	uint32_t cci_m0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t cci_m1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t dmac0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t dmac1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t dcf_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t crypto0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t crypto1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t pmu_cm0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t peri_cm1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t gic_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t sdmmc_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t gmac_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t emmc_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t usb_otg0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t usb_otg1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t usb_host0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t usb_host1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t gpu_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t video_m0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t video_m1_r_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t video_m1_w_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t rga_r_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t rga_w_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t vop_big_r[CPU_AXI_QOS_NUM_REGS];
+	uint32_t vop_big_w[CPU_AXI_QOS_NUM_REGS];
+	uint32_t vop_little[CPU_AXI_QOS_NUM_REGS];
+	uint32_t iep_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t isp1_m0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t isp1_m1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t isp0_m0_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t isp0_m1_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t hdcp_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t perihp_nsp_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t perilp_nsp_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t perilpslv_nsp_qos[CPU_AXI_QOS_NUM_REGS];
+	uint32_t sdio_qos[CPU_AXI_QOS_NUM_REGS];
+};
 
+extern uint32_t clst_warmboot_data[PLATFORM_CLUSTER_COUNT];
 #endif /* __PMU_H__ */
diff --git a/plat/rockchip/rk3399/drivers/soc/soc.c b/plat/rockchip/rk3399/drivers/soc/soc.c
index bf2d441..9ccf90c 100644
--- a/plat/rockchip/rk3399/drivers/soc/soc.c
+++ b/plat/rockchip/rk3399/drivers/soc/soc.c
@@ -39,36 +39,9 @@
 
 /* Table of regions to map using the MMU.  */
 const mmap_region_t plat_rk_mmap[] = {
-	MAP_REGION_FLAT(GIC500_BASE, GIC500_SIZE,
+	MAP_REGION_FLAT(RK3399_DEV_RNG0_BASE, RK3399_DEV_RNG0_SIZE,
 			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(CCI500_BASE, CCI500_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(STIME_BASE, STIME_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(CRUS_BASE, CRUS_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(SGRF_BASE, SGRF_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(PMU_BASE, PMU_SIZE,
-			MT_DEVICE | MT_RW | MT_NS),
-	MAP_REGION_FLAT(PMUSRAM_BASE, PMUSRAM_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(RK3399_UART2_BASE, RK3399_UART2_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(PMUGRF_BASE, PMUGRF_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(GPIO0_BASE, GPIO0_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(GPIO1_BASE, GPIO1_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(GPIO2_BASE, GPIO2_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(GPIO3_BASE, GPIO3_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(GPIO4_BASE, GPIO4_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
-	MAP_REGION_FLAT(GRF_BASE, GRF_SIZE,
-			MT_DEVICE | MT_RW | MT_SECURE),
+
 	{ 0 }
 };
 
@@ -272,8 +245,7 @@
 
 	for (i = 0; i < CRU_CLKSEL_COUNT; i++)
 		slp_data.cru_clksel_con[i] =
-			mmio_read_32(CRU_BASE +
-				     CRU_CLKSEL_OFFSET + i * REG_SIZE);
+			mmio_read_32(CRU_BASE + CRU_CLKSEL_CON(i));
 
 	for (i = 0; i < PMUCRU_CLKSEL_CONUT; i++)
 		slp_data.pmucru_clksel_con[i] =
@@ -289,6 +261,43 @@
 	_pll_suspend(ALPLL_ID);
 }
 
+void clk_gate_con_save(void)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < PMUCRU_GATE_COUNT; i++)
+		slp_data.pmucru_gate_con[i] =
+			mmio_read_32(PMUCRU_BASE + PMUCRU_GATE_CON(i));
+
+	for (i = 0; i < CRU_GATE_COUNT; i++)
+		slp_data.cru_gate_con[i] =
+			mmio_read_32(CRU_BASE + CRU_GATE_CON(i));
+}
+
+void clk_gate_con_disable(void)
+{
+	uint32_t i;
+
+	for (i = 0; i < PMUCRU_GATE_COUNT; i++)
+		mmio_write_32(PMUCRU_BASE + PMUCRU_GATE_CON(i), REG_SOC_WMSK);
+
+	for (i = 0; i < CRU_GATE_COUNT; i++)
+		mmio_write_32(CRU_BASE + CRU_GATE_CON(i), REG_SOC_WMSK);
+}
+
+void clk_gate_con_restore(void)
+{
+	uint32_t i;
+
+	for (i = 0; i < PMUCRU_GATE_COUNT; i++)
+		mmio_write_32(PMUCRU_BASE + PMUCRU_GATE_CON(i),
+			      REG_SOC_WMSK | slp_data.pmucru_gate_con[i]);
+
+	for (i = 0; i < CRU_GATE_COUNT; i++)
+		mmio_write_32(CRU_BASE + CRU_GATE_CON(i),
+			      REG_SOC_WMSK | slp_data.cru_gate_con[i]);
+}
+
 static void set_plls_nobypass(uint32_t pll_id)
 {
 	if (pll_id == PPLL_ID)
@@ -304,7 +313,7 @@
 	int i;
 
 	for (i = 0; i < CRU_CLKSEL_COUNT; i++)
-		mmio_write_32((CRU_BASE + CRU_CLKSEL_OFFSET + i * REG_SIZE),
+		mmio_write_32((CRU_BASE + CRU_CLKSEL_CON(i)),
 			      REG_SOC_WMSK | slp_data.cru_clksel_con[i]);
 	for (i = 0; i < PMUCRU_CLKSEL_CONUT; i++)
 		mmio_write_32((PMUCRU_BASE +
@@ -351,7 +360,7 @@
 	 * so we do not hope the core to excute valid codes.
 	 */
 	while (1)
-	;
+		;
 }
 
 void plat_rockchip_soc_init(void)
diff --git a/plat/rockchip/rk3399/drivers/soc/soc.h b/plat/rockchip/rk3399/drivers/soc/soc.h
index 4c6f000..26c0df6 100644
--- a/plat/rockchip/rk3399/drivers/soc/soc.h
+++ b/plat/rockchip/rk3399/drivers/soc/soc.h
@@ -34,12 +34,8 @@
 #define GLB_SRST_FST_CFG_VAL	0xfdb9
 #define GLB_SRST_SND_CFG_VAL	0xeca8
 
-#define PMUCRU_PPLL_CON_OFFSET		0x000
-#define PMUCRU_PPLL_CON_BASE_ADDR	(PMUCRU_BASE + PMUCRU_PPLL_CON_OFFSET)
-#define PMUCRU_PPLL_CON_CONUT		0x06
-
-#define PMUCRU_PPLL_CON(num)		(PMUCRU_PPLL_CON_BASE_ADDR + num * 4)
-#define CRU_PLL_CON(pll_id, num)	(CRU_BASE + pll_id  * 0x20 + num * 4)
+#define PMUCRU_PPLL_CON(n)		((n) * 4)
+#define CRU_PLL_CON(pll_id, n)	((pll_id)  * 0x20 + (n) * 4)
 #define PLL_MODE_MSK			0x03
 #define PLL_MODE_SHIFT			0x08
 #define PLL_BYPASS_MSK			0x01
@@ -52,28 +48,30 @@
 #define NO_PLL_BYPASS			(0x00)
 #define NO_PLL_PWRDN			(0x00)
 
-#define PLL_SLOW_MODE		BITS_WITH_WMASK(SLOW_MODE,\
+#define PLL_SLOW_MODE			BITS_WITH_WMASK(SLOW_MODE,\
 						PLL_MODE_MSK, PLL_MODE_SHIFT)
-#define PLL_BYPASS_MODE		BITS_WITH_WMASK(PLL_BYPASS,\
-						PLL_BYPASS_MSK,\
-						PLL_BYPASS_SHIFT)
-#define PLL_NO_BYPASS_MODE	BITS_WITH_WMASK(NO_PLL_BYPASS,\
-						PLL_BYPASS_MSK,\
-						PLL_BYPASS_SHIFT)
-#define PLL_NOMAL_MODE		BITS_WITH_WMASK(NORMAL_MODE,\
+
+#define PLL_NOMAL_MODE			BITS_WITH_WMASK(NORMAL_MODE,\
 						PLL_MODE_MSK, PLL_MODE_SHIFT)
 
+#define PLL_BYPASS_MODE			BIT_WITH_WMSK(PLL_BYPASS_SHIFT)
+#define PLL_NO_BYPASS_MODE		WMSK_BIT(PLL_BYPASS_SHIFT)
+
 #define PLL_CON_COUNT			0x06
 #define CRU_CLKSEL_COUNT		0x108
-#define CRU_CLKSEL_OFFSET		0x300
+#define CRU_CLKSEL_CON(n)		(0x80 + (n) * 4)
 
 #define PMUCRU_CLKSEL_CONUT		0x06
 #define PMUCRU_CLKSEL_OFFSET		0x080
 #define REG_SIZE			0x04
 #define REG_SOC_WMSK			0xffff0000
-
 #define CLK_GATE_MASK			0x01
 
+#define PMUCRU_GATE_COUNT	0x03
+#define CRU_GATE_COUNT		0x23
+#define PMUCRU_GATE_CON(n)	(0x100 + (n) * 4)
+#define CRU_GATE_CON(n)	(0x300 + (n) * 4)
+
 enum plls_id {
 	ALPLL_ID = 0,
 	ABPLL_ID,
@@ -86,6 +84,9 @@
 	END_PLL_ID,
 };
 
+#define CLST_L_CPUS_MSK (0xf)
+#define CLST_B_CPUS_MSK (0x3)
+
 enum pll_work_mode {
 	SLOW_MODE = 0x00,
 	NORMAL_MODE = 0x01,
@@ -102,10 +103,13 @@
 	uint32_t plls_con[END_PLL_ID][PLL_CON_COUNT];
 	uint32_t pmucru_clksel_con[PMUCRU_CLKSEL_CONUT];
 	uint32_t cru_clksel_con[CRU_CLKSEL_COUNT];
+	uint32_t cru_gate_con[CRU_GATE_COUNT];
+	uint32_t pmucru_gate_con[PMUCRU_GATE_COUNT];
 };
 
 #define CYCL_24M_CNT_US(us)	(24 * us)
 #define CYCL_24M_CNT_MS(ms)	(ms * CYCL_24M_CNT_US(1000))
+#define CYCL_32K_CNT_MS(ms)	(ms * 32)
 
 /**************************************************
  * secure timer
@@ -261,5 +265,7 @@
 void __dead2 soc_global_soft_reset(void);
 void plls_resume(void);
 void plls_suspend(void);
-
+void clk_gate_con_save(void);
+void clk_gate_con_disable(void);
+void clk_gate_con_restore(void);
 #endif /* __SOC_H__ */
diff --git a/bl31/cpu_data_array.c b/plat/rockchip/rk3399/include/plat_sip_calls.h
similarity index 64%
copy from bl31/cpu_data_array.c
copy to plat/rockchip/rk3399/include/plat_sip_calls.h
index 4cba118..a778f49 100644
--- a/bl31/cpu_data_array.c
+++ b/plat/rockchip/rk3399/include/plat_sip_calls.h
@@ -1,20 +1,16 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
  *
- * Redistributions in binary form must reproduce the above copyright notice,
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -28,9 +24,9 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <cassert.h>
-#include <cpu_data.h>
-#include <platform_def.h>
+#ifndef __PLAT_SIP_CALLS_H__
+#define __PLAT_SIP_CALLS_H__
+
+#define RK_PLAT_SIP_NUM_CALLS	0
 
-/* The per_cpu_ptr_cache_t space allocation */
-cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+#endif /* __PLAT_SIP_CALLS_H__ */
diff --git a/plat/rockchip/rk3399/include/platform_def.h b/plat/rockchip/rk3399/include/platform_def.h
index 5f04db9..b494824 100644
--- a/plat/rockchip/rk3399/include/platform_def.h
+++ b/plat/rockchip/rk3399/include/platform_def.h
@@ -73,7 +73,7 @@
 #define PLATFORM_NUM_AFFS		(PLATFORM_SYSTEM_COUNT +	\
 					 PLATFORM_CLUSTER_COUNT +	\
 					 PLATFORM_CORE_COUNT)
-
+#define PLAT_RK_CLST_TO_CPUID_SHIFT	6
 #define PLAT_MAX_PWR_LVL		MPIDR_AFFLVL2
 
 /*
@@ -109,7 +109,7 @@
  ******************************************************************************/
 #define ADDR_SPACE_SIZE		(1ull << 32)
 #define MAX_XLAT_TABLES		20
-#define MAX_MMAP_REGIONS	20
+#define MAX_MMAP_REGIONS	25
 
 /*******************************************************************************
  * Declarations and constants to access the mailboxes safely. Each mailbox is
@@ -146,6 +146,4 @@
 
 #define PLAT_RK_PRIMARY_CPU		0x0
 
-#define RK_PLAT_AARCH_CFG		RK_PLAT_CFG1
-
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/bl31/cpu_data_array.c b/plat/rockchip/rk3399/plat_sip_calls.c
similarity index 60%
copy from bl31/cpu_data_array.c
copy to plat/rockchip/rk3399/plat_sip_calls.c
index 4cba118..3d2f39a 100644
--- a/bl31/cpu_data_array.c
+++ b/plat/rockchip/rk3399/plat_sip_calls.c
@@ -1,20 +1,16 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
  *
- * Redistributions in binary form must reproduce the above copyright notice,
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -28,9 +24,24 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <cassert.h>
-#include <cpu_data.h>
-#include <platform_def.h>
+#include <debug.h>
+#include <mmio.h>
+#include <plat_sip_calls.h>
+#include <rockchip_sip_svc.h>
+#include <runtime_svc.h>
 
-/* The per_cpu_ptr_cache_t space allocation */
-cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+uint64_t rockchip_plat_sip_handler(uint32_t smc_fid,
+				   uint64_t x1,
+				   uint64_t x2,
+				   uint64_t x3,
+				   uint64_t x4,
+				   void *cookie,
+				   void *handle,
+				   uint64_t flags)
+{
+	switch (smc_fid) {
+	default:
+		ERROR("%s: unhandled SMC (0x%x)\n", __func__, smc_fid);
+		SMC_RET1(handle, SMC_UNK);
+	}
+}
diff --git a/plat/rockchip/rk3399/platform.mk b/plat/rockchip/rk3399/platform.mk
index b0ce56f..142fe9e 100644
--- a/plat/rockchip/rk3399/platform.mk
+++ b/plat/rockchip/rk3399/platform.mk
@@ -50,7 +50,7 @@
 PLAT_BL_COMMON_SOURCES  :=	lib/xlat_tables/xlat_tables_common.c		\
 				lib/xlat_tables/aarch64/xlat_tables.c		\
                                 plat/common/aarch64/plat_common.c               \
-				plat/common/aarch64/plat_psci_common.c
+				plat/common/plat_psci_common.c
 
 BL31_SOURCES            +=      ${RK_GIC_SOURCES}                               \
                                 drivers/arm/cci/cci.c                           \
@@ -70,6 +70,8 @@
                                 ${RK_PLAT_COMMON}/plat_pm.c                     \
                                 ${RK_PLAT_COMMON}/plat_topology.c               \
                                 ${RK_PLAT_COMMON}/aarch64/platform_common.c        \
+				${RK_PLAT_COMMON}/rockchip_sip_svc.c		\
+				${RK_PLAT_SOC}/plat_sip_calls.c			\
 				${RK_PLAT_SOC}/drivers/gpio/rk3399_gpio.c	\
                                 ${RK_PLAT_SOC}/drivers/pmu/pmu.c                \
                                 ${RK_PLAT_SOC}/drivers/soc/soc.c
diff --git a/plat/rockchip/rk3399/rk3399_def.h b/plat/rockchip/rk3399/rk3399_def.h
index ed3a424..787ec0c 100644
--- a/plat/rockchip/rk3399/rk3399_def.h
+++ b/plat/rockchip/rk3399/rk3399_def.h
@@ -39,60 +39,82 @@
 #define SIZE_K(n)	((n) * 1024)
 #define SIZE_M(n)	((n) * 1024 * 1024)
 
-#define CCI500_BASE		0xffb00000
-#define CCI500_SIZE		SIZE_M(1)
+/* Register base address and size */
+#define MMIO_BASE		0xfe000000
 
-#define GIC500_BASE		0xfee00000
+#define GIC500_BASE		(MMIO_BASE + 0xe00000)
 #define GIC500_SIZE		SIZE_M(2)
 
-#define STIME_BASE		0xff860000
-#define STIME_SIZE		SIZE_K(64)
-
-#define CRUS_BASE		0xff750000
-#define CRUS_SIZE			SIZE_K(128)
+#define PMU_BASE		(MMIO_BASE + 0x1310000)
+#define PMU_SIZE		SIZE_K(64)
 
-#define SGRF_BASE		0xff330000
-#define SGRF_SIZE			SIZE_K(64)
+#define PMUGRF_BASE		(MMIO_BASE + 0x1320000)
+#define PMUGRF_SIZE		SIZE_K(64)
 
-#define PMU_BASE			0xff310000
-#define PMU_SIZE			SIZE_K(64)
+#define SGRF_BASE		(MMIO_BASE + 0x1330000)
+#define SGRF_SIZE		SIZE_K(64)
 
-#define PMUSRAM_BASE		0xff3b0000
+#define PMUSRAM_BASE		(MMIO_BASE + 0x13b0000)
 #define PMUSRAM_SIZE		SIZE_K(64)
 #define PMUSRAM_RSIZE		SIZE_K(8)
 
-#define PMUGRF_BASE		0xff320000
-#define PMUGRF_SIZE		SIZE_K(64)
+#define PWM_BASE		(MMIO_BASE + 0x1420000)
+#define PWM_SIZE		SIZE_K(64)
 
-#define GPIO0_BASE		0xff720000
+#define GPIO0_BASE		(MMIO_BASE + 0x1720000)
 #define GPIO0_SIZE		SIZE_K(64)
 
-#define GPIO1_BASE		0xff730000
+#define GPIO1_BASE		(MMIO_BASE + 0x1730000)
 #define GPIO1_SIZE		SIZE_K(64)
 
-#define GPIO2_BASE		0xff780000
+#define CRUS_BASE		(MMIO_BASE + 0x1750000)
+#define CRUS_SIZE		SIZE_K(128)
+
+#define GRF_BASE		(MMIO_BASE + 0x1770000)
+#define GRF_SIZE		SIZE_K(64)
+
+#define GPIO2_BASE		(MMIO_BASE + 0x1780000)
 #define GPIO2_SIZE		SIZE_K(32)
 
-#define GPIO3_BASE		0xff788000
+#define GPIO3_BASE		(MMIO_BASE + 0x1788000)
 #define GPIO3_SIZE		SIZE_K(32)
 
-#define GPIO4_BASE		0xff790000
+#define GPIO4_BASE		(MMIO_BASE + 0x1790000)
 #define GPIO4_SIZE		SIZE_K(32)
 
-#define GRF_BASE		0xff770000
-#define GRF_SIZE		SIZE_K(64)
+#define STIME_BASE		(MMIO_BASE + 0x1860000)
+#define STIME_SIZE		SIZE_K(64)
+
+#define SERVICE_NOC_0_BASE	(MMIO_BASE + 0x1a50000)
+#define NOC_0_SIZE		SIZE_K(192)
+
+#define SERVICE_NOC_1_BASE	(MMIO_BASE + 0x1a84000)
+#define NOC_1_SIZE		SIZE_K(16)
+
+#define SERVICE_NOC_2_BASE	(MMIO_BASE + 0x1a8c000)
+#define NOC_2_SIZE		SIZE_K(16)
+
+#define SERVICE_NOC_3_BASE	(MMIO_BASE + 0x1a90000)
+#define NOC_3_SIZE		SIZE_K(448)
+
+#define CCI500_BASE		(MMIO_BASE + 0x1b00000)
+#define CCI500_SIZE		SIZE_M(1)
+
+/* Aggregate of all devices in the first GB */
+#define RK3399_DEV_RNG0_BASE	MMIO_BASE
+#define RK3399_DEV_RNG0_SIZE	0x1d00000
 
 /*
  * include i2c pmu/audio, pwm0-3 rkpwm0-3 uart_dbg,mailbox scr
  * 0xff650000 -0xff6c0000
  */
-#define PD_BUS0_BASE		0xff650000
-#define PD_BUS0_SIZE		0x70000
+#define PD_BUS0_BASE		(MMIO_BASE + 0x1650000)
+#define PD_BUS0_SIZE		SIZE_K(448)
 
-#define PMUCRU_BASE		0xff750000
-#define CRU_BASE			0xff760000
+#define PMUCRU_BASE		(MMIO_BASE + 0x1750000)
+#define CRU_BASE		(MMIO_BASE + 0x1760000)
 
-#define COLD_BOOT_BASE		0xffff0000
+#define COLD_BOOT_BASE		(MMIO_BASE + 0x1ff0000)
 
 /**************************************************************************
  * UART related constants
@@ -119,12 +141,6 @@
 #define PLAT_RK_CCI_CLUSTER1_SL_IFACE_IX	1
 
 /******************************************************************************
- * cpu up status
- ******************************************************************************/
-#define PMU_CPU_HOTPLUG		0xdeadbeaf
-#define PMU_CPU_AUTO_PWRDN	0xabcdef12
-
-/******************************************************************************
  * sgi, ppi
  ******************************************************************************/
 #define ARM_IRQ_SEC_PHY_TIMER		29
@@ -145,4 +161,14 @@
 #define RK3399_G1S_IRQS			ARM_IRQ_SEC_PHY_TIMER
 #define RK3399_G0_IRQS			ARM_IRQ_SEC_SGI_6
 
+#define PWM0_IOMUX_PWM_EN		(1 << 0)
+#define PWM1_IOMUX_PWM_EN		(1 << 1)
+#define PWM2_IOMUX_PWM_EN		(1 << 2)
+#define PWM3_IOMUX_PWM_EN		(1 << 3)
+
+#define PWM_CNT(n)			0x0000 + 0x10 * n
+#define PWM_PERIOD_HPR(n)		0x0004 + 0x10 * n
+#define PWM_DUTY_LPR(n)			0x0008 + 0x10 * n
+#define PWM_CTRL(n)			0x000c + 0x10 * n
+
 #endif /* __PLAT_DEF_H__ */
diff --git a/plat/xilinx/zynqmp/bl31_zynqmp_setup.c b/plat/xilinx/zynqmp/bl31_zynqmp_setup.c
index 6f1a18b..d878b86 100644
--- a/plat/xilinx/zynqmp/bl31_zynqmp_setup.c
+++ b/plat/xilinx/zynqmp/bl31_zynqmp_setup.c
@@ -38,24 +38,7 @@
 #include <platform.h>
 #include "zynqmp_private.h"
 
-/*
- * Declarations of linker defined symbols which will help us find the layout
- * of trusted SRAM
- */
-extern unsigned long __RO_START__;
-extern unsigned long __RO_END__;
-
-extern unsigned long __COHERENT_RAM_START__;
-extern unsigned long __COHERENT_RAM_END__;
-
-/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
- */
-#define BL31_RO_BASE (unsigned long)(&__RO_START__)
-#define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL31_END (unsigned long)(&__BL31_END__)
 
 /*
  * The next 2 constants identify the extents of the coherent memory region.
@@ -147,18 +130,20 @@
 }
 
 /*
- * Perform the very early platform specific architectural setup here. At the
- * moment this is only intializes the MMU in a quick and dirty way.
+ * Perform the very early platform specific architectural setup here.
  */
 void bl31_plat_arch_setup(void)
 {
 	plat_arm_interconnect_init();
 	plat_arm_interconnect_enter_coherency();
 
-	arm_configure_mmu_el3(BL31_RO_BASE,
-			      BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE,
-			      BL31_RO_BASE,
-			      BL31_RO_LIMIT,
+	arm_setup_page_tables(BL31_BASE,
+			      BL31_END - BL31_BASE,
+			      BL_CODE_BASE,
+			      BL_CODE_LIMIT,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_LIMIT,
 			      BL31_COHERENT_RAM_BASE,
 			      BL31_COHERENT_RAM_LIMIT);
+	enable_mmu_el3(0);
 }
diff --git a/plat/xilinx/zynqmp/include/platform_def.h b/plat/xilinx/zynqmp/include/platform_def.h
index 76a52de..a35bd12 100644
--- a/plat/xilinx/zynqmp/include/platform_def.h
+++ b/plat/xilinx/zynqmp/include/platform_def.h
@@ -100,7 +100,7 @@
  * Platform specific page table and MMU setup constants
  ******************************************************************************/
 #define ADDR_SPACE_SIZE			(1ull << 32)
-#define MAX_MMAP_REGIONS		6
+#define MAX_MMAP_REGIONS		7
 #if IMAGE_BL32
 # define MAX_XLAT_TABLES		5
 #else
diff --git a/plat/xilinx/zynqmp/platform.mk b/plat/xilinx/zynqmp/platform.mk
index ad87cd9..fe939c7 100644
--- a/plat/xilinx/zynqmp/platform.mk
+++ b/plat/xilinx/zynqmp/platform.mk
@@ -30,6 +30,7 @@
 PROGRAMMABLE_RESET_ADDRESS := 1
 PSCI_EXTENDED_STATE_ID := 1
 A53_DISABLE_NON_TEMPORAL_HINT := 0
+SEPARATE_CODE_AND_RODATA := 1
 
 ifdef ZYNQMP_ATF_MEM_BASE
     $(eval $(call add_define,ZYNQMP_ATF_MEM_BASE))
@@ -68,9 +69,9 @@
 				drivers/arm/gic/v2/gicv2_helpers.c		\
 				drivers/cadence/uart/cdns_console.S		\
 				drivers/console/console.S			\
-				plat/arm/common/aarch64/arm_common.c		\
 				plat/arm/common/aarch64/arm_helpers.S		\
 				plat/arm/common/arm_cci.c			\
+				plat/arm/common/arm_common.c			\
 				plat/arm/common/arm_gicv2.c			\
 				plat/common/plat_gicv2.c			\
 				plat/common/aarch64/plat_common.c		\
@@ -80,7 +81,7 @@
 BL31_SOURCES		+=	drivers/arm/cci/cci.c				\
 				lib/cpus/aarch64/aem_generic.S			\
 				lib/cpus/aarch64/cortex_a53.S			\
-				plat/common/aarch64/plat_psci_common.c		\
+				plat/common/plat_psci_common.c			\
 				plat/common/aarch64/platform_mp_stack.S		\
 				plat/xilinx/zynqmp/bl31_zynqmp_setup.c		\
 				plat/xilinx/zynqmp/plat_psci.c			\
diff --git a/plat/xilinx/zynqmp/pm_service/pm_client.c b/plat/xilinx/zynqmp/pm_service/pm_client.c
index d3396df..cf0d5f0 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_client.c
+++ b/plat/xilinx/zynqmp/pm_service/pm_client.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,6 +37,7 @@
 #include <gicv2.h>
 #include <bl_common.h>
 #include <mmio.h>
+#include <utils.h>
 #include "pm_api_sys.h"
 #include "pm_client.h"
 #include "pm_ipi.h"
diff --git a/plat/xilinx/zynqmp/tsp/tsp_plat_setup.c b/plat/xilinx/zynqmp/tsp/tsp_plat_setup.c
index 58a3e2a..8e3ca62 100644
--- a/plat/xilinx/zynqmp/tsp/tsp_plat_setup.c
+++ b/plat/xilinx/zynqmp/tsp/tsp_plat_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,19 +35,8 @@
 #include <plat_arm.h>
 #include "../zynqmp_private.h"
 
-/*
- * The next 3 constants identify the extents of the code & RO data region and
- * the limit of the BL32 image. These addresses are used by the MMU setup code
- * and therefore they must be page-aligned.  It is the responsibility of the
- * linker script to ensure that __RO_START__, __RO_END__ & & __BL32_END__
- * linker symbols refer to page-aligned addresses.
- */
-#define BL32_RO_BASE (unsigned long)(&__RO_START__)
-#define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
 #define BL32_END (unsigned long)(&__BL32_END__)
 
-
-#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -57,7 +46,6 @@
  */
 #define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
-#endif
 
 /*******************************************************************************
  * Initialize the UART
@@ -90,13 +78,14 @@
  ******************************************************************************/
 void tsp_plat_arch_setup(void)
 {
-	arm_configure_mmu_el1(BL32_RO_BASE,
-			      (BL32_END - BL32_RO_BASE),
-			      BL32_RO_BASE,
-			      BL32_RO_LIMIT
-#if USE_COHERENT_MEM
-			      , BL32_COHERENT_RAM_BASE,
+	arm_setup_page_tables(BL32_BASE,
+			      BL32_END - BL32_BASE,
+			      BL_CODE_BASE,
+			      BL_CODE_LIMIT,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_LIMIT,
+			      BL32_COHERENT_RAM_BASE,
 			      BL32_COHERENT_RAM_LIMIT
-#endif
 			      );
+	enable_mmu_el1(0);
 }
diff --git a/services/std_svc/psci/psci_entry.S b/services/std_svc/psci/psci_entry.S
deleted file mode 100644
index f8c0afa..0000000
--- a/services/std_svc/psci/psci_entry.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <arch.h>
-#include <asm_macros.S>
-#include <el3_common_macros.S>
-#include <psci.h>
-#include <xlat_tables.h>
-
-	.globl	psci_entrypoint
-	.globl	psci_power_down_wfi
-
-	/* --------------------------------------------------------------------
-	 * This CPU has been physically powered up. It is either resuming from
-	 * suspend or has simply been turned on. In both cases, call the power
-	 * on finisher.
-	 * --------------------------------------------------------------------
-	 */
-func psci_entrypoint
-	/*
-	 * On the warm boot path, most of the EL3 initialisations performed by
-	 * 'el3_entrypoint_common' must be skipped:
-	 *
-	 *  - Only when the platform bypasses the BL1/BL31 entrypoint by
-	 *    programming the reset address do we need to set the CPU endianness.
-	 *    In other cases, we assume this has been taken care by the
-	 *    entrypoint code.
-	 *
-	 *  - No need to determine the type of boot, we know it is a warm boot.
-	 *
-	 *  - Do not try to distinguish between primary and secondary CPUs, this
-	 *    notion only exists for a cold boot.
-	 *
-	 *  - No need to initialise the memory or the C runtime environment,
-	 *    it has been done once and for all on the cold boot path.
-	 */
-	el3_entrypoint_common					\
-		_set_endian=PROGRAMMABLE_RESET_ADDRESS		\
-		_warm_boot_mailbox=0				\
-		_secondary_cold_boot=0				\
-		_init_memory=0					\
-		_init_c_runtime=0				\
-		_exception_vectors=runtime_exceptions
-
-	/* --------------------------------------------
-	 * Enable the MMU with the DCache disabled. It
-	 * is safe to use stacks allocated in normal
-	 * memory as a result. All memory accesses are
-	 * marked nGnRnE when the MMU is disabled. So
-	 * all the stack writes will make it to memory.
-	 * All memory accesses are marked Non-cacheable
-	 * when the MMU is enabled but D$ is disabled.
-	 * So used stack memory is guaranteed to be
-	 * visible immediately after the MMU is enabled
-	 * Enabling the DCache at the same time as the
-	 * MMU can lead to speculatively fetched and
-	 * possibly stale stack memory being read from
-	 * other caches. This can lead to coherency
-	 * issues.
-	 * --------------------------------------------
-	 */
-	mov	x0, #DISABLE_DCACHE
-	bl	bl31_plat_enable_mmu
-
-	bl	psci_power_up_finish
-
-	b	el3_exit
-endfunc psci_entrypoint
-
-	/* --------------------------------------------
-	 * This function is called to indicate to the
-	 * power controller that it is safe to power
-	 * down this cpu. It should not exit the wfi
-	 * and will be released from reset upon power
-	 * up. 'wfi_spill' is used to catch erroneous
-	 * exits from wfi.
-	 * --------------------------------------------
-	 */
-func psci_power_down_wfi
-	dsb	sy		// ensure write buffer empty
-	wfi
-	bl	plat_panic_handler
-endfunc psci_power_down_wfi
-
diff --git a/services/std_svc/std_svc_setup.c b/services/std_svc/std_svc_setup.c
index 6cb0319..06647e0 100644
--- a/services/std_svc/std_svc_setup.c
+++ b/services/std_svc/std_svc_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,6 +31,7 @@
 #include <debug.h>
 #include <psci.h>
 #include <runtime_svc.h>
+#include <smcc_helpers.h>
 #include <std_svc.h>
 #include <stdint.h>
 #include <uuid.h>
@@ -40,36 +41,27 @@
 		0x108d905b, 0xf863, 0x47e8, 0xae, 0x2d,
 		0xc0, 0xfb, 0x56, 0x41, 0xf6, 0xe2);
 
-/* Setup Standard Services */
-static int32_t std_svc_setup(void)
-{
-	/*
-	 * PSCI is the only specification implemented as a Standard Service.
-	 * Invoke PSCI setup from here
-	 */
-	return psci_setup();
-}
-
 /*
  * Top-level Standard Service SMC handler. This handler will in turn dispatch
  * calls to PSCI SMC handler
  */
-uint64_t std_svc_smc_handler(uint32_t smc_fid,
-			     uint64_t x1,
-			     uint64_t x2,
-			     uint64_t x3,
-			     uint64_t x4,
+uintptr_t std_svc_smc_handler(uint32_t smc_fid,
+			     u_register_t x1,
+			     u_register_t x2,
+			     u_register_t x3,
+			     u_register_t x4,
 			     void *cookie,
 			     void *handle,
-			     uint64_t flags)
+			     u_register_t flags)
 {
 	/*
 	 * Dispatch PSCI calls to PSCI SMC handler and return its return
 	 * value
 	 */
 	if (is_psci_fid(smc_fid)) {
-		return psci_smc_handler(smc_fid, x1, x2, x3, x4, cookie,
-				handle, flags);
+		SMC_RET1(handle,
+			psci_smc_handler(smc_fid, x1, x2, x3, x4,
+					cookie, handle, flags));
 	}
 
 	switch (smc_fid) {
@@ -101,6 +93,6 @@
 		OEN_STD_START,
 		OEN_STD_END,
 		SMC_TYPE_FAST,
-		std_svc_setup,
+		NULL,
 		std_svc_smc_handler
 );
diff --git a/tools/cert_create/src/main.c b/tools/cert_create/src/main.c
index c87d988..c58f41d 100644
--- a/tools/cert_create/src/main.c
+++ b/tools/cert_create/src/main.c
@@ -428,9 +428,11 @@
 			 */
 			switch (ext->type) {
 			case EXT_TYPE_NVCOUNTER:
-				nvctr = atoi(ext->arg);
-				CHECK_NULL(cert_ext, ext_new_nvcounter(ext_nid,
+				if (ext->arg) {
+					nvctr = atoi(ext->arg);
+					CHECK_NULL(cert_ext, ext_new_nvcounter(ext_nid,
 						EXT_CRIT, nvctr));
+				}
 				break;
 			case EXT_TYPE_HASH:
 				if (ext->arg == NULL) {