Merge pull request #1193 from jwerner-chromium/JW_coreboot

New console API and coreboot support [v4]
diff --git a/Makefile b/Makefile
index 3c3f5a4..c16cad7 100644
--- a/Makefile
+++ b/Makefile
@@ -125,6 +125,7 @@
 OD			:=	${CROSS_COMPILE}objdump
 NM			:=	${CROSS_COMPILE}nm
 PP			:=	${CROSS_COMPILE}gcc -E
+DTC			?=	dtc
 
 ifeq (${ARM_ARCH_MAJOR},7)
 target32-directive	= 	-target arm-none-eabi
@@ -159,10 +160,19 @@
 				-ffreestanding -fno-builtin -Wall -std=gnu99	\
 				-Os -ffunction-sections -fdata-sections
 
+GCC_V_OUTPUT		:=	$(shell $(CC) -v 2>&1)
+PIE_FOUND		:=	$(findstring --enable-default-pie,${GCC_V_OUTPUT})
+
+ifeq ($(PIE_FOUND),1)
+TF_CFLAGS		+=	-fno-PIE
+endif
+
 TF_LDFLAGS		+=	--fatal-warnings -O1
 TF_LDFLAGS		+=	--gc-sections
 TF_LDFLAGS		+=	$(TF_LDFLAGS_$(ARCH))
 
+DTC_FLAGS		+=	-I dts -O dtb
+
 ################################################################################
 # Common sources and include directories
 ################################################################################
@@ -451,6 +461,10 @@
 endif
 endif
 
+ifdef FDT_SOURCES
+NEED_FDT := yes
+endif
+
 ################################################################################
 # Build options checks
 ################################################################################
@@ -487,6 +501,7 @@
 $(eval $(call assert_boolean,USE_COHERENT_MEM))
 $(eval $(call assert_boolean,USE_TBBR_DEFS))
 $(eval $(call assert_boolean,WARMBOOT_ENABLE_DCACHE_EARLY))
+$(eval $(call assert_boolean,BL2_AT_EL3))
 
 $(eval $(call assert_numeric,ARM_ARCH_MAJOR))
 $(eval $(call assert_numeric,ARM_ARCH_MINOR))
@@ -531,6 +546,7 @@
 $(eval $(call add_define,USE_COHERENT_MEM))
 $(eval $(call add_define,USE_TBBR_DEFS))
 $(eval $(call add_define,WARMBOOT_ENABLE_DCACHE_EARLY))
+$(eval $(call add_define,BL2_AT_EL3))
 
 # Define the EL3_PAYLOAD_BASE flag only if it is provided.
 ifdef EL3_PAYLOAD_BASE
@@ -553,7 +569,7 @@
 # Build targets
 ################################################################################
 
-.PHONY:	all msg_start clean realclean distclean cscope locate-checkpatch checkcodebase checkpatch fiptool fip fwu_fip certtool
+.PHONY:	all msg_start clean realclean distclean cscope locate-checkpatch checkcodebase checkpatch fiptool fip fwu_fip certtool dtbs
 .SUFFIXES:
 
 all: msg_start
@@ -572,10 +588,14 @@
 endif
 
 ifeq (${NEED_BL2},yes)
-$(if ${BL2}, $(eval $(call MAKE_TOOL_ARGS,2,${BL2},tb-fw)),\
-	$(eval $(call MAKE_BL,2,tb-fw)))
+ifeq (${BL2_AT_EL3}, 0)
+FIP_BL2_ARGS := tb-fw
 endif
 
+$(if ${BL2}, $(eval $(call MAKE_TOOL_ARGS,2,${BL2},${FIP_BL2_ARGS})),\
+	$(eval $(call MAKE_BL,2,${FIP_BL2_ARGS})))
+endif
+
 ifeq (${NEED_SCP_BL2},yes)
 $(eval $(call FIP_ADD_IMG,SCP_BL2,--scp-fw))
 endif
@@ -606,6 +626,13 @@
 $(eval $(call FWU_FIP_ADD_PAYLOAD,${BL2U_PATH},--ap-fwu-cfg))
 endif
 
+# Expand build macros for the different images
+ifeq (${NEED_FDT},yes)
+$(eval $(call MAKE_DTBS,$(BUILD_PLAT)/fdts,$(FDT_SOURCES)))
+$(eval $(call MAKE_FDT))
+dtbs: $(DTBS)
+endif
+
 locate-checkpatch:
 ifndef CHECKPATCH
 	$(error "Please set CHECKPATCH to point to the Linux checkpatch.pl file, eg: CHECKPATCH=../linux/scripts/checkpatch.pl")
@@ -733,6 +760,7 @@
 	@echo "  distclean      Remove all build artifacts for all platforms"
 	@echo "  certtool       Build the Certificate generation tool"
 	@echo "  fiptool        Build the Firmware Image Package (FIP) creation tool"
+	@echo "  dtbs           Build the Flattened device tree (if required for the platform)"
 	@echo ""
 	@echo "Note: most build targets require PLAT to be set to a specific platform."
 	@echo ""
diff --git a/bl1/bl1.mk b/bl1/bl1.mk
index a026499..41ee1a7 100644
--- a/bl1/bl1.mk
+++ b/bl1/bl1.mk
@@ -13,7 +13,10 @@
 				lib/cpus/errata_report.c		\
 				lib/el3_runtime/${ARCH}/context_mgmt.c	\
 				plat/common/plat_bl1_common.c		\
-				plat/common/${ARCH}/platform_up_stack.S
+				plat/common/${ARCH}/platform_up_stack.S \
+				${MBEDTLS_COMMON_SOURCES}		\
+				${MBEDTLS_CRYPTO_SOURCES}		\
+				${MBEDTLS_X509_SOURCES}
 
 ifeq (${ARCH},aarch64)
 BL1_SOURCES		+=	lib/el3_runtime/aarch64/context.S
diff --git a/bl2/aarch32/bl2_el3_entrypoint.S b/bl2/aarch32/bl2_el3_entrypoint.S
new file mode 100644
index 0000000..997b069
--- /dev/null
+++ b/bl2/aarch32/bl2_el3_entrypoint.S
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+#include <el3_common_macros.S>
+
+
+	.globl	bl2_entrypoint
+	.globl	bl2_run_next_image
+
+
+func bl2_entrypoint
+	/* Save arguments x0-x3 from previous Boot loader */
+	mov	r9, r0
+	mov	r10, r1
+	mov	r11, r2
+	mov	r12, r3
+
+	el3_entrypoint_common                                   \
+                _init_sctlr=1                                   \
+                _warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS  \
+                _secondary_cold_boot=!COLD_BOOT_SINGLE_CPU      \
+                _init_memory=1                                  \
+                _init_c_runtime=1                               \
+                _exception_vectors=bl2_vector_table
+
+	/*
+	 * Restore parameters of boot rom
+	 */
+	mov	r0, r9
+	mov	r1, r10
+	mov	r2, r11
+	mov	r3, r12
+
+	bl	bl2_el3_early_platform_setup
+	bl	bl2_el3_plat_arch_setup
+
+	/* ---------------------------------------------
+	 * Jump to main function.
+	 * ---------------------------------------------
+	 */
+	bl	bl2_main
+
+	/* ---------------------------------------------
+	 * Should never reach this point.
+	 * ---------------------------------------------
+	 */
+	no_ret	plat_panic_handler
+
+endfunc bl2_entrypoint
+
+func bl2_run_next_image
+	mov	r8,r0
+
+	/*
+	 * MMU needs to be disabled because both BL2 and BL32 execute
+	 * in PL1, and therefore share the same address space.
+	 * BL32 will initialize the address space according to its
+	 * own requirement.
+	 */
+	bl	disable_mmu_icache_secure
+	stcopr	r0, TLBIALL
+	dsb	sy
+	isb
+	mov	r0, r8
+	bl	bl2_el3_plat_prepare_exit
+
+	/*
+	 * Extract PC and SPSR based on struct `entry_point_info_t`
+	 * and load it in LR and SPSR registers respectively.
+	 */
+	ldr	lr, [r8, #ENTRY_POINT_INFO_PC_OFFSET]
+	ldr	r1, [r8, #(ENTRY_POINT_INFO_PC_OFFSET + 4)]
+	msr	spsr, r1
+
+	add	r8, r8, #ENTRY_POINT_INFO_ARGS_OFFSET
+	ldm	r8, {r0, r1, r2, r3}
+	eret
+endfunc bl2_run_next_image
diff --git a/bl2/aarch32/bl2_el3_exceptions.S b/bl2/aarch32/bl2_el3_exceptions.S
new file mode 100644
index 0000000..11ddf37
--- /dev/null
+++ b/bl2/aarch32/bl2_el3_exceptions.S
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+
+	.globl	bl2_vector_table
+
+vector_base bl2_vector_table
+	b	bl2_entrypoint
+	b	report_exception	/* Undef */
+	b	report_exception	/* SVC call */
+	b	report_exception	/* Prefetch abort */
+	b	report_exception	/* Data abort */
+	b	report_exception	/* Reserved */
+	b	report_exception	/* IRQ */
+	b	report_exception	/* FIQ */
diff --git a/bl2/aarch64/bl2_el3_entrypoint.S b/bl2/aarch64/bl2_el3_entrypoint.S
new file mode 100644
index 0000000..2d3efd1
--- /dev/null
+++ b/bl2/aarch64/bl2_el3_entrypoint.S
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+#include <el3_common_macros.S>
+
+	.globl	bl2_entrypoint
+	.globl	bl2_vector_table
+	.globl	bl2_el3_run_image
+	.globl	bl2_run_next_image
+
+func bl2_entrypoint
+	/* Save arguments x0-x3 from previous Boot loader */
+	mov	x20, x0
+	mov	x21, x1
+	mov	x22, x2
+	mov	x23, x3
+
+	el3_entrypoint_common                                   \
+		_init_sctlr=1                                   \
+		_warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS  \
+		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU      \
+		_init_memory=1                                  \
+		_init_c_runtime=1                               \
+		_exception_vectors=bl2_el3_exceptions
+
+	/*
+	 * Restore parameters of boot rom
+	 */
+	mov	x0, x20
+	mov	x1, x21
+	mov	x2, x22
+	mov	x3, x23
+
+	bl	bl2_el3_early_platform_setup
+	bl	bl2_el3_plat_arch_setup
+
+	/* ---------------------------------------------
+	 * Jump to main function.
+	 * ---------------------------------------------
+	 */
+	bl	bl2_main
+
+	/* ---------------------------------------------
+	 * Should never reach this point.
+	 * ---------------------------------------------
+	 */
+	no_ret	plat_panic_handler
+endfunc bl2_entrypoint
+
+func bl2_run_next_image
+	mov	x20,x0
+        /*
+         * MMU needs to be disabled because both BL2 and BL31 execute
+         * in EL3, and therefore share the same address space.
+         * BL31 will initialize the address space according to its
+         * own requirement.
+         */
+	bl	disable_mmu_icache_el3
+	tlbi	alle3
+	bl	bl2_el3_plat_prepare_exit
+
+	ldp	x0, x1, [x20, #ENTRY_POINT_INFO_PC_OFFSET]
+	msr	elr_el3, x0
+	msr	spsr_el3, x1
+
+	ldp	x6, x7, [x20, #(ENTRY_POINT_INFO_ARGS_OFFSET + 0x30)]
+	ldp	x4, x5, [x20, #(ENTRY_POINT_INFO_ARGS_OFFSET + 0x20)]
+	ldp	x2, x3, [x20, #(ENTRY_POINT_INFO_ARGS_OFFSET + 0x10)]
+	ldp	x0, x1, [x20, #(ENTRY_POINT_INFO_ARGS_OFFSET + 0x0)]
+	eret
+endfunc bl2_run_next_image
diff --git a/bl2/aarch64/bl2_el3_exceptions.S b/bl2/aarch64/bl2_el3_exceptions.S
new file mode 100644
index 0000000..987f6e3
--- /dev/null
+++ b/bl2/aarch64/bl2_el3_exceptions.S
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl1.h>
+#include <bl_common.h>
+#include <context.h>
+
+/* -----------------------------------------------------------------------------
+ * Very simple stackless exception handlers used by BL2.
+ * -----------------------------------------------------------------------------
+ */
+	.globl	bl2_el3_exceptions
+
+vector_base bl2_el3_exceptions
+
+	/* -----------------------------------------------------
+	 * Current EL with SP0 : 0x0 - 0x200
+	 * -----------------------------------------------------
+	 */
+vector_entry SynchronousExceptionSP0
+	mov	x0, #SYNC_EXCEPTION_SP_EL0
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SynchronousExceptionSP0
+
+vector_entry IrqSP0
+	mov	x0, #IRQ_SP_EL0
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size IrqSP0
+
+vector_entry FiqSP0
+	mov	x0, #FIQ_SP_EL0
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size FiqSP0
+
+vector_entry SErrorSP0
+	mov	x0, #SERROR_SP_EL0
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SErrorSP0
+
+	/* -----------------------------------------------------
+	 * Current EL with SPx: 0x200 - 0x400
+	 * -----------------------------------------------------
+	 */
+vector_entry SynchronousExceptionSPx
+	mov	x0, #SYNC_EXCEPTION_SP_ELX
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SynchronousExceptionSPx
+
+vector_entry IrqSPx
+	mov	x0, #IRQ_SP_ELX
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size IrqSPx
+
+vector_entry FiqSPx
+	mov	x0, #FIQ_SP_ELX
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size FiqSPx
+
+vector_entry SErrorSPx
+	mov	x0, #SERROR_SP_ELX
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SErrorSPx
+
+	/* -----------------------------------------------------
+	 * Lower EL using AArch64 : 0x400 - 0x600
+	 * -----------------------------------------------------
+	 */
+vector_entry SynchronousExceptionA64
+	mov	x0, #SYNC_EXCEPTION_AARCH64
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SynchronousExceptionA64
+
+vector_entry IrqA64
+	mov	x0, #IRQ_AARCH64
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size IrqA64
+
+vector_entry FiqA64
+	mov	x0, #FIQ_AARCH64
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size FiqA64
+
+vector_entry SErrorA64
+	mov	x0, #SERROR_AARCH64
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SErrorA64
+
+	/* -----------------------------------------------------
+	 * Lower EL using AArch32 : 0x600 - 0x800
+	 * -----------------------------------------------------
+	 */
+vector_entry SynchronousExceptionA32
+	mov	x0, #SYNC_EXCEPTION_AARCH32
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SynchronousExceptionA32
+
+vector_entry IrqA32
+	mov	x0, #IRQ_AARCH32
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size IrqA32
+
+vector_entry FiqA32
+	mov	x0, #FIQ_AARCH32
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size FiqA32
+
+vector_entry SErrorA32
+	mov	x0, #SERROR_AARCH32
+	bl	plat_report_exception
+	no_ret	plat_panic_handler
+	check_vector_size SErrorA32
diff --git a/bl2/bl2.mk b/bl2/bl2.mk
index 32e3284..9d75286 100644
--- a/bl2/bl2.mk
+++ b/bl2/bl2.mk
@@ -5,10 +5,12 @@
 #
 
 BL2_SOURCES		+=	bl2/bl2_main.c				\
-				bl2/${ARCH}/bl2_entrypoint.S		\
 				bl2/${ARCH}/bl2_arch_setup.c		\
 				lib/locks/exclusive/${ARCH}/spinlock.S	\
-				plat/common/${ARCH}/platform_up_stack.S
+				plat/common/${ARCH}/platform_up_stack.S	\
+				${MBEDTLS_COMMON_SOURCES}               \
+				${MBEDTLS_CRYPTO_SOURCES}		\
+				${MBEDTLS_X509_SOURCES}
 
 ifeq (${ARCH},aarch64)
 BL2_SOURCES		+=	common/aarch64/early_exceptions.S
@@ -20,4 +22,15 @@
 BL2_SOURCES		+=	bl2/bl2_image_load.c
 endif
 
+ifeq (${BL2_AT_EL3},0)
+BL2_SOURCES		+=	bl2/${ARCH}/bl2_entrypoint.S
 BL2_LINKERFILE		:=	bl2/bl2.ld.S
+
+else
+BL2_SOURCES		+=	bl2/${ARCH}/bl2_el3_entrypoint.S	\
+				bl2/${ARCH}/bl2_el3_exceptions.S	\
+				plat/common/plat_bl2_el3_common.c	\
+				lib/cpus/${ARCH}/cpu_helpers.S		\
+				lib/cpus/errata_report.c
+BL2_LINKERFILE		:=	bl2/bl2_el3.ld.S
+endif
diff --git a/bl2/bl2_el3.ld.S b/bl2/bl2_el3.ld.S
new file mode 100644
index 0000000..57709e3
--- /dev/null
+++ b/bl2/bl2_el3.ld.S
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <platform_def.h>
+#include <xlat_tables_defs.h>
+
+OUTPUT_FORMAT(PLATFORM_LINKER_FORMAT)
+OUTPUT_ARCH(PLATFORM_LINKER_ARCH)
+ENTRY(bl2_entrypoint)
+
+MEMORY {
+    RAM (rwx): ORIGIN = BL2_BASE, LENGTH = BL2_LIMIT - BL2_BASE
+}
+
+
+SECTIONS
+{
+    . = BL2_BASE;
+    ASSERT(. == ALIGN(PAGE_SIZE),
+           "BL2_BASE address is not aligned on a page boundary.")
+
+#if SEPARATE_CODE_AND_RODATA
+    .text . : {
+        __TEXT_START__ = .;
+	__TEXT_RESIDENT_START__ = .;
+	*bl2_el3_entrypoint.o(.text*)
+	*(.text.asm.*)
+	__TEXT_RESIDENT_END__ = .;
+        *(.text*)
+        *(.vectors)
+        . = NEXT(PAGE_SIZE);
+        __TEXT_END__ = .;
+     } >RAM
+
+    .rodata . : {
+        __RODATA_START__ = .;
+        *(.rodata*)
+
+        /* Ensure 8-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(8);
+        __PARSER_LIB_DESCS_START__ = .;
+        KEEP(*(.img_parser_lib_descs))
+        __PARSER_LIB_DESCS_END__ = .;
+
+        /*
+         * Ensure 8-byte alignment for cpu_ops so that its fields are also
+         * aligned. Also ensure cpu_ops inclusion.
+         */
+        . = ALIGN(8);
+        __CPU_OPS_START__ = .;
+        KEEP(*(cpu_ops))
+        __CPU_OPS_END__ = .;
+
+        . = NEXT(PAGE_SIZE);
+        __RODATA_END__ = .;
+    } >RAM
+
+    ASSERT(__TEXT_RESIDENT_END__ - __TEXT_RESIDENT_START__ <= PAGE_SIZE,
+          "Resident part of BL2 has exceeded its limit.")
+#else
+    ro . : {
+        __RO_START__ = .;
+	__TEXT_RESIDENT_START__ = .;
+	*bl2_el3_entrypoint.o(.text*)
+	*(.text.asm.*)
+	__TEXT_RESIDENT_END__ = .;
+        *(.text*)
+        *(.rodata*)
+
+        /*
+         * Ensure 8-byte alignment for cpu_ops so that its fields are also
+         * aligned. Also ensure cpu_ops inclusion.
+         */
+        . = ALIGN(8);
+        __CPU_OPS_START__ = .;
+        KEEP(*(cpu_ops))
+        __CPU_OPS_END__ = .;
+
+        /* Ensure 8-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(8);
+        __PARSER_LIB_DESCS_START__ = .;
+        KEEP(*(.img_parser_lib_descs))
+        __PARSER_LIB_DESCS_END__ = .;
+
+        *(.vectors)
+        __RO_END_UNALIGNED__ = .;
+        /*
+         * Memory page(s) mapped to this section will be marked as
+         * read-only, executable.  No RW data from the next section must
+         * creep in.  Ensure the rest of the current memory page is unused.
+         */
+        . = NEXT(PAGE_SIZE);
+
+        __RO_END__ = .;
+    } >RAM
+#endif
+
+    ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
+          "cpu_ops not defined for this platform.")
+
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
+    /*
+     * .data must be placed at a lower address than the stacks if the stack
+     * protector is enabled. Alternatively, the .data.stack_protector_canary
+     * section can be placed independently of the main .data section.
+     */
+    .data . : {
+        __DATA_START__ = .;
+        *(.data*)
+        __DATA_END__ = .;
+    } >RAM
+
+    stacks (NOLOAD) : {
+        __STACKS_START__ = .;
+        *(tzfw_normal_stacks)
+        __STACKS_END__ = .;
+    } >RAM
+
+    /*
+     * The .bss section gets initialised to 0 at runtime.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
+     */
+    .bss : ALIGN(16) {
+        __BSS_START__ = .;
+        *(SORT_BY_ALIGNMENT(.bss*))
+        *(COMMON)
+        __BSS_END__ = .;
+    } >RAM
+
+    /*
+     * The xlat_table section is for full, aligned page tables (4K).
+     * Removing them from .bss avoids forcing 4K alignment on
+     * the .bss section and eliminates the unnecessary zero init
+     */
+    xlat_table (NOLOAD) : {
+        *(xlat_table)
+    } >RAM
+
+#if USE_COHERENT_MEM
+    /*
+     * The base address of the coherent memory section must be page-aligned (4K)
+     * to guarantee that the coherent data are stored on their own pages and
+     * are not mixed with normal data.  This is required to set up the correct
+     * memory attributes for the coherent data page tables.
+     */
+    coherent_ram (NOLOAD) : ALIGN(PAGE_SIZE) {
+        __COHERENT_RAM_START__ = .;
+        *(tzfw_coherent_mem)
+        __COHERENT_RAM_END_UNALIGNED__ = .;
+        /*
+         * Memory page(s) mapped to this section will be marked
+         * as device memory.  No other unexpected data must creep in.
+         * Ensure the rest of the current memory page is unused.
+         */
+        . = NEXT(PAGE_SIZE);
+        __COHERENT_RAM_END__ = .;
+    } >RAM
+#endif
+
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
+    __BL2_END__ = .;
+
+    __BSS_SIZE__ = SIZEOF(.bss);
+
+#if USE_COHERENT_MEM
+    __COHERENT_RAM_UNALIGNED_SIZE__ =
+        __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
+
+    ASSERT(. <= BL2_LIMIT, "BL2 image has exceeded its limit.")
+}
diff --git a/bl2/bl2_main.c b/bl2/bl2_main.c
index 018deb3..c85db2d 100644
--- a/bl2/bl2_main.c
+++ b/bl2/bl2_main.c
@@ -13,6 +13,11 @@
 #include <platform.h>
 #include "bl2_private.h"
 
+#ifdef AARCH32
+#define NEXT_IMAGE	"BL32"
+#else
+#define NEXT_IMAGE	"BL31"
+#endif
 
 /*******************************************************************************
  * The only thing to do in BL2 is to load further images and pass control to
@@ -49,6 +54,8 @@
 	disable_mmu_icache_secure();
 #endif /* AARCH32 */
 
+
+#if !BL2_AT_EL3
 	console_flush();
 
 	/*
@@ -57,4 +64,11 @@
 	 * be passed to next BL image as an argument.
 	 */
 	smc(BL1_SMC_RUN_IMAGE, (unsigned long)next_bl_ep_info, 0, 0, 0, 0, 0, 0);
+#else
+	NOTICE("BL2: Booting " NEXT_IMAGE "\n");
+	print_entry_point_info(next_bl_ep_info);
+	console_flush();
+
+	bl2_run_next_image(next_bl_ep_info);
+#endif
 }
diff --git a/bl2/bl2_private.h b/bl2/bl2_private.h
index 83b8047..ea2f33a 100644
--- a/bl2/bl2_private.h
+++ b/bl2/bl2_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -17,5 +17,6 @@
  *****************************************/
 void bl2_arch_setup(void);
 struct entry_point_info *bl2_load_images(void);
+void bl2_run_next_image(const entry_point_info_t *bl_ep_info);
 
 #endif /* __BL2_PRIVATE_H__ */
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index d8fbb9b..9b7735f 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -14,6 +14,26 @@
 
 	.globl	runtime_exceptions
 
+	.globl	sync_exception_sp_el0
+	.globl	irq_sp_el0
+	.globl	fiq_sp_el0
+	.globl	serror_sp_el0
+
+	.globl	sync_exception_sp_elx
+	.globl	irq_sp_elx
+	.globl	fiq_sp_elx
+	.globl	serror_sp_elx
+
+	.globl	sync_exception_aarch64
+	.globl	irq_aarch64
+	.globl	fiq_aarch64
+	.globl	serror_aarch64
+
+	.globl	sync_exception_aarch32
+	.globl	irq_aarch32
+	.globl	fiq_aarch32
+	.globl	serror_aarch32
+
 	/* ---------------------------------------------------------------------
 	 * This macro handles Synchronous exceptions.
 	 * Only SMC exceptions are supported.
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index fdcc931..2db4856 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -51,13 +51,19 @@
 endif
 
 ifeq (${ENABLE_AMU},1)
-BL31_SOURCES		+=	lib/extensions/amu/aarch64/amu.c
+BL31_SOURCES		+=	lib/extensions/amu/aarch64/amu.c		\
+				lib/extensions/amu/aarch64/amu_helpers.S
 endif
 
 ifeq (${ENABLE_SVE_FOR_NS},1)
 BL31_SOURCES		+=	lib/extensions/sve/sve.c
 endif
 
+ifeq (${WORKAROUND_CVE_2017_5715},1)
+BL31_SOURCES		+=	lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S		\
+				lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S
+endif
+
 BL31_LINKERFILE		:=	bl31/bl31.ld.S
 
 # Flag used to indicate if Crash reporting via console should be included
diff --git a/common/tf_printf.c b/common/tf_printf.c
index f73842a..d403983 100644
--- a/common/tf_printf.c
+++ b/common/tf_printf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -31,7 +31,8 @@
 		putchar(*str++);
 }
 
-static void unsigned_num_print(unsigned long long int unum, unsigned int radix)
+static void unsigned_num_print(unsigned long long int unum, unsigned int radix,
+			       char padc, int padn)
 {
 	/* Just need enough space to store 64 bit decimal integer */
 	unsigned char num_buf[20];
@@ -45,6 +46,12 @@
 			num_buf[i++] = 'a' + (rem - 0xa);
 	} while (unum /= radix);
 
+	if (padn > 0) {
+		while (i < padn--) {
+			putchar(padc);
+		}
+	}
+
 	while (--i >= 0)
 		putchar(num_buf[i]);
 }
@@ -63,6 +70,9 @@
  * %ll - long long int (64-bit on AArch64)
  * %z - size_t sized integer formats (64 bit on AArch64)
  *
+ * The following padding specifiers are supported by this print
+ * %0NN - Left-pad the number with 0s (NN is a decimal number)
+ *
  * The print exits on all other formats specifiers other than valid
  * combinations of the above specifiers.
  *******************************************************************/
@@ -72,9 +82,12 @@
 	long long int num;
 	unsigned long long int unum;
 	char *str;
+	char padc = 0; /* Padding character */
+	int padn; /* Number of characters to pad */
 
 	while (*fmt) {
 		l_count = 0;
+		padn = 0;
 
 		if (*fmt == '%') {
 			fmt++;
@@ -87,10 +100,11 @@
 				if (num < 0) {
 					putchar('-');
 					unum = (unsigned long long int)-num;
+					padn--;
 				} else
 					unum = (unsigned long long int)num;
 
-				unsigned_num_print(unum, 10);
+				unsigned_num_print(unum, 10, padc, padn);
 				break;
 			case 's':
 				str = va_arg(args, char *);
@@ -98,14 +112,16 @@
 				break;
 			case 'p':
 				unum = (uintptr_t)va_arg(args, void *);
-				if (unum)
+				if (unum) {
 					tf_string_print("0x");
+					padn -= 2;
+				}
 
-				unsigned_num_print(unum, 16);
+				unsigned_num_print(unum, 16, padc, padn);
 				break;
 			case 'x':
 				unum = get_unum_va_args(args, l_count);
-				unsigned_num_print(unum, 16);
+				unsigned_num_print(unum, 16, padc, padn);
 				break;
 			case 'z':
 				if (sizeof(size_t) == 8)
@@ -119,8 +135,21 @@
 				goto loop;
 			case 'u':
 				unum = get_unum_va_args(args, l_count);
-				unsigned_num_print(unum, 10);
+				unsigned_num_print(unum, 10, padc, padn);
 				break;
+			case '0':
+				padc = '0';
+				padn = 0;
+				fmt++;
+
+				while (1) {
+					char ch = *fmt;
+					if (ch < '0' || ch > '9') {
+						goto loop;
+					}
+					padn = (padn * 10) + (ch - '0');
+					fmt++;
+				}
 			default:
 				/* Exit on any other format specifier */
 				return;
diff --git a/docs/cpu-specific-build-macros.rst b/docs/cpu-specific-build-macros.rst
index f74b459..014817d 100644
--- a/docs/cpu-specific-build-macros.rst
+++ b/docs/cpu-specific-build-macros.rst
@@ -11,6 +11,15 @@
 operations framework to enable errata workarounds and to enable optimizations
 for a specific CPU on a platform.
 
+Security Vulnerability Workarounds
+----------------------------------
+
+ARM Trusted Firmware exports a series of build flags which control which
+security vulnerability workarounds should be applied at runtime.
+
+-  ``WORKAROUND_CVE_2017_5715``: Enables the security workaround for
+   `CVE-2017-5715`_. Defaults to 1.
+
 CPU Errata Workarounds
 ----------------------
 
@@ -142,6 +151,7 @@
 
 *Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.*
 
+.. _CVE-2017-5715: http://www.cve.mitre.org/cgi-bin/cvename.cgi?name=2017-5715
 .. _Cortex-A53 MPCore Software Developers Errata Notice: http://infocenter.arm.com/help/topic/com.arm.doc.epm048406/Cortex_A53_MPCore_Software_Developers_Errata_Notice.pdf
 .. _Cortex-A57 MPCore Software Developers Errata Notice: http://infocenter.arm.com/help/topic/com.arm.doc.epm049219/cortex_a57_mpcore_software_developers_errata_notice.pdf
 .. _Cortex-A72 MPCore Software Developers Errata Notice: http://infocenter.arm.com/help/topic/com.arm.doc.epm012079/index.html
diff --git a/docs/diagrams/secure_sw_stack_sp.png b/docs/diagrams/secure_sw_stack_sp.png
new file mode 100644
index 0000000..5cb2ca7
--- /dev/null
+++ b/docs/diagrams/secure_sw_stack_sp.png
Binary files differ
diff --git a/docs/diagrams/secure_sw_stack_tos.png b/docs/diagrams/secure_sw_stack_tos.png
new file mode 100644
index 0000000..1f2d555
--- /dev/null
+++ b/docs/diagrams/secure_sw_stack_tos.png
Binary files differ
diff --git a/docs/firmware-design.rst b/docs/firmware-design.rst
index 405964d..1f8fcc8 100644
--- a/docs/firmware-design.rst
+++ b/docs/firmware-design.rst
@@ -418,6 +418,63 @@
 
 #. BL1 passes control to BL31 at the specified entrypoint at EL3.
 
+Running BL2 at EL3 execution level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some platforms have a non-TF Boot ROM that expects the next boot stage
+to execute at EL3. On these platforms, TF BL1 is a waste of memory
+as its only purpose is to ensure TF BL2 is entered at S-EL1. To avoid
+this waste, a special mode enables BL2 to execute at EL3, which allows
+a non-TF Boot ROM to load and jump directly to BL2. This mode is selected
+when the build flag BL2_AT_EL3 is enabled. The main differences in this
+mode are:
+
+#. BL2 includes the reset code and the mailbox mechanism to differentiate
+   cold boot and warm boot. It runs at EL3 doing the arch
+   initialization required for EL3.
+
+#. BL2 does not receive the meminfo information from BL1 anymore. This
+   information can be passed by the Boot ROM or be internal to the
+   BL2 image.
+
+#. Since BL2 executes at EL3, BL2 jumps directly to the next image,
+   instead of invoking the RUN_IMAGE SMC call.
+
+
+We assume 3 different types of BootROM support on the platform:
+
+#. The Boot ROM always jumps to the same address, for both cold
+   and warm boot. In this case, we will need to keep a resident part
+   of BL2 whose memory cannot be reclaimed by any other image. The
+   linker script defines the symbols __TEXT_RESIDENT_START__ and
+   __TEXT_RESIDENT_END__ that allows the platform to configure
+   correctly the memory map.
+#. The platform has some mechanism to indicate the jump address to the
+   Boot ROM. Platform code can then program the jump address with
+   psci_warmboot_entrypoint during cold boot.
+#. The platform has some mechanism to program the reset address using
+   the PROGRAMMABLE_RESET_ADDRESS feature. Platform code can then
+   program the reset address with psci_warmboot_entrypoint during
+   cold boot, bypassing the boot ROM for warm boot.
+
+In the last 2 cases, no part of BL2 needs to remain resident at
+runtime. In the first 2 cases, we expect the Boot ROM to be able to
+differentiate between warm and cold boot, to avoid loading BL2 again
+during warm boot.
+
+This functionality can be tested with FVP loading the image directly
+in memory and changing the address where the system jumps at reset.
+For example:
+
+	-C cluster0.cpu0.RVBAR=0x4014000
+	--data cluster0.cpu0=bl2.bin@0x4014000
+
+With this configuration, FVP is like a platform of the first case,
+where the Boot ROM jumps always to the same address. For simplification,
+BL32 is loaded in DRAM in this case, to avoid other images reclaiming
+BL2 memory.
+
+
 AArch64 BL31
 ~~~~~~~~~~~~
 
@@ -1868,9 +1925,11 @@
 
 The FIP layout consists of a table of contents (ToC) followed by payload data.
 The ToC itself has a header followed by one or more table entries. The ToC is
-terminated by an end marker entry. All ToC entries describe some payload data
-that has been appended to the end of the binary package. With the information
-provided in the ToC entry the corresponding payload data can be retrieved.
+terminated by an end marker entry, and since the size of the ToC is 0 bytes,
+the offset equals the total size of the FIP file. All ToC entries describe some
+payload data that has been appended to the end of the binary package. With the
+information provided in the ToC entry the corresponding payload data can be
+retrieved.
 
 ::
 
diff --git a/docs/plat/socionext-uniphier.rst b/docs/plat/socionext-uniphier.rst
index fb6ebe5..2c652ac 100644
--- a/docs/plat/socionext-uniphier.rst
+++ b/docs/plat/socionext-uniphier.rst
@@ -1,11 +1,12 @@
 ARM Trusted Firmware for Socionext UniPhier SoCs
 ================================================
 
+
 Socionext UniPhier ARMv8-A SoCs use ARM Trusted Firmware as the secure world
 firmware, supporting BL1, BL2, and BL31.
 
 UniPhier SoC family implements its internal boot ROM, so BL1 is used as pseudo
-ROM (i.e. runs in RAM). The internal boot ROM loads 64KB `1`_ image from a
+ROM (i.e. runs in RAM). The internal boot ROM loads 64KB [1]_ image from a
 non-volatile storage to the on-chip SRAM. Unfortunately, BL1 does not fit in
 the 64KB limit if `Trusted Board Boot`_ (TBB) is enabled. To solve this problem,
 Socionext provides a first stage loader called `UniPhier BL`_. This loader runs
@@ -23,35 +24,33 @@
 fits in the 64KB limit. The concatenated image is loaded by the boot ROM
 (and verified if the chip fuses are blown).
 
-::
-
-     to the lowest common denominator.
 
 Boot Flow
 ---------
 
-#. The Boot ROM
+1. The Boot ROM
 
-This is hard-wired ROM, so never corrupted. It loads the UniPhier BL (with
-compressed-BL1 appended) into the on-chip SRAM. If the SoC fuses are blown,
-the image is verified by the SoC's own method.
+   This is hard-wired ROM, so never corrupted. It loads the UniPhier BL (with
+   compressed-BL1 appended) into the on-chip SRAM. If the SoC fuses are blown,
+   the image is verified by the SoC's own method.
 
-#. UniPhier BL
+2. UniPhier BL
 
-This runs in the on-chip SRAM. After the minimum SoC initialization and DRAM
-setup, it decompresses the appended BL1 image into the DRAM, then jumps to
-the BL1 entry.
+   This runs in the on-chip SRAM. After the minimum SoC initialization and DRAM
+   setup, it decompresses the appended BL1 image into the DRAM, then jumps to
+   the BL1 entry.
 
-#. BL1
+3. BL1
 
-This runs in the DRAM. It extracts BL2 from FIP (Firmware Image Package).
-If TBB is enabled, the BL2 is authenticated by the standard mechanism of ARM
-Trusted Firmware.
+   This runs in the DRAM. It extracts BL2 from FIP (Firmware Image Package).
+   If TBB is enabled, the BL2 is authenticated by the standard mechanism of ARM
+   Trusted Firmware.
 
-#. BL2, BL31, and more
+4. BL2, BL31, and more
 
-They all run in the DRAM, and are authenticated by the standard mechanism if
-TBB is enabled. See `Firmware Design`_ for details.
+   They all run in the DRAM, and are authenticated by the standard mechanism if
+   TBB is enabled. See `Firmware Design`_ for details.
+
 
 Basic Build
 -----------
@@ -63,59 +62,52 @@
 SoCs. The U-Boot image (``u-boot.bin``) must be built in advance. For the build
 procedure of U-Boot, refer to the document in the `U-Boot`_ project.
 
-To build minimum functionality for UniPhier (without TBB):
-
-::
+To build minimum functionality for UniPhier (without TBB)::
 
     make CROSS_COMPILE=<gcc-prefix> PLAT=uniphier BL33=<path-to-BL33> bl1_gzip fip
 
 Output images:
 
+- ``bl1.bin.gzip``
+- ``fip.bin``
+
--  ``bl1.bin.gzip``
--  ``fip.bin``
 
 Optional features
 -----------------
 
--  Trusted Board Boot
+- Trusted Board Boot
 
-`mbed TLS`_ is needed as the cryptographic and image parser modules.
-Refer to the `User Guide`_ for the appropriate version of mbed TLS.
+  `mbed TLS`_ is needed as the cryptographic and image parser modules.
+  Refer to the `User Guide`_ for the appropriate version of mbed TLS.
 
-To enable TBB, add the following options to the build command:
-
-::
+  To enable TBB, add the following options to the build command::
 
       TRUSTED_BOARD_BOOT=1 GENERATE_COT=1 MBEDTLS_DIR=<path-to-mbedtls>
 
--  System Control Processor (SCP)
+- System Control Processor (SCP)
 
-If desired, FIP can include an SCP BL2 image. If BL2 finds an SCP BL2 image
-in FIP, BL2 loads it into DRAM and kicks the SCP. Most of UniPhier boards
-still work without SCP, but SCP provides better power management support.
+  If desired, FIP can include an SCP BL2 image. If BL2 finds an SCP BL2 image
+  in FIP, BL2 loads it into DRAM and kicks the SCP. Most of UniPhier boards
+  still work without SCP, but SCP provides better power management support.
 
-To include SCP\_BL2, add the following option to the build command:
-
-::
+  To include SCP BL2, add the following option to the build command::
 
       SCP_BL2=<path-to-SCP>
 
--  BL32 (Secure Payload)
+- BL32 (Secure Payload)
 
-To enable BL32, add the following option to the build command:
-
-::
+  To enable BL32, add the following options to the build command::
 
       SPD=<spd> BL32=<path-to-BL32>
 
-If you use TSP for BL32, ``BL32=<path-to-BL32>`` is not required. Just add the
-following:
-
-::
+  If you use TSP for BL32, ``BL32=<path-to-BL32>`` is not required. Just add the
+  following::
 
       SPD=tspd
 
-.. _1: Some%20SoCs%20can%20load%2080KB,%20but%20the%20software%20implementation%20must%20be%20aligned
+
+.. [1] Some SoCs can load 80KB, but the software implementation must be aligned
+   to the lowest common denominator.
 .. _Trusted Board Boot: ../trusted-board-boot.rst
 .. _UniPhier BL: https://github.com/uniphier/uniphier-bl
 .. _Firmware Design: ../firmware-design.rst
diff --git a/docs/porting-guide.rst b/docs/porting-guide.rst
index 10a6da7..7683ded 100644
--- a/docs/porting-guide.rst
+++ b/docs/porting-guide.rst
@@ -549,6 +549,22 @@
    doesn't print anything to the console. If ``PLAT_LOG_LEVEL_ASSERT`` isn't
    defined, it defaults to ``LOG_LEVEL``.
 
+If the platform port uses the Activity Monitor Unit, the following constants
+may be defined:
+
+-  **PLAT\_AMU\_GROUP1\_COUNTERS\_MASK**
+   This mask reflects the set of group counters that should be enabled.  The
+   maximum number of group 1 counters supported by AMUv1 is 16 so the mask
+   can be at most 0xffff. If the platform does not define this mask, no group 1
+   counters are enabled. If the platform defines this mask, the following
+   constant needs to also be defined.
+
+-  **PLAT\_AMU\_GROUP1\_NR\_COUNTERS**
+   This value is used to allocate an array to save and restore the counters
+   specified by ``PLAT_AMU_GROUP1_COUNTERS_MASK`` on CPU suspend.
+   This value should be equal to the highest bit position set in the
+   mask, plus 1.  The maximum number of group 1 counters in AMUv1 is 16.
+
 File : plat\_macros.S [mandatory]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1128,6 +1144,9 @@
 for performing any remaining platform-specific setup that can occur after the
 MMU and data cache have been enabled.
 
+if support for multiple boot sources is required, it initializes the boot
+sequence used by plat\_try\_next\_boot\_source().
+
 In ARM standard platforms, this function initializes the storage abstraction
 layer used to load the next bootloader image.
 
@@ -1624,6 +1643,70 @@
 must return 0, otherwise it must return 1. The default implementation
 of this always returns 0.
 
+Boot Loader Stage 2 (BL2) at EL3
+--------------------------------
+
+When the platform has a non-TF Boot ROM it is desirable to jump
+directly to BL2 instead of TF BL1. In this case BL2 is expected to
+execute at EL3 instead of executing at EL1. Refer to the `Firmware
+Design`_ for more information.
+
+All mandatory functions of BL2 must be implemented, except the functions
+bl2\_early\_platform\_setup and bl2\_el3\_plat\_arch\_setup, because
+their work is done now by bl2\_el3\_early\_platform\_setup and
+bl2\_el3\_plat\_arch\_setup. These functions should generally implement
+the bl1\_plat\_xxx() and bl2\_plat\_xxx() functionality combined.
+
+
+Function : bl2\_el3\_early\_platform\_setup() [mandatory]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+	Argument : u_register_t, u_register_t, u_register_t, u_register_t
+	Return   : void
+
+This function executes with the MMU and data caches disabled. It is only called
+by the primary CPU. This function receives four parameters which can be used
+by the platform to pass any needed information from the Boot ROM to BL2.
+
+On ARM standard platforms, this function does the following:
+
+-  Initializes a UART (PL011 console), which enables access to the ``printf``
+   family of functions in BL2.
+
+-  Initializes the storage abstraction layer used to load further bootloader
+   images. It is necessary to do this early on platforms with a SCP\_BL2 image,
+   since the later ``bl2_platform_setup`` must be done after SCP\_BL2 is loaded.
+
+- Initializes the private variables that define the memory layout used.
+
+Function : bl2\_el3\_plat\_arch\_setup() [mandatory]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+	Argument : void
+	Return   : void
+
+This function executes with the MMU and data caches disabled. It is only called
+by the primary CPU.
+
+The purpose of this function is to perform any architectural initialization
+that varies across platforms.
+
+On ARM standard platforms, this function enables the MMU.
+
+Function : bl2\_el3\_plat\_prepare\_exit() [optional]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+	Argument : void
+	Return   : void
+
+This function is called prior to exiting BL2 and run the next image.
+It should be used to perform platform specific clean up or bookkeeping
+operations before transferring control to the next image. This function
+runs with MMU disabled.
+
 FWU Boot Loader Stage 2 (BL2U)
 ------------------------------
 
diff --git a/docs/secure-partition-manager-design.rst b/docs/secure-partition-manager-design.rst
new file mode 100644
index 0000000..05d4e8b
--- /dev/null
+++ b/docs/secure-partition-manager-design.rst
@@ -0,0 +1,825 @@
+*******************************
+Secure Partition Manager Design
+*******************************
+
+.. section-numbering::
+    :suffix: .
+
+.. contents::
+
+Background
+==========
+
+In some market segments that primarily deal with client-side devices like mobile
+phones, tablets, STBs and embedded devices, a Trusted OS instantiates trusted
+applications to provide security services like DRM, secure payment and
+authentication. The Global Platform TEE Client API specification defines the API
+used by Non-secure world applications to access these services. A Trusted OS
+fulfils the requirements of a security service as described above.
+
+Management services are typically implemented at the highest level of privilege
+in the system (i.e. EL3 in Arm Trusted Firmware). The service requirements are
+fulfilled by the execution environment provided by Arm Trusted Firmware.
+
+The following diagram illustrates the corresponding software stack:
+
+|Image 1|
+
+In other market segments that primarily deal with server-side devices (e.g. data
+centres and enterprise servers) the secure software stack typically does not
+include a Global Platform Trusted OS. Security functions are accessed through
+other interfaces (e.g. ACPI TCG TPM interface, UEFI runtime variable service).
+
+Placement of management and security functions with diverse requirements in a
+privileged Exception Level (i.e. EL3 or S-EL1) makes security auditing of
+firmware more difficult and does not allow isolation of unrelated services from
+each other either.
+
+Introduction
+============
+
+A **Secure Partition** is a software execution environment instantiated in
+S-EL0 that can be used to implement simple management and security services.
+Since S-EL0 is an unprivileged Exception Level, a Secure Partition relies on
+privileged firmware (i.e. Arm Trusted Firmware) to be granted access to system
+and processor resources. Essentially, it is a software sandbox in the Secure
+world that runs under the control of privileged software, provides one or more
+services and accesses the following system resources:
+
+- Memory and device regions in the system address map.
+
+- PE system registers.
+
+- A range of synchronous exceptions (e.g. SMC function identifiers).
+
+Note that currently the Arm Trusted Firmware only supports handling one Secure
+Partition.
+
+A Secure Partition enables Arm Trusted Firmware to implement only the essential
+secure services in EL3 and instantiate the rest in a partition in S-EL0.
+Furthermore, multiple Secure Partitions can be used to isolate unrelated
+services from each other.
+
+The following diagram illustrates the place of a Secure Partition in a typical
+ARMv8-A software stack. A single or multiple Secure Partitions provide secure
+services to software components in the Non-secure world and other Secure
+Partitions.
+
+|Image 2|
+
+The Arm Trusted Firmware build system is responsible for including the Secure
+Partition image in the FIP. During boot, BL2 includes support to authenticate
+and load the Secure Partition image. A BL31 component called **Secure Partition
+Manager (SPM)** is responsible for managing the partition. This is semantically
+similar to a hypervisor managing a virtual machine.
+
+The SPM is responsible for the following actions during boot:
+
+- Allocate resources requested by the Secure Partition.
+
+- Perform architectural and system setup required by the Secure Partition to
+  fulfil a service request.
+
+- Implement a standard interface that is used for initialising a Secure
+  Partition.
+
+The SPM is responsible for the following actions during runtime:
+
+- Implement a standard interface that is used by a Secure Partition to fulfil
+  service requests.
+
+- Implement a standard interface that is used by the Non-secure world for
+  accessing the services exported by a Secure Partition. A service can be
+  invoked through a SMC.
+
+Alternatively, a partition can be viewed as a thread of execution running under
+the control of the SPM. Hence common programming concepts described below are
+applicable to a partition.
+
+Description
+===========
+
+The previous section introduced some general aspects of the software
+architecture of a Secure Partition. This section describes the specific choices
+made in the current implementation of this software architecture. Subsequent
+revisions of the implementation will include a richer set of features that
+enable a more flexible architecture.
+
+Building Arm Trusted Firmware with Secure Partition support
+-----------------------------------------------------------
+
+SPM is supported on the Arm FVP exclusively at the moment. The current
+implementation supports inclusion of only a single Secure Partition in which a
+service always runs to completion (e.g. the requested services cannot be
+preempted to give control back to the Normal world).
+
+It is not currently possible for BL31 to integrate SPM support and a Secure
+Payload Dispatcher (SPD) at the same time; they are mutually exclusive. In the
+SPM bootflow, a Secure Partition image executing at S-EL0 replaces the Secure
+Payload image executing at S-EL1 (e.g. a Trusted OS). Both are referred to as
+BL32.
+
+A working prototype of a SP has been implemented by re-purposing the EDK2 code
+and tools, leveraging the concept of the *Standalone Management Mode (MM)* in
+the UEFI specification (see the PI v1.6 Volume 4: Management Mode Core
+Interface). This will be referred to as the *Standalone MM Secure Partition* in
+the rest of this document.
+
+To enable SPM support in the TF, the source code must be compiled with the build
+flag ``ENABLE_SPM=1``. On Arm platforms the build option ``ARM_BL31_IN_DRAM``
+can be used to select the location of BL31, both SRAM and DRAM are supported.
+Also, the location of the binary that contains the BL32 image
+(``BL32=path/to/image.bin``) must be specified.
+
+First, build the Standalone MM Secure Partition. To build it, refer to the
+`instructions in the EDK2 repository`_.
+
+Then build TF with SPM support and include the Standalone MM Secure Partition
+image in the FIP:
+
+::
+
+    BL32=path/to/standalone/mm/sp BL33=path/to/bl33.bin \
+    make PLAT=fvp ENABLE_SPM=1 fip all
+
+Describing Secure Partition resources
+-------------------------------------
+
+Arm Trusted Firmware exports a porting interface that enables a platform to
+specify the system resources required by the Secure Partition. Some instructions
+are given below. However, this interface is under development and it may change
+as new features are implemented.
+
+- A Secure Partition is considered a BL32 image, so the same defines that apply
+  to BL32 images apply to a Secure Partition: ``BL32_BASE`` and ``BL32_LIMIT``.
+
+- The following defines are needed to allocate space for the translation tables
+  used by the Secure Partition: ``PLAT_SP_IMAGE_MMAP_REGIONS`` and
+  ``PLAT_SP_IMAGE_MAX_XLAT_TABLES``.
+
+- The functions ``plat_get_secure_partition_mmap()`` and
+  ``plat_get_secure_partition_boot_info()`` have to be implemented. The file
+  ``plat/arm/board/fvp/fvp_common.c`` can be used as an example. It uses the
+  defines in ``include/plat/arm/common/arm_spm_def.h``.
+
+  - ``plat_get_secure_partition_mmap()`` returns an array of mmap regions that
+    describe the memory regions that the SPM needs to allocate for a Secure
+    Partition.
+
+  - ``plat_get_secure_partition_boot_info()`` returns a
+    ``secure_partition_boot_info_t`` struct that is populated by the platform
+    with information about the memory map of the Secure Partition.
+
+For an example of all the changes in context, you may refer to commit
+``e29efeb1b4``, in which the port for FVP was introduced.
+
+Accessing Secure Partition services
+-----------------------------------
+
+The `SMC Calling Convention`_ (*ARM DEN 0028B*) describes SMCs as a conduit for
+accessing services implemented in the Secure world. The ``MM_COMMUNICATE``
+interface defined in the `Management Mode Interface Specification`_ (*ARM DEN
+0060A*) is used to invoke a Secure Partition service as a Fast Call.
+
+The mechanism used to identify a service within the partition depends on the
+service implementation. It is assumed that the caller of the service will be
+able to discover this mechanism through standard platform discovery mechanisms
+like ACPI and Device Trees. For example, *Volume 4: Platform Initialisation
+Specification v1.6. Management Mode Core Interface* specifies that a GUID is
+used to identify a management mode service. A client populates the GUID in the
+``EFI_MM_COMMUNICATE_HEADER``. The header is populated in the communication
+buffer shared with the Secure Partition.
+
+A Fast Call appears to be atomic from the perspective of the caller and returns
+when the requested operation has completed. A service invoked through the
+``MM_COMMUNICATE`` SMC will run to completion in the partition on a given CPU.
+The SPM is responsible for guaranteeing this behaviour. This means that there
+can only be a single outstanding Fast Call in a partition on a given CPU.
+
+Exchanging data with the Secure Partition
+-----------------------------------------
+
+The exchange of data between the Non-secure world and the partition takes place
+through a shared memory region. The location of data in the shared memory area
+is passed as a parameter to the ``MM_COMMUNICATE`` SMC. The shared memory area
+is statically allocated by the SPM and is expected to be either implicitly known
+to the Non-secure world or discovered through a platform discovery mechanism
+e.g. ACPI table or device tree. It is possible for the Non-secure world to
+exchange data with a partition only if it has been populated in this shared
+memory area. The shared memory area is implemented as per the guidelines
+specified in Section 3.2.3 of the `Management Mode Interface Specification`_
+(*ARM DEN 0060A*).
+
+The format of data structures used to encapsulate data in the shared memory is
+agreed between the Non-secure world and the Secure Partition. For example, in
+the `Management Mode Interface specification`_ (*ARM DEN 0060A*), Section 4
+describes that the communication buffer shared between the Non-secure world and
+the Management Mode (MM) in the Secure world must be of the type
+``EFI_MM_COMMUNICATE_HEADER``. This data structure is defined in *Volume 4:
+Platform Initialisation Specification v1.6. Management Mode Core Interface*.
+Any caller of a MM service will have to use the ``EFI_MM_COMMUNICATE_HEADER``
+data structure.
+
+Runtime model of the Secure Partition
+=====================================
+
+This section describes how the Secure Partition interfaces with the SPM.
+
+Interface with SPM
+------------------
+
+In order to instantiate one or more secure services in the Secure Partition in
+S-EL0, the SPM should define the following types of interfaces:
+
+- Interfaces that enable access to privileged operations from S-EL0. These
+  operations typically require access to system resources that are either shared
+  amongst multiple software components in the Secure world or cannot be directly
+  accessed from an unprivileged Exception Level.
+
+- Interfaces that establish the control path between the SPM and the Secure
+  Partition.
+
+This section describes the APIs currently exported by the SPM that enable a
+Secure Partition to initialise itself and export its services in S-EL0. These
+interfaces are not accessible from the Non-secure world.
+
+Conduit
+^^^^^^^
+
+The `SMC Calling Convention`_ (*ARM DEN 0028B*) specification describes the SMC
+and HVC conduits for accessing firmware services and their availability
+depending on the implemented Exception levels. In S-EL0, the Supervisor Call
+exception (SVC) is the only architectural mechanism available for unprivileged
+software to make a request for an operation implemented in privileged software.
+Hence, the SVC conduit must be used by the Secure Partition to access interfaces
+implemented by the SPM.
+
+A SVC causes an exception to be taken to S-EL1. Arm Trusted Firmware assumes
+ownership of S-EL1 and installs a simple exception vector table in S-EL1 that
+relays a SVC request from a Secure Partition as a SMC request to the SPM in EL3.
+Upon servicing the SMC request, Arm Trusted Firmware returns control directly to
+S-EL0 through an ERET instruction.
+
+Calling conventions
+^^^^^^^^^^^^^^^^^^^
+
+The `SMC Calling Convention`_ (*ARM DEN 0028B*) specification describes the
+32-bit and 64-bit calling conventions for the SMC and HVC conduits. The SVC
+conduit introduces the concept of SVC32 and SVC64 calling conventions. The SVC32
+and SVC64 calling conventions are equivalent to the 32-bit (SMC32) and the
+64-bit (SMC64) calling conventions respectively.
+
+Communication initiated by SPM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A service request is initiated from the SPM through an exception return
+instruction (ERET) to S-EL0. Later, the Secure Partition issues an SVC
+instruction to signal completion of the request. Some example use cases are
+given below:
+
+- A request to initialise the Secure Partition during system boot.
+
+- A request to handle a runtime service request.
+
+Communication initiated by Secure Partition
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A request is initiated from the Secure Partition by executing a SVC instruction.
+An ERET instruction is used by Arm Trusted Firmware to return to S-EL0 with the
+result of the request.
+
+For instance, a request to perform privileged operations on behalf of a
+partition (e.g.  management of memory attributes in the translation tables for
+the Secure EL1&0 translation regime).
+
+Interfaces
+^^^^^^^^^^
+
+The current implementation reserves function IDs for Fast Calls in the Standard
+Secure Service calls range (see `SMC Calling Convention`_ (*ARM DEN 0028B*)
+specification) for each API exported by the SPM. This section defines the
+function prototypes for each function ID. The function IDs specify whether one
+or both of the SVC32 and SVC64 calling conventions can be used to invoke the
+corresponding interface.
+
+Secure Partition Event Management
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Secure Partition provides an Event Management interface that is used by the
+SPM to delegate service requests to the Secure Partition. The interface also
+allows the Secure Partition to:
+
+- Register with the SPM a service that it provides.
+- Indicate completion of a service request delagated by the SPM
+
+Miscellaneous interfaces
+------------------------
+
+``SPM_VERSION_AARCH32``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Returns the version of the interface exported by SPM.
+
+- Parameters
+
+  - **uint32** - Function ID
+
+    - SVC32 Version: **0x84000060**
+
+- Return parameters
+
+  - **int32** - Status
+
+    On success, the format of the value is as follows:
+
+    - Bit [31]: Must be 0
+    - Bits [30:16]: Major Version. Must be 0 for this revision of the SPM
+      interface.
+    - Bits [15:0]: Minor Version. Must be 1 for this revision of the SPM
+      interface.
+
+    On error, the format of the value is as follows:
+
+    - ``NOT_SUPPORTED``: SPM interface is not supported or not available for the
+      client.
+
+- Usage
+
+  This function returns the version of the Secure Partition Manager
+  implementation. The major version is 0 and the minor version is 1. The version
+  number is a 31-bit unsigned integer, with the upper 15 bits denoting the major
+  revision, and the lower 16 bits denoting the minor revision. The following
+  rules apply to the version numbering:
+
+  - Different major revision values indicate possibly incompatible functions.
+
+  - For two revisions, A and B, for which the major revision values are
+    identical, if the minor revision value of revision B is greater than the
+    minor revision value of revision A, then every function in revision A must
+    work in a compatible way with revision B. However, it is possible for
+    revision B to have a higher function count than revision A.
+
+- Implementation responsibilities
+
+  If this function returns a valid version number, all the functions that are
+  described subsequently must be implemented, unless it is explicitly stated
+  that a function is optional.
+
+See `Error Codes`_ for integer values that are associated with each return
+code.
+
+Secure Partition Initialisation
+-------------------------------
+
+The SPM is responsible for initialising the architectural execution context to
+enable initialisation of a service in S-EL0. The responsibilities of the SPM are
+listed below. At the end of initialisation, the partition issues a
+``SP_EVENT_COMPLETE_AARCH64`` call (described later) to signal readiness for
+handling requests for services implemented by the Secure Partition. The
+initialisation event is executed as a Fast Call.
+
+Entry point invocation
+^^^^^^^^^^^^^^^^^^^^^^
+
+The entry point for service requests that should be handled as Fast Calls is
+used as the target of the ERET instruction to start initialisation of the Secure
+Partition.
+
+Architectural Setup
+^^^^^^^^^^^^^^^^^^^
+
+At cold boot, system registers accessible from S-EL0 will be in their reset
+state unless otherwise specified. The SPM will perform the following
+architectural setup to enable execution in S-EL0
+
+MMU setup
+^^^^^^^^^
+
+The platform port of a Secure Partition specifies to the SPM a list of regions
+that it needs access to and their attributes. The SPM validates this resource
+description and initialises the Secure EL1&0 translation regime as follows.
+
+1. Device regions are mapped with nGnRE attributes and Execute Never
+   instruction access permissions.
+
+2. Code memory regions are mapped with RO data and Executable instruction access
+   permissions.
+
+3. Read Only data memory regions are mapped with RO data and Execute Never
+   instruction access permissions.
+
+4. Read Write data memory regions are mapped with RW data and Execute Never
+   instruction access permissions.
+
+5. If the resource description does not explicitly describe the type of memory
+   regions then all memory regions will be marked with Code memory region
+   attributes.
+
+6. The ``UXN`` and ``PXN`` bits are set for regions that are not executable by
+   S-EL0 or S-EL1.
+
+System Register Setup
+^^^^^^^^^^^^^^^^^^^^^
+
+System registers that influence software execution in S-EL0 are setup by the SPM
+as follows:
+
+1. ``SCTLR_EL1``
+
+   - ``UCI=1``
+   - ``EOE=0``
+   - ``WXN=1``
+   - ``nTWE=1``
+   - ``nTWI=1``
+   - ``UCT=1``
+   - ``DZE=1``
+   - ``I=1``
+   - ``UMA=0``
+   - ``SA0=1``
+   - ``C=1``
+   - ``A=1``
+   - ``M=1``
+
+2. ``CPACR_EL1``
+
+   - ``FPEN=b'11``
+
+3. ``PSTATE``
+
+   - ``D,A,I,F=1``
+   - ``CurrentEL=0`` (EL0)
+   - ``SpSel=0`` (Thread mode)
+   - ``NRW=0`` (AArch64)
+
+General Purpose Register Setup
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SPM will invoke the entry point of a service by executing an ERET instruction.
+This transition into S-EL0 is special since it is not in response to a previous
+request through a SVC instruction. This is the first entry into S-EL0. The
+general purpose register usage at the time of entry will be as specified in the
+"Return State" column of Table 3-1 in Section 3.1 "Register use in AArch64 SMC
+calls" of the `SMC Calling Convention`_ (*ARM DEN 0028B*) specification. In
+addition, certain other restrictions will be applied as described below.
+
+1. ``SP_EL0``
+
+   A non-zero value will indicate that the SPM has initialised the stack pointer
+   for the current CPU.
+
+   The value will be 0 otherwise.
+
+2. ``X4-X30``
+
+   The values of these registers will be 0.
+
+3. ``X0-X3``
+
+   Parameters passed by the SPM.
+
+   - ``X0``: Virtual address of a buffer shared between EL3 and S-EL0. The
+     buffer will be mapped in the Secure EL1&0 translation regime with read-only
+     memory attributes described earlier.
+
+   - ``X1``: Size of the buffer in bytes.
+
+   - ``X2``: Cookie value (*IMPLEMENTATION DEFINED*).
+
+   - ``X3``: Cookie value (*IMPLEMENTATION DEFINED*).
+
+Runtime Event Delegation
+------------------------
+
+The SPM receives requests for Secure Partition services through a synchronous
+invocation (i.e. a SMC from the Non-secure world). These requests are delegated
+to the partition by programming a return from the last
+``SP_EVENT_COMPLETE_AARCH64`` call received from the partition. The last call
+was made to signal either completion of Secure Partition initialisation or
+completion of a partition service request.
+
+``SP_EVENT_COMPLETE_AARCH64``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Signal completion of the last SP service request.
+
+- Parameters
+
+  - **uint32** - Function ID
+
+    - SVC64 Version: **0xC4000061**
+
+  - **int32** - Event Status Code
+
+    Zero or a positive value indicates that the event was handled successfully.
+    The values depend upon the original event that was delegated to the Secure
+    partition. They are described as follows.
+
+    - ``SUCCESS`` : Used to indicate that the Secure Partition was initialised
+      or a runtime request was handled successfully.
+
+    - Any other value greater than 0 is used to pass a specific Event Status
+      code in response to a runtime event.
+
+    A negative value indicates an error. The values of Event Status code depend
+    on the original event.
+
+- Return parameters
+
+  - **int32** - Event ID/Return Code
+
+    Zero or a positive value specifies the unique ID of the event being
+    delegated to the partition by the SPM.
+
+    In the current implementation, this parameter contains the function ID of
+    the ``MM_COMMUNICATE`` SMC. This value indicates to the partition that an
+    event has been delegated to it in response to an ``MM_COMMUNICATE`` request
+    from the Non-secure world.
+
+    A negative value indicates an error. The format of the value is as follows:
+
+    - ``NOT_SUPPORTED``: Function was called from the Non-secure world.
+
+    See `Error Codes`_ for integer values that are associated with each return
+    code.
+
+  - **uint32** - Event Context Address
+
+    Address of a buffer shared between the SPM and Secure Partition to pass
+    event specific information. The format of the data populated in the buffer
+    is implementation defined.
+
+    The buffer is mapped in the Secure EL1&0 translation regime with read-only
+    memory attributes described earlier.
+
+    For the SVC64 version, this parameter is a 64-bit Virtual Address (VA).
+
+    For the SVC32 version, this parameter is a 32-bit Virtual Address (VA).
+
+  - **uint32** - Event context size
+
+    Size of the memory starting at Event Address.
+
+  - **uint32/uint64** - Event Cookie
+
+    This is an optional parameter. If unused its value is SBZ.
+
+- Usage
+
+  This function signals to the SPM that the handling of the last event delegated
+  to a partition has completed. The partition is ready to handle its next event.
+  A return from this function is in response to the next event that will be
+  delegated to the partition. The return parameters describe the next event.
+
+- Caller responsibilities
+
+  A Secure Partition must only call ``SP_EVENT_COMPLETE_AARCH64`` to signal
+  completion of a request that was delegated to it by the SPM.
+
+- Callee responsibilities
+
+  When the SPM receives this call from a Secure Partition, the corresponding
+  syndrome information can be used to return control through an ERET
+  instruction, to the instruction immediately after the call in the Secure
+  Partition context. This syndrome information comprises of general purpose and
+  system register values when the call was made.
+
+  The SPM must save this syndrome information and use it to delegate the next
+  event to the Secure Partition. The return parameters of this interface must
+  specify the properties of the event and be populated in ``X0-X3/W0-W3``
+  registers.
+
+Secure Partition Memory Management
+----------------------------------
+
+A Secure Partition executes at S-EL0, which is an unprivileged Exception Level.
+The SPM is responsible for enabling access to regions of memory in the system
+address map from a Secure Partition. This is done by mapping these regions in
+the Secure EL1&0 Translation regime with appropriate memory attributes.
+Attributes refer to memory type, permission, cacheability and shareability
+attributes used in the Translation tables. The definitions of these attributes
+and their usage can be found in the `ARMv8 ARM`_ (*ARM DDI 0487*).
+
+All memory required by the Secure Partition is allocated upfront in the SPM,
+even before handing over to the Secure Partition for the first time. The initial
+access permissions of the memory regions are statically provided by the platform
+port and should allow the Secure Partition to run its initialisation code.
+
+However, they might not suit the final needs of the Secure Partition because its
+final memory layout might not be known until the Secure Partition initialises
+itself. As the Secure Partition initialises its runtime environment it might,
+for example, load dynamically some modules. For instance, a Secure Partition
+could implement a loader for a standard executable file format (e.g. an PE-COFF
+loader for loading executable files at runtime). These executable files will be
+a part of the Secure Partition image. The location of various sections in an
+executable file and their permission attributes (e.g. read-write data, read-only
+data and code) will be known only when the file is loaded into memory.
+
+In this case, the Secure Partition needs a way to change the access permissions
+of its memory regions. The SPM provides this feature through the
+``SP_MEMORY_ATTRIBUTES_SET_AARCH64`` SVC interface. This interface is available
+to the Secure Partition during a specific time window: from the first entry into
+the Secure Partition up to the first ``SP_EVENT_COMPLETE`` call that signals the
+Secure Partition has finished its initialisation. Once the initialisation is
+complete, the SPM does not allow changes to the memory attributes.
+
+This section describes the standard SVC interface that is implemented by the SPM
+to determine and change permission attributes of memory regions that belong to a
+Secure Partition.
+
+``SP_MEMORY_ATTRIBUTES_GET_AARCH64``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Request the permission attributes of a memory region from S-EL0.
+
+- Parameters
+
+  - **uint32** Function ID
+
+    - SVC64 Version: **0xC4000064**
+
+  - **uint64** Base Address
+
+    This parameter is a 64-bit Virtual Address (VA).
+
+    There are no alignment restrictions on the Base Address. The permission
+    attributes of the translation granule it lies in are returned.
+
+- Return parameters
+
+  - **int32** - Memory Attributes/Return Code
+
+    On success the format of the Return Code is as follows:
+
+    - Bits[1:0] : Data access permission
+
+      - b'00 : No access
+      - b'01 : Read-Write access
+      - b'10 : Reserved
+      - b'11 : Read-only access
+
+    - Bit[2]: Instruction access permission
+
+      - b'0 : Executable
+      - b'1 : Non-executable
+
+    - Bit[30:3] : Reserved. SBZ.
+
+    - Bit[31]   : Must be 0
+
+    On failure the following error codes are returned:
+
+    - ``INVALID_PARAMETERS``: The Secure Partition is not allowed to access the
+      memory region the Base Address lies in.
+
+    - ``NOT_SUPPORTED`` : The SPM does not support retrieval of attributes of
+      any memory page that is accessible by the Secure Partition, or the
+      function was called from the Non-secure world. Also returned if it is
+      used after ``SP_EVENT_COMPLETE_AARCH64``.
+
+    See `Error Codes`_ for integer values that are associated with each return
+    code.
+
+- Usage
+
+  This function is used to request the permission attributes for S-EL0 on a
+  memory region accessible from a Secure Partition. The size of the memory
+  region is equal to the Translation Granule size used in the Secure EL1&0
+  translation regime. Requests to retrieve other memory region attributes are
+  not currently supported.
+
+- Caller responsibilities
+
+  The caller must obtain the Translation Granule Size of the Secure EL1&0
+  translation regime from the SPM through an implementation defined method.
+
+- Callee responsibilities
+
+  The SPM must not return the memory access controls for a page of memory that
+  is not accessible from a Secure Partition.
+
+``SP_MEMORY_ATTRIBUTES_SET_AARCH64``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Set the permission attributes of a memory region from S-EL0.
+
+- Parameters
+
+  - **uint32** - Function ID
+
+    - SVC64 Version: **0xC4000065**
+
+  - **uint64** - Base Address
+
+    This parameter is a 64-bit Virtual Address (VA).
+
+    The alignment of the Base Address must be greater than or equal to the size
+    of the Translation Granule Size used in the Secure EL1&0 translation
+    regime.
+
+  - **uint32** - Page count
+
+    Number of pages starting from the Base Address whose memory attributes
+    should be changed. The page size is equal to the Translation Granule Size.
+
+  - **uint32** - Memory Access Controls
+
+    - Bits[1:0] : Data access permission
+
+      - b'00 : No access
+      - b'01 : Read-Write access
+      - b'10 : Reserved
+      - b'11 : Read-only access
+
+    - Bit[2] : Instruction access permission
+
+      - b'0 : Executable
+      - b'1 : Non-executable
+
+    - Bits[31:3] : Reserved. SBZ.
+
+    A combination of attributes that mark the region with RW and Executable
+    permissions is prohibited. A request to mark a device memory region with
+    Executable permissions is prohibited.
+
+- Return parameters
+
+  - **int32** - Return Code
+
+    - ``SUCCESS``: The Memory Access Controls were changed successfully.
+
+    - ``DENIED``: The SPM is servicing a request to change the attributes of a
+      memory region that overlaps with the region specified in this request.
+
+    - ``INVALID_PARAMETER``: An invalid combination of Memory Access Controls
+      has been specified. The Base Address is not correctly aligned. The Secure
+      Partition is not allowed to access part or all of the memory region
+      specified in the call.
+
+    - ``NO_MEMORY``: The SPM does not have memory resources to change the
+      attributes of the memory region in the translation tables.
+
+    - ``NOT_SUPPORTED``: The SPM does not permit change of attributes of any
+      memory region that is accessible by the Secure Partition. Function was
+      called from the Non-secure world. Also returned if it is used after
+      ``SP_EVENT_COMPLETE_AARCH64``.
+
+    See `Error Codes`_ for integer values that are associated with each return
+    code.
+
+- Usage
+
+  This function is used to change the permission attributes for S-EL0 on a
+  memory region accessible from a Secure Partition. The size of the memory
+  region is equal to the Translation Granule size used in the Secure EL1&0
+  translation regime. Requests to change other memory region attributes are not
+  currently supported.
+
+  This function is only available at boot time. This interface is revoked after
+  the Secure Partition sends the first ``SP_EVENT_COMPLETE_AARCH64`` to signal
+  that it is initialised and ready to receive run-time requests.
+
+- Caller responsibilities
+
+  The caller must obtain the Translation Granule Size of the Secure EL1&0
+  translation regime from the SPM through an implementation defined method.
+
+- Callee responsibilities
+
+  The SPM must preserve the original memory access controls of the region of
+  memory in case of an unsuccessful call.  The SPM must preserve the consistency
+  of the S-EL1 translation regime if this function is called on different PEs
+  concurrently and the memory regions specified overlap.
+
+Error Codes
+-----------
+
+.. csv-table::
+   :header: "Name", "Value"
+
+   ``SUCCESS``,0
+   ``NOT_SUPPORTED``,-1
+   ``INVALID_PARAMETER``,-2
+   ``DENIED``,-3
+   ``NO_MEMORY``,-5
+   ``NOT_PRESENT``,-7
+
+--------------
+
+*Copyright (c) 2017, Arm Limited and Contributors. All rights reserved.*
+
+.. _ARMv8 ARM: https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
+.. _instructions in the EDK2 repository: https://github.com/tianocore/edk2-staging/blob/AArch64StandaloneMm/HowtoBuild.MD
+.. _Management Mode Interface Specification: http://infocenter.arm.com/help/topic/com.arm.doc.den0060a/DEN0060A_ARM_MM_Interface_Specification.pdf
+.. _SDEI Specification: http://infocenter.arm.com/help/topic/com.arm.doc.den0054a/ARM_DEN0054A_Software_Delegated_Exception_Interface.pdf
+.. _SMC Calling Convention: http://infocenter.arm.com/help/topic/com.arm.doc.den0028b/ARM_DEN0028B_SMC_Calling_Convention.pdf
+
+.. |Image 1| image:: diagrams/secure_sw_stack_tos.png
+.. |Image 2| image:: diagrams/secure_sw_stack_sp.png
diff --git a/docs/spm-user-guide.rst b/docs/spm-user-guide.rst
deleted file mode 100644
index a3b64d9..0000000
--- a/docs/spm-user-guide.rst
+++ /dev/null
@@ -1,59 +0,0 @@
-ARM Trusted Firmware - SPM User Guide
-=====================================
-
-.. section-numbering::
-    :suffix: .
-
-.. contents::
-
-
-This document briefly presents the Secure Partition Management (SPM) support in
-the Arm Trusted Firmware (TF), specifically focusing on how to build Arm TF with
-SPM support.
-
-Overview of the SPM software stack
-----------------------------------
-
-SPM is supported on the Arm FVP exclusively at the moment.
-
-It is not currently possible for BL31 to integrate SPM support and a Secure
-Payload Dispatcher (SPD) at the same time; they are mutually exclusive. In the
-SPM bootflow, a Secure Partition (SP) image executing at Secure-EL0 replaces the
-Secure Payload image executing at Secure-EL1 (e.g. a Trusted OS). Both are
-referred to as BL32.
-
-A working prototype of a SP has been implemented by repurposing the EDK2 code
-and tools, leveraging the concept of the *Standalone Management Mode (MM)* in
-the UEFI specification (see the PI v1.6 Volume 4: Management Mode Core
-Interface). This will be referred to as the *Standalone MM Secure Partition* in
-the rest of this document.
-
-
-Building TF with SPM support
-----------------------------
-
-To enable SPM support in the TF, the source code must be compiled with the build
-flag ``ENABLE_SPM=1``. On Arm platforms the build option ``ARM_BL31_IN_DRAM``
-can be used to select the location of BL31, both SRAM and DRAM are supported.
-
-
-Using the Standalone MM SP
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-First, build the Standalone MM Secure Partition. To build it, refer to the
-`instructions in the EDK2 repository`_.
-
-Then build TF with SPM support and include the Standalone MM Secure Partition
-image in the FIP:
-
-::
-
-    BL32=path/to/standalone/mm/sp BL33=path/to/bl33.bin \
-    make PLAT=fvp ENABLE_SPM=1 fip all
-
-
---------------
-
-*Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.*
-
-.. _instructions in the EDK2 repository: https://github.com/tianocore/edk2-staging/blob/AArch64StandaloneMm/HowtoBuild.MD
diff --git a/docs/user-guide.rst b/docs/user-guide.rst
index 13f0964..ed5ba18 100644
--- a/docs/user-guide.rst
+++ b/docs/user-guide.rst
@@ -55,7 +55,7 @@
 
     sudo apt-get install build-essential gcc make git libssl-dev
 
-ARM TF has been tested with `Linaro Release 17.04`_.
+ARM TF has been tested with `Linaro Release 17.10`_.
 
 Download and install the AArch32 or AArch64 little-endian GCC cross compiler.
 The `Linaro Release Notes`_ documents which version of the compiler to use for a
@@ -245,6 +245,9 @@
    BL2U image. In this case, the BL2U in the ARM Trusted Firmware will not
    be built.
 
+- ``BL2_AT_EL3``: This is an optional build option that enables the use of
+   BL2 at EL3 execution level.
+
 -  ``BL31``: This is an optional build option which specifies the path to
    BL31 image for the ``fip`` target. In this case, the BL31 in the ARM
    Trusted Firmware will not be built.
@@ -1006,7 +1009,7 @@
    modules by checking out a recent version of the `mbed TLS Repository`_. It
    is important to use a version that is compatible with TF and fixes any
    known security vulnerabilities. See `mbed TLS Security Center`_ for more
-   information. The latest version of TF is tested with tag ``mbedtls-2.4.2``.
+   information. The latest version of TF is tested with tag ``mbedtls-2.6.0``.
 
    The ``drivers/auth/mbedtls/mbedtls_*.mk`` files contain the list of mbed TLS
    source files the modules depend upon.
@@ -1475,10 +1478,10 @@
 The latest version of the AArch64 build of ARM Trusted Firmware has been tested
 on the following ARM FVPs (64-bit host machine only).
 
-NOTE: Unless otherwise stated, the model version is Version 11.1 Build 11.1.22.
+NOTE: Unless otherwise stated, the model version is Version 11.2 Build 11.2.33.
 
 -  ``Foundation_Platform``
--  ``FVP_Base_AEMv8A-AEMv8A`` (Version 8.7, Build 0.8.8702)
+-  ``FVP_Base_AEMv8A-AEMv8A`` (Version 9.0, Build 0.8.9005)
 -  ``FVP_Base_Cortex-A35x4``
 -  ``FVP_Base_Cortex-A53x4``
 -  ``FVP_Base_Cortex-A57x4-A53x4``
@@ -1491,7 +1494,7 @@
 The latest version of the AArch32 build of ARM Trusted Firmware has been tested
 on the following ARM FVPs (64-bit host machine only).
 
--  ``FVP_Base_AEMv8A-AEMv8A`` (Version 8.7, Build 0.8.8702)
+-  ``FVP_Base_AEMv8A-AEMv8A`` (Version 9.0, Build 0.8.9005)
 -  ``FVP_Base_Cortex-A32x4``
 
 NOTE: The build numbers quoted above are those reported by launching the FVP
@@ -1868,10 +1871,10 @@
 
 .. _Linaro: `Linaro Release Notes`_
 .. _Linaro Release: `Linaro Release Notes`_
-.. _Linaro Release Notes: https://community.arm.com/tools/dev-platforms/b/documents/posts/linaro-release-notes-deprecated
-.. _Linaro Release 17.04: https://community.arm.com/tools/dev-platforms/b/documents/posts/linaro-release-notes-deprecated#LinaroRelease17.04
-.. _Linaro instructions: https://community.arm.com/dev-platforms/b/documents/posts/instructions-for-using-the-linaro-software-deliverables
-.. _Instructions for using Linaro's deliverables on Juno: https://community.arm.com/dev-platforms/b/documents/posts/using-linaros-deliverables-on-juno
+.. _Linaro Release Notes: https://community.arm.com/dev-platforms/w/docs/226/old-linaro-release-notes
+.. _Linaro Release 17.10: https://community.arm.com/dev-platforms/w/docs/226/old-linaro-release-notes#1710
+.. _Linaro instructions: https://community.arm.com/dev-platforms/w/docs/304/linaro-software-deliverables
+.. _Instructions for using Linaro's deliverables on Juno: https://community.arm.com/dev-platforms/w/docs/303/juno
 .. _ARM Platforms Portal: https://community.arm.com/dev-platforms/
 .. _Development Studio 5 (DS-5): http://www.arm.com/products/tools/software-tools/ds-5/index.php
 .. _Dia: https://wiki.gnome.org/Apps/Dia/Download
diff --git a/drivers/auth/mbedtls/mbedtls_common.mk b/drivers/auth/mbedtls/mbedtls_common.mk
index f2b6f6e..8c4123d 100644
--- a/drivers/auth/mbedtls/mbedtls_common.mk
+++ b/drivers/auth/mbedtls/mbedtls_common.mk
@@ -29,7 +29,4 @@
 				platform.c 				\
 				)
 
-BL1_SOURCES		+=	${MBEDTLS_COMMON_SOURCES}
-BL2_SOURCES		+=	${MBEDTLS_COMMON_SOURCES}
-
 endif
diff --git a/drivers/auth/mbedtls/mbedtls_crypto.mk b/drivers/auth/mbedtls/mbedtls_crypto.mk
index 8eb4873..6b15e71 100644
--- a/drivers/auth/mbedtls/mbedtls_crypto.mk
+++ b/drivers/auth/mbedtls/mbedtls_crypto.mk
@@ -89,6 +89,3 @@
 # Needs to be set to drive mbed TLS configuration correctly
 $(eval $(call add_define,TF_MBEDTLS_KEY_ALG_ID))
 $(eval $(call add_define,TF_MBEDTLS_HASH_ALG_ID))
-
-BL1_SOURCES			+=	${MBEDTLS_CRYPTO_SOURCES}
-BL2_SOURCES			+=	${MBEDTLS_CRYPTO_SOURCES}
diff --git a/drivers/auth/mbedtls/mbedtls_x509.mk b/drivers/auth/mbedtls/mbedtls_x509.mk
index 0f28b65..a6f72e6 100644
--- a/drivers/auth/mbedtls/mbedtls_x509.mk
+++ b/drivers/auth/mbedtls/mbedtls_x509.mk
@@ -11,6 +11,3 @@
 				x509.c 						\
 				x509_crt.c 					\
 				)
-
-BL1_SOURCES		+=	${MBEDTLS_X509_SOURCES}
-BL2_SOURCES		+=	${MBEDTLS_X509_SOURCES}
diff --git a/drivers/emmc/emmc.c b/drivers/emmc/emmc.c
index bcdc82c..92d1e87 100644
--- a/drivers/emmc/emmc.c
+++ b/drivers/emmc/emmc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  *
@@ -353,7 +353,9 @@
 	       (clk != 0) &&
 	       ((width == EMMC_BUS_WIDTH_1) ||
 		(width == EMMC_BUS_WIDTH_4) ||
-		(width == EMMC_BUS_WIDTH_8)));
+		(width == EMMC_BUS_WIDTH_8) ||
+		(width == EMMC_BUS_WIDTH_DDR_4) ||
+		(width == EMMC_BUS_WIDTH_DDR_8)));
 	ops = ops_ptr;
 	emmc_flags = flags;
 
diff --git a/drivers/io/io_block.c b/drivers/io/io_block.c
index 128246f..8226554 100644
--- a/drivers/io/io_block.c
+++ b/drivers/io/io_block.c
@@ -167,15 +167,98 @@
 	return 0;
 }
 
+/*
+ * This function allows the caller to read any number of bytes
+ * from any position. It hides from the caller that the low level
+ * driver only can read aligned blocks of data. For this reason
+ * we need to handle the use case where the first byte to be read is not
+ * aligned to start of the block, the last byte to be read is also not
+ * aligned to the end of a block, and there are zero or more blocks-worth
+ * of data in between.
+ *
+ * In such a case we need to read more bytes than requested (i.e. full
+ * blocks) and strip-out the leading bytes (aka skip) and the trailing
+ * bytes (aka padding). See diagram below
+ *
+ * cur->file_pos ------------
+ *                          |
+ * cur->base                |
+ *  |                       |
+ *  v                       v<----  length   ---->
+ *  --------------------------------------------------------------
+ * |           |         block#1    |        |   block#n          |
+ * |  block#0  |            +       |   ...  |     +              |
+ * |           | <- skip -> +       |        |     + <- padding ->|
+ *  ------------------------+----------------------+--------------
+ *             ^                                                  ^
+ *             |                                                  |
+ *             v    iteration#1                iteration#n        v
+ *              --------------------------------------------------
+ *             |                    |        |                    |
+ *             |<----  request ---->|  ...   |<----- request ---->|
+ *             |                    |        |                    |
+ *              --------------------------------------------------
+ *            /                   /          |                    |
+ *           /                   /           |                    |
+ *          /                   /            |                    |
+ *         /                   /             |                    |
+ *        /                   /              |                    |
+ *       /                   /               |                    |
+ *      /                   /                |                    |
+ *     /                   /                 |                    |
+ *    /                   /                  |                    |
+ *   /                   /                   |                    |
+ *  <---- request ------>                    <------ request  ----->
+ *  ---------------------                    -----------------------
+ *  |        |          |                    |          |           |
+ *  |<-skip->|<-nbytes->|           -------->|<-nbytes->|<-padding->|
+ *  |        |          |           |        |          |           |
+ *  ---------------------           |        -----------------------
+ *  ^        \           \          |        |          |
+ *  |         \           \         |        |          |
+ *  |          \           \        |        |          |
+ *  buf->offset \           \   buf->offset  |          |
+ *               \           \               |          |
+ *                \           \              |          |
+ *                 \           \             |          |
+ *                  \           \            |          |
+ *                   \           \           |          |
+ *                    \           \          |          |
+ *                     \           \         |          |
+ *                      --------------------------------
+ *                      |           |        |         |
+ * buffer-------------->|           | ...    |         |
+ *                      |           |        |         |
+ *                      --------------------------------
+ *                      <-count#1->|                   |
+ *                      <----------  count#n   -------->
+ *                      <----------  length  ---------->
+ *
+ * Additionally, the IO driver has an underlying buffer that is at least
+ * one block-size and may be big enough to allow.
+ */
 static int block_read(io_entity_t *entity, uintptr_t buffer, size_t length,
 		      size_t *length_read)
 {
 	block_dev_state_t *cur;
 	io_block_spec_t *buf;
 	io_block_ops_t *ops;
-	size_t aligned_length, skip, count, left, padding, block_size;
 	int lba;
-	int buffer_not_aligned;
+	size_t block_size, left;
+	size_t nbytes;  /* number of bytes read in one iteration */
+	size_t request; /* number of requested bytes in one iteration */
+	size_t count;   /* number of bytes already read */
+	/*
+	 * number of leading bytes from start of the block
+	 * to the first byte to be read
+	 */
+	size_t skip;
+
+	/*
+	 * number of trailing bytes between the last byte
+	 * to be read and the end of the block
+	 */
+	size_t padding;
 
 	assert(entity->info != (uintptr_t)NULL);
 	cur = (block_dev_state_t *)entity->info;
@@ -186,102 +269,107 @@
 	       (length > 0) &&
 	       (ops->read != 0));
 
-	if ((buffer & (block_size - 1)) != 0) {
+	/*
+	 * We don't know the number of bytes that we are going
+	 * to read in every iteration, because it will depend
+	 * on the low level driver.
+	 */
+	count = 0;
+	for (left = length; left > 0; left -= nbytes) {
 		/*
-		 * buffer isn't aligned with block size.
-		 * Block device always relies on DMA operation.
-		 * It's better to make the buffer as block size aligned.
+		 * We must only request operations aligned to the block
+		 * size. Therefore if file_pos is not block-aligned,
+		 * we have to request the operation to start at the
+		 * previous block boundary and skip the leading bytes. And
+		 * similarly, the number of bytes requested must be a
+		 * block size multiple
 		 */
-		buffer_not_aligned = 1;
-	} else {
-		buffer_not_aligned = 0;
-	}
+		skip = cur->file_pos & (block_size - 1);
 
-	skip = cur->file_pos % block_size;
-	aligned_length = ((skip + length) + (block_size - 1)) &
-			 ~(block_size - 1);
-	padding = aligned_length - (skip + length);
-	left = aligned_length;
-	do {
+		/*
+		 * Calculate the block number containing file_pos
+		 * - e.g. block 3.
+		 */
 		lba = (cur->file_pos + cur->base) / block_size;
-		if (left >= buf->length) {
+
+		if (skip + left > buf->length) {
 			/*
-			 * Since left is larger, it's impossible to padding.
-			 *
-			 * If buffer isn't aligned, we need to use aligned
-			 * buffer instead.
+			 * The underlying read buffer is too small to
+			 * read all the required data - limit to just
+			 * fill the buffer, and then read again.
 			 */
-			if (skip || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we need to use
-				 * block buffer to read block. Since block
-				 * device is always relied on DMA operation.
-				 */
-				count = ops->read(lba, buf->offset,
-						  buf->length);
-			} else {
-				count = ops->read(lba, buffer, buf->length);
-			}
-			assert(count == buf->length);
-			cur->file_pos += count - skip;
-			if (skip || buffer_not_aligned) {
-				/*
-				 * Since there's not aligned block size caused
-				 * by skip or not aligned buffer, block buffer
-				 * is used to store data.
-				 */
-				memcpy((void *)buffer,
-				       (void *)(buf->offset + skip),
-				       count - skip);
-			}
-			left = left - (count - skip);
+			request = buf->length;
 		} else {
-			if (skip || padding || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we have to read
-				 * full block by block buffer instead.
-				 * The size isn't aligned with block size.
-				 * Use block buffer to avoid overflow.
-				 *
-				 * If buffer isn't aligned, use block buffer
-				 * to avoid DMA error.
-				 */
-				count = ops->read(lba, buf->offset, left);
-			} else
-				count = ops->read(lba, buffer, left);
-			assert(count == left);
-			left = left - (skip + padding);
-			cur->file_pos += left;
-			if (skip || padding || buffer_not_aligned) {
-				/*
-				 * Since there's not aligned block size or
-				 * buffer, block buffer is used to store data.
-				 */
-				memcpy((void *)buffer,
-				       (void *)(buf->offset + skip),
-				       left);
-			}
-			/* It's already the last block operation */
-			left = 0;
+			/*
+			 * The underlying read buffer is big enough to
+			 * read all the required data. Calculate the
+			 * number of bytes to read to align with the
+			 * block size.
+			 */
+			request = skip + left;
+			request = (request + (block_size - 1)) & ~(block_size - 1);
+		}
+		request = ops->read(lba, buf->offset, request);
+
+		if (request <= skip) {
+			/*
+			 * We couldn't read enough bytes to jump over
+			 * the skip bytes, so we should have to read
+			 * again the same block, thus generating
+			 * the same error.
+			 */
+			return -EIO;
 		}
-		skip = cur->file_pos % block_size;
-	} while (left > 0);
-	*length_read = length;
+
+		/*
+		 * Need to remove skip and padding bytes,if any, from
+		 * the read data when copying to the user buffer.
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		memcpy((void *)(buffer + count),
+		       (void *)(buf->offset + skip),
+		       nbytes);
+
+		cur->file_pos += nbytes;
+		count += nbytes;
+	}
+	assert(count == length);
+	*length_read = count;
 
 	return 0;
 }
 
+/*
+ * This function allows the caller to write any number of bytes
+ * from any position. It hides from the caller that the low level
+ * driver only can write aligned blocks of data.
+ * See comments for block_read for more details.
+ */
 static int block_write(io_entity_t *entity, const uintptr_t buffer,
 		       size_t length, size_t *length_written)
 {
 	block_dev_state_t *cur;
 	io_block_spec_t *buf;
 	io_block_ops_t *ops;
-	size_t aligned_length, skip, count, left, padding, block_size;
 	int lba;
-	int buffer_not_aligned;
+	size_t block_size, left;
+	size_t nbytes;  /* number of bytes read in one iteration */
+	size_t request; /* number of requested bytes in one iteration */
+	size_t count;   /* number of bytes already read */
+	/*
+	 * number of leading bytes from start of the block
+	 * to the first byte to be read
+	 */
+	size_t skip;
+
+	/*
+	 * number of trailing bytes between the last byte
+	 * to be read and the end of the block
+	 */
+	size_t padding;
 
 	assert(entity->info != (uintptr_t)NULL);
 	cur = (block_dev_state_t *)entity->info;
@@ -293,75 +381,107 @@
 	       (ops->read != 0) &&
 	       (ops->write != 0));
 
-	if ((buffer & (block_size - 1)) != 0) {
+	/*
+	 * We don't know the number of bytes that we are going
+	 * to write in every iteration, because it will depend
+	 * on the low level driver.
+	 */
+	count = 0;
+	for (left = length; left > 0; left -= nbytes) {
 		/*
-		 * buffer isn't aligned with block size.
-		 * Block device always relies on DMA operation.
-		 * It's better to make the buffer as block size aligned.
+		 * We must only request operations aligned to the block
+		 * size. Therefore if file_pos is not block-aligned,
+		 * we have to request the operation to start at the
+		 * previous block boundary and skip the leading bytes. And
+		 * similarly, the number of bytes requested must be a
+		 * block size multiple
 		 */
-		buffer_not_aligned = 1;
-	} else {
-		buffer_not_aligned = 0;
-	}
+		skip = cur->file_pos & (block_size - 1);
 
-	skip = cur->file_pos % block_size;
-	aligned_length = ((skip + length) + (block_size - 1)) &
-			 ~(block_size - 1);
-	padding = aligned_length - (skip + length);
-	left = aligned_length;
-	do {
+		/*
+		 * Calculate the block number containing file_pos
+		 * - e.g. block 3.
+		 */
 		lba = (cur->file_pos + cur->base) / block_size;
-		if (left >= buf->length) {
-			/* Since left is larger, it's impossible to padding. */
-			if (skip || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size or buffer isn't
-				 * aligned, we need to use block buffer to
-				 * write block.
-				 */
-				count = ops->read(lba, buf->offset,
-						  buf->length);
-				assert(count == buf->length);
-				memcpy((void *)(buf->offset + skip),
-				       (void *)buffer,
-				       count - skip);
-				count = ops->write(lba, buf->offset,
-						   buf->length);
-			} else
-				count = ops->write(lba, buffer, buf->length);
-			assert(count == buf->length);
-			cur->file_pos += count - skip;
-			left = left - (count - skip);
+
+		if (skip + left > buf->length) {
+			/*
+			 * The underlying read buffer is too small to
+			 * read all the required data - limit to just
+			 * fill the buffer, and then read again.
+			 */
+			request = buf->length;
 		} else {
-			if (skip || padding || buffer_not_aligned) {
+			/*
+			 * The underlying read buffer is big enough to
+			 * read all the required data. Calculate the
+			 * number of bytes to read to align with the
+			 * block size.
+			 */
+			request = skip + left;
+			request = (request + (block_size - 1)) & ~(block_size - 1);
+		}
+
+		/*
+		 * The number of bytes that we are going to write
+		 * from the user buffer will depend of the size
+		 * of the current request.
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		/*
+		 * If we have skip or padding bytes then we have to preserve
+		 * some content and it means that we have to read before
+		 * writing
+		 */
+		if (skip > 0 || padding > 0) {
+			request = ops->read(lba, buf->offset, request);
+			/*
+			 * The read may return size less than
+			 * requested. Round down to the nearest block
+			 * boundary
+			 */
+			request &= ~(block_size-1);
+			if (request <= skip) {
 				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we need to avoid
-				 * poluate data in the beginning. Reading and
-				 * skipping the beginning is the only way.
-				 * The size isn't aligned with block size.
-				 * Use block buffer to avoid overflow.
-				 *
-				 * If buffer isn't aligned, use block buffer
-				 * to avoid DMA error.
+				 * We couldn't read enough bytes to jump over
+				 * the skip bytes, so we should have to read
+				 * again the same block, thus generating
+				 * the same error.
 				 */
-				count = ops->read(lba, buf->offset, left);
-				assert(count == left);
-				memcpy((void *)(buf->offset + skip),
-				       (void *)buffer,
-				       left - skip - padding);
-				count = ops->write(lba, buf->offset, left);
-			} else
-				count = ops->write(lba, buffer, left);
-			assert(count == left);
-			cur->file_pos += left - (skip + padding);
-			/* It's already the last block operation */
-			left = 0;
+				return -EIO;
+			}
+			nbytes = request - skip;
+			padding = (nbytes > left) ? nbytes - left : 0;
+			nbytes -= padding;
 		}
-		skip = cur->file_pos % block_size;
-	} while (left > 0);
-	*length_written = length;
+
+		memcpy((void *)(buf->offset + skip),
+		       (void *)(buffer + count),
+		       nbytes);
+
+		request = ops->write(lba, buf->offset, request);
+		if (request <= skip)
+			return -EIO;
+
+		/*
+		 * And the previous write operation may modify the size
+		 * of the request, so again, we have to calculate the
+		 * number of bytes that we consumed from the user
+		 * buffer
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		cur->file_pos += nbytes;
+		count += nbytes;
+	}
+	assert(count == length);
+	*length_written = count;
+
 	return 0;
 }
 
diff --git a/drivers/synopsys/emmc/dw_mmc.c b/drivers/synopsys/emmc/dw_mmc.c
index e6904d1..701e6d5 100644
--- a/drivers/synopsys/emmc/dw_mmc.c
+++ b/drivers/synopsys/emmc/dw_mmc.c
@@ -146,7 +146,7 @@
 		if ((data & CMD_START) == 0)
 			break;
 		data = mmio_read_32(dw_params.reg_base + DWMMC_RINTSTS);
-		assert(data & INT_HLE);
+		assert((data & INT_HLE) == 0);
 	}
 }
 
diff --git a/include/bl32/payloads/tlk.h b/include/bl32/payloads/tlk.h
index 4e06bcd..941b6cc 100644
--- a/include/bl32/payloads/tlk.h
+++ b/include/bl32/payloads/tlk.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -20,6 +20,7 @@
  */
 #define TLK_REGISTER_LOGBUF	TLK_TOS_YIELD_FID(0x1)
 #define TLK_REGISTER_REQBUF	TLK_TOS_YIELD_FID(0x2)
+#define TLK_REGISTER_NS_DRAM	TLK_TOS_YIELD_FID(0x4)
 #define TLK_RESUME_FID		TLK_TOS_YIELD_FID(0x100)
 #define TLK_SYSTEM_SUSPEND	TLK_TOS_YIELD_FID(0xE001)
 #define TLK_SYSTEM_RESUME	TLK_TOS_YIELD_FID(0xE002)
diff --git a/include/common/aarch32/el3_common_macros.S b/include/common/aarch32/el3_common_macros.S
index 59e99f8..d654b65 100644
--- a/include/common/aarch32/el3_common_macros.S
+++ b/include/common/aarch32/el3_common_macros.S
@@ -260,9 +260,9 @@
 	 * ---------------------------------------------------------------------
 	 */
 	.if \_init_c_runtime
-#ifdef IMAGE_BL32
+#if defined(IMAGE_BL32) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 		/* -----------------------------------------------------------------
-		 * Invalidate the RW memory used by the BL32 (SP_MIN) image. This
+		 * Invalidate the RW memory used by the image. This
 		 * includes the data and NOBITS sections. This is done to
 		 * safeguard against possible corruption of this memory by
 		 * dirty cache lines in a system cache as a result of use by
@@ -273,7 +273,7 @@
 		ldr	r1, =__RW_END__
 		sub	r1, r1, r0
 		bl	inv_dcache_range
-#endif /* IMAGE_BL32 */
+#endif
 
 		ldr	r0, =__BSS_START__
 		ldr	r1, =__BSS_SIZE__
diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
index 63a0fa7..4ebf77b 100644
--- a/include/common/aarch64/el3_common_macros.S
+++ b/include/common/aarch64/el3_common_macros.S
@@ -13,7 +13,7 @@
 	/*
 	 * Helper macro to initialise EL3 registers we care about.
 	 */
-	.macro el3_arch_init_common _exception_vectors
+	.macro el3_arch_init_common
 	/* ---------------------------------------------------------------------
 	 * SCTLR_EL3 has already been initialised - read current value before
 	 * modifying.
@@ -50,14 +50,6 @@
 #endif /* IMAGE_BL31 */
 
 	/* ---------------------------------------------------------------------
-	 * Set the exception vectors.
-	 * ---------------------------------------------------------------------
-	 */
-	adr	x0, \_exception_vectors
-	msr	vbar_el3, x0
-	isb
-
-	/* ---------------------------------------------------------------------
 	 * Initialise SCR_EL3, setting all fields rather than relying on hw.
 	 * All fields are architecturally UNKNOWN on reset. The following fields
 	 * do not change during the TF lifetime. The remaining fields are set to
@@ -221,6 +213,14 @@
 	.endif /* _warm_boot_mailbox */
 
 	/* ---------------------------------------------------------------------
+	 * Set the exception vectors.
+	 * ---------------------------------------------------------------------
+	 */
+	adr	x0, \_exception_vectors
+	msr	vbar_el3, x0
+	isb
+
+	/* ---------------------------------------------------------------------
 	 * It is a cold boot.
 	 * Perform any processor specific actions upon reset e.g. cache, TLB
 	 * invalidations etc.
@@ -228,7 +228,7 @@
 	 */
 	bl	reset_handler
 
-	el3_arch_init_common \_exception_vectors
+	el3_arch_init_common
 
 	.if \_secondary_cold_boot
 		/* -------------------------------------------------------------
@@ -269,7 +269,7 @@
 	 * ---------------------------------------------------------------------
 	 */
 	.if \_init_c_runtime
-#ifdef IMAGE_BL31
+#if defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 		/* -------------------------------------------------------------
 		 * Invalidate the RW memory used by the BL31 image. This
 		 * includes the data and NOBITS sections. This is done to
@@ -282,7 +282,7 @@
 		adr	x1, __RW_END__
 		sub	x1, x1, x0
 		bl	inv_dcache_range
-#endif /* IMAGE_BL31 */
+#endif
 
 		ldr	x0, =__BSS_START__
 		ldr	x1, =__BSS_SIZE__
diff --git a/include/common/asm_macros_common.S b/include/common/asm_macros_common.S
index 6a02e18..ca8c1ad 100644
--- a/include/common/asm_macros_common.S
+++ b/include/common/asm_macros_common.S
@@ -29,7 +29,7 @@
 	 * debugging experience.
 	 */
 	.cfi_sections .debug_frame
-	.section .text.\_name, "ax"
+	.section .text.asm.\_name, "ax"
 	.type \_name, %function
 	.func \_name
 	/*
diff --git a/include/drivers/emmc.h b/include/drivers/emmc.h
index 921f4cf..286c014 100644
--- a/include/drivers/emmc.h
+++ b/include/drivers/emmc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -25,6 +25,7 @@
 #define EMMC_CMD13			13
 #define EMMC_CMD17			17
 #define EMMC_CMD18			18
+#define EMMC_CMD21			21
 #define EMMC_CMD23			23
 #define EMMC_CMD24			24
 #define EMMC_CMD25			25
@@ -61,6 +62,8 @@
 #define EMMC_BUS_WIDTH_1		0
 #define EMMC_BUS_WIDTH_4		1
 #define EMMC_BUS_WIDTH_8		2
+#define EMMC_BUS_WIDTH_DDR_4		5
+#define EMMC_BUS_WIDTH_DDR_8		6
 #define EMMC_BOOT_MODE_BACKWARD		(0 << 3)
 #define EMMC_BOOT_MODE_HS_TIMING	(1 << 3)
 #define EMMC_BOOT_MODE_DDR		(2 << 3)
diff --git a/include/lib/aarch32/arch_helpers.h b/include/lib/aarch32/arch_helpers.h
index 0230195..beae5d0 100644
--- a/include/lib/aarch32/arch_helpers.h
+++ b/include/lib/aarch32/arch_helpers.h
@@ -287,6 +287,11 @@
 DEFINE_COPROCR_RW_FUNCS(amcntenclr0, AMCNTENCLR0)
 DEFINE_COPROCR_RW_FUNCS(amcntenclr1, AMCNTENCLR1)
 
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr00, AMEVCNTR00)
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr01, AMEVCNTR01)
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr02, AMEVCNTR02)
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr03, AMEVCNTR03)
+
 /*
  * TLBI operation prototypes
  */
diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h
index 96e2d5f..91aa484 100644
--- a/include/lib/aarch64/arch.h
+++ b/include/lib/aarch64/arch.h
@@ -117,6 +117,9 @@
 #define ID_AA64PFR0_SVE_SHIFT	U(32)
 #define ID_AA64PFR0_SVE_MASK	U(0xf)
 #define ID_AA64PFR0_SVE_LENGTH	U(4)
+#define ID_AA64PFR0_CSV2_SHIFT	U(56)
+#define ID_AA64PFR0_CSV2_MASK	U(0xf)
+#define ID_AA64PFR0_CSV2_LENGTH	U(4)
 
 /* ID_AA64DFR0_EL1.PMS definitions (for ARMv8.2+) */
 #define ID_AA64DFR0_PMS_SHIFT	U(32)
@@ -337,6 +340,11 @@
 #define SPSR_T_ARM		U(0x0)
 #define SPSR_T_THUMB		U(0x1)
 
+#define SPSR_M_SHIFT		U(4)
+#define SPSR_M_MASK		U(0x1)
+#define SPSR_M_AARCH64		U(0x0)
+#define SPSR_M_AARCH32		U(0x1)
+
 #define DISABLE_ALL_EXCEPTIONS \
 		(DAIF_FIQ_BIT | DAIF_IRQ_BIT | DAIF_ABT_BIT | DAIF_DBG_BIT)
 
@@ -656,4 +664,45 @@
 #define AMEVTYPER02_EL0		S3_3_C13_C6_2
 #define AMEVTYPER03_EL0		S3_3_C13_C6_3
 
+/* Activity Monitor Group 1 Event Counter Registers */
+#define AMEVCNTR10_EL0		S3_3_C13_C12_0
+#define AMEVCNTR11_EL0		S3_3_C13_C12_1
+#define AMEVCNTR12_EL0		S3_3_C13_C12_2
+#define AMEVCNTR13_EL0		S3_3_C13_C12_3
+#define AMEVCNTR14_EL0		S3_3_C13_C12_4
+#define AMEVCNTR15_EL0		S3_3_C13_C12_5
+#define AMEVCNTR16_EL0		S3_3_C13_C12_6
+#define AMEVCNTR17_EL0		S3_3_C13_C12_7
+#define AMEVCNTR18_EL0		S3_3_C13_C13_0
+#define AMEVCNTR19_EL0		S3_3_C13_C13_1
+#define AMEVCNTR1A_EL0		S3_3_C13_C13_2
+#define AMEVCNTR1B_EL0		S3_3_C13_C13_3
+#define AMEVCNTR1C_EL0		S3_3_C13_C13_4
+#define AMEVCNTR1D_EL0		S3_3_C13_C13_5
+#define AMEVCNTR1E_EL0		S3_3_C13_C13_6
+#define AMEVCNTR1F_EL0		S3_3_C13_C13_7
+
+/* Activity Monitor Group 1 Event Type Registers */
+#define AMEVTYPER10_EL0		S3_3_C13_C14_0
+#define AMEVTYPER11_EL0		S3_3_C13_C14_1
+#define AMEVTYPER12_EL0		S3_3_C13_C14_2
+#define AMEVTYPER13_EL0		S3_3_C13_C14_3
+#define AMEVTYPER14_EL0		S3_3_C13_C14_4
+#define AMEVTYPER15_EL0		S3_3_C13_C14_5
+#define AMEVTYPER16_EL0		S3_3_C13_C14_6
+#define AMEVTYPER17_EL0		S3_3_C13_C14_7
+#define AMEVTYPER18_EL0		S3_3_C13_C15_0
+#define AMEVTYPER19_EL0		S3_3_C13_C15_1
+#define AMEVTYPER1A_EL0		S3_3_C13_C15_2
+#define AMEVTYPER1B_EL0		S3_3_C13_C15_3
+#define AMEVTYPER1C_EL0		S3_3_C13_C15_4
+#define AMEVTYPER1D_EL0		S3_3_C13_C15_5
+#define AMEVTYPER1E_EL0		S3_3_C13_C15_6
+#define AMEVTYPER1F_EL0		S3_3_C13_C15_7
+
+/* AMCGCR_EL0 definitions */
+#define AMCGCR_EL0_CG1NC_SHIFT	U(8)
+#define AMCGCR_EL0_CG1NC_LENGTH	U(8)
+#define AMCGCR_EL0_CG1NC_MASK	U(0xff)
+
 #endif /* __ARCH_H__ */
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 831dfb0..485ed43 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -322,6 +322,7 @@
 DEFINE_RENAME_SYSREG_WRITE_FUNC(icc_eoir1_el1, ICC_EOIR1_EL1)
 DEFINE_RENAME_SYSREG_WRITE_FUNC(icc_sgi0r_el1, ICC_SGI0R_EL1)
 
+DEFINE_RENAME_SYSREG_RW_FUNCS(amcgcr_el0, AMCGCR_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenclr0_el0, AMCNTENCLR0_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenset0_el0, AMCNTENSET0_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenclr1_el0, AMCNTENCLR1_EL0)
diff --git a/include/lib/cpus/aarch32/cpu_macros.S b/include/lib/cpus/aarch32/cpu_macros.S
index e2e4316..0f3a572 100644
--- a/include/lib/cpus/aarch32/cpu_macros.S
+++ b/include/lib/cpus/aarch32/cpu_macros.S
@@ -9,6 +9,10 @@
 #include <arch.h>
 #include <errata_report.h>
 
+#if defined(IMAGE_BL1) || defined(IMAGE_BL32)  || (defined(IMAGE_BL2) && BL2_AT_EL3)
+#define IMAGE_AT_EL3
+#endif
+
 #define CPU_IMPL_PN_MASK	(MIDR_IMPL_MASK << MIDR_IMPL_SHIFT) | \
 				(MIDR_PN_MASK << MIDR_PN_SHIFT)
 
@@ -38,7 +42,7 @@
 CPU_MIDR: /* cpu_ops midr */
 	.space  4
 /* Reset fn is needed during reset */
-#if defined(IMAGE_BL1) || defined(IMAGE_BL32)
+#if defined(IMAGE_AT_EL3)
 CPU_RESET_FUNC: /* cpu_ops reset_func */
 	.space  4
 #endif
@@ -54,7 +58,7 @@
 #if REPORT_ERRATA
 CPU_ERRATA_FUNC: /* CPU errata status printing function */
 	.space  4
-#ifdef IMAGE_BL32
+#if defined(IMAGE_BL32)
 CPU_ERRATA_LOCK:
 	.space	4
 CPU_ERRATA_PRINTED:
@@ -120,7 +124,7 @@
 	.align 2
 	.type cpu_ops_\_name, %object
 	.word \_midr
-#if defined(IMAGE_BL1) || defined(IMAGE_BL32)
+#if defined(IMAGE_AT_EL3)
 	.word \_resetfunc
 #endif
 #ifdef IMAGE_BL32
diff --git a/include/lib/cpus/aarch64/cortex_a75.h b/include/lib/cpus/aarch64/cortex_a75.h
index d68c957..940125d 100644
--- a/include/lib/cpus/aarch64/cortex_a75.h
+++ b/include/lib/cpus/aarch64/cortex_a75.h
@@ -50,7 +50,19 @@
  * CPUAMEVTYPER<n> register and are disabled by default. Platforms may
  * enable this with suitable programming.
  */
+#define CORTEX_A75_AMU_NR_COUNTERS	5
 #define CORTEX_A75_AMU_GROUP0_MASK	0x7
 #define CORTEX_A75_AMU_GROUP1_MASK	(0 << 3)
 
+#ifndef __ASSEMBLY__
+#include <stdint.h>
+
+uint64_t cortex_a75_amu_cnt_read(int idx);
+void cortex_a75_amu_cnt_write(int idx, uint64_t val);
+unsigned int cortex_a75_amu_read_cpuamcntenset_el0(void);
+unsigned int cortex_a75_amu_read_cpuamcntenclr_el0(void);
+void cortex_a75_amu_write_cpuamcntenset_el0(unsigned int mask);
+void cortex_a75_amu_write_cpuamcntenclr_el0(unsigned int mask);
+#endif /* __ASSEMBLY__ */
+
 #endif /* __CORTEX_A75_H__ */
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
index a8c23e5..ccf5306 100644
--- a/include/lib/cpus/aarch64/cpu_macros.S
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -21,6 +21,10 @@
 /* Word size for 64-bit CPUs */
 #define CPU_WORD_SIZE			8
 
+#if defined(IMAGE_BL1) || defined(IMAGE_BL31) ||(defined(IMAGE_BL2) && BL2_AT_EL3)
+#define IMAGE_AT_EL3
+#endif
+
 /*
  * Whether errata status needs reporting. Errata status is printed in debug
  * builds for both BL1 and BL31 images.
@@ -38,7 +42,7 @@
 CPU_MIDR: /* cpu_ops midr */
 	.space  8
 /* Reset fn is needed in BL at reset vector */
-#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
+#if defined(IMAGE_AT_EL3)
 CPU_RESET_FUNC: /* cpu_ops reset_func */
 	.space  8
 #endif
@@ -54,7 +58,7 @@
 #if REPORT_ERRATA
 CPU_ERRATA_FUNC:
 	.space	8
-#ifdef IMAGE_BL31
+#if defined(IMAGE_BL31)
 CPU_ERRATA_LOCK:
 	.space	8
 CPU_ERRATA_PRINTED:
@@ -124,7 +128,7 @@
 	.align 3
 	.type cpu_ops_\_name, %object
 	.quad \_midr
-#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
+#if defined(IMAGE_AT_EL3)
 	.quad \_resetfunc
 #endif
 #ifdef IMAGE_BL31
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 5889904..5e212ec 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -46,12 +46,26 @@
 #define CTX_GPREG_SP_EL0	U(0xf8)
 #define CTX_GPREGS_END		U(0x100)
 
+#if WORKAROUND_CVE_2017_5715
+#define CTX_CVE_2017_5715_OFFSET	(CTX_GPREGS_OFFSET + CTX_GPREGS_END)
+#define CTX_CVE_2017_5715_QUAD0		U(0x0)
+#define CTX_CVE_2017_5715_QUAD1		U(0x8)
+#define	CTX_CVE_2017_5715_QUAD2		U(0x10)
+#define CTX_CVE_2017_5715_QUAD3		U(0x18)
+#define CTX_CVE_2017_5715_QUAD4		U(0x20)
+#define CTX_CVE_2017_5715_QUAD5		U(0x28)
+#define CTX_CVE_2017_5715_END		U(0x30)
+#else
+#define CTX_CVE_2017_5715_OFFSET	CTX_GPREGS_OFFSET
+#define CTX_CVE_2017_5715_END		CTX_GPREGS_END
+#endif
+
 /*******************************************************************************
  * Constants that allow assembler code to access members of and the 'el3_state'
  * structure at their correct offsets. Note that some of the registers are only
  * 32-bits wide but are stored as 64-bit values for convenience
  ******************************************************************************/
-#define CTX_EL3STATE_OFFSET	(CTX_GPREGS_OFFSET + CTX_GPREGS_END)
+#define CTX_EL3STATE_OFFSET	(CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_END)
 #define CTX_SCR_EL3		U(0x0)
 #define CTX_RUNTIME_SP		U(0x8)
 #define CTX_SPSR_EL3		U(0x10)
@@ -186,6 +200,9 @@
 
 /* Constants to determine the size of individual context structures */
 #define CTX_GPREG_ALL		(CTX_GPREGS_END >> DWORD_SHIFT)
+#if WORKAROUND_CVE_2017_5715
+#define CTX_CVE_2017_5715_ALL	(CTX_CVE_2017_5715_END >> DWORD_SHIFT)
+#endif
 #define CTX_SYSREG_ALL		(CTX_SYSREGS_END >> DWORD_SHIFT)
 #if CTX_INCLUDE_FPREGS
 #define CTX_FPREG_ALL		(CTX_FPREGS_END >> DWORD_SHIFT)
@@ -201,6 +218,10 @@
  */
 DEFINE_REG_STRUCT(gp_regs, CTX_GPREG_ALL);
 
+#if WORKAROUND_CVE_2017_5715
+DEFINE_REG_STRUCT(cve_2017_5715_regs, CTX_CVE_2017_5715_ALL);
+#endif
+
 /*
  * AArch64 EL1 system register context structure for preserving the
  * architectural state during switches from one security state to
@@ -242,6 +263,9 @@
  */
 typedef struct cpu_context {
 	gp_regs_t gpregs_ctx;
+#if WORKAROUND_CVE_2017_5715
+	cve_2017_5715_regs_t cve_2017_5715_regs_ctx;
+#endif
 	el3_state_t el3state_ctx;
 	el1_sys_regs_t sysregs_ctx;
 #if CTX_INCLUDE_FPREGS
diff --git a/include/lib/el3_runtime/pubsub_events.h b/include/lib/el3_runtime/pubsub_events.h
index 9cfedb4..64b3f63 100644
--- a/include/lib/el3_runtime/pubsub_events.h
+++ b/include/lib/el3_runtime/pubsub_events.h
@@ -17,6 +17,13 @@
  */
 REGISTER_PUBSUB_EVENT(psci_cpu_on_finish);
 
+/*
+ * These events are published before/after a CPU has been powered down/up
+ * via the PSCI CPU SUSPEND API.
+ */
+REGISTER_PUBSUB_EVENT(psci_suspend_pwrdown_start);
+REGISTER_PUBSUB_EVENT(psci_suspend_pwrdown_finish);
+
 #ifdef AARCH64
 /*
  * These events are published by the AArch64 context management framework
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index bbefe8f..faa0ee1 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -7,9 +7,39 @@
 #ifndef __AMU_H__
 #define __AMU_H__
 
-/* Enable all group 0 counters */
+#include <sys/cdefs.h> /* for CASSERT() */
+#include <cassert.h>
+#include <platform_def.h>
+#include <stdint.h>
+
+/* All group 0 counters */
 #define AMU_GROUP0_COUNTERS_MASK	0xf
 
+#ifdef PLAT_AMU_GROUP1_COUNTERS_MASK
+#define AMU_GROUP1_COUNTERS_MASK	PLAT_AMU_GROUP1_COUNTERS_MASK
+#else
+#define AMU_GROUP1_COUNTERS_MASK	0
+#endif
+
+#ifdef PLAT_AMU_GROUP1_NR_COUNTERS
+#define AMU_GROUP1_NR_COUNTERS		PLAT_AMU_GROUP1_NR_COUNTERS
+#else
+#define AMU_GROUP1_NR_COUNTERS		0
+#endif
+
+CASSERT(AMU_GROUP1_COUNTERS_MASK <= 0xffff, invalid_amu_group1_counters_mask);
+CASSERT(AMU_GROUP1_NR_COUNTERS <= 16, invalid_amu_group1_nr_counters);
+
+int amu_supported(void);
 void amu_enable(int el2_unused);
 
+/* Group 0 configuration helpers */
+uint64_t amu_group0_cnt_read(int idx);
+void amu_group0_cnt_write(int idx, uint64_t val);
+
+/* Group 1 configuration helpers */
+uint64_t amu_group1_cnt_read(int idx);
+void amu_group1_cnt_write(int idx, uint64_t val);
+void amu_group1_set_evtype(int idx, unsigned int val);
+
 #endif /* __AMU_H__ */
diff --git a/include/lib/extensions/amu_private.h b/include/lib/extensions/amu_private.h
new file mode 100644
index 0000000..0c660bb
--- /dev/null
+++ b/include/lib/extensions/amu_private.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __AMU_PRIVATE_H__
+#define __AMU_PRIVATE_H__
+
+#include <stdint.h>
+
+uint64_t amu_group0_cnt_read_internal(int idx);
+void amu_group0_cnt_write_internal(int idx, uint64_t);
+
+uint64_t amu_group1_cnt_read_internal(int idx);
+void amu_group1_cnt_write_internal(int idx, uint64_t);
+void amu_group1_set_evtype_internal(int idx, unsigned int val);
+
+#endif /* __AMU_PRIVATE_H__ */
diff --git a/include/lib/utils.h b/include/lib/utils.h
index cfc8302..3d215c3 100644
--- a/include/lib/utils.h
+++ b/include/lib/utils.h
@@ -19,7 +19,7 @@
 
 #include <types.h>
 
-typedef struct mem_region_t {
+typedef struct mem_region {
 	uintptr_t base;
 	size_t nbytes;
 } mem_region_t;
diff --git a/include/lib/xlat_tables/xlat_tables_v2_helpers.h b/include/lib/xlat_tables/xlat_tables_v2_helpers.h
index 1be99b7..de1c2d4 100644
--- a/include/lib/xlat_tables/xlat_tables_v2_helpers.h
+++ b/include/lib/xlat_tables/xlat_tables_v2_helpers.h
@@ -168,7 +168,7 @@
  * This IMAGE_EL macro must not to be used outside the library, and it is only
  * used in AArch64.
  */
-#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
+#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 # define IMAGE_EL	3
 # define IMAGE_XLAT_DEFAULT_REGIME EL3_REGIME
 #else
diff --git a/include/plat/arm/board/common/board_arm_def.h b/include/plat/arm/board/common/board_arm_def.h
index 69eb727..888629e 100644
--- a/include/plat/arm/board/common/board_arm_def.h
+++ b/include/plat/arm/board/common/board_arm_def.h
@@ -90,7 +90,7 @@
  * PLAT_ARM_MAX_BL31_SIZE is calculated using the current BL31 debug size plus a
  * little space for growth.
  */
-#define PLAT_ARM_MAX_BL31_SIZE		0x1D000
+#define PLAT_ARM_MAX_BL31_SIZE		0x20000
 
 #ifdef AARCH32
 /*
diff --git a/include/plat/arm/common/arm_def.h b/include/plat/arm/common/arm_def.h
index 7887525..697a0b0 100644
--- a/include/plat/arm/common/arm_def.h
+++ b/include/plat/arm/common/arm_def.h
@@ -334,6 +334,11 @@
 #define BL2_BASE			(BL1_RW_BASE - PLAT_ARM_MAX_BL2_SIZE)
 #define BL2_LIMIT			BL1_RW_BASE
 
+#elif BL2_AT_EL3
+
+#define BL2_BASE			ARM_BL_RAM_BASE
+#define BL2_LIMIT			(ARM_BL_RAM_BASE + ARM_BL_RAM_SIZE)
+
 #elif defined(AARCH32) || JUNO_AARCH32_EL3_RUNTIME
 /*
  * Put BL2 just below BL32.
@@ -491,4 +496,15 @@
 #define ARM_SDEI_DS_EVENT_1		2001
 #define ARM_SDEI_DS_EVENT_2		2002
 
+#define ARM_SDEI_PRIVATE_EVENTS \
+	SDEI_DEFINE_EVENT_0(ARM_SDEI_SGI), \
+	SDEI_PRIVATE_EVENT(ARM_SDEI_DP_EVENT_0, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC), \
+	SDEI_PRIVATE_EVENT(ARM_SDEI_DP_EVENT_1, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC), \
+	SDEI_PRIVATE_EVENT(ARM_SDEI_DP_EVENT_2, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC)
+
+#define ARM_SDEI_SHARED_EVENTS \
+	SDEI_SHARED_EVENT(ARM_SDEI_DS_EVENT_0, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC), \
+	SDEI_SHARED_EVENT(ARM_SDEI_DS_EVENT_1, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC), \
+	SDEI_SHARED_EVENT(ARM_SDEI_DS_EVENT_2, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC)
+
 #endif /* __ARM_DEF_H__ */
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index abd7395..dfd7a20 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -145,6 +145,10 @@
 uint32_t arm_get_spsr_for_bl33_entry(void);
 int arm_bl2_handle_post_image_load(unsigned int image_id);
 
+/* BL2 at EL3 functions */
+void arm_bl2_el3_early_platform_setup(void);
+void arm_bl2_el3_plat_arch_setup(void);
+
 /* BL2U utility functions */
 void arm_bl2u_early_platform_setup(struct meminfo *mem_layout,
 				void *plat_info);
diff --git a/include/plat/common/platform.h b/include/plat/common/platform.h
index f11bee9..0960105 100644
--- a/include/plat/common/platform.h
+++ b/include/plat/common/platform.h
@@ -235,6 +235,21 @@
  * Optional BL2 functions (may be overridden)
  ******************************************************************************/
 
+
+/*******************************************************************************
+ * Mandatory BL2 at EL3 functions: Must be implemented if BL2_AT_EL3 image is
+ * supported
+ ******************************************************************************/
+void bl2_el3_early_platform_setup(u_register_t arg0, u_register_t arg1,
+				  u_register_t arg2, u_register_t arg3);
+void bl2_el3_plat_arch_setup(void);
+
+
+/*******************************************************************************
+ * Optional BL2 at EL3 functions (may be overridden)
+ ******************************************************************************/
+void bl2_el3_plat_prepare_exit(void);
+
 /*******************************************************************************
  * Mandatory BL2U functions.
  ******************************************************************************/
diff --git a/include/services/mm_svc.h b/include/services/mm_svc.h
new file mode 100644
index 0000000..7a8a3eb
--- /dev/null
+++ b/include/services/mm_svc.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __MM_SVC_H__
+#define __MM_SVC_H__
+
+#include <utils_def.h>
+
+#define MM_VERSION_MAJOR	U(1)
+#define MM_VERSION_MAJOR_SHIFT	16
+#define MM_VERSION_MAJOR_MASK	U(0x7FFF)
+#define MM_VERSION_MINOR	U(0)
+#define MM_VERSION_MINOR_SHIFT	0
+#define MM_VERSION_MINOR_MASK	U(0xFFFF)
+#define MM_VERSION_FORM(major, minor)	((major << MM_VERSION_MAJOR_SHIFT) | (minor))
+#define MM_VERSION_COMPILED	MM_VERSION_FORM(MM_VERSION_MAJOR, MM_VERSION_MINOR)
+
+/*
+ * SMC IDs defined in [1] for accessing MM services from the Non-secure world.
+ * These FIDs occupy the range 0x40 - 0x5f.
+ * [1] DEN0060A_ARM_MM_Interface_Specification.pdf
+ */
+#define MM_VERSION_AARCH32		U(0x84000040)
+
+#define MM_COMMUNICATE_AARCH64		U(0xC4000041)
+#define MM_COMMUNICATE_AARCH32		U(0x84000041)
+
+#endif /* __MM_SVC_H__ */
diff --git a/include/services/spm_svc.h b/include/services/spm_svc.h
index 738979e..8f872c3 100644
--- a/include/services/spm_svc.h
+++ b/include/services/spm_svc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,15 +10,14 @@
 #include <utils_def.h>
 
 #define SPM_VERSION_MAJOR	U(0)
+#define SPM_VERSION_MAJOR_SHIFT	16
+#define SPM_VERSION_MAJOR_MASK	U(0x7FFF)
 #define SPM_VERSION_MINOR	U(1)
-#define SPM_VERSION_FORM(major, minor)	((major << 16) | (minor))
+#define SPM_VERSION_MINOR_SHIFT	0
+#define SPM_VERSION_MINOR_MASK	U(0xFFFF)
+#define SPM_VERSION_FORM(major, minor)	((major << SPM_VERSION_MAJOR_SHIFT) | (minor))
 #define SPM_VERSION_COMPILED	SPM_VERSION_FORM(SPM_VERSION_MAJOR, SPM_VERSION_MINOR)
 
-#define SP_VERSION_MAJOR	U(1)
-#define SP_VERSION_MINOR	U(0)
-#define SP_VERSION_FORM(major, minor)	((major << 16) | (minor))
-#define SP_VERSION_COMPILED	SP_VERSION_FORM(SP_VERSION_MAJOR, SP_VERSION_MINOR)
-
 /* The macros below are used to identify SPM calls from the SMC function ID */
 #define SPM_FID_MASK			U(0xffff)
 #define SPM_FID_MIN_VALUE		U(0x40)
@@ -31,6 +30,7 @@
  * SMC IDs defined for accessing services implemented by the Secure Partition
  * Manager from the Secure Partition(s). These services enable a partition to
  * handle delegated events and request privileged operations from the manager.
+ * They occupy the range 0x60-0x7f.
  */
 #define SPM_VERSION_AARCH32			U(0x84000060)
 #define SP_EVENT_COMPLETE_AARCH64		U(0xC4000061)
@@ -51,16 +51,6 @@
 #define SP_MEMORY_ATTRIBUTES_EXEC		(U(0) << 2)
 #define SP_MEMORY_ATTRIBUTES_NON_EXEC		(U(1) << 2)
 
-/*
- * SMC IDs defined in [1] for accessing secure partition services from the
- * Non-secure world. These FIDs occupy the range 0x40 - 0x5f
- * [1] DEN0060A_ARM_MM_Interface_Specification.pdf
- */
-#define SP_VERSION_AARCH64		U(0xC4000040)
-#define SP_VERSION_AARCH32		U(0x84000040)
-
-#define MM_COMMUNICATE_AARCH64		U(0xC4000041)
-#define MM_COMMUNICATE_AARCH32		U(0x84000041)
 
 /* SPM error codes. */
 #define SPM_SUCCESS		0
diff --git a/lib/cpus/aarch32/cortex_a72.S b/lib/cpus/aarch32/cortex_a72.S
index 7550520..35b9bc2 100644
--- a/lib/cpus/aarch32/cortex_a72.S
+++ b/lib/cpus/aarch32/cortex_a72.S
@@ -109,7 +109,7 @@
 	orr64_imm	r0, r1, CORTEX_A72_ECTLR_SMP_BIT
 	stcopr16	r0, r1,	CORTEX_A72_ECTLR
 	isb
-	bx	lr
+	bx	r5
 endfunc cortex_a72_reset_func
 
 	/* ----------------------------------------------------
diff --git a/lib/cpus/aarch32/cpu_helpers.S b/lib/cpus/aarch32/cpu_helpers.S
index bfdc1e4..72e42c6 100644
--- a/lib/cpus/aarch32/cpu_helpers.S
+++ b/lib/cpus/aarch32/cpu_helpers.S
@@ -10,7 +10,7 @@
 #include <cpu_data.h>
 #include <cpu_macros.S>
 
-#if defined(IMAGE_BL1) || defined(IMAGE_BL32)
+#if defined(IMAGE_BL1) || defined(IMAGE_BL32) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 	/*
 	 * The reset handler common to all platforms.  After a matching
 	 * cpu_ops structure entry is found, the correponding reset_handler
@@ -42,7 +42,7 @@
 	bx	lr
 endfunc reset_handler
 
-#endif /* IMAGE_BL1 || IMAGE_BL32 */
+#endif
 
 #ifdef IMAGE_BL32 /* The power down core and cluster is needed only in  BL32 */
 	/*
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index a720e98..683be47 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -383,6 +383,11 @@
 	bl	errata_a57_859972_wa
 #endif
 
+#if IMAGE_BL31 && WORKAROUND_CVE_2017_5715
+	adr	x0, workaround_mmu_runtime_exceptions
+	msr	vbar_el3, x0
+#endif
+
 	/* ---------------------------------------------
 	 * Enable the SMP bit.
 	 * ---------------------------------------------
diff --git a/lib/cpus/aarch64/cortex_a72.S b/lib/cpus/aarch64/cortex_a72.S
index b034125..93821b7 100644
--- a/lib/cpus/aarch64/cortex_a72.S
+++ b/lib/cpus/aarch64/cortex_a72.S
@@ -110,6 +110,12 @@
 	mov	x0, x18
 	bl	errata_a72_859971_wa
 #endif
+
+#if IMAGE_BL31 && WORKAROUND_CVE_2017_5715
+	adr	x0, workaround_mmu_runtime_exceptions
+	msr	vbar_el3, x0
+#endif
+
 	/* ---------------------------------------------
 	 * Enable the SMP bit.
 	 * ---------------------------------------------
diff --git a/lib/cpus/aarch64/cortex_a73.S b/lib/cpus/aarch64/cortex_a73.S
index f642816..c43f07e 100644
--- a/lib/cpus/aarch64/cortex_a73.S
+++ b/lib/cpus/aarch64/cortex_a73.S
@@ -36,6 +36,11 @@
 endfunc cortex_a73_disable_smp
 
 func cortex_a73_reset_func
+#if IMAGE_BL31 && WORKAROUND_CVE_2017_5715
+	adr	x0, workaround_bpiall_vbar0_runtime_exceptions
+	msr	vbar_el3, x0
+#endif
+
 	/* ---------------------------------------------
 	 * Enable the SMP bit.
 	 * Clobbers : x0
diff --git a/lib/cpus/aarch64/cortex_a75.S b/lib/cpus/aarch64/cortex_a75.S
index 4cab9e4..e66ad06 100644
--- a/lib/cpus/aarch64/cortex_a75.S
+++ b/lib/cpus/aarch64/cortex_a75.S
@@ -11,7 +11,120 @@
 #include <plat_macros.S>
 #include <cortex_a75.h>
 
+	.globl	cortex_a75_amu_cnt_read
+	.globl	cortex_a75_amu_cnt_write
+	.globl	cortex_a75_amu_read_cpuamcntenset_el0
+	.globl	cortex_a75_amu_read_cpuamcntenclr_el0
+	.globl	cortex_a75_amu_write_cpuamcntenset_el0
+	.globl	cortex_a75_amu_write_cpuamcntenclr_el0
+
+/*
+ * uint64_t cortex_a75_amu_cnt_read(int idx);
+ *
+ * Given `idx`, read the corresponding AMU counter
+ * and return it in `x0`.
+ */
+func cortex_a75_amu_cnt_read
+	adr	x1, 1f
+	lsl	x0, x0, #3
+	add	x1, x1, x0
+	br	x1
+
+1:
+	mrs	x0, CPUAMEVCNTR0_EL0
+	ret
+	mrs	x0, CPUAMEVCNTR1_EL0
+	ret
+	mrs	x0, CPUAMEVCNTR2_EL0
+	ret
+	mrs	x0, CPUAMEVCNTR3_EL0
+	ret
+	mrs	x0, CPUAMEVCNTR4_EL0
+	ret
+endfunc cortex_a75_amu_cnt_read
+
+/*
+ * void cortex_a75_amu_cnt_write(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU counter.
+ */
+func cortex_a75_amu_cnt_write
+	adr	x2, 1f
+	lsl	x0, x0, #3
+	add	x2, x2, x0
+	br	x2
+
+1:
+	msr	CPUAMEVCNTR0_EL0, x0
+	ret
+	msr	CPUAMEVCNTR1_EL0, x0
+	ret
+	msr	CPUAMEVCNTR2_EL0, x0
+	ret
+	msr	CPUAMEVCNTR3_EL0, x0
+	ret
+	msr	CPUAMEVCNTR4_EL0, x0
+	ret
+endfunc cortex_a75_amu_cnt_write
+
+/*
+ * unsigned int cortex_a75_amu_read_cpuamcntenset_el0(void);
+ *
+ * Read the `CPUAMCNTENSET_EL0` CPU register and return
+ * it in `x0`.
+ */
+func cortex_a75_amu_read_cpuamcntenset_el0
+	mrs	x0, CPUAMCNTENSET_EL0
+	ret
+endfunc cortex_a75_amu_read_cpuamcntenset_el0
+
+/*
+ * unsigned int cortex_a75_amu_read_cpuamcntenclr_el0(void);
+ *
+ * Read the `CPUAMCNTENCLR_EL0` CPU register and return
+ * it in `x0`.
+ */
+func cortex_a75_amu_read_cpuamcntenclr_el0
+	mrs	x0, CPUAMCNTENCLR_EL0
+	ret
+endfunc cortex_a75_amu_read_cpuamcntenclr_el0
+
+/*
+ * void cortex_a75_amu_write_cpuamcntenset_el0(unsigned int mask);
+ *
+ * Write `mask` to the `CPUAMCNTENSET_EL0` CPU register.
+ */
+func cortex_a75_amu_write_cpuamcntenset_el0
+	msr	CPUAMCNTENSET_EL0, x0
+	ret
+endfunc cortex_a75_amu_write_cpuamcntenset_el0
+
+/*
+ * void cortex_a75_amu_write_cpuamcntenclr_el0(unsigned int mask);
+ *
+ * Write `mask` to the `CPUAMCNTENCLR_EL0` CPU register.
+ */
+func cortex_a75_amu_write_cpuamcntenclr_el0
+	mrs	x0, CPUAMCNTENCLR_EL0
+	ret
+endfunc cortex_a75_amu_write_cpuamcntenclr_el0
+
 func cortex_a75_reset_func
+#if IMAGE_BL31 && WORKAROUND_CVE_2017_5715
+	mrs	x0, id_aa64pfr0_el1
+	ubfx	x0, x0, #ID_AA64PFR0_CSV2_SHIFT, #ID_AA64PFR0_CSV2_LENGTH
+	/*
+	 * If the field equals to 1 then branch targets trained in one
+	 * context cannot affect speculative execution in a different context.
+	 */
+	cmp	x0, #1
+	beq	1f
+
+	adr	x0, workaround_bpiall_vbar0_runtime_exceptions
+	msr	vbar_el3, x0
+1:
+#endif
+
 #if ENABLE_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
diff --git a/lib/cpus/aarch64/cortex_a75_pubsub.c b/lib/cpus/aarch64/cortex_a75_pubsub.c
new file mode 100644
index 0000000..c1089a6
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a75_pubsub.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <cortex_a75.h>
+#include <pubsub_events.h>
+#include <platform.h>
+
+struct amu_ctx {
+	uint64_t cnts[CORTEX_A75_AMU_NR_COUNTERS];
+	uint16_t mask;
+};
+
+static struct amu_ctx amu_ctxs[PLATFORM_CORE_COUNT];
+
+static void *cortex_a75_context_save(const void *arg)
+{
+	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
+	unsigned int midr;
+	unsigned int midr_mask;
+	int i;
+
+	midr = read_midr();
+	midr_mask = (MIDR_IMPL_MASK << MIDR_IMPL_SHIFT) |
+		(MIDR_PN_MASK << MIDR_PN_SHIFT);
+	if ((midr & midr_mask) != (CORTEX_A75_MIDR & midr_mask))
+		return 0;
+
+	/* Save counter configuration */
+	ctx->mask = cortex_a75_amu_read_cpuamcntenset_el0();
+
+	/* Ensure counters are disabled */
+	cortex_a75_amu_write_cpuamcntenclr_el0(ctx->mask);
+	isb();
+
+	/* Save counters */
+	for (i = 0; i < CORTEX_A75_AMU_NR_COUNTERS; i++)
+		ctx->cnts[i] = cortex_a75_amu_cnt_read(i);
+
+	return 0;
+}
+
+static void *cortex_a75_context_restore(const void *arg)
+{
+	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
+	unsigned int midr;
+	unsigned int midr_mask;
+	int i;
+
+	midr = read_midr();
+	midr_mask = (MIDR_IMPL_MASK << MIDR_IMPL_SHIFT) |
+		(MIDR_PN_MASK << MIDR_PN_SHIFT);
+	if ((midr & midr_mask) != (CORTEX_A75_MIDR & midr_mask))
+		return 0;
+
+	ctx = &amu_ctxs[plat_my_core_pos()];
+
+	/* Counters were disabled in `cortex_a75_context_save()` */
+	assert(cortex_a75_amu_read_cpuamcntenset_el0() == 0);
+
+	/* Restore counters */
+	for (i = 0; i < CORTEX_A75_AMU_NR_COUNTERS; i++)
+		cortex_a75_amu_cnt_write(i, ctx->cnts[i]);
+	isb();
+
+	/* Restore counter configuration */
+	cortex_a75_amu_write_cpuamcntenset_el0(ctx->mask);
+
+	return 0;
+}
+
+SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_start, cortex_a75_context_save);
+SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_finish, cortex_a75_context_restore);
diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S
index 2384553..ae1c3c2 100644
--- a/lib/cpus/aarch64/cpu_helpers.S
+++ b/lib/cpus/aarch64/cpu_helpers.S
@@ -7,7 +7,7 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
-#ifdef IMAGE_BL31
+#if defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 #include <cpu_data.h>
 #endif
 #include <cpu_macros.S>
@@ -15,7 +15,7 @@
 #include <errata_report.h>
 
  /* Reset fn is needed in BL at reset vector */
-#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
+#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 	/*
 	 * The reset handler common to all platforms.  After a matching
 	 * cpu_ops structure entry is found, the correponding reset_handler
@@ -47,7 +47,7 @@
 	ret
 endfunc reset_handler
 
-#endif /* IMAGE_BL1 || IMAGE_BL31 */
+#endif
 
 #ifdef IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
 	/*
diff --git a/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S b/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S
new file mode 100644
index 0000000..cd29266
--- /dev/null
+++ b/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <context.h>
+
+	.globl	workaround_bpiall_vbar0_runtime_exceptions
+
+#define EMIT_BPIALL		0xee070fd5
+#define EMIT_MOV_R0_IMM(v)	0xe3a0000##v
+#define EMIT_SMC		0xe1600070
+
+	.macro	enter_workaround _stub_name
+	/* Save GP regs */
+	stp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	stp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+	stp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	stp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+	stp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	stp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	stp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	stp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+	stp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+	stp	x18, x19, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+	stp	x20, x21, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X20]
+	stp	x22, x23, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X22]
+	stp	x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24]
+	stp	x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
+	stp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
+
+	adr	x4, \_stub_name
+
+	/*
+	 * Load SPSR_EL3 and VBAR_EL3.  SPSR_EL3 is set up to have
+	 * all interrupts masked in preparation to running the workaround
+	 * stub in S-EL1.  VBAR_EL3 points to the vector table that
+	 * will handle the SMC back from the workaround stub.
+	 */
+	ldp	x0, x1, [x4, #0]
+
+	/*
+	 * Load SCTLR_EL1 and ELR_EL3.  SCTLR_EL1 is configured to disable
+	 * the MMU in S-EL1.  ELR_EL3 points to the appropriate stub in S-EL1.
+	 */
+	ldp	x2, x3, [x4, #16]
+
+	mrs	x4, scr_el3
+	mrs	x5, spsr_el3
+	mrs	x6, elr_el3
+	mrs	x7, sctlr_el1
+	mrs	x8, esr_el3
+
+	/* Preserve system registers in the workaround context */
+	stp	x4, x5, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD0]
+	stp	x6, x7, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD2]
+	stp	x8, x30, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD4]
+
+	/*
+	 * Setting SCR_EL3 to all zeroes means that the NS, RW
+	 * and SMD bits are configured as expected.
+	 */
+	msr	scr_el3, xzr
+
+	/*
+	 * Reload system registers with the crafted values
+	 * in preparation for entry in S-EL1.
+	 */
+	msr	spsr_el3, x0
+	msr	vbar_el3, x1
+	msr	sctlr_el1, x2
+	msr	elr_el3, x3
+
+	eret
+	.endm
+
+	/* ---------------------------------------------------------------------
+	 * This vector table is used at runtime to enter the workaround at
+	 * AArch32 S-EL1 for Sync/IRQ/FIQ/SError exceptions.  If the workaround
+	 * is not enabled, the existing runtime exception vector table is used.
+	 * ---------------------------------------------------------------------
+	 */
+vector_base workaround_bpiall_vbar0_runtime_exceptions
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_EL0 : 0x0 - 0x200
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar0_sync_exception_sp_el0
+	b	sync_exception_sp_el0
+	/*
+	 * Since each vector table entry is 128 bytes, we can store the
+	 * stub context in the unused space to minimize memory footprint.
+	 */
+aarch32_stub_smc:
+	.word	EMIT_BPIALL
+	.word	EMIT_MOV_R0_IMM(1)
+	.word	EMIT_SMC
+aarch32_stub_ctx_smc:
+	/* Mask all interrupts and set AArch32 Supervisor mode */
+	.quad	(SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
+	         SPSR_M_AARCH32 << SPSR_M_SHIFT | \
+	         MODE32_svc << MODE32_SHIFT)
+
+	/*
+	 * VBAR_EL3 points to vbar1 which is the vector table
+	 * used while the workaround is executing.
+	 */
+	.quad	workaround_bpiall_vbar1_runtime_exceptions
+
+	/* Setup SCTLR_EL1 with MMU off and I$ on */
+	.quad	SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
+
+	/* ELR_EL3 is setup to point to the sync exception stub in AArch32 */
+	.quad	aarch32_stub_smc
+	check_vector_size workaround_bpiall_vbar0_sync_exception_sp_el0
+
+vector_entry workaround_bpiall_vbar0_irq_sp_el0
+	b	irq_sp_el0
+aarch32_stub_irq:
+	.word	EMIT_BPIALL
+	.word	EMIT_MOV_R0_IMM(2)
+	.word	EMIT_SMC
+aarch32_stub_ctx_irq:
+	.quad	(SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
+	         SPSR_M_AARCH32 << SPSR_M_SHIFT | \
+	         MODE32_svc << MODE32_SHIFT)
+	.quad	workaround_bpiall_vbar1_runtime_exceptions
+	.quad	SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
+	.quad	aarch32_stub_irq
+	check_vector_size workaround_bpiall_vbar0_irq_sp_el0
+
+vector_entry workaround_bpiall_vbar0_fiq_sp_el0
+	b	fiq_sp_el0
+aarch32_stub_fiq:
+	.word	EMIT_BPIALL
+	.word	EMIT_MOV_R0_IMM(4)
+	.word	EMIT_SMC
+aarch32_stub_ctx_fiq:
+	.quad	(SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
+	         SPSR_M_AARCH32 << SPSR_M_SHIFT | \
+	         MODE32_svc << MODE32_SHIFT)
+	.quad	workaround_bpiall_vbar1_runtime_exceptions
+	.quad	SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
+	.quad	aarch32_stub_fiq
+	check_vector_size workaround_bpiall_vbar0_fiq_sp_el0
+
+vector_entry workaround_bpiall_vbar0_serror_sp_el0
+	b	serror_sp_el0
+aarch32_stub_serror:
+	.word	EMIT_BPIALL
+	.word	EMIT_MOV_R0_IMM(8)
+	.word	EMIT_SMC
+aarch32_stub_ctx_serror:
+	.quad	(SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
+	         SPSR_M_AARCH32 << SPSR_M_SHIFT | \
+	         MODE32_svc << MODE32_SHIFT)
+	.quad	workaround_bpiall_vbar1_runtime_exceptions
+	.quad	SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
+	.quad	aarch32_stub_serror
+	check_vector_size workaround_bpiall_vbar0_serror_sp_el0
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_ELx: 0x200 - 0x400
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar0_sync_exception_sp_elx
+	b	sync_exception_sp_elx
+	check_vector_size workaround_bpiall_vbar0_sync_exception_sp_elx
+
+vector_entry workaround_bpiall_vbar0_irq_sp_elx
+	b	irq_sp_elx
+	check_vector_size workaround_bpiall_vbar0_irq_sp_elx
+
+vector_entry workaround_bpiall_vbar0_fiq_sp_elx
+	b	fiq_sp_elx
+	check_vector_size workaround_bpiall_vbar0_fiq_sp_elx
+
+vector_entry workaround_bpiall_vbar0_serror_sp_elx
+	b	serror_sp_elx
+	check_vector_size workaround_bpiall_vbar0_serror_sp_elx
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch64 : 0x400 - 0x600
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar0_sync_exception_aarch64
+	enter_workaround aarch32_stub_ctx_smc
+	check_vector_size workaround_bpiall_vbar0_sync_exception_aarch64
+
+vector_entry workaround_bpiall_vbar0_irq_aarch64
+	enter_workaround aarch32_stub_ctx_irq
+	check_vector_size workaround_bpiall_vbar0_irq_aarch64
+
+vector_entry workaround_bpiall_vbar0_fiq_aarch64
+	enter_workaround aarch32_stub_ctx_fiq
+	check_vector_size workaround_bpiall_vbar0_fiq_aarch64
+
+vector_entry workaround_bpiall_vbar0_serror_aarch64
+	enter_workaround aarch32_stub_ctx_serror
+	check_vector_size workaround_bpiall_vbar0_serror_aarch64
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch32 : 0x600 - 0x800
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar0_sync_exception_aarch32
+	enter_workaround aarch32_stub_ctx_smc
+	check_vector_size workaround_bpiall_vbar0_sync_exception_aarch32
+
+vector_entry workaround_bpiall_vbar0_irq_aarch32
+	enter_workaround aarch32_stub_ctx_irq
+	check_vector_size workaround_bpiall_vbar0_irq_aarch32
+
+vector_entry workaround_bpiall_vbar0_fiq_aarch32
+	enter_workaround aarch32_stub_ctx_fiq
+	check_vector_size workaround_bpiall_vbar0_fiq_aarch32
+
+vector_entry workaround_bpiall_vbar0_serror_aarch32
+	enter_workaround aarch32_stub_ctx_serror
+	check_vector_size workaround_bpiall_vbar0_serror_aarch32
+
+	/* ---------------------------------------------------------------------
+	 * This vector table is used while the workaround is executing.  It
+	 * installs a simple SMC handler to allow the Sync/IRQ/FIQ/SError
+	 * workaround stubs to enter EL3 from S-EL1.  It restores the previous
+	 * EL3 state before proceeding with the normal runtime exception vector.
+	 * ---------------------------------------------------------------------
+	 */
+vector_base workaround_bpiall_vbar1_runtime_exceptions
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_EL0 : 0x0 - 0x200 (UNUSED)
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar1_sync_exception_sp_el0
+	b	report_unhandled_exception
+	check_vector_size workaround_bpiall_vbar1_sync_exception_sp_el0
+
+vector_entry workaround_bpiall_vbar1_irq_sp_el0
+	b	report_unhandled_interrupt
+	check_vector_size workaround_bpiall_vbar1_irq_sp_el0
+
+vector_entry workaround_bpiall_vbar1_fiq_sp_el0
+	b	report_unhandled_interrupt
+	check_vector_size workaround_bpiall_vbar1_fiq_sp_el0
+
+vector_entry workaround_bpiall_vbar1_serror_sp_el0
+	b	report_unhandled_exception
+	check_vector_size workaround_bpiall_vbar1_serror_sp_el0
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_ELx: 0x200 - 0x400 (UNUSED)
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar1_sync_exception_sp_elx
+	b	report_unhandled_exception
+	check_vector_size workaround_bpiall_vbar1_sync_exception_sp_elx
+
+vector_entry workaround_bpiall_vbar1_irq_sp_elx
+	b	report_unhandled_interrupt
+	check_vector_size workaround_bpiall_vbar1_irq_sp_elx
+
+vector_entry workaround_bpiall_vbar1_fiq_sp_elx
+	b	report_unhandled_interrupt
+	check_vector_size workaround_bpiall_vbar1_fiq_sp_elx
+
+vector_entry workaround_bpiall_vbar1_serror_sp_elx
+	b	report_unhandled_exception
+	check_vector_size workaround_bpiall_vbar1_serror_sp_elx
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch64 : 0x400 - 0x600 (UNUSED)
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar1_sync_exception_aarch64
+	b	report_unhandled_exception
+	check_vector_size workaround_bpiall_vbar1_sync_exception_aarch64
+
+vector_entry workaround_bpiall_vbar1_irq_aarch64
+	b	report_unhandled_interrupt
+	check_vector_size workaround_bpiall_vbar1_irq_aarch64
+
+vector_entry workaround_bpiall_vbar1_fiq_aarch64
+	b	report_unhandled_interrupt
+	check_vector_size workaround_bpiall_vbar1_fiq_aarch64
+
+vector_entry workaround_bpiall_vbar1_serror_aarch64
+	b	report_unhandled_exception
+	check_vector_size workaround_bpiall_vbar1_serror_aarch64
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch32 : 0x600 - 0x800
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_bpiall_vbar1_sync_exception_aarch32
+	/* Restore register state from the workaround context */
+	ldp	x2, x3, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD0]
+	ldp	x4, x5, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD2]
+	ldp	x6, x30, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD4]
+
+	/* Apply the restored system register state */
+	msr	scr_el3, x2
+	msr	spsr_el3, x3
+	msr	elr_el3, x4
+	msr	sctlr_el1, x5
+	msr	esr_el3, x6
+
+	/*
+	 * Workaround is complete, so swap VBAR_EL3 to point
+	 * to workaround entry table in preparation for subsequent
+	 * Sync/IRQ/FIQ/SError exceptions.
+	 */
+	adr	x2, workaround_bpiall_vbar0_runtime_exceptions
+	msr	vbar_el3, x2
+
+	/*
+	 * Restore all GP regs except x0 and x1.  The value in x0
+	 * indicates the type of the original exception.
+	 */
+	ldp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+	ldp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	ldp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+	ldp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	ldp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	ldp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	ldp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+	ldp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+	ldp	x18, x19, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+	ldp	x20, x21, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X20]
+	ldp	x22, x23, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X22]
+	ldp	x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24]
+	ldp	x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
+	ldp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
+
+	/*
+	 * Each of these handlers will first restore x0 and x1 from
+	 * the context and the branch to the common implementation for
+	 * each of the exception types.
+	 */
+	tbnz	x0, #1, workaround_bpiall_vbar1_irq
+	tbnz	x0, #2, workaround_bpiall_vbar1_fiq
+	tbnz	x0, #3, workaround_bpiall_vbar1_serror
+
+	/* Fallthrough case for Sync exception */
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	b	sync_exception_aarch64
+	check_vector_size workaround_bpiall_vbar1_sync_exception_aarch32
+
+vector_entry workaround_bpiall_vbar1_irq_aarch32
+	b	report_unhandled_interrupt
+workaround_bpiall_vbar1_irq:
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	b	irq_aarch64
+	check_vector_size workaround_bpiall_vbar1_irq_aarch32
+
+vector_entry workaround_bpiall_vbar1_fiq_aarch32
+	b	report_unhandled_interrupt
+workaround_bpiall_vbar1_fiq:
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	b	fiq_aarch64
+	check_vector_size workaround_bpiall_vbar1_fiq_aarch32
+
+vector_entry workaround_bpiall_vbar1_serror_aarch32
+	b	report_unhandled_exception
+workaround_bpiall_vbar1_serror:
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	b	serror_aarch64
+	check_vector_size workaround_bpiall_vbar1_serror_aarch32
diff --git a/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S b/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S
new file mode 100644
index 0000000..f478148
--- /dev/null
+++ b/lib/cpus/aarch64/workaround_cve_2017_5715_mmu.S
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <context.h>
+
+	.globl	workaround_mmu_runtime_exceptions
+
+vector_base workaround_mmu_runtime_exceptions
+
+	.macro	apply_workaround
+	stp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	mrs	x0, sctlr_el3
+	/* Disable MMU */
+	bic	x1, x0, #SCTLR_M_BIT
+	msr	sctlr_el3, x1
+	isb
+	/* Restore MMU config */
+	msr	sctlr_el3, x0
+	isb
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	.endm
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_EL0 : 0x0 - 0x200
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_mmu_sync_exception_sp_el0
+	b	sync_exception_sp_el0
+	check_vector_size workaround_mmu_sync_exception_sp_el0
+
+vector_entry workaround_mmu_irq_sp_el0
+	b	irq_sp_el0
+	check_vector_size workaround_mmu_irq_sp_el0
+
+vector_entry workaround_mmu_fiq_sp_el0
+	b	fiq_sp_el0
+	check_vector_size workaround_mmu_fiq_sp_el0
+
+vector_entry workaround_mmu_serror_sp_el0
+	b	serror_sp_el0
+	check_vector_size workaround_mmu_serror_sp_el0
+
+	/* ---------------------------------------------------------------------
+	 * Current EL with SP_ELx: 0x200 - 0x400
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_mmu_sync_exception_sp_elx
+	b	sync_exception_sp_elx
+	check_vector_size workaround_mmu_sync_exception_sp_elx
+
+vector_entry workaround_mmu_irq_sp_elx
+	b	irq_sp_elx
+	check_vector_size workaround_mmu_irq_sp_elx
+
+vector_entry workaround_mmu_fiq_sp_elx
+	b	fiq_sp_elx
+	check_vector_size workaround_mmu_fiq_sp_elx
+
+vector_entry workaround_mmu_serror_sp_elx
+	b	serror_sp_elx
+	check_vector_size workaround_mmu_serror_sp_elx
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch64 : 0x400 - 0x600
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_mmu_sync_exception_aarch64
+	apply_workaround
+	b	sync_exception_aarch64
+	check_vector_size workaround_mmu_sync_exception_aarch64
+
+vector_entry workaround_mmu_irq_aarch64
+	apply_workaround
+	b	irq_aarch64
+	check_vector_size workaround_mmu_irq_aarch64
+
+vector_entry workaround_mmu_fiq_aarch64
+	apply_workaround
+	b	fiq_aarch64
+	check_vector_size workaround_mmu_fiq_aarch64
+
+vector_entry workaround_mmu_serror_aarch64
+	apply_workaround
+	b	serror_aarch64
+	check_vector_size workaround_mmu_serror_aarch64
+
+	/* ---------------------------------------------------------------------
+	 * Lower EL using AArch32 : 0x600 - 0x800
+	 * ---------------------------------------------------------------------
+	 */
+vector_entry workaround_mmu_sync_exception_aarch32
+	apply_workaround
+	b	sync_exception_aarch32
+	check_vector_size workaround_mmu_sync_exception_aarch32
+
+vector_entry workaround_mmu_irq_aarch32
+	apply_workaround
+	b	irq_aarch32
+	check_vector_size workaround_mmu_irq_aarch32
+
+vector_entry workaround_mmu_fiq_aarch32
+	apply_workaround
+	b	fiq_aarch32
+	check_vector_size workaround_mmu_fiq_aarch32
+
+vector_entry workaround_mmu_serror_aarch32
+	apply_workaround
+	b	serror_aarch32
+	check_vector_size workaround_mmu_serror_aarch32
diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk
index 31adfb4..3ba8c1f 100644
--- a/lib/cpus/cpu-ops.mk
+++ b/lib/cpus/cpu-ops.mk
@@ -16,6 +16,8 @@
 # It is enabled by default.
 A57_DISABLE_NON_TEMPORAL_HINT	?=1
 
+WORKAROUND_CVE_2017_5715	?=1
+
 # Process SKIP_A57_L1_FLUSH_PWR_DWN flag
 $(eval $(call assert_boolean,SKIP_A57_L1_FLUSH_PWR_DWN))
 $(eval $(call add_define,SKIP_A57_L1_FLUSH_PWR_DWN))
@@ -28,6 +30,9 @@
 $(eval $(call assert_boolean,A57_DISABLE_NON_TEMPORAL_HINT))
 $(eval $(call add_define,A57_DISABLE_NON_TEMPORAL_HINT))
 
+# Process WORKAROUND_CVE_2017_5715 flag
+$(eval $(call assert_boolean,WORKAROUND_CVE_2017_5715))
+$(eval $(call add_define,WORKAROUND_CVE_2017_5715))
 
 # CPU Errata Build flags.
 # These should be enabled by the platform if the erratum workaround needs to be
diff --git a/lib/cpus/errata_report.c b/lib/cpus/errata_report.c
index 8d9f704..182679d 100644
--- a/lib/cpus/errata_report.c
+++ b/lib/cpus/errata_report.c
@@ -20,6 +20,8 @@
 # define BL_STRING	"BL31"
 #elif defined(AARCH32) && defined(IMAGE_BL32)
 # define BL_STRING	"BL32"
+#elif defined(IMAGE_BL2) && BL2_AT_EL3
+# define BL_STRING "BL2"
 #else
 # error This image should not be printing errata status
 #endif
diff --git a/lib/extensions/amu/aarch32/amu.c b/lib/extensions/amu/aarch32/amu.c
index d450bd6..effc5bd 100644
--- a/lib/extensions/amu/aarch32/amu.c
+++ b/lib/extensions/amu/aarch32/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,26 +7,100 @@
 #include <amu.h>
 #include <arch.h>
 #include <arch_helpers.h>
+#include <platform.h>
+#include <pubsub_events.h>
+
+#define AMU_GROUP0_NR_COUNTERS	4
+
+struct amu_ctx {
+	uint64_t group0_cnts[AMU_GROUP0_NR_COUNTERS];
+};
+
+static struct amu_ctx amu_ctxs[PLATFORM_CORE_COUNT];
 
 void amu_enable(int el2_unused)
 {
 	uint64_t features;
 
 	features = read_id_pfr0() >> ID_PFR0_AMU_SHIFT;
-	if ((features & ID_PFR0_AMU_MASK) == 1) {
-		if (el2_unused) {
-			uint64_t v;
+	if ((features & ID_PFR0_AMU_MASK) != 1)
+		return;
 
-			/*
-			 * Non-secure access from EL0 or EL1 to the Activity Monitor
-			 * registers do not trap to EL2.
-			 */
-			v = read_hcptr();
-			v &= ~TAM_BIT;
-			write_hcptr(v);
-		}
+	if (el2_unused) {
+		uint64_t v;
 
-		/* Enable group 0 counters */
-		write_amcntenset0(AMU_GROUP0_COUNTERS_MASK);
+		/*
+		 * Non-secure access from EL0 or EL1 to the Activity Monitor
+		 * registers do not trap to EL2.
+		 */
+		v = read_hcptr();
+		v &= ~TAM_BIT;
+		write_hcptr(v);
 	}
+
+	/* Enable group 0 counters */
+	write_amcntenset0(AMU_GROUP0_COUNTERS_MASK);
 }
+
+static void *amu_context_save(const void *arg)
+{
+	struct amu_ctx *ctx;
+	uint64_t features;
+
+	features = read_id_pfr0() >> ID_PFR0_AMU_SHIFT;
+	if ((features & ID_PFR0_AMU_MASK) != 1)
+		return (void *)-1;
+
+	ctx = &amu_ctxs[plat_my_core_pos()];
+
+	/* Assert that group 0 counter configuration is what we expect */
+	assert(read_amcntenset0() == AMU_GROUP0_COUNTERS_MASK);
+
+	/*
+	 * Disable group 0 counters to avoid other observers like SCP sampling
+	 * counter values from the future via the memory mapped view.
+	 */
+	write_amcntenclr0(AMU_GROUP0_COUNTERS_MASK);
+	isb();
+
+	ctx->group0_cnts[0] = read64_amevcntr00();
+	ctx->group0_cnts[1] = read64_amevcntr01();
+	ctx->group0_cnts[2] = read64_amevcntr02();
+	ctx->group0_cnts[3] = read64_amevcntr03();
+
+	return 0;
+}
+
+static void *amu_context_restore(const void *arg)
+{
+	struct amu_ctx *ctx;
+	uint64_t features;
+
+	features = read_id_pfr0() >> ID_PFR0_AMU_SHIFT;
+	if ((features & ID_PFR0_AMU_MASK) != 1)
+		return (void *)-1;
+
+	ctx = &amu_ctxs[plat_my_core_pos()];
+
+	/* Counters were disabled in `amu_context_save()` */
+	assert(read_amcntenset0() == 0);
+
+	/* Restore group 0 counters */
+	if (AMU_GROUP0_COUNTERS_MASK & (1U << 0))
+		write64_amevcntr00(ctx->group0_cnts[0]);
+	if (AMU_GROUP0_COUNTERS_MASK & (1U << 1))
+		write64_amevcntr01(ctx->group0_cnts[1]);
+	if (AMU_GROUP0_COUNTERS_MASK & (1U << 2))
+		write64_amevcntr02(ctx->group0_cnts[2]);
+	if (AMU_GROUP0_COUNTERS_MASK & (1U << 3))
+		write64_amevcntr03(ctx->group0_cnts[3]);
+	isb();
+
+	/* Enable group 0 counters */
+	write_amcntenset0(AMU_GROUP0_COUNTERS_MASK);
+
+	return 0;
+}
+
+SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_start, amu_context_save);
+SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_finish, amu_context_restore);
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index 007b349..d7645a9 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -1,40 +1,185 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <amu.h>
+#include <amu_private.h>
 #include <arch.h>
 #include <arch_helpers.h>
+#include <assert.h>
+#include <platform.h>
+#include <pubsub_events.h>
 
-void amu_enable(int el2_unused)
+#define AMU_GROUP0_NR_COUNTERS	4
+
+struct amu_ctx {
+	uint64_t group0_cnts[AMU_GROUP0_NR_COUNTERS];
+	uint64_t group1_cnts[AMU_GROUP1_NR_COUNTERS];
+};
+
+static struct amu_ctx amu_ctxs[PLATFORM_CORE_COUNT];
+
+int amu_supported(void)
 {
 	uint64_t features;
 
 	features = read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT;
-	if ((features & ID_AA64PFR0_AMU_MASK) == 1) {
-		uint64_t v;
+	return (features & ID_AA64PFR0_AMU_MASK) == 1;
+}
+
+/*
+ * Enable counters.  This function is meant to be invoked
+ * by the context management library before exiting from EL3.
+ */
+void amu_enable(int el2_unused)
+{
+	uint64_t v;
 
-		if (el2_unused) {
-			/*
-			 * CPTR_EL2.TAM: Set to zero so any accesses to
-			 * the Activity Monitor registers do not trap to EL2.
-			 */
-			v = read_cptr_el2();
-			v &= ~CPTR_EL2_TAM_BIT;
-			write_cptr_el2(v);
-		}
+	if (!amu_supported())
+		return;
 
+	if (el2_unused) {
 		/*
-		 * CPTR_EL3.TAM: Set to zero so that any accesses to
-		 * the Activity Monitor registers do not trap to EL3.
+		 * CPTR_EL2.TAM: Set to zero so any accesses to
+		 * the Activity Monitor registers do not trap to EL2.
 		 */
-		v = read_cptr_el3();
-		v &= ~TAM_BIT;
-		write_cptr_el3(v);
-
-		/* Enable group 0 counters */
-		write_amcntenset0_el0(AMU_GROUP0_COUNTERS_MASK);
+		v = read_cptr_el2();
+		v &= ~CPTR_EL2_TAM_BIT;
+		write_cptr_el2(v);
 	}
+
+	/*
+	 * CPTR_EL3.TAM: Set to zero so that any accesses to
+	 * the Activity Monitor registers do not trap to EL3.
+	 */
+	v = read_cptr_el3();
+	v &= ~TAM_BIT;
+	write_cptr_el3(v);
+
+	/* Enable group 0 counters */
+	write_amcntenset0_el0(AMU_GROUP0_COUNTERS_MASK);
+	/* Enable group 1 counters */
+	write_amcntenset1_el0(AMU_GROUP1_COUNTERS_MASK);
+}
+
+/* Read the group 0 counter identified by the given `idx`. */
+uint64_t amu_group0_cnt_read(int idx)
+{
+	assert(amu_supported());
+	assert(idx >= 0 && idx < AMU_GROUP0_NR_COUNTERS);
+
+	return amu_group0_cnt_read_internal(idx);
+}
+
+/* Write the group 0 counter identified by the given `idx` with `val`. */
+void amu_group0_cnt_write(int idx, uint64_t val)
+{
+	assert(amu_supported());
+	assert(idx >= 0 && idx < AMU_GROUP0_NR_COUNTERS);
+
+	amu_group0_cnt_write_internal(idx, val);
+	isb();
+}
+
+/* Read the group 1 counter identified by the given `idx`. */
+uint64_t amu_group1_cnt_read(int idx)
+{
+	assert(amu_supported());
+	assert(idx >= 0 && idx < AMU_GROUP1_NR_COUNTERS);
+
+	return amu_group1_cnt_read_internal(idx);
+}
+
+/* Write the group 1 counter identified by the given `idx` with `val`. */
+void amu_group1_cnt_write(int idx, uint64_t val)
+{
+	assert(amu_supported());
+	assert(idx >= 0 && idx < AMU_GROUP1_NR_COUNTERS);
+
+	amu_group1_cnt_write_internal(idx, val);
+	isb();
 }
+
+/*
+ * Program the event type register for the given `idx` with
+ * the event number `val`.
+ */
+void amu_group1_set_evtype(int idx, unsigned int val)
+{
+	assert(amu_supported());
+	assert (idx >= 0 && idx < AMU_GROUP1_NR_COUNTERS);
+
+	amu_group1_set_evtype_internal(idx, val);
+	isb();
+}
+
+static void *amu_context_save(const void *arg)
+{
+	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
+	int i;
+
+	if (!amu_supported())
+		return (void *)-1;
+
+	/* Assert that group 0/1 counter configuration is what we expect */
+	assert(read_amcntenset0_el0() == AMU_GROUP0_COUNTERS_MASK &&
+	       read_amcntenset1_el0() == AMU_GROUP1_COUNTERS_MASK);
+
+	assert((sizeof(int) * 8) - __builtin_clz(AMU_GROUP1_COUNTERS_MASK)
+		<= AMU_GROUP1_NR_COUNTERS);
+
+	/*
+	 * Disable group 0/1 counters to avoid other observers like SCP sampling
+	 * counter values from the future via the memory mapped view.
+	 */
+	write_amcntenclr0_el0(AMU_GROUP0_COUNTERS_MASK);
+	write_amcntenclr1_el0(AMU_GROUP1_COUNTERS_MASK);
+	isb();
+
+	/* Save group 0 counters */
+	for (i = 0; i < AMU_GROUP0_NR_COUNTERS; i++)
+		ctx->group0_cnts[i] = amu_group0_cnt_read(i);
+
+	/* Save group 1 counters */
+	for (i = 0; i < AMU_GROUP1_NR_COUNTERS; i++)
+		ctx->group1_cnts[i] = amu_group1_cnt_read(i);
+
+	return 0;
+}
+
+static void *amu_context_restore(const void *arg)
+{
+	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
+	int i;
+
+	if (!amu_supported())
+		return (void *)-1;
+
+	/* Counters were disabled in `amu_context_save()` */
+	assert(read_amcntenset0_el0() == 0 && read_amcntenset1_el0() == 0);
+
+	assert((sizeof(int) * 8) - __builtin_clz(AMU_GROUP1_COUNTERS_MASK)
+		<= AMU_GROUP1_NR_COUNTERS);
+
+	/* Restore group 0 counters */
+	for (i = 0; i < AMU_GROUP0_NR_COUNTERS; i++)
+		if (AMU_GROUP0_COUNTERS_MASK & (1U << i))
+			amu_group0_cnt_write(i, ctx->group0_cnts[i]);
+
+	/* Restore group 1 counters */
+	for (i = 0; i < AMU_GROUP1_NR_COUNTERS; i++)
+		if (AMU_GROUP1_COUNTERS_MASK & (1U << i))
+			amu_group1_cnt_write(i, ctx->group1_cnts[i]);
+	isb();
+
+	/* Restore group 0/1 counter configuration */
+	write_amcntenset0_el0(AMU_GROUP0_COUNTERS_MASK);
+	write_amcntenset1_el0(AMU_GROUP1_COUNTERS_MASK);
+
+	return 0;
+}
+
+SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_start, amu_context_save);
+SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_finish, amu_context_restore);
diff --git a/lib/extensions/amu/aarch64/amu_helpers.S b/lib/extensions/amu/aarch64/amu_helpers.S
new file mode 100644
index 0000000..e0b1f56
--- /dev/null
+++ b/lib/extensions/amu/aarch64/amu_helpers.S
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <assert_macros.S>
+#include <asm_macros.S>
+
+	.globl	amu_group0_cnt_read_internal
+	.globl	amu_group0_cnt_write_internal
+	.globl	amu_group1_cnt_read_internal
+	.globl	amu_group1_cnt_write_internal
+	.globl	amu_group1_set_evtype_internal
+
+/*
+ * uint64_t amu_group0_cnt_read_internal(int idx);
+ *
+ * Given `idx`, read the corresponding AMU counter
+ * and return it in `x0`.
+ */
+func amu_group0_cnt_read_internal
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	mov	x1, x0
+	lsr	x1, x1, #2
+	cmp	x1, #0
+	ASM_ASSERT(eq)
+#endif
+
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	adr	x1, 1f
+	lsl	x0, x0, #3		/* each mrs/ret sequence is 8 bytes */
+	add	x1, x1, x0
+	br	x1
+
+1:
+	mrs	x0, AMEVCNTR00_EL0	/* index 0 */
+	ret
+	mrs	x0, AMEVCNTR01_EL0	/* index 1 */
+	ret
+	mrs	x0, AMEVCNTR02_EL0	/* index 2 */
+	ret
+	mrs	x0, AMEVCNTR03_EL0	/* index 3 */
+	ret
+endfunc amu_group0_cnt_read_internal
+
+/*
+ * void amu_group0_cnt_write_internal(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU counter.
+ */
+func amu_group0_cnt_write_internal
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	mov	x2, x0
+	lsr	x2, x2, #2
+	cmp	x2, #0
+	ASM_ASSERT(eq)
+#endif
+
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	adr	x2, 1f
+	lsl	x0, x0, #3		/* each msr/ret sequence is 8 bytes */
+	add	x2, x2, x0
+	br	x2
+
+1:
+	msr	AMEVCNTR00_EL0, x1	/* index 0 */
+	ret
+	msr	AMEVCNTR01_EL0, x1	/* index 1 */
+	ret
+	msr	AMEVCNTR02_EL0, x1	/* index 2 */
+	ret
+	msr	AMEVCNTR03_EL0, x1	/* index 3 */
+	ret
+endfunc amu_group0_cnt_write_internal
+
+/*
+ * uint64_t amu_group1_cnt_read_internal(int idx);
+ *
+ * Given `idx`, read the corresponding AMU counter
+ * and return it in `x0`.
+ */
+func amu_group1_cnt_read_internal
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	mov	x1, x0
+	lsr	x1, x1, #4
+	cmp	x1, #0
+	ASM_ASSERT(eq)
+#endif
+
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	adr	x1, 1f
+	lsl	x0, x0, #3		/* each mrs/ret sequence is 8 bytes */
+	add	x1, x1, x0
+	br	x1
+
+1:
+	mrs	x0, AMEVCNTR10_EL0	/* index 0 */
+	ret
+	mrs	x0, AMEVCNTR11_EL0	/* index 1 */
+	ret
+	mrs	x0, AMEVCNTR12_EL0	/* index 2 */
+	ret
+	mrs	x0, AMEVCNTR13_EL0	/* index 3 */
+	ret
+	mrs	x0, AMEVCNTR14_EL0	/* index 4 */
+	ret
+	mrs	x0, AMEVCNTR15_EL0	/* index 5 */
+	ret
+	mrs	x0, AMEVCNTR16_EL0	/* index 6 */
+	ret
+	mrs	x0, AMEVCNTR17_EL0	/* index 7 */
+	ret
+	mrs	x0, AMEVCNTR18_EL0	/* index 8 */
+	ret
+	mrs	x0, AMEVCNTR19_EL0	/* index 9 */
+	ret
+	mrs	x0, AMEVCNTR1A_EL0	/* index 10 */
+	ret
+	mrs	x0, AMEVCNTR1B_EL0	/* index 11 */
+	ret
+	mrs	x0, AMEVCNTR1C_EL0	/* index 12 */
+	ret
+	mrs	x0, AMEVCNTR1D_EL0	/* index 13 */
+	ret
+	mrs	x0, AMEVCNTR1E_EL0	/* index 14 */
+	ret
+	mrs	x0, AMEVCNTR1F_EL0	/* index 15 */
+	ret
+endfunc amu_group1_cnt_read_internal
+
+/*
+ * void amu_group1_cnt_write_internal(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU counter.
+ */
+func amu_group1_cnt_write_internal
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	mov	x2, x0
+	lsr	x2, x2, #4
+	cmp	x2, #0
+	ASM_ASSERT(eq)
+#endif
+
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	adr	x2, 1f
+	lsl	x0, x0, #3		/* each msr/ret sequence is 8 bytes */
+	add	x2, x2, x0
+	br	x2
+
+1:
+	msr	AMEVCNTR10_EL0, x1	/* index 0 */
+	ret
+	msr	AMEVCNTR11_EL0, x1	/* index 1 */
+	ret
+	msr	AMEVCNTR12_EL0, x1	/* index 2 */
+	ret
+	msr	AMEVCNTR13_EL0, x1	/* index 3 */
+	ret
+	msr	AMEVCNTR14_EL0, x1	/* index 4 */
+	ret
+	msr	AMEVCNTR15_EL0, x1	/* index 5 */
+	ret
+	msr	AMEVCNTR16_EL0, x1	/* index 6 */
+	ret
+	msr	AMEVCNTR17_EL0, x1	/* index 7 */
+	ret
+	msr	AMEVCNTR18_EL0, x1	/* index 8 */
+	ret
+	msr	AMEVCNTR19_EL0, x1	/* index 9 */
+	ret
+	msr	AMEVCNTR1A_EL0, x1	/* index 10 */
+	ret
+	msr	AMEVCNTR1B_EL0, x1	/* index 11 */
+	ret
+	msr	AMEVCNTR1C_EL0, x1	/* index 12 */
+	ret
+	msr	AMEVCNTR1D_EL0, x1	/* index 13 */
+	ret
+	msr	AMEVCNTR1E_EL0, x1	/* index 14 */
+	ret
+	msr	AMEVCNTR1F_EL0, x1	/* index 15 */
+	ret
+endfunc amu_group1_cnt_write_internal
+
+/*
+ * void amu_group1_set_evtype_internal(int idx, unsigned int val);
+ *
+ * Program the AMU event type register indexed by `idx`
+ * with the value `val`.
+ */
+func amu_group1_set_evtype_internal
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	mov	x2, x0
+	lsr	x2, x2, #4
+	cmp	x2, #0
+	ASM_ASSERT(eq)
+
+	/* val should be between [0, 65535] */
+	mov	x2, x1
+	lsr	x2, x2, #16
+	cmp	x2, #0
+	ASM_ASSERT(eq)
+#endif
+
+	/*
+	 * Given `idx` calculate address of msr/ret instruction pair
+	 * in the table below.
+	 */
+	adr	x2, 1f
+	lsl	x0, x0, #3		/* each msr/ret sequence is 8 bytes */
+	add	x2, x2, x0
+	br	x2
+
+1:
+	msr	AMEVTYPER10_EL0, x1	/* index 0 */
+	ret
+	msr	AMEVTYPER11_EL0, x1	/* index 1 */
+	ret
+	msr	AMEVTYPER12_EL0, x1	/* index 2 */
+	ret
+	msr	AMEVTYPER13_EL0, x1	/* index 3 */
+	ret
+	msr	AMEVTYPER14_EL0, x1	/* index 4 */
+	ret
+	msr	AMEVTYPER15_EL0, x1	/* index 5 */
+	ret
+	msr	AMEVTYPER16_EL0, x1	/* index 6 */
+	ret
+	msr	AMEVTYPER17_EL0, x1	/* index 7 */
+	ret
+	msr	AMEVTYPER18_EL0, x1	/* index 8 */
+	ret
+	msr	AMEVTYPER19_EL0, x1	/* index 9 */
+	ret
+	msr	AMEVTYPER1A_EL0, x1	/* index 10 */
+	ret
+	msr	AMEVTYPER1B_EL0, x1	/* index 11 */
+	ret
+	msr	AMEVTYPER1C_EL0, x1	/* index 12 */
+	ret
+	msr	AMEVTYPER1D_EL0, x1	/* index 13 */
+	ret
+	msr	AMEVTYPER1E_EL0, x1	/* index 14 */
+	ret
+	msr	AMEVTYPER1F_EL0, x1	/* index 15 */
+	ret
+endfunc amu_group1_set_evtype_internal
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
index d949067..a77972d 100644
--- a/lib/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -14,6 +14,7 @@
 #include <debug.h>
 #include <platform.h>
 #include <pmf.h>
+#include <pubsub_events.h>
 #include <runtime_instr.h>
 #include <stddef.h>
 #include "psci_private.h"
@@ -68,6 +69,8 @@
 {
 	unsigned int max_off_lvl = psci_find_max_off_lvl(state_info);
 
+	PUBLISH_EVENT(psci_suspend_pwrdown_start);
+
 	/* Save PSCI target power level for the suspend finisher handler */
 	psci_set_suspend_pwrlvl(end_pwrlvl);
 
@@ -308,6 +311,8 @@
 	/* Invalidate the suspend level for the cpu */
 	psci_set_suspend_pwrlvl(PSCI_INVALID_PWR_LVL);
 
+	PUBLISH_EVENT(psci_suspend_pwrdown_finish);
+
 	/*
 	 * Generic management: Now we just need to retrieve the
 	 * information that we had stashed away during the suspend
diff --git a/lib/xlat_tables_v2/xlat_tables_internal.c b/lib/xlat_tables_v2/xlat_tables_internal.c
index 0acfacb..75c5a91 100644
--- a/lib/xlat_tables_v2/xlat_tables_internal.c
+++ b/lib/xlat_tables_v2/xlat_tables_internal.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -893,7 +893,7 @@
 			 * Check if the mapping function actually managed to map
 			 * anything. If not, just return now.
 			 */
-			if (mm_cursor->base_va >= end_va)
+			if (mm->base_va >= end_va)
 				return -ENOMEM;
 
 			/*
diff --git a/make_helpers/build_macros.mk b/make_helpers/build_macros.mk
index 7531f6d..a4fbc5a 100644
--- a/make_helpers/build_macros.mk
+++ b/make_helpers/build_macros.mk
@@ -101,7 +101,7 @@
 # FIP_ADD_PAYLOAD appends the command line arguments required by fiptool
 # to package a new payload. Optionally, it adds the dependency on this payload
 #   $(1) = payload filename (i.e. bl31.bin)
-#   $(2) = command line option for the specified payload (i.e. --bl31)
+#   $(2) = command line option for the specified payload (i.e. --soc-fw)
 #   $(3) = fip target dependency (optional) (i.e. bl31)
 define FIP_ADD_PAYLOAD
     $(eval FIP_ARGS += $(2) $(1))
@@ -121,14 +121,15 @@
 # using a build option. It also adds a dependency on the image file, aborting
 # the build if the file does not exist.
 #   $(1) = build option to specify the image filename (SCP_BL2, BL33, etc)
-#   $(2) = command line option for fiptool (scp_bl2, bl33, etc)
+#   $(2) = command line option for fiptool (--scp-fw, --nt-fw, etc)
 # Example:
-#   $(eval $(call FIP_ADD_IMG,BL33,--bl33))
+#   $(eval $(call FIP_ADD_IMG,BL33,--nt-fw))
 define FIP_ADD_IMG
     CRT_DEPS += check_$(1)
     FIP_DEPS += check_$(1)
     $(call FIP_ADD_PAYLOAD,$(value $(1)),$(2))
 
+.PHONY: check_$(1)
 check_$(1):
 	$$(if $(value $(1)),,$$(error "Platform '${PLAT}' requires $(1). Please set $(1) to point to the right file"))
 endef
@@ -154,14 +155,15 @@
 
 # FWU_FIP_ADD_IMG allows the platform to pack a binary image in the FWU FIP
 #   $(1) build option to specify the image filename (BL2U, NS_BL2U, etc)
-#   $(2) command line option for fiptool (bl2u, ns_bl2u, etc)
+#   $(2) command line option for fiptool (--ap-fwu-cfg, --fwu, etc)
 # Example:
-#   $(eval $(call FWU_FIP_ADD_IMG,BL2U,--bl2u))
+#   $(eval $(call FWU_FIP_ADD_IMG,BL2U,--ap-fwu-cfg))
 define FWU_FIP_ADD_IMG
     FWU_CRT_DEPS += check_$(1)
     FWU_FIP_DEPS += check_$(1)
     $(call FWU_FIP_ADD_PAYLOAD,$(value $(1)),$(2))
 
+.PHONY: check_$(1)
 check_$(1):
 	$$(if $(value $(1)),,$$(error "Platform '${PLAT}' requires $(1). Please set $(1) to point to the right file"))
 endef
@@ -336,3 +338,49 @@
 
 endef
 
+define SOURCES_TO_DTBS
+        $(notdir $(patsubst %.dts,%.dtb,$(filter %.dts,$(1))))
+endef
+
+# MAKE_FDT macro defines the targets and options to build each FDT binary
+# Arguments: (none)
+define MAKE_FDT
+        $(eval DTB_BUILD_DIR  := ${BUILD_PLAT}/fdts)
+        $(eval DTBS       := $(addprefix $(DTB_BUILD_DIR)/,$(call SOURCES_TO_DTBS,$(FDT_SOURCES))))
+        $(eval TEMP_DTB_DIRS := $(sort $(dir ${DTBS})))
+        # The $(dir ) function leaves a trailing / on the directory names
+        # Rip off the / to match directory names with make rule targets.
+        $(eval DTB_DIRS   := $(patsubst %/,%,$(TEMP_DTB_DIRS)))
+
+$(eval $(foreach objd,${DTB_DIRS},$(call MAKE_PREREQ_DIR,${objd},${BUILD_DIR})))
+
+fdt_dirs: ${DTB_DIRS}
+
+endef
+
+# MAKE_DTB generate the Flattened device tree binary (device tree binary)
+#   $(1) = output directory
+#   $(2) = input dts
+define MAKE_DTB
+
+$(eval DOBJ := $(1)/$(patsubst %.dts,%.dtb,$(notdir $(2))))
+$(eval DEP := $(patsubst %.dtb,%.d,$(DOBJ)))
+
+$(DOBJ): $(2) | fdt_dirs
+	@echo "  DTC      $$<"
+	$$(Q)$$(DTC) $$(DTC_FLAGS) -d $(DEP) -o $$@ $$<
+
+-include $(DEP)
+
+endef
+
+# MAKE_DTBS builds flattened device tree sources
+#   $(1) = output directory
+#   $(2) = list of flattened device tree source files
+define MAKE_DTBS
+        $(eval DOBJS := $(filter %.dts,$(2)))
+        $(eval REMAIN := $(filter-out %.dts,$(2)))
+        $(eval $(foreach obj,$(DOBJS),$(call MAKE_DTB,$(1),$(obj))))
+
+        $(and $(REMAIN),$(error Unexpected s present: $(REMAIN)))
+endef
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index eb1bb0a..a80a491 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -27,6 +27,9 @@
 # Base commit to perform code check on
 BASE_COMMIT			:= origin/master
 
+# Execute BL2 at EL3
+BL2_AT_EL3			:= 0
+
 # By default, consider that the platform may release several CPUs out of reset.
 # The platform Makefile is free to override this value.
 COLD_BOOT_SINGLE_CPU		:= 0
diff --git a/make_helpers/tbbr/tbbr_tools.mk b/make_helpers/tbbr/tbbr_tools.mk
index 6e6e273..cda8d72 100644
--- a/make_helpers/tbbr/tbbr_tools.mk
+++ b/make_helpers/tbbr/tbbr_tools.mk
@@ -64,7 +64,9 @@
 $(if ${BL2},$(eval $(call CERT_ADD_CMD_OPT,${BL2},--tb-fw,true)),\
             $(eval $(call CERT_ADD_CMD_OPT,$(call IMG_BIN,2),--tb-fw,true)))
 $(eval $(call CERT_ADD_CMD_OPT,${BUILD_PLAT}/tb_fw.crt,--tb-fw-cert))
+ifeq (${BL2_AT_EL3}, 0)
 $(eval $(call FIP_ADD_PAYLOAD,${BUILD_PLAT}/tb_fw.crt,--tb-fw-cert))
+endif
 
 # Add the SCP_BL2 CoT (key cert + img cert + image)
 ifneq (${SCP_BL2},)
diff --git a/plat/arm/board/fvp/fvp_bl2_el3_setup.c b/plat/arm/board/fvp/fvp_bl2_el3_setup.c
new file mode 100644
index 0000000..69f2f7a
--- /dev/null
+++ b/plat/arm/board/fvp/fvp_bl2_el3_setup.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <plat_arm.h>
+#include "fvp_private.h"
+
+void bl2_el3_early_platform_setup(u_register_t arg0 __unused,
+				  u_register_t arg1 __unused,
+				  u_register_t arg2 __unused,
+				  u_register_t arg3 __unused)
+{
+	arm_bl2_el3_early_platform_setup();
+
+	/* Initialize the platform config for future decision making */
+	fvp_config_setup();
+
+	/*
+	 * Initialize Interconnect for this cluster during cold boot.
+	 * No need for locks as no other CPU is active.
+	 */
+	fvp_interconnect_init();
+	/*
+	 * Enable coherency in Interconnect for the primary CPU's cluster.
+	 */
+	fvp_interconnect_enable();
+}
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index 7080fc3..4ac0850 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -146,4 +146,7 @@
 
 #define PLAT_ARM_G0_IRQ_PROPS(grp)	ARM_G0_IRQ_PROPS(grp)
 
+#define PLAT_ARM_PRIVATE_SDEI_EVENTS	ARM_SDEI_PRIVATE_EVENTS
+#define PLAT_ARM_SHARED_SDEI_EVENTS	ARM_SDEI_SHARED_EVENTS
+
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index 7edbd3d..a257784 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -126,6 +126,13 @@
 				plat/arm/board/fvp/fvp_trusted_boot.c		\
 				${FVP_SECURITY_SOURCES}
 
+ifeq (${BL2_AT_EL3},1)
+BL2_SOURCES		+=	plat/arm/board/fvp/${ARCH}/fvp_helpers.S	\
+				plat/arm/board/fvp/fvp_bl2_el3_setup.c		\
+				${FVP_CPU_LIBS}					\
+				${FVP_INTERCONNECT_SOURCES}
+endif
+
 ifeq (${FVP_USE_SP804_TIMER},1)
 BL2_SOURCES		+=	drivers/arm/sp804/sp804_delay_timer.c
 endif
@@ -150,6 +157,10 @@
 # Enable Activity Monitor Unit extensions by default
 ENABLE_AMU			:=	1
 
+ifeq (${ENABLE_AMU},1)
+BL31_SOURCES		+= lib/cpus/aarch64/cortex_a75_pubsub.c
+endif
+
 ifneq (${ENABLE_STACK_PROTECTOR},0)
 PLAT_BL_COMMON_SOURCES	+=	plat/arm/board/fvp/fvp_stack_protector.c
 endif
@@ -161,5 +172,9 @@
 # Add support for platform supplied linker script for BL31 build
 $(eval $(call add_define,PLAT_EXTRA_LD_SCRIPT))
 
+ifneq (${BL2_AT_EL3}, 0)
+    override BL1_SOURCES =
+endif
+
 include plat/arm/board/common/board_common.mk
 include plat/arm/common/arm_common.mk
diff --git a/plat/arm/board/juno/include/platform_def.h b/plat/arm/board/juno/include/platform_def.h
index e475ece..cac47f7 100644
--- a/plat/arm/board/juno/include/platform_def.h
+++ b/plat/arm/board/juno/include/platform_def.h
@@ -238,4 +238,7 @@
 /* CSS SoC NIC-400 Global Programmers View (GPV) */
 #define PLAT_SOC_CSS_NIC400_BASE	0x2a000000
 
+#define PLAT_ARM_PRIVATE_SDEI_EVENTS	ARM_SDEI_PRIVATE_EVENTS
+#define PLAT_ARM_SHARED_SDEI_EVENTS	ARM_SDEI_SHARED_EVENTS
+
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/arm/common/aarch64/arm_sdei.c b/plat/arm/common/aarch64/arm_sdei.c
index 514800c..687b21d 100644
--- a/plat/arm/common/aarch64/arm_sdei.c
+++ b/plat/arm/common/aarch64/arm_sdei.c
@@ -11,23 +11,14 @@
 #include <sdei.h>
 
 /* Private event mappings */
-static sdei_ev_map_t arm_private_sdei[] = {
-	/* Event 0 */
-	SDEI_DEFINE_EVENT_0(ARM_SDEI_SGI),
-
-	/* Dynamic private events */
-	SDEI_PRIVATE_EVENT(ARM_SDEI_DP_EVENT_0, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC),
-	SDEI_PRIVATE_EVENT(ARM_SDEI_DP_EVENT_1, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC),
-	SDEI_PRIVATE_EVENT(ARM_SDEI_DP_EVENT_2, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC),
+static sdei_ev_map_t arm_sdei_private[] = {
+	PLAT_ARM_PRIVATE_SDEI_EVENTS
 };
 
 /* Shared event mappings */
-static sdei_ev_map_t arm_shared_sdei[] = {
-	/* Dynamic shared events */
-	SDEI_SHARED_EVENT(ARM_SDEI_DS_EVENT_0, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC),
-	SDEI_SHARED_EVENT(ARM_SDEI_DS_EVENT_1, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC),
-	SDEI_SHARED_EVENT(ARM_SDEI_DS_EVENT_2, SDEI_DYN_IRQ, SDEI_MAPF_DYNAMIC),
+static sdei_ev_map_t arm_sdei_shared[] = {
+	PLAT_ARM_SHARED_SDEI_EVENTS
 };
 
 /* Export ARM SDEI events */
-REGISTER_SDEI_MAP(arm_private_sdei, arm_shared_sdei);
+REGISTER_SDEI_MAP(arm_sdei_private, arm_sdei_shared);
diff --git a/plat/arm/common/arm_bl2_el3_setup.c b/plat/arm/common/arm_bl2_el3_setup.c
new file mode 100644
index 0000000..e70d115
--- /dev/null
+++ b/plat/arm/common/arm_bl2_el3_setup.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <console.h>
+#include <generic_delay_timer.h>
+#include <plat_arm.h>
+#include <platform.h>
+
+#pragma weak bl2_el3_early_platform_setup
+#pragma weak bl2_el3_plat_arch_setup
+#pragma weak bl2_el3_plat_prepare_exit
+
+static meminfo_t bl2_el3_tzram_layout;
+
+/*
+ * Perform arm specific early platform setup. At this moment we only initialize
+ * the console and the memory layout.
+ */
+void arm_bl2_el3_early_platform_setup(void)
+{
+	/* Initialize the console to provide early debug support */
+	console_init(PLAT_ARM_BOOT_UART_BASE, PLAT_ARM_BOOT_UART_CLK_IN_HZ,
+			ARM_CONSOLE_BAUDRATE);
+
+	/*
+	 * Allow BL2 to see the whole Trusted RAM. This is determined
+	 * statically since we cannot rely on BL1 passing this information
+	 * in the BL2_AT_EL3 case.
+	 */
+	bl2_el3_tzram_layout.total_base = ARM_BL_RAM_BASE;
+	bl2_el3_tzram_layout.total_size = ARM_BL_RAM_SIZE;
+
+	/* Initialise the IO layer and register platform IO devices */
+	plat_arm_io_setup();
+}
+
+void bl2_el3_early_platform_setup(u_register_t arg0 __unused,
+				  u_register_t arg1 __unused,
+				  u_register_t arg2 __unused,
+				  u_register_t arg3 __unused)
+{
+	arm_bl2_el3_early_platform_setup();
+
+	/*
+	 * Initialize Interconnect for this cluster during cold boot.
+	 * No need for locks as no other CPU is active.
+	 */
+	plat_arm_interconnect_init();
+	/*
+	 * Enable Interconnect coherency for the primary CPU's cluster.
+	 */
+	plat_arm_interconnect_enter_coherency();
+
+	generic_delay_timer_init();
+}
+
+/*******************************************************************************
+ * Perform the very early platform specific architectural setup here. At the
+ * moment this is only initializes the mmu in a quick and dirty way.
+ ******************************************************************************/
+void arm_bl2_el3_plat_arch_setup(void)
+{
+	arm_setup_page_tables(bl2_el3_tzram_layout.total_base,
+			      bl2_el3_tzram_layout.total_size,
+			      BL_CODE_BASE,
+			      BL_CODE_END,
+			      BL_RO_DATA_BASE,
+			      BL_RO_DATA_END
+#if USE_COHERENT_MEM
+			      , BL_COHERENT_RAM_BASE,
+			      BL_COHERENT_RAM_END
+#endif
+			      );
+
+#ifdef AARCH32
+	enable_mmu_secure(0);
+#else
+	enable_mmu_el3(0);
+#endif
+}
+
+void bl2_el3_plat_arch_setup(void)
+{
+	arm_bl2_el3_plat_arch_setup();
+}
+
+void bl2_el3_plat_prepare_exit(void)
+{
+}
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index 17acae5..e6ce18a 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -7,13 +7,9 @@
 ifeq (${ARCH}, aarch64)
   # On ARM standard platorms, the TSP can execute from Trusted SRAM, Trusted
   # DRAM (if available) or the TZC secured area of DRAM.
-  # Trusted SRAM is the default.
+  # TZC secured DRAM is the default.
 
-  ifneq (${TRUSTED_BOARD_BOOT},0)
-    ARM_TSP_RAM_LOCATION	?=	dram
-  else
-    ARM_TSP_RAM_LOCATION	?=	tsram
-  endif
+  ARM_TSP_RAM_LOCATION	?=	dram
 
   ifeq (${ARM_TSP_RAM_LOCATION}, tsram)
     ARM_TSP_RAM_LOCATION_ID = ARM_TRUSTED_SRAM_ID
@@ -154,6 +150,11 @@
 				drivers/io/io_storage.c				\
 				plat/arm/common/arm_bl2_setup.c			\
 				plat/arm/common/arm_io_storage.c
+
+ifeq (${BL2_AT_EL3},1)
+BL2_SOURCES		+=	plat/arm/common/arm_bl2_el3_setup.c
+endif
+
 ifeq (${LOAD_IMAGE_V2},1)
 # Because BL1/BL2 execute in AArch64 mode but BL32 in AArch32 we need to use
 # the AArch32 descriptors.
diff --git a/plat/common/plat_bl2_el3_common.c b/plat/common/plat_bl2_el3_common.c
new file mode 100644
index 0000000..358a02d
--- /dev/null
+++ b/plat/common/plat_bl2_el3_common.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <debug.h>
+#include <errno.h>
+#include <platform_def.h>
+
+/*
+ * The following platform functions are weakly defined. They
+ * are default implementations that allow BL2 to compile in
+ * absence of real definitions. The Platforms may override
+ * with more complex definitions.
+ */
+#pragma weak bl2_el3_plat_prepare_exit
+
+void bl2_el3_plat_prepare_exit(void)
+{
+}
diff --git a/plat/hisilicon/hikey/platform.mk b/plat/hisilicon/hikey/platform.mk
index 18b5e15..524fa6a 100644
--- a/plat/hisilicon/hikey/platform.mk
+++ b/plat/hisilicon/hikey/platform.mk
@@ -120,3 +120,5 @@
 ERRATA_A53_836870		:=	1
 ERRATA_A53_843419		:=	1
 ERRATA_A53_855873		:=	1
+
+FIP_ALIGN			:=	512
diff --git a/plat/hisilicon/hikey960/aarch64/hikey960_helpers.S b/plat/hisilicon/hikey960/aarch64/hikey960_helpers.S
index c88f68e..d18399f 100644
--- a/plat/hisilicon/hikey960/aarch64/hikey960_helpers.S
+++ b/plat/hisilicon/hikey960/aarch64/hikey960_helpers.S
@@ -16,8 +16,6 @@
 	.globl	plat_crash_console_putc
 	.globl	plat_report_exception
 	.globl	plat_reset_handler
-	.globl	set_retention_ticks
-	.globl	clr_retention_ticks
 	.globl	clr_ex
 	.globl	nop
 
@@ -139,35 +137,6 @@
 endfunc plat_reset_handler
 
 	/* -----------------------------------------------------
-	 * void set_retention_ticks(unsigned int val);
-	 * Clobber list : x0
-	 * -----------------------------------------------------
-	 */
-func set_retention_ticks
-	mrs	x0, CORTEX_A53_ECTLR_EL1
-	bic	x0, x0, #CORTEX_A53_ECTLR_CPU_RET_CTRL_MASK
-	orr	x0, x0, #RETENTION_ENTRY_TICKS_8
-	msr	CORTEX_A53_ECTLR_EL1, x0
-	isb
-	dsb	sy
-	ret
-endfunc set_retention_ticks
-
-	/* -----------------------------------------------------
-	 * void clr_retention_ticks(unsigned int val);
-	 * Clobber list : x0
-	 * -----------------------------------------------------
-	 */
-func clr_retention_ticks
-	mrs	x0, CORTEX_A53_ECTLR_EL1
-	bic	x0, x0, #CORTEX_A53_ECTLR_CPU_RET_CTRL_MASK
-	msr	CORTEX_A53_ECTLR_EL1, x0
-	isb
-	dsb	sy
-	ret
-endfunc clr_retention_ticks
-
-	/* -----------------------------------------------------
 	 * void clrex(void);
 	 * -----------------------------------------------------
 	 */
diff --git a/plat/hisilicon/hikey960/hikey960_bl1_setup.c b/plat/hisilicon/hikey960/hikey960_bl1_setup.c
index 6dfada7..11f143a 100644
--- a/plat/hisilicon/hikey960/hikey960_bl1_setup.c
+++ b/plat/hisilicon/hikey960/hikey960_bl1_setup.c
@@ -519,6 +519,11 @@
 	set_audio_power_up();
 	set_pcie_power_up();
 	set_isp_srt_power_up();
+
+	/* set ISP_CORE_CTRL_S to unsecure mode */
+	mmio_write_32(0xe8583800, 0x7);
+	/* set ISP_SUB_CTRL_S to unsecure mode */
+	mmio_write_32(0xe8583804, 0xf);
 }
 
 static void hikey960_ufs_reset(void)
@@ -642,6 +647,8 @@
 	}
 	/* GPIO005 - PMU SSI, 10mA */
 	mmio_write_32(IOCG_006_REG, 2 << 4);
+	/* GPIO213 - PCIE_CLKREQ_N */
+	mmio_write_32(IOMG_AO_033_REG, 1);
 }
 
 /*
diff --git a/plat/hisilicon/hikey960/hikey960_pm.c b/plat/hisilicon/hikey960/hikey960_pm.c
index 078f0d8..6609530 100644
--- a/plat/hisilicon/hikey960/hikey960_pm.c
+++ b/plat/hisilicon/hikey960/hikey960_pm.c
@@ -26,38 +26,6 @@
 #define SYSTEM_PWR_STATE(state) \
 	((state)->pwr_domain_state[PLAT_MAX_PWR_LVL])
 
-#define PSTATE_WIDTH		4
-#define PSTATE_MASK		((1 << PSTATE_WIDTH) - 1)
-
-#define MAKE_PWRSTATE(lvl2_state, lvl1_state, lvl0_state, pwr_lvl, type) \
-		(((lvl2_state) << (PSTATE_ID_SHIFT + PSTATE_WIDTH * 2)) | \
-		 ((lvl1_state) << (PSTATE_ID_SHIFT + PSTATE_WIDTH)) | \
-		 ((lvl0_state) << (PSTATE_ID_SHIFT)) | \
-		 ((pwr_lvl) << PSTATE_PWR_LVL_SHIFT) | \
-		 ((type) << PSTATE_TYPE_SHIFT))
-
-/*
- * The table storing the valid idle power states. Ensure that the
- * array entries are populated in ascending order of state-id to
- * enable us to use binary search during power state validation.
- * The table must be terminated by a NULL entry.
- */
-const unsigned int hikey960_pwr_idle_states[] = {
-	/* State-id - 0x001 */
-	MAKE_PWRSTATE(PLAT_MAX_RUN_STATE, PLAT_MAX_RUN_STATE,
-		      PLAT_MAX_STB_STATE, MPIDR_AFFLVL0, PSTATE_TYPE_STANDBY),
-	/* State-id - 0x002 */
-	MAKE_PWRSTATE(PLAT_MAX_RUN_STATE, PLAT_MAX_RUN_STATE,
-		      PLAT_MAX_RET_STATE, MPIDR_AFFLVL0, PSTATE_TYPE_STANDBY),
-	/* State-id - 0x003 */
-	MAKE_PWRSTATE(PLAT_MAX_RUN_STATE, PLAT_MAX_RUN_STATE,
-		      PLAT_MAX_OFF_STATE, MPIDR_AFFLVL0, PSTATE_TYPE_POWERDOWN),
-	/* State-id - 0x033 */
-	MAKE_PWRSTATE(PLAT_MAX_RUN_STATE, PLAT_MAX_OFF_STATE,
-		      PLAT_MAX_OFF_STATE, MPIDR_AFFLVL1, PSTATE_TYPE_POWERDOWN),
-	0,
-};
-
 #define DMAC_GLB_REG_SEC	0x694
 #define AXI_CONF_BASE		0x820
 
@@ -66,24 +34,17 @@
 static void hikey960_pwr_domain_standby(plat_local_state_t cpu_state)
 {
 	unsigned long scr;
-	unsigned int val = 0;
-
-	assert(cpu_state == PLAT_MAX_STB_STATE ||
-	       cpu_state == PLAT_MAX_RET_STATE);
 
 	scr = read_scr_el3();
 
-	/* Enable Physical IRQ and FIQ to wake the CPU*/
+	/* Enable Physical IRQ and FIQ to wake the CPU */
 	write_scr_el3(scr | SCR_IRQ_BIT | SCR_FIQ_BIT);
 
-	if (cpu_state == PLAT_MAX_RET_STATE)
-		set_retention_ticks(val);
-
+	/* Add barrier before CPU enter WFI state */
+	isb();
+	dsb();
 	wfi();
 
-	if (cpu_state == PLAT_MAX_RET_STATE)
-		clr_retention_ticks(val);
-
 	/*
 	 * Restore SCR to the original value, synchronisazion of
 	 * scr_el3 is done by eret while el3_exit to save some
@@ -161,34 +122,38 @@
 int hikey960_validate_power_state(unsigned int power_state,
 			       psci_power_state_t *req_state)
 {
-	unsigned int state_id;
+	unsigned int pstate = psci_get_pstate_type(power_state);
+	unsigned int pwr_lvl = psci_get_pstate_pwrlvl(power_state);
 	int i;
 
 	assert(req_state);
 
-	/*
-	 *  Currently we are using a linear search for finding the matching
-	 *  entry in the idle power state array. This can be made a binary
-	 *  search if the number of entries justify the additional complexity.
-	 */
-	for (i = 0; !!hikey960_pwr_idle_states[i]; i++) {
-		if (power_state == hikey960_pwr_idle_states[i])
-			break;
-	}
-
-	/* Return error if entry not found in the idle state array */
-	if (!hikey960_pwr_idle_states[i])
+	if (pwr_lvl > PLAT_MAX_PWR_LVL)
 		return PSCI_E_INVALID_PARAMS;
 
-	i = 0;
-	state_id = psci_get_pstate_id(power_state);
+	/* Sanity check the requested state */
+	if (pstate == PSTATE_TYPE_STANDBY) {
+		/*
+		 * It's possible to enter standby only on power level 0
+		 * Ignore any other power level.
+		 */
+		if (pwr_lvl != MPIDR_AFFLVL0)
+			return PSCI_E_INVALID_PARAMS;
 
-	/* Parse the State ID and populate the state info parameter */
-	while (state_id) {
-		req_state->pwr_domain_state[i++] = state_id & PSTATE_MASK;
-		state_id >>= PSTATE_WIDTH;
+		req_state->pwr_domain_state[MPIDR_AFFLVL0] =
+					PLAT_MAX_RET_STATE;
+	} else {
+		for (i = MPIDR_AFFLVL0; i <= pwr_lvl; i++)
+			req_state->pwr_domain_state[i] =
+					PLAT_MAX_OFF_STATE;
 	}
 
+	/*
+	 * We expect the 'state id' to be zero.
+	 */
+	if (psci_get_pstate_id(power_state))
+		return PSCI_E_INVALID_PARAMS;
+
 	return PSCI_E_SUCCESS;
 }
 
diff --git a/plat/hisilicon/hikey960/include/hi3660.h b/plat/hisilicon/hikey960/include/hi3660.h
index 83d1b36..ab7b8aa 100644
--- a/plat/hisilicon/hikey960/include/hi3660.h
+++ b/plat/hisilicon/hikey960/include/hi3660.h
@@ -335,6 +335,8 @@
 #define IOMG_AO_026_REG			(IOMG_AO_REG_BASE + 0x068)
 /* GPIO219: PD interrupt. pull up */
 #define IOMG_AO_039_REG			(IOMG_AO_REG_BASE + 0x09C)
+/* GPIO213: PCIE_CLKREQ_N */
+#define IOMG_AO_033_REG			(IOMG_AO_REG_BASE + 0x084)
 
 #define IOCG_AO_REG_BASE		0xFFF1187C
 /* GPIO219: PD interrupt. pull up */
diff --git a/plat/hisilicon/hikey960/include/platform_def.h b/plat/hisilicon/hikey960/include/platform_def.h
index 2ac7f2a..cb76090 100644
--- a/plat/hisilicon/hikey960/include/platform_def.h
+++ b/plat/hisilicon/hikey960/include/platform_def.h
@@ -31,10 +31,8 @@
 #define PLAT_NUM_PWR_DOMAINS		(PLATFORM_CORE_COUNT + \
 					 PLATFORM_CLUSTER_COUNT + 1)
 
-#define PLAT_MAX_RUN_STATE		0
-#define PLAT_MAX_STB_STATE		1
-#define PLAT_MAX_RET_STATE		2
-#define PLAT_MAX_OFF_STATE		3
+#define PLAT_MAX_RET_STATE		1
+#define PLAT_MAX_OFF_STATE		2
 
 #define MAX_IO_DEVICES			3
 #define MAX_IO_HANDLES			4
diff --git a/plat/hisilicon/hikey960/platform.mk b/plat/hisilicon/hikey960/platform.mk
index 695f092..cb97deb 100644
--- a/plat/hisilicon/hikey960/platform.mk
+++ b/plat/hisilicon/hikey960/platform.mk
@@ -101,3 +101,5 @@
 ERRATA_A53_836870		:=	1
 ERRATA_A53_843419		:=	1
 ERRATA_A53_855873		:=	1
+
+FIP_ALIGN			:=	512
diff --git a/plat/hisilicon/poplar/aarch64/platform_common.c b/plat/hisilicon/poplar/aarch64/platform_common.c
index a7dac4f..762bd84 100644
--- a/plat/hisilicon/poplar/aarch64/platform_common.c
+++ b/plat/hisilicon/poplar/aarch64/platform_common.c
@@ -25,9 +25,14 @@
 					DEVICE_SIZE,			\
 					MT_DEVICE | MT_RW | MT_SECURE)
 
+#define MAP_TSP_MEM	MAP_REGION_FLAT(TSP_SEC_MEM_BASE,		\
+					TSP_SEC_MEM_SIZE,		\
+					MT_MEMORY | MT_RW | MT_SECURE)
+
 static const mmap_region_t poplar_mmap[] = {
 	MAP_DDR,
 	MAP_DEVICE,
+	MAP_TSP_MEM,
 	{0}
 };
 
diff --git a/plat/hisilicon/poplar/bl2_plat_setup.c b/plat/hisilicon/poplar/bl2_plat_setup.c
index 1741475..db507c3 100644
--- a/plat/hisilicon/poplar/bl2_plat_setup.c
+++ b/plat/hisilicon/poplar/bl2_plat_setup.c
@@ -29,8 +29,10 @@
 typedef struct bl2_to_bl31_params_mem {
 	bl31_params_t		bl31_params;
 	image_info_t		bl31_image_info;
+	image_info_t		bl32_image_info;
 	image_info_t		bl33_image_info;
 	entry_point_info_t	bl33_ep_info;
+	entry_point_info_t	bl32_ep_info;
 	entry_point_info_t	bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
@@ -61,6 +63,16 @@
 	SET_PARAM_HEAD(bl2_to_bl31_params->bl31_image_info,
 		       PARAM_IMAGE_BINARY, VERSION_1, 0);
 
+	/* Fill BL3-2 related information if it exists */
+#ifdef BL32_BASE
+	bl2_to_bl31_params->bl32_ep_info = &bl31_params_mem.bl32_ep_info;
+	SET_PARAM_HEAD(bl2_to_bl31_params->bl32_ep_info, PARAM_EP,
+		VERSION_1, 0);
+	bl2_to_bl31_params->bl32_image_info = &bl31_params_mem.bl32_image_info;
+	SET_PARAM_HEAD(bl2_to_bl31_params->bl32_image_info, PARAM_IMAGE_BINARY,
+		VERSION_1, 0);
+#endif
+
 	/* Fill BL3-3 related information */
 	bl2_to_bl31_params->bl33_ep_info = &bl31_params_mem.bl33_ep_info;
 	SET_PARAM_HEAD(bl2_to_bl31_params->bl33_ep_info,
@@ -89,6 +101,41 @@
 				     DISABLE_ALL_EXCEPTIONS);
 }
 
+/*******************************************************************************
+ * Before calling this function BL32 is loaded in memory and its entrypoint
+ * is set by load_image. This is a placeholder for the platform to change
+ * the entrypoint of BL32 and set SPSR and security state.
+ * On Poplar we only set the security state of the entrypoint
+ ******************************************************************************/
+#ifdef BL32_BASE
+void bl2_plat_set_bl32_ep_info(image_info_t *bl32_image_info,
+					entry_point_info_t *bl32_ep_info)
+{
+	SET_SECURITY_STATE(bl32_ep_info->h.attr, SECURE);
+	/*
+	 * The Secure Payload Dispatcher service is responsible for
+	 * setting the SPSR prior to entry into the BL32 image.
+	 */
+	bl32_ep_info->spsr = 0;
+}
+
+/*******************************************************************************
+ * Populate the extents of memory available for loading BL32
+ ******************************************************************************/
+void bl2_plat_get_bl32_meminfo(meminfo_t *bl32_meminfo)
+{
+	/*
+	 * Populate the extents of memory available for loading BL32.
+	 */
+	bl32_meminfo->total_base = BL32_BASE;
+	bl32_meminfo->free_base = BL32_BASE;
+	bl32_meminfo->total_size =
+			(TSP_SEC_MEM_BASE + TSP_SEC_MEM_SIZE) - BL32_BASE;
+	bl32_meminfo->free_size =
+			(TSP_SEC_MEM_BASE + TSP_SEC_MEM_SIZE) - BL32_BASE;
+}
+#endif /* BL32_BASE */
+
 static uint32_t hisi_get_spsr_for_bl33_entry(void)
 {
 	unsigned long el_status;
@@ -159,5 +206,5 @@
 
 unsigned long plat_get_ns_image_entrypoint(void)
 {
-	return PLAT_ARM_NS_IMAGE_OFFSET;
+	return PLAT_POPLAR_NS_IMAGE_OFFSET;
 }
diff --git a/plat/hisilicon/poplar/bl31_plat_setup.c b/plat/hisilicon/poplar/bl31_plat_setup.c
index b9a0e18..e3a5c50 100644
--- a/plat/hisilicon/poplar/bl31_plat_setup.c
+++ b/plat/hisilicon/poplar/bl31_plat_setup.c
@@ -32,11 +32,31 @@
 #define BL31_COHERENT_RAM_BASE	(unsigned long)(&__COHERENT_RAM_START__)
 #define BL31_COHERENT_RAM_LIMIT	(unsigned long)(&__COHERENT_RAM_END__)
 
+#define TZPC_SEC_ATTR_CTRL_VALUE (0x9DB98D45)
+
+static entry_point_info_t bl32_image_ep_info;
 static entry_point_info_t bl33_image_ep_info;
 
+static void hisi_tzpc_sec_init(void)
+{
+	mmio_write_32(HISI_TZPC_SEC_ATTR_CTRL, TZPC_SEC_ATTR_CTRL_VALUE);
+}
+
 entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)
 {
-	return &bl33_image_ep_info;
+	entry_point_info_t *next_image_info;
+
+	assert(sec_state_is_valid(type));
+	next_image_info = (type == NON_SECURE)
+			? &bl33_image_ep_info : &bl32_image_ep_info;
+	/*
+	 * None of the images on the ARM development platforms can have 0x0
+	 * as the entrypoint
+	 */
+	if (next_image_info->pc)
+		return next_image_info;
+	else
+		return NULL;
 }
 
 void bl31_early_platform_setup(bl31_params_t *from_bl2,
@@ -47,6 +67,13 @@
 	/* Init console for crash report */
 	plat_crash_console_init();
 
+
+	/*
+	 * Copy BL32 (if populated by BL2) and BL33 entry point information.
+	 * They are stored in Secure RAM, in BL2's address space.
+	 */
+	if (from_bl2->bl32_ep_info)
+		bl32_image_ep_info = *from_bl2->bl32_ep_info;
 	bl33_image_ep_info = *from_bl2->bl33_ep_info;
 }
 
@@ -58,6 +85,9 @@
 	/* Init GIC distributor and CPU interface */
 	plat_arm_gic_driver_init();
 	plat_arm_gic_init();
+
+	/* Init security properties of IP blocks */
+	hisi_tzpc_sec_init();
 }
 
 void bl31_plat_runtime_setup(void)
diff --git a/plat/hisilicon/poplar/include/hi3798cv200.h b/plat/hisilicon/poplar/include/hi3798cv200.h
index 6318b9c..540d0aa 100644
--- a/plat/hisilicon/poplar/include/hi3798cv200.h
+++ b/plat/hisilicon/poplar/include/hi3798cv200.h
@@ -30,7 +30,7 @@
 #define TIMER20_BGLOAD			(SEC_TIMER2_BASE + 0x018)
 
 /* GPIO */
-#define	GPIO_MAX			(12)
+#define	GPIO_MAX			(13)
 #define	GPIO_BASE(x)			(x != 5 ?			\
 					0xf820000 + x * 0x1000 : 0xf8004000)
 
@@ -97,4 +97,7 @@
 /* Watchdog */
 #define HISI_WDG0_BASE			(0xF8A2C000)
 
+#define HISI_TZPC_BASE			(0xF8A80000)
+#define HISI_TZPC_SEC_ATTR_CTRL		(HISI_TZPC_BASE + 0x10)
+
 #endif	/* __HI3798cv200_H__ */
diff --git a/plat/hisilicon/poplar/include/platform_def.h b/plat/hisilicon/poplar/include/platform_def.h
index b7afe82..3d1ad9b 100644
--- a/plat/hisilicon/poplar/include/platform_def.h
+++ b/plat/hisilicon/poplar/include/platform_def.h
@@ -48,11 +48,55 @@
 #define TEE_SEC_MEM_BASE		(0x70000000)
 #define TEE_SEC_MEM_SIZE		(0x10000000)
 
+/* Memory location options for TSP */
+#define POPLAR_SRAM_ID	0
+#define POPLAR_DRAM_ID	1
+
+/*
+ * DDR for OP-TEE (28MB from 0x02200000 -0x04000000) is divided in several
+ * regions:
+ *   - Secure DDR (default is the top 16MB) used by OP-TEE
+ *   - Non-secure DDR (4MB) reserved for OP-TEE's future use
+ *   - Secure DDR (4MB aligned on 4MB) for OP-TEE's "Secure Data Path" feature
+ *   - Non-secure DDR used by OP-TEE (shared memory and padding) (4MB)
+ *   - Non-secure DDR (2MB) reserved for OP-TEE's future use
+ */
+#define DDR_SEC_SIZE			0x01000000
+#define DDR_SEC_BASE			0x03000000
+
 #define BL_MEM_BASE			(BL1_RO_BASE)
 #define BL_MEM_LIMIT			(BL31_LIMIT)
 #define BL_MEM_SIZE			(BL_MEM_LIMIT - BL_MEM_BASE)
 
+/*
+ * BL3-2 specific defines.
+ */
+
+/*
+ * The TSP currently executes from TZC secured area of DRAM.
+ */
+#define BL32_DRAM_BASE			0x03000000
+#define BL32_DRAM_LIMIT			0x04000000
+
+#if (POPLAR_TSP_RAM_LOCATION_ID == POPLAR_DRAM_ID)
+#define TSP_SEC_MEM_BASE		BL32_DRAM_BASE
+#define TSP_SEC_MEM_SIZE		(BL32_DRAM_LIMIT - BL32_DRAM_BASE)
+#define BL32_BASE			BL32_DRAM_BASE
+#define BL32_LIMIT			BL32_DRAM_LIMIT
+#elif (POPLAR_TSP_RAM_LOCATION_ID == POPLAR_SRAM_ID)
+#error "SRAM storage of TSP payload is currently unsupported"
+#else
+#error "Currently unsupported POPLAR_TSP_LOCATION_ID value"
+#endif
+
+/* BL32 is mandatory in AArch32 */
+#ifndef AARCH32
+#ifdef SPD_none
+#undef BL32_BASE
+#endif /* SPD_none */
+#endif
+
-#define PLAT_ARM_NS_IMAGE_OFFSET	0x37000000
+#define PLAT_POPLAR_NS_IMAGE_OFFSET	0x37000000
 
 /* Page table and MMU setup constants */
 #define ADDR_SPACE_SIZE			(1ull << 32)
diff --git a/plat/hisilicon/poplar/include/poplar_layout.h b/plat/hisilicon/poplar/include/poplar_layout.h
index 192bcb9..e0b5618 100644
--- a/plat/hisilicon/poplar/include/poplar_layout.h
+++ b/plat/hisilicon/poplar/include/poplar_layout.h
@@ -74,16 +74,16 @@
  * "OFFSET" is an offset to the start of a region relative to the
  * base of the "l-loader" TEXT section (also a multiple of page size).
  */
-#define LLOADER_TEXT_BASE		0x00001000	/* page aligned */
+#define LLOADER_TEXT_BASE		0x02001000	/* page aligned */
 #define BL1_OFFSET			0x0000D000	/* page multiple */
-#define FIP_BASE			0x00040000
+#define FIP_BASE			0x02040000
 
 #define BL1_RO_SIZE			0x00008000	/* page multiple */
 #define BL1_RW_SIZE			0x00008000	/* page multiple */
 #define BL1_SIZE			(BL1_RO_SIZE + BL1_RW_SIZE)
 #define BL2_SIZE			0x0000c000	/* page multiple */
 #define BL31_SIZE			0x00014000
-#define FIP_SIZE			0x00068000
+#define FIP_SIZE			0x000c0000  /* absolute max */
 
      /* BL1_OFFSET */			/* (Defined above) */
 #define BL1_BASE			(LLOADER_TEXT_BASE + BL1_OFFSET)
diff --git a/plat/hisilicon/poplar/plat_storage.c b/plat/hisilicon/poplar/plat_storage.c
index 623a61b..ab94cba 100644
--- a/plat/hisilicon/poplar/plat_storage.c
+++ b/plat/hisilicon/poplar/plat_storage.c
@@ -43,6 +43,10 @@
 	.uuid = UUID_EL3_RUNTIME_FIRMWARE_BL31,
 };
 
+static const io_uuid_spec_t bl32_uuid_spec = {
+	.uuid = UUID_SECURE_PAYLOAD_BL32,
+};
+
 static const io_uuid_spec_t bl33_uuid_spec = {
 	.uuid = UUID_NON_TRUSTED_FIRMWARE_BL33,
 };
@@ -69,6 +73,11 @@
 		(uintptr_t)&bl31_uuid_spec,
 		open_fip
 	},
+	[BL32_IMAGE_ID] = {
+		&fip_dev_handle,
+		(uintptr_t)&bl32_uuid_spec,
+		open_fip
+	},
 	[BL33_IMAGE_ID] = {
 		&fip_dev_handle,
 		(uintptr_t)&bl33_uuid_spec,
diff --git a/plat/hisilicon/poplar/platform.mk b/plat/hisilicon/poplar/platform.mk
index 28e0d1f..818e311 100644
--- a/plat/hisilicon/poplar/platform.mk
+++ b/plat/hisilicon/poplar/platform.mk
@@ -4,6 +4,17 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# On Poplar, the TSP can execute from TZC secure area in DRAM.
+POPLAR_TSP_RAM_LOCATION	:=	dram
+ifeq (${POPLAR_TSP_RAM_LOCATION}, dram)
+  POPLAR_TSP_RAM_LOCATION_ID = POPLAR_DRAM_ID
+else ifeq (${HIKEY960_TSP_RAM_LOCATION}, sram)
+  POPLAR_TSP_RAM_LOCATION_ID := POPLAR_SRAM_ID
+else
+  $(error "Currently unsupported POPLAR_TSP_RAM_LOCATION value")
+endif
+$(eval $(call add_define,POPLAR_TSP_RAM_LOCATION_ID))
+
 NEED_BL33			:= yes
 
 COLD_BOOT_SINGLE_CPU		:= 1
diff --git a/plat/socionext/uniphier/include/platform_def.h b/plat/socionext/uniphier/include/platform_def.h
index b5dc16a..cc046eb 100644
--- a/plat/socionext/uniphier/include/platform_def.h
+++ b/plat/socionext/uniphier/include/platform_def.h
@@ -47,7 +47,7 @@
 #define BL32_LIMIT			(UNIPHIER_SEC_DRAM_LIMIT)
 
 #define UNIPHIER_BLOCK_BUF_SIZE		0x00400000
-#define UNIPHIER_BLOCK_BUF_BASE		((BL2_LIMIT) - \
+#define UNIPHIER_BLOCK_BUF_BASE		((BL2_BASE) - \
 					 (UNIPHIER_BLOCK_BUF_SIZE))
 
 #define PLAT_PHY_ADDR_SPACE_SIZE	(1ULL << 32)
diff --git a/plat/socionext/uniphier/platform.mk b/plat/socionext/uniphier/platform.mk
index 3c78054..e0ddfa8 100644
--- a/plat/socionext/uniphier/platform.mk
+++ b/plat/socionext/uniphier/platform.mk
@@ -6,7 +6,6 @@
 
 override COLD_BOOT_SINGLE_CPU	:= 1
 override ENABLE_PLAT_COMPAT	:= 0
-override ERROR_DEPRECATED	:= 1
 override LOAD_IMAGE_V2		:= 1
 override USE_COHERENT_MEM	:= 1
 override USE_TBBR_DEFS		:= 1
@@ -116,5 +115,5 @@
 .PHONY: bl1_gzip
 bl1_gzip: $(BUILD_PLAT)/bl1.bin.gzip
 %.gzip: %
-	@echo " GZIP     $@"
-	$(Q)(cat $< | gzip -n -f -9 > $@) || (rm -f $@ || false)
+	@echo "  GZIP    $@"
+	$(Q)gzip -n -f -9 $< --stdout > $@
diff --git a/plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.c b/plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.c
new file mode 100644
index 0000000..bfc19d3
--- /dev/null
+++ b/plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/*
+ * Top-level SMC handler for ZynqMP IPI Mailbox doorbell functions.
+ */
+
+#include <bakery_lock.h>
+#include <debug.h>
+#include <errno.h>
+#include <mmio.h>
+#include <runtime_svc.h>
+#include <string.h>
+#include "ipi_mailbox_svc.h"
+#include "../zynqmp_ipi.h"
+#include "../zynqmp_private.h"
+#include "../../../services/spd/trusty/smcall.h"
+
+/*********************************************************************
+ * Macros definitions
+ ********************************************************************/
+
+/* IPI SMC calls macros: */
+#define IPI_SMC_OPEN_IRQ_MASK		0x00000001U /* IRQ enable bit in IPI
+						     * open SMC call
+						     */
+#define IPI_SMC_NOTIFY_BLOCK_MASK	0x00000001U /* Flag to indicate if
+						     * IPI notification needs
+						     * to be blocking.
+						     */
+#define IPI_SMC_ENQUIRY_DIRQ_MASK	0x00000001U /* Flag to indicate if
+						     * notification interrupt
+						     * to be disabled.
+						     */
+#define IPI_SMC_ACK_EIRQ_MASK		0x00000001U /* Flag to indicate if
+						     * notification interrupt
+						     * to be enable.
+						     */
+
+#define UNSIGNED32_MASK			0xFFFFFFFFU /* 32bit mask */
+
+/**
+ * ipi_smc_handler() - SMC handler for IPI SMC calls
+ *
+ * @smc_fid - Function identifier
+ * @x1 - x4 - Arguments
+ * @cookie  - Unused
+ * @handler - Pointer to caller's context structure
+ *
+ * @return  - Unused
+ *
+ * Determines that smc_fid is valid and supported PM SMC Function ID from the
+ * list of pm_api_ids, otherwise completes the request with
+ * the unknown SMC Function ID
+ *
+ * The SMC calls for PM service are forwarded from SIP Service SMC handler
+ * function with rt_svc_handle signature
+ */
+uint64_t ipi_smc_handler(uint32_t smc_fid, uint64_t x1, uint64_t x2,
+			 uint64_t x3, uint64_t x4, void *cookie,
+			 void *handle, uint64_t flags)
+{
+	int ret;
+	uint32_t ipi_local_id;
+	uint32_t ipi_remote_id;
+	unsigned int is_secure;
+
+	ipi_local_id = x1 & UNSIGNED32_MASK;
+	ipi_remote_id = x2 & UNSIGNED32_MASK;
+
+	if (SMC_ENTITY(smc_fid) >= SMC_ENTITY_TRUSTED_APP)
+		is_secure = 1;
+	else
+		is_secure = 0;
+
+	/* Validate IPI mailbox access */
+	ret = ipi_mb_validate(ipi_local_id, ipi_remote_id, is_secure);
+	if (ret)
+		SMC_RET1(handle, ret);
+
+	switch (SMC_FUNCTION(smc_fid)) {
+	case IPI_MAILBOX_OPEN:
+		ipi_mb_open(ipi_local_id, ipi_remote_id);
+		SMC_RET1(handle, 0);
+	case IPI_MAILBOX_RELEASE:
+		ipi_mb_release(ipi_local_id, ipi_remote_id);
+		SMC_RET1(handle, 0);
+	case IPI_MAILBOX_STATUS_ENQUIRY:
+	{
+		int disable_irq;
+
+		disable_irq = (x3 & IPI_SMC_ENQUIRY_DIRQ_MASK) ? 1 : 0;
+		ret = ipi_mb_enquire_status(ipi_local_id, ipi_remote_id);
+		if ((ret & IPI_MB_STATUS_RECV_PENDING) && disable_irq)
+			ipi_mb_disable_irq(ipi_local_id, ipi_remote_id);
+		SMC_RET1(handle, ret);
+	}
+	case IPI_MAILBOX_NOTIFY:
+	{
+		uint32_t is_blocking;
+
+		is_blocking = (x3 & IPI_SMC_NOTIFY_BLOCK_MASK) ? 1 : 0;
+		ipi_mb_notify(ipi_local_id, ipi_remote_id, is_blocking);
+		SMC_RET1(handle, 0);
+	}
+	case IPI_MAILBOX_ACK:
+	{
+		int enable_irq;
+
+		enable_irq = (x3 & IPI_SMC_ACK_EIRQ_MASK) ? 1 : 0;
+		ipi_mb_ack(ipi_local_id, ipi_remote_id);
+		if (enable_irq)
+			ipi_mb_enable_irq(ipi_local_id, ipi_remote_id);
+		SMC_RET1(handle, 0);
+	}
+	case IPI_MAILBOX_ENABLE_IRQ:
+		ipi_mb_enable_irq(ipi_local_id, ipi_remote_id);
+		SMC_RET1(handle, 0);
+	case IPI_MAILBOX_DISABLE_IRQ:
+		ipi_mb_disable_irq(ipi_local_id, ipi_remote_id);
+		SMC_RET1(handle, 0);
+	default:
+		WARN("Unimplemented IPI service call: 0x%x\n", smc_fid);
+		SMC_RET1(handle, SMC_UNK);
+	}
+}
diff --git a/plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.h b/plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.h
new file mode 100644
index 0000000..387ffd2
--- /dev/null
+++ b/plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/* ZynqMP IPI mailbox doorbell service enums and defines */
+
+#ifndef _IPI_MAILBOX_SVC_H_
+#define _IPI_MAILBOX_SVC_H_
+
+#include <stdint.h>
+
+/*********************************************************************
+ * Enum definitions
+ ********************************************************************/
+
+/* IPI SMC function numbers enum definition */
+enum ipi_api_id {
+	/* IPI mailbox operations functions: */
+	IPI_MAILBOX_OPEN = 0x1000,
+	IPI_MAILBOX_RELEASE,
+	IPI_MAILBOX_STATUS_ENQUIRY,
+	IPI_MAILBOX_NOTIFY,
+	IPI_MAILBOX_ACK,
+	IPI_MAILBOX_ENABLE_IRQ,
+	IPI_MAILBOX_DISABLE_IRQ
+};
+
+/*********************************************************************
+ * IPI mailbox service APIs declarations
+ ********************************************************************/
+
+/* IPI SMC handler */
+uint64_t ipi_smc_handler(uint32_t smc_fid, uint64_t x1, uint64_t x2,
+			 uint64_t x3, uint64_t x4, void *cookie, void *handle,
+			 uint64_t flags);
+
+#endif /* _IPI_MAILBOX_SVC_H_ */
diff --git a/plat/xilinx/zynqmp/platform.mk b/plat/xilinx/zynqmp/platform.mk
index cb3b442..bdd194b 100644
--- a/plat/xilinx/zynqmp/platform.mk
+++ b/plat/xilinx/zynqmp/platform.mk
@@ -42,7 +42,8 @@
 PLAT_INCLUDES		:=	-Iinclude/plat/arm/common/			\
 				-Iinclude/plat/arm/common/aarch64/		\
 				-Iplat/xilinx/zynqmp/include/			\
-				-Iplat/xilinx/zynqmp/pm_service/
+				-Iplat/xilinx/zynqmp/pm_service/		\
+				-Iplat/xilinx/zynqmp/ipi_mailbox_service/
 
 PLAT_BL_COMMON_SOURCES	:=	lib/xlat_tables/xlat_tables_common.c		\
 				lib/xlat_tables/aarch64/xlat_tables.c		\
@@ -71,7 +72,9 @@
 				plat/xilinx/zynqmp/plat_startup.c		\
 				plat/xilinx/zynqmp/plat_topology.c		\
 				plat/xilinx/zynqmp/sip_svc_setup.c		\
+				plat/xilinx/zynqmp/zynqmp_ipi.c		\
 				plat/xilinx/zynqmp/pm_service/pm_svc_main.c	\
 				plat/xilinx/zynqmp/pm_service/pm_api_sys.c	\
 				plat/xilinx/zynqmp/pm_service/pm_ipi.c		\
-				plat/xilinx/zynqmp/pm_service/pm_client.c
+				plat/xilinx/zynqmp/pm_service/pm_client.c	\
+				plat/xilinx/zynqmp/ipi_mailbox_service/ipi_mailbox_svc.c
diff --git a/plat/xilinx/zynqmp/pm_service/pm_api_sys.c b/plat/xilinx/zynqmp/pm_service/pm_api_sys.c
index 90c670d..9e21067 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_api_sys.c
+++ b/plat/xilinx/zynqmp/pm_service/pm_api_sys.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -542,7 +542,6 @@
  */
 void pm_get_callbackdata(uint32_t *data, size_t count)
 {
-
 	pm_ipi_buff_read_callb(data, count);
-	pm_ipi_irq_clear();
+	pm_ipi_irq_clear(primary_proc);
 }
diff --git a/plat/xilinx/zynqmp/pm_service/pm_common.h b/plat/xilinx/zynqmp/pm_service/pm_common.h
index 03351c2..5dcbb0d 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_common.h
+++ b/plat/xilinx/zynqmp/pm_service/pm_common.h
@@ -21,13 +21,13 @@
 
 /**
  * pm_ipi - struct for capturing IPI-channel specific info
- * @mask	mask for enabling/disabling and triggering the IPI
- * @base	base address for IPI
+ * @apu_ipi_id	APU IPI agent ID
+ * @pmu_ipi_id	PMU Agent ID
  * @buffer_base	base address for payload buffer
  */
 struct pm_ipi {
-	const unsigned int mask;
-	const uintptr_t base;
+	const uint32_t apu_ipi_id;
+	const uint32_t pmu_ipi_id;
 	const uintptr_t buffer_base;
 };
 
diff --git a/plat/xilinx/zynqmp/pm_service/pm_ipi.c b/plat/xilinx/zynqmp/pm_service/pm_ipi.c
index fdffde7..58faf0e 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_ipi.c
+++ b/plat/xilinx/zynqmp/pm_service/pm_ipi.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,28 +8,17 @@
 #include <bakery_lock.h>
 #include <mmio.h>
 #include <platform.h>
+#include "../zynqmp_ipi.h"
 #include "../zynqmp_private.h"
 #include "pm_ipi.h"
 
 /* IPI message buffers */
 #define IPI_BUFFER_BASEADDR	0xFF990000U
 
-#define IPI_BUFFER_RPU_0_BASE	(IPI_BUFFER_BASEADDR + 0x0U)
-#define IPI_BUFFER_RPU_1_BASE	(IPI_BUFFER_BASEADDR + 0x200U)
 #define IPI_BUFFER_APU_BASE	(IPI_BUFFER_BASEADDR + 0x400U)
-#define IPI_BUFFER_PL_0_BASE	(IPI_BUFFER_BASEADDR + 0x600U)
-#define IPI_BUFFER_PL_1_BASE	(IPI_BUFFER_BASEADDR + 0x800U)
-#define IPI_BUFFER_PL_2_BASE	(IPI_BUFFER_BASEADDR + 0xA00U)
-#define IPI_BUFFER_PL_3_BASE	(IPI_BUFFER_BASEADDR + 0xC00U)
 #define IPI_BUFFER_PMU_BASE	(IPI_BUFFER_BASEADDR + 0xE00U)
 
-#define IPI_BUFFER_TARGET_RPU_0_OFFSET	0x0U
-#define IPI_BUFFER_TARGET_RPU_1_OFFSET	0x40U
 #define IPI_BUFFER_TARGET_APU_OFFSET	0x80U
-#define IPI_BUFFER_TARGET_PL_0_OFFSET	0xC0U
-#define IPI_BUFFER_TARGET_PL_1_OFFSET	0x100U
-#define IPI_BUFFER_TARGET_PL_2_OFFSET	0x140U
-#define IPI_BUFFER_TARGET_PL_3_OFFSET	0x180U
 #define IPI_BUFFER_TARGET_PMU_OFFSET	0x1C0U
 
 #define IPI_BUFFER_MAX_WORDS	8
@@ -37,76 +26,33 @@
 #define IPI_BUFFER_REQ_OFFSET	0x0U
 #define IPI_BUFFER_RESP_OFFSET	0x20U
 
-/* IPI Base Address */
-#define IPI_BASEADDR		0XFF300000
-
-/* APU's IPI registers */
-#define IPI_APU_ISR		(IPI_BASEADDR + 0X00000010)
-#define IPI_APU_IER		(IPI_BASEADDR + 0X00000018)
-#define IPI_APU_IDR		(IPI_BASEADDR + 0X0000001C)
-#define IPI_APU_IXR_PMU_0_MASK		(1 << 16)
-
-#define IPI_TRIG_OFFSET		0
-#define IPI_OBS_OFFSET		4
-
-/* Power Management IPI interrupt number */
-#define PM_INT_NUM		0
-#define IPI_PMU_PM_INT_BASE	(IPI_PMU_0_TRIG + (PM_INT_NUM * 0x1000))
-#define IPI_PMU_PM_INT_MASK	(IPI_APU_IXR_PMU_0_MASK << PM_INT_NUM)
-#if (PM_INT_NUM < 0 || PM_INT_NUM > 3)
-	#error PM_INT_NUM value out of range
-#endif
-
-#define IPI_APU_MASK		1U
-
 DEFINE_BAKERY_LOCK(pm_secure_lock);
 
 const struct pm_ipi apu_ipi = {
-	.mask = IPI_APU_MASK,
-	.base = IPI_BASEADDR,
+	.apu_ipi_id = IPI_ID_APU,
+	.pmu_ipi_id = IPI_ID_PMU0,
 	.buffer_base = IPI_BUFFER_APU_BASE,
 };
 
 /**
  * pm_ipi_init() - Initialize IPI peripheral for communication with PMU
  *
+ * @proc	Pointer to the processor who is initiating request
  * @return	On success, the initialization function must return 0.
  *		Any other return value will cause the framework to ignore
  *		the service
  *
  * Called from pm_setup initialization function
  */
-int pm_ipi_init(void)
+int pm_ipi_init(const struct pm_proc *proc)
 {
 	bakery_lock_init(&pm_secure_lock);
-
-	/* IPI Interrupts Clear & Disable */
-	mmio_write_32(IPI_APU_ISR, 0xffffffff);
-	mmio_write_32(IPI_APU_IDR, 0xffffffff);
+	ipi_mb_open(proc->ipi->apu_ipi_id, proc->ipi->pmu_ipi_id);
 
 	return 0;
 }
 
 /**
- * pm_ipi_wait() - wait for pmu to handle request
- * @proc	proc which is waiting for PMU to handle request
- */
-static enum pm_ret_status pm_ipi_wait(const struct pm_proc *proc)
-{
-	int status;
-
-	/* Wait until previous interrupt is handled by PMU */
-	do {
-		status = mmio_read_32(proc->ipi->base + IPI_OBS_OFFSET) &
-					IPI_PMU_PM_INT_MASK;
-		/* TODO: 1) Use timer to add delay between read attempts */
-		/* TODO: 2) Return PM_RET_ERR_TIMEOUT if this times out */
-	} while (status);
-
-	return PM_RET_SUCCESS;
-}
-
-/**
  * pm_ipi_send_common() - Sends IPI request to the PMU
  * @proc	Pointer to the processor who is initiating request
  * @payload	API id and call arguments to be written in IPI buffer
@@ -124,16 +70,13 @@
 					IPI_BUFFER_TARGET_PMU_OFFSET +
 					IPI_BUFFER_REQ_OFFSET;
 
-	/* Wait until previous interrupt is handled by PMU */
-	pm_ipi_wait(proc);
-
 	/* Write payload into IPI buffer */
 	for (size_t i = 0; i < PAYLOAD_ARG_CNT; i++) {
 		mmio_write_32(buffer_base + offset, payload[i]);
 		offset += PAYLOAD_ARG_SIZE;
 	}
 	/* Generate IPI to PMU */
-	mmio_write_32(proc->ipi->base + IPI_TRIG_OFFSET, IPI_PMU_PM_INT_MASK);
+	ipi_mb_notify(proc->ipi->apu_ipi_id, proc->ipi->pmu_ipi_id, 1);
 
 	return PM_RET_SUCCESS;
 }
@@ -178,8 +121,6 @@
 				IPI_BUFFER_TARGET_PMU_OFFSET +
 				IPI_BUFFER_RESP_OFFSET;
 
-	pm_ipi_wait(proc);
-
 	/*
 	 * Read response from IPI buffer
 	 * buf-0: success or error+reason
@@ -250,17 +191,12 @@
 	return ret;
 }
 
-void pm_ipi_irq_enable(void)
-{
-	mmio_write_32(IPI_APU_IER, IPI_APU_IXR_PMU_0_MASK);
-}
-
-void pm_ipi_irq_disable(void)
+void pm_ipi_irq_enable(const struct pm_proc *proc)
 {
-	mmio_write_32(IPI_APU_IDR, IPI_APU_IXR_PMU_0_MASK);
+	ipi_mb_enable_irq(proc->ipi->apu_ipi_id, proc->ipi->pmu_ipi_id);
 }
 
-void pm_ipi_irq_clear(void)
+void pm_ipi_irq_clear(const struct pm_proc *proc)
 {
-	mmio_write_32(IPI_APU_ISR, IPI_APU_IXR_PMU_0_MASK);
+	ipi_mb_ack(proc->ipi->apu_ipi_id, proc->ipi->pmu_ipi_id);
 }
diff --git a/plat/xilinx/zynqmp/pm_service/pm_ipi.h b/plat/xilinx/zynqmp/pm_service/pm_ipi.h
index a76298b..e6b36f5 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_ipi.h
+++ b/plat/xilinx/zynqmp/pm_service/pm_ipi.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -9,7 +9,7 @@
 
 #include "pm_common.h"
 
-int pm_ipi_init(void);
+int pm_ipi_init(const struct pm_proc *proc);
 
 enum pm_ret_status pm_ipi_send(const struct pm_proc *proc,
 			       uint32_t payload[PAYLOAD_ARG_CNT]);
@@ -17,8 +17,7 @@
 				    uint32_t payload[PAYLOAD_ARG_CNT],
 				    unsigned int *value, size_t count);
 void pm_ipi_buff_read_callb(unsigned int *value, size_t count);
-void pm_ipi_irq_enable(void);
-void pm_ipi_irq_disable(void);
-void pm_ipi_irq_clear(void);
+void pm_ipi_irq_enable(const struct pm_proc *proc);
+void pm_ipi_irq_clear(const struct pm_proc *proc);
 
 #endif /* _PM_IPI_H_ */
diff --git a/plat/xilinx/zynqmp/pm_service/pm_svc_main.c b/plat/xilinx/zynqmp/pm_service/pm_svc_main.c
index f4e679b..fb64bc5 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_svc_main.c
+++ b/plat/xilinx/zynqmp/pm_service/pm_svc_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -49,22 +49,25 @@
  */
 int pm_setup(void)
 {
-	int status;
+	int status, ret;
 
 	if (!zynqmp_is_pmu_up())
 		return -ENODEV;
 
-	status = pm_ipi_init();
+	status = pm_ipi_init(primary_proc);
 
-	if (status == 0)
+	if (status >= 0) {
 		INFO("BL31: PM Service Init Complete: API v%d.%d\n",
 		     PM_VERSION_MAJOR, PM_VERSION_MINOR);
-	else
+		ret = 0;
+	} else {
 		INFO("BL31: PM Service Init Failed, Error Code %d!\n", status);
+		ret = status;
+	}
 
 	pm_down = status;
 
-	return status;
+	return ret;
 }
 
 /**
@@ -163,7 +166,7 @@
 		 * Even if we were wrong, it would not enable the IRQ in
 		 * the GIC.
 		 */
-		pm_ipi_irq_enable();
+		pm_ipi_irq_enable(primary_proc);
 		SMC_RET1(handle, (uint64_t)ret |
 			 ((uint64_t)pm_ctx.api_version << 32));
 
diff --git a/plat/xilinx/zynqmp/sip_svc_setup.c b/plat/xilinx/zynqmp/sip_svc_setup.c
index ae6ecaf..8b44eaa 100644
--- a/plat/xilinx/zynqmp/sip_svc_setup.c
+++ b/plat/xilinx/zynqmp/sip_svc_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,7 +8,9 @@
 
 #include <runtime_svc.h>
 #include <uuid.h>
+#include "ipi_mailbox_svc.h"
 #include "pm_svc_main.h"
+#include "zynqmp_ipi.h"
 
 /* SMC function IDs for SiP Service queries */
 #define ZYNQMP_SIP_SVC_CALL_COUNT	0x8200ff00
@@ -19,10 +21,12 @@
 #define SIP_SVC_VERSION_MAJOR	0
 #define SIP_SVC_VERSION_MINOR	1
 
-/* These macros are used to identify PM calls from the SMC function ID */
+/* These macros are used to identify PM, IPI calls from the SMC function ID */
 #define PM_FID_MASK	0xf000u
 #define PM_FID_VALUE	0u
+#define IPI_FID_VALUE	0x1000u
 #define is_pm_fid(_fid) (((_fid) & PM_FID_MASK) == PM_FID_VALUE)
+#define is_ipi_fid(_fid) (((_fid) & PM_FID_MASK) == IPI_FID_VALUE)
 
 /* SiP Service UUID */
 DEFINE_SVC_UUID(zynqmp_sip_uuid,
@@ -63,6 +67,12 @@
 				      flags);
 	}
 
+	/* Let IPI SMC handler deal with IPI-related requests */
+	if (is_ipi_fid(smc_fid)) {
+		return ipi_smc_handler(smc_fid, x1, x2, x3, x4, cookie, handle,
+				      flags);
+	}
+
 	switch (smc_fid) {
 	case ZYNQMP_SIP_SVC_CALL_COUNT:
 		/* PM functions + default functions */
diff --git a/plat/xilinx/zynqmp/zynqmp_ipi.c b/plat/xilinx/zynqmp/zynqmp_ipi.c
new file mode 100644
index 0000000..755a3b7
--- /dev/null
+++ b/plat/xilinx/zynqmp/zynqmp_ipi.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/*
+ * Zynq UltraScale+ MPSoC IPI agent registers access management
+ */
+
+#include <bakery_lock.h>
+#include <debug.h>
+#include <errno.h>
+#include <mmio.h>
+#include <runtime_svc.h>
+#include <string.h>
+#include "zynqmp_ipi.h"
+#include "../zynqmp_private.h"
+
+/*********************************************************************
+ * Macros definitions
+ ********************************************************************/
+
+/* IPI registers base address */
+#define IPI_REGS_BASE   0xFF300000U
+
+/* IPI registers offsets macros */
+#define IPI_TRIG_OFFSET 0x00U
+#define IPI_OBR_OFFSET  0x04U
+#define IPI_ISR_OFFSET  0x10U
+#define IPI_IMR_OFFSET  0x14U
+#define IPI_IER_OFFSET  0x18U
+#define IPI_IDR_OFFSET  0x1CU
+
+/* IPI register start offset */
+#define IPI_REG_BASE(I) (zynqmp_ipi_table[(I)].ipi_reg_base)
+
+/* IPI register bit mask */
+#define IPI_BIT_MASK(I) (zynqmp_ipi_table[(I)].ipi_bit_mask)
+
+/* IPI secure check */
+#define IPI_SECURE_MASK  0x1U
+#define IPI_IS_SECURE(I) ((zynqmp_ipi_table[(I)].secure_only & \
+			   IPI_SECURE_MASK) ? 1 : 0)
+
+/*********************************************************************
+ * Struct definitions
+ ********************************************************************/
+
+/* structure to maintain IPI configuration information */
+struct zynqmp_ipi_config {
+	unsigned int ipi_bit_mask;
+	unsigned int ipi_reg_base;
+	unsigned char secure_only;
+};
+
+/* Zynqmp ipi configuration table */
+const static struct zynqmp_ipi_config zynqmp_ipi_table[] = {
+	/* APU IPI */
+	{
+		.ipi_bit_mask = 0x1,
+		.ipi_reg_base = 0xFF300000,
+		.secure_only = 0,
+	},
+	/* RPU0 IPI */
+	{
+		.ipi_bit_mask = 0x100,
+		.ipi_reg_base = 0xFF310000,
+		.secure_only = 0,
+	},
+	/* RPU1 IPI */
+	{
+		.ipi_bit_mask = 0x200,
+		.ipi_reg_base = 0xFF320000,
+		.secure_only = 0,
+	},
+	/* PMU0 IPI */
+	{
+		.ipi_bit_mask = 0x10000,
+		.ipi_reg_base = 0xFF330000,
+		.secure_only = IPI_SECURE_MASK,
+	},
+	/* PMU1 IPI */
+	{
+		.ipi_bit_mask = 0x20000,
+		.ipi_reg_base = 0xFF331000,
+		.secure_only = IPI_SECURE_MASK,
+	},
+	/* PMU2 IPI */
+	{
+		.ipi_bit_mask = 0x40000,
+		.ipi_reg_base = 0xFF332000,
+		.secure_only = IPI_SECURE_MASK,
+	},
+	/* PMU3 IPI */
+	{
+		.ipi_bit_mask = 0x80000,
+		.ipi_reg_base = 0xFF333000,
+		.secure_only = IPI_SECURE_MASK,
+	},
+	/* PL0 IPI */
+	{
+		.ipi_bit_mask = 0x1000000,
+		.ipi_reg_base = 0xFF340000,
+		.secure_only = 0,
+	},
+	/* PL1 IPI */
+	{
+		.ipi_bit_mask = 0x2000000,
+		.ipi_reg_base = 0xFF350000,
+		.secure_only = 0,
+	},
+	/* PL2 IPI */
+	{
+		.ipi_bit_mask = 0x4000000,
+		.ipi_reg_base = 0xFF360000,
+		.secure_only = 0,
+	},
+	/* PL3 IPI */
+	{
+		.ipi_bit_mask = 0x8000000,
+		.ipi_reg_base = 0xFF370000,
+		.secure_only = 0,
+	},
+};
+
+/* is_ipi_mb_within_range() - verify if IPI mailbox is within range
+ *
+ * @local  - local IPI ID
+ * @remote - remote IPI ID
+ *
+ * return - 1 if within range, 0 if not
+ */
+static inline int is_ipi_mb_within_range(uint32_t local, uint32_t remote)
+{
+	int ret = 1;
+	uint32_t ipi_total = ARRAY_SIZE(zynqmp_ipi_table);
+
+	if (remote >= ipi_total || local >= ipi_total)
+		ret = 0;
+
+	return ret;
+}
+
+/**
+ * ipi_mb_validate() - validate IPI mailbox access
+ *
+ * @local  - local IPI ID
+ * @remote - remote IPI ID
+ * @is_secure - indicate if the requester is from secure software
+ *
+ * return - 0 success, negative value for errors
+ */
+int ipi_mb_validate(uint32_t local, uint32_t remote, unsigned int is_secure)
+{
+	int ret = 0;
+
+	if (!is_ipi_mb_within_range(local, remote))
+		ret = -EINVAL;
+	else if (IPI_IS_SECURE(local) && !is_secure)
+		ret = -EPERM;
+	else if (IPI_IS_SECURE(remote) && !is_secure)
+		ret = -EPERM;
+
+	return ret;
+}
+
+/**
+ * ipi_mb_open() - Open IPI mailbox.
+ *
+ * @local  - local IPI ID
+ * @remote - remote IPI ID
+ *
+ */
+void ipi_mb_open(uint32_t local, uint32_t remote)
+{
+	mmio_write_32(IPI_REG_BASE(local) + IPI_IDR_OFFSET,
+		      IPI_BIT_MASK(remote));
+	mmio_write_32(IPI_REG_BASE(local) + IPI_ISR_OFFSET,
+		      IPI_BIT_MASK(remote));
+}
+
+/**
+ * ipi_mb_release() - Open IPI mailbox.
+ *
+ * @local  - local IPI ID
+ * @remote - remote IPI ID
+ *
+ */
+void ipi_mb_release(uint32_t local, uint32_t remote)
+{
+	mmio_write_32(IPI_REG_BASE(local) + IPI_IDR_OFFSET,
+		      IPI_BIT_MASK(remote));
+}
+
+/**
+ * ipi_mb_enquire_status() - Enquire IPI mailbox status
+ *
+ * @local  - local IPI ID
+ * @remote - remote IPI ID
+ *
+ * return - 0 idle, positive value for pending sending or receiving,
+ *          negative value for errors
+ */
+int ipi_mb_enquire_status(uint32_t local, uint32_t remote)
+{
+	int ret = 0;
+	uint32_t status;
+
+	status = mmio_read_32(IPI_REG_BASE(local) + IPI_OBR_OFFSET);
+	if (status & IPI_BIT_MASK(remote))
+		ret |= IPI_MB_STATUS_SEND_PENDING;
+	status = mmio_read_32(IPI_REG_BASE(local) + IPI_ISR_OFFSET);
+	if (status & IPI_BIT_MASK(remote))
+		ret |= IPI_MB_STATUS_RECV_PENDING;
+
+	return ret;
+}
+
+/* ipi_mb_notify() - Trigger IPI mailbox notification
+ *
+ * @local - local IPI ID
+ * @remote - remote IPI ID
+ * @is_blocking - if to trigger the notification in blocking mode or not.
+ *
+ * It sets the remote bit in the IPI agent trigger register.
+ *
+ */
+void ipi_mb_notify(uint32_t local, uint32_t remote, uint32_t is_blocking)
+{
+	uint32_t status;
+
+	mmio_write_32(IPI_REG_BASE(local) + IPI_TRIG_OFFSET,
+		      IPI_BIT_MASK(remote));
+	if (is_blocking) {
+		do {
+			status = mmio_read_32(IPI_REG_BASE(local) +
+					      IPI_OBR_OFFSET);
+		} while (status & IPI_BIT_MASK(remote));
+	}
+}
+
+/* ipi_mb_ack() - Ack IPI mailbox notification from the other end
+ *
+ * @local - local IPI ID
+ * @remote - remote IPI ID
+ *
+ * It will clear the remote bit in the isr register.
+ *
+ */
+void ipi_mb_ack(uint32_t local, uint32_t remote)
+{
+	mmio_write_32(IPI_REG_BASE(local) + IPI_ISR_OFFSET,
+		      IPI_BIT_MASK(remote));
+}
+
+/* ipi_mb_disable_irq() - Disable IPI mailbox notification interrupt
+ *
+ * @local - local IPI ID
+ * @remote - remote IPI ID
+ *
+ * It will mask the remote bit in the idr register.
+ *
+ */
+void ipi_mb_disable_irq(uint32_t local, uint32_t remote)
+{
+	mmio_write_32(IPI_REG_BASE(local) + IPI_IDR_OFFSET,
+		      IPI_BIT_MASK(remote));
+}
+
+/* ipi_mb_enable_irq() - Enable IPI mailbox notification interrupt
+ *
+ * @local - local IPI ID
+ * @remote - remote IPI ID
+ *
+ * It will mask the remote bit in the idr register.
+ *
+ */
+void ipi_mb_enable_irq(uint32_t local, uint32_t remote)
+{
+	mmio_write_32(IPI_REG_BASE(local) + IPI_IER_OFFSET,
+		      IPI_BIT_MASK(remote));
+}
diff --git a/plat/xilinx/zynqmp/zynqmp_ipi.h b/plat/xilinx/zynqmp/zynqmp_ipi.h
new file mode 100644
index 0000000..0544ddb
--- /dev/null
+++ b/plat/xilinx/zynqmp/zynqmp_ipi.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/* ZynqMP IPI management enums and defines */
+
+#ifndef _ZYNQMP_IPI_H_
+#define _ZYNQMP_IPI_H_
+
+#include <stdint.h>
+
+/*********************************************************************
+ * IPI agent IDs macros
+ ********************************************************************/
+#define IPI_ID_APU	0U
+#define IPI_ID_RPU0	1U
+#define IPI_ID_RPU1	2U
+#define IPI_ID_PMU0	3U
+#define IPI_ID_PMU1	4U
+#define IPI_ID_PMU2	5U
+#define IPI_ID_PMU3	6U
+#define IPI_ID_PL0	7U
+#define IPI_ID_PL1	8U
+#define IPI_ID_PL2	9U
+#define IPI_ID_PL3	10U
+
+/*********************************************************************
+ * IPI mailbox status macros
+ ********************************************************************/
+#define IPI_MB_STATUS_IDLE		0
+#define IPI_MB_STATUS_SEND_PENDING	1
+#define IPI_MB_STATUS_RECV_PENDING	2
+
+/*********************************************************************
+ * IPI mailbox call is secure or not macros
+ ********************************************************************/
+#define IPI_MB_CALL_NOTSECURE	0
+#define IPI_MB_CALL_SECURE	1
+
+/*********************************************************************
+ * IPI APIs declarations
+ ********************************************************************/
+
+/* Validate IPI mailbox access */
+int ipi_mb_validate(uint32_t local, uint32_t remote, unsigned int is_secure);
+
+/* Open the IPI mailbox */
+void ipi_mb_open(uint32_t local, uint32_t remote);
+
+/* Release the IPI mailbox */
+void ipi_mb_release(uint32_t local, uint32_t remote);
+
+/* Enquire IPI mailbox status */
+int ipi_mb_enquire_status(uint32_t local, uint32_t remote);
+
+/* Trigger notification on the IPI mailbox */
+void ipi_mb_notify(uint32_t local, uint32_t remote, uint32_t is_blocking);
+
+/* Ack IPI mailbox notification */
+void ipi_mb_ack(uint32_t local, uint32_t remote);
+
+/* Disable IPI mailbox notification interrupt */
+void ipi_mb_disable_irq(uint32_t local, uint32_t remote);
+
+/* Enable IPI mailbox notification interrupt */
+void ipi_mb_enable_irq(uint32_t local, uint32_t remote);
+
+#endif /* _ZYNQMP_IPI_H_ */
diff --git a/services/spd/tlkd/tlkd_main.c b/services/spd/tlkd/tlkd_main.c
index 78e9853..cb68bff 100644
--- a/services/spd/tlkd/tlkd_main.c
+++ b/services/spd/tlkd/tlkd_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -193,12 +193,14 @@
 	 * b. register shared memory with the SP for passing args
 	 *    required for maintaining sessions with the Trusted
 	 *    Applications.
-	 * c. open/close sessions
-	 * d. issue commands to the Trusted Apps
-	 * e. resume the preempted yielding SMC call.
+	 * c. register non-secure world's memory map with the OS
+	 * d. open/close sessions
+	 * e. issue commands to the Trusted Apps
+	 * f. resume the preempted yielding SMC call.
 	 */
 	case TLK_REGISTER_LOGBUF:
 	case TLK_REGISTER_REQBUF:
+	case TLK_REGISTER_NS_DRAM:
 	case TLK_OPEN_TA_SESSION:
 	case TLK_CLOSE_TA_SESSION:
 	case TLK_TA_LAUNCH_OP:
diff --git a/services/std_svc/spm/spm_main.c b/services/std_svc/spm/spm_main.c
index 00f3a30..d31fad6 100644
--- a/services/std_svc/spm/spm_main.c
+++ b/services/std_svc/spm/spm_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,6 +10,7 @@
 #include <context_mgmt.h>
 #include <debug.h>
 #include <errno.h>
+#include <mm_svc.h>
 #include <platform.h>
 #include <runtime_svc.h>
 #include <secure_partition.h>
@@ -29,7 +30,6 @@
  * Secure Partition context information.
  ******************************************************************************/
 static secure_partition_context_t sp_ctx;
-unsigned int sp_init_in_progress;
 
 /*******************************************************************************
  * Replace the S-EL1 re-entry information with S-EL0 re-entry
@@ -48,7 +48,7 @@
  * 2. Saves the current C runtime state (callee-saved registers) on the stack
  *    frame and saves a reference to this state.
  * 3. Calls el3_exit() so that the EL3 system and general purpose registers
- *    from the sp_ctx->cpu_ctx are used to enter the secure payload image.
+ *    from the sp_ctx->cpu_ctx are used to enter the secure partition image.
  ******************************************************************************/
 static uint64_t spm_synchronous_sp_entry(secure_partition_context_t *sp_ctx_ptr)
 {
@@ -75,7 +75,7 @@
 
 /*******************************************************************************
  * This function takes a Secure partition context pointer and:
- * 1. Saves the S-EL1 system register context tp sp_ctx->cpu_ctx.
+ * 1. Saves the S-EL1 system register context to sp_ctx->cpu_ctx.
  * 2. Restores the current C runtime state (callee saved registers) from the
  *    stack frame using the reference to this state saved in
  *    spm_secure_partition_enter().
@@ -101,7 +101,7 @@
  * This function passes control to the Secure Partition image (BL32) for the
  * first time on the primary cpu after a cold boot. It assumes that a valid
  * secure context has already been created by spm_setup() which can be directly
- * used. This function performs a synchronous entry into the Secure payload.
+ * used. This function performs a synchronous entry into the Secure partition.
  * The SP passes control back to this routine through a SMC.
  ******************************************************************************/
 int32_t spm_init(void)
@@ -126,21 +126,28 @@
 	secure_partition_setup();
 
 	/*
-	 * Arrange for an entry into the secure payload.
+	 * Make all CPUs use the same secure context.
 	 */
-	sp_init_in_progress = 1;
+	for (unsigned int i = 0; i < PLATFORM_CORE_COUNT; i++) {
+		cm_set_context_by_index(i, &sp_ctx.cpu_ctx, SECURE);
+	}
+
+	/*
+	 * Arrange for an entry into the secure partition.
+	 */
+	sp_ctx.sp_init_in_progress = 1;
 	rc = spm_synchronous_sp_entry(&sp_ctx);
 	assert(rc == 0);
-	sp_init_in_progress = 0;
+	sp_ctx.sp_init_in_progress = 0;
 	VERBOSE("SP_MEMORY_ATTRIBUTES_SET_AARCH64 availability has been revoked\n");
 
 	return rc;
 }
 
 /*******************************************************************************
- * Given a secure payload entrypoint info pointer, entry point PC & pointer to
+ * Given a secure partition entrypoint info pointer, entry point PC & pointer to
  * a context data structure, this function will initialize the SPM context and
- * entry point info for the secure payload
+ * entry point info for the secure partition.
  ******************************************************************************/
 void spm_init_sp_ep_state(struct entry_point_info *sp_ep_info,
 			  uint64_t pc,
@@ -161,7 +168,7 @@
 	SET_PARAM_HEAD(sp_ep_info, PARAM_EP, VERSION_1, ep_attr);
 
 	sp_ep_info->pc = pc;
-	/* The SPM payload runs in S-EL0 */
+	/* The secure partition runs in S-EL0. */
 	sp_ep_info->spsr = SPSR_64(MODE_EL0,
 				   MODE_SP_EL0,
 				   DISABLE_ALL_EXCEPTIONS);
@@ -350,7 +357,7 @@
 
 		switch (smc_fid) {
 
-		case  SPM_VERSION_AARCH32:
+		case SPM_VERSION_AARCH32:
 			SMC_RET1(handle, SPM_VERSION_COMPILED);
 
 		case SP_EVENT_COMPLETE_AARCH64:
@@ -358,7 +365,7 @@
 			cm_el1_sysregs_context_save(SECURE);
 			spm_setup_next_eret_into_sel0(handle);
 
-			if (sp_init_in_progress) {
+			if (sp_ctx.sp_init_in_progress) {
 				/*
 				 * SPM reports completion. The SPM must have
 				 * initiated the original request through a
@@ -370,6 +377,9 @@
 				assert(0);
 			}
 
+			/* Release the Secure Partition context */
+			spin_unlock(&sp_ctx.lock);
+
 			/*
 			 * This is the result from the Secure partition of an
 			 * earlier request. Copy the result into the non-secure
@@ -391,7 +401,7 @@
 		case SP_MEMORY_ATTRIBUTES_GET_AARCH64:
 			INFO("Received SP_MEMORY_ATTRIBUTES_GET_AARCH64 SMC\n");
 
-			if (!sp_init_in_progress) {
+			if (!sp_ctx.sp_init_in_progress) {
 				WARN("SP_MEMORY_ATTRIBUTES_GET_AARCH64 is available at boot time only\n");
 				SMC_RET1(handle, SPM_NOT_SUPPORTED);
 			}
@@ -400,7 +410,7 @@
 		case SP_MEMORY_ATTRIBUTES_SET_AARCH64:
 			INFO("Received SP_MEMORY_ATTRIBUTES_SET_AARCH64 SMC\n");
 
-			if (!sp_init_in_progress) {
+			if (!sp_ctx.sp_init_in_progress) {
 				WARN("SP_MEMORY_ATTRIBUTES_SET_AARCH64 is available at boot time only\n");
 				SMC_RET1(handle, SPM_NOT_SUPPORTED);
 			}
@@ -414,16 +424,37 @@
 
 		switch (smc_fid) {
 
-		case  SP_VERSION_AARCH64:
-		case  SP_VERSION_AARCH32:
-			SMC_RET1(handle, SP_VERSION_COMPILED);
+		case MM_VERSION_AARCH32:
+			SMC_RET1(handle, MM_VERSION_COMPILED);
 
 		case MM_COMMUNICATE_AARCH32:
 		case MM_COMMUNICATE_AARCH64:
+		{
+			uint64_t mm_cookie = x1;
+			uint64_t comm_buffer_address = x2;
+			uint64_t comm_size_address = x3;
+
+			/* Cookie. Reserved for future use. It must be zero. */
+			if (mm_cookie != 0) {
+				ERROR("MM_COMMUNICATE: cookie is not zero\n");
+				SMC_RET1(handle, SPM_INVALID_PARAMETER);
+			}
+
+			if (comm_buffer_address == 0) {
+				ERROR("MM_COMMUNICATE: comm_buffer_address is zero\n");
+				SMC_RET1(handle, SPM_INVALID_PARAMETER);
+			}
+
+			if (comm_size_address != 0) {
+				VERBOSE("MM_COMMUNICATE: comm_size_address is not 0 as recommended.\n");
+			}
 
 			/* Save the Normal world context */
 			cm_el1_sysregs_context_save(NON_SECURE);
 
+			/* Lock the Secure Partition context. */
+			spin_lock(&sp_ctx.lock);
+
 			/*
 			 * Restore the secure world context and prepare for
 			 * entry in S-EL0
@@ -432,14 +463,9 @@
 			cm_el1_sysregs_context_restore(SECURE);
 			cm_set_next_eret_context(SECURE);
 
-			/* Cookie. Reserved for future use. It must be zero. */
-			assert(x1 == 0);
-
-			if (x3 != 0) {
-				VERBOSE("MM_COMMUNICATE_AARCH32/64: X3 is not 0 as recommended.\n");
-			}
-
-			SMC_RET4(&sp_ctx.cpu_ctx, smc_fid, x1, x2, x3);
+			SMC_RET4(&sp_ctx.cpu_ctx, smc_fid, comm_buffer_address,
+				 comm_size_address, plat_my_core_pos());
+		}
 
 		case SP_MEMORY_ATTRIBUTES_GET_AARCH64:
 		case SP_MEMORY_ATTRIBUTES_SET_AARCH64:
diff --git a/services/std_svc/spm/spm_private.h b/services/std_svc/spm/spm_private.h
index 16993e8..1d16b45 100644
--- a/services/std_svc/spm/spm_private.h
+++ b/services/std_svc/spm/spm_private.h
@@ -32,6 +32,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <spinlock.h>
 #include <stdint.h>
 #include <xlat_tables_v2.h>
 
@@ -43,6 +44,8 @@
 typedef struct secure_partition_context {
 	uint64_t c_rt_ctx;
 	cpu_context_t cpu_ctx;
+	unsigned int sp_init_in_progress;
+	spinlock_t lock;
 } secure_partition_context_t;
 
 uint64_t spm_secure_partition_enter(uint64_t *c_rt_ctx);
diff --git a/tools/fiptool/fiptool.c b/tools/fiptool/fiptool.c
index 1dcb7e8..33c451e 100644
--- a/tools/fiptool/fiptool.c
+++ b/tools/fiptool/fiptool.c
@@ -492,7 +492,7 @@
 	fip_toc_header_t *toc_header;
 	fip_toc_entry_t *toc_entry;
 	char *buf;
-	uint64_t entry_offset, buf_size, payload_size = 0;
+	uint64_t entry_offset, buf_size, payload_size = 0, pad_size;
 	size_t nr_images = 0;
 
 	for (desc = image_desc_head; desc != NULL; desc = desc->next)
@@ -526,9 +526,13 @@
 		entry_offset += image->toc_e.size;
 	}
 
-	/* Append a null uuid entry to mark the end of ToC entries. */
+	/*
+	 * Append a null uuid entry to mark the end of ToC entries.
+	 * NOTE the offset address for the last toc_entry must match the fip
+	 * size.
+	 */
 	memset(toc_entry, 0, sizeof(*toc_entry));
-	toc_entry->offset_address = entry_offset;
+	toc_entry->offset_address = (entry_offset + align - 1) & ~(align - 1);
 
 	/* Generate the FIP file. */
 	fp = fopen(filename, "wb");
@@ -555,6 +559,13 @@
 		xfwrite(image->buffer, image->toc_e.size, fp, filename);
 	}
 
+	if (fseek(fp, entry_offset, SEEK_SET))
+		log_errx("Failed to set file position");
+
+	pad_size = toc_entry->offset_address - entry_offset;
+	while (pad_size--)
+		fputc(0x0, fp);
+
 	fclose(fp);
 	return 0;
 }