diff --git a/.gitignore b/.gitignore
index c0d5183..2f9c89d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,9 @@
 # Ignore header files copied.
 tools/fiptool/firmware_image_package.h
 tools/fiptool/uuid.h
+
+# GNU GLOBAL files
+GPATH
+GRTAGS
+GSYMS
+GTAGS
diff --git a/Makefile b/Makefile
index 9e148fb..9f900db 100644
--- a/Makefile
+++ b/Makefile
@@ -111,7 +111,7 @@
 
 # Default build string (git branch and commit)
 ifeq (${BUILD_STRING},)
-        BUILD_STRING	:=	$(shell git log -n 1 --pretty=format:"%h")
+        BUILD_STRING	:=	$(shell git describe --always --dirty --tags 2> /dev/null)
 endif
 VERSION_STRING		:=	v${VERSION_MAJOR}.${VERSION_MINOR}(${BUILD_TYPE}):${BUILD_STRING}
 
@@ -346,11 +346,6 @@
         endif
 endif
 
-# Make sure PMF is enabled if PSCI STAT is enabled.
-ifeq (${ENABLE_PSCI_STAT},1)
-ENABLE_PMF			:= 1
-endif
-
 ifneq (${FIP_ALIGN},0)
 FIP_ARGS += --align ${FIP_ALIGN}
 endif
@@ -397,6 +392,9 @@
 $(eval $(call assert_boolean,TRUSTED_BOARD_BOOT))
 $(eval $(call assert_boolean,USE_COHERENT_MEM))
 
+$(eval $(call assert_numeric,ARM_ARCH_MAJOR))
+$(eval $(call assert_numeric,ARM_ARCH_MINOR))
+
 ################################################################################
 # Add definitions to the cpp preprocessor based on the current build options.
 # This is done after including the platform specific makefile to allow the
@@ -404,6 +402,8 @@
 ################################################################################
 
 $(eval $(call add_define,ARM_CCI_PRODUCT_ID))
+$(eval $(call add_define,ARM_ARCH_MAJOR))
+$(eval $(call add_define,ARM_ARCH_MINOR))
 $(eval $(call add_define,ARM_GIC_ARCH))
 $(eval $(call add_define,ASM_ASSERTION))
 $(eval $(call add_define,COLD_BOOT_SINGLE_CPU))
diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S
index b9554d1..b69065e 100644
--- a/bl1/bl1.ld.S
+++ b/bl1/bl1.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -133,7 +133,8 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
      */
     .bss : ALIGN(16) {
         __BSS_START__ = .;
diff --git a/bl1/bl1_fwu.c b/bl1/bl1_fwu.c
index 1cc7daf..f7fae68 100644
--- a/bl1/bl1_fwu.c
+++ b/bl1/bl1_fwu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -335,7 +335,7 @@
 		 */
 		if (image_desc->state == IMAGE_STATE_COPIED) {
 			/* Clear the memory.*/
-			memset((void *)base_addr, 0, total_size);
+			zero_normalmem((void *)base_addr, total_size);
 			flush_dcache_range(base_addr, total_size);
 
 			/* Indicate that image can be copied again*/
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 25363ac..31f7787 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -94,12 +94,12 @@
 	 */
 	ldr	x0, =__BSS_START__
 	ldr	x1, =__BSS_SIZE__
-	bl	zeromem16
+	bl	zeromem
 
 #if USE_COHERENT_MEM
 	ldr	x0, =__COHERENT_RAM_START__
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
-	bl	zeromem16
+	bl	zeromem
 #endif
 
 	/* --------------------------------------------
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index fa694de..b9275f3 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -113,7 +113,8 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
      */
     .bss : ALIGN(16) {
         __BSS_START__ = .;
diff --git a/bl2u/aarch64/bl2u_entrypoint.S b/bl2u/aarch64/bl2u_entrypoint.S
index 1175c6f..9fa84bf 100644
--- a/bl2u/aarch64/bl2u_entrypoint.S
+++ b/bl2u/aarch64/bl2u_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -94,7 +94,7 @@
 	 */
 	ldr	x0, =__BSS_START__
 	ldr	x1, =__BSS_SIZE__
-	bl	zeromem16
+	bl	zeromem
 
 	/* --------------------------------------------
 	 * Allocate a stack whose memory will be marked
diff --git a/bl2u/bl2u.ld.S b/bl2u/bl2u.ld.S
index d72589f..91e8556 100644
--- a/bl2u/bl2u.ld.S
+++ b/bl2u/bl2u.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -100,7 +100,8 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
      */
     .bss : ALIGN(16) {
         __BSS_START__ = .;
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 9a05e6c..e5d6232 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -158,7 +158,8 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
      */
     .bss (NOLOAD) : ALIGN(16) {
         __BSS_START__ = .;
diff --git a/bl32/sp_min/sp_min.ld.S b/bl32/sp_min/sp_min.ld.S
index e0e23e8..f1d4d0b 100644
--- a/bl32/sp_min/sp_min.ld.S
+++ b/bl32/sp_min/sp_min.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -134,9 +134,10 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 8-byte aligned for better performance of the
+     * zero-initialization code.
      */
-    .bss (NOLOAD) : ALIGN(16) {
+    .bss (NOLOAD) : ALIGN(8) {
         __BSS_START__ = .;
         *(.bss*)
         *(COMMON)
diff --git a/bl32/sp_min/sp_min_main.c b/bl32/sp_min/sp_min_main.c
index 02663a2..f34716e 100644
--- a/bl32/sp_min/sp_min_main.c
+++ b/bl32/sp_min/sp_min_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,6 +45,7 @@
 #include <stdint.h>
 #include <string.h>
 #include <types.h>
+#include <utils.h>
 #include "sp_min_private.h"
 
 /* Pointers to per-core cpu contexts */
@@ -203,7 +204,7 @@
 	smc_set_next_ctx(NON_SECURE);
 
 	next_smc_ctx = smc_get_next_ctx();
-	memset(next_smc_ctx, 0, sizeof(smc_ctx_t));
+	zeromem(next_smc_ctx, sizeof(smc_ctx_t));
 
 	copy_cpu_ctx_to_smc_stx(get_regs_ctx(cm_get_context(NON_SECURE)),
 			next_smc_ctx);
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 4c296d4..bdb882a 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -120,12 +120,12 @@
 	 */
 	ldr	x0, =__BSS_START__
 	ldr	x1, =__BSS_SIZE__
-	bl	zeromem16
+	bl	zeromem
 
 #if USE_COHERENT_MEM
 	ldr	x0, =__COHERENT_RAM_START__
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
-	bl	zeromem16
+	bl	zeromem
 #endif
 
 	/* --------------------------------------------
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index 7e24f66..d93e3bb 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -104,7 +104,8 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
      */
     .bss : ALIGN(16) {
         __BSS_START__ = .;
diff --git a/common/bl_common.c b/common/bl_common.c
index 47bdad5..1d66530 100644
--- a/common/bl_common.c
+++ b/common/bl_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -347,7 +347,7 @@
 				 image_data->image_size);
 	if (rc != 0) {
 		/* Authentication error, zero memory and flush it right away. */
-		memset((void *)image_data->image_base, 0x00,
+		zero_normalmem((void *)image_data->image_base,
 		       image_data->image_size);
 		flush_dcache_range(image_data->image_base,
 				   image_data->image_size);
@@ -543,7 +543,7 @@
 				 image_data->image_size);
 	if (rc != 0) {
 		/* Authentication error, zero memory and flush it right away. */
-		memset((void *)image_data->image_base, 0x00,
+		zero_normalmem((void *)image_data->image_base,
 		       image_data->image_size);
 		flush_dcache_range(image_data->image_base,
 				   image_data->image_size);
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index bd6e2f6..523fa55 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -16,8 +16,9 @@
 11. [Use of coherent memory in Trusted Firmware](#11--use-of-coherent-memory-in-trusted-firmware)
 12. [Isolating code and read-only data on separate memory pages](#12--isolating-code-and-read-only-data-on-separate-memory-pages)
 13. [Performance Measurement Framework](#13--performance-measurement-framework)
-14. [Code Structure](#14--code-structure)
-15. [References](#15--references)
+14. [ARMv8 Architecture Extensions](#14--armv8-architecture-extensions)
+15. [Code Structure](#15--code-structure)
+16. [References](#16--references)
 
 
 1.  Introduction
@@ -1342,7 +1343,7 @@
 
 The following linker symbols are defined for this purpose:
 
-*   `__BSS_START__`          Must be aligned on a 16-byte boundary.
+*   `__BSS_START__`
 *   `__BSS_SIZE__`
 *   `__COHERENT_RAM_START__` Must be aligned on a page-size boundary.
 *   `__COHERENT_RAM_END__`   Must be aligned on a page-size boundary.
@@ -2208,7 +2209,39 @@
 5.  `pmf_helpers.h` is an internal header used by `pmf.h`.
 
 
-14.  Code Structure
+14.  ARMv8 Architecture Extensions
+----------------------------------
+
+ARM Trusted Firmware makes use of ARMv8 Architecture Extensions where
+applicable. This section lists the usage of Architecture Extensions, and build
+flags controlling them.
+
+In general, and unless individually mentioned, the build options
+`ARM_ARCH_MAJOR` and `ARM_ARCH_MINOR` selects the Architecture Extension to
+target when building ARM Trusted Firmware. Subsequent ARM Architecture
+Extensions are backward compatible with previous versions.
+
+The build system only requires that `ARM_ARCH_MAJOR` and `ARM_ARCH_MINOR` have a
+valid numeric value. These build options only control whether or not
+Architecture Extension-specific code is included in the build. Otherwise, ARM
+Trusted Firmware targets the base ARMv8.0 architecture; i.e. as if
+`ARM_ARCH_MAJOR` == 8 and `ARM_ARCH_MINOR` == 0, which are also their respective
+default values.
+
+See also the _Summary of build options_ in [User Guide].
+
+For details on the Architecture Extension and available features, please refer
+to the respective Architecture Extension Supplement.
+
+### ARMv8.1
+
+This Architecture Extension is targeted when `ARM_ARCH_MAJOR` >= 8, or when
+`ARM_ARCH_MAJOR` == 8 and `ARM_ARCH_MINOR` >= 1.
+
+*  The Compare and Swap instruction is used to implement spinlocks. Otherwise,
+   the load-/store-exclusive instruction pair is used.
+
+15.  Code Structure
 -------------------
 
 Trusted Firmware code is logically divided between the three boot loader
@@ -2252,7 +2285,7 @@
 kernel at boot time. These can be found in the `fdts` directory.
 
 
-15.  References
+16.  References
 ---------------
 
 1.  Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available
diff --git a/docs/plat/nvidia-tegra.md b/docs/plat/nvidia-tegra.md
index b29532c..f82085b 100644
--- a/docs/plat/nvidia-tegra.md
+++ b/docs/plat/nvidia-tegra.md
@@ -62,6 +62,22 @@
 Platforms wanting to use different TZDRAM_BASE, can add 'TZDRAM_BASE=<value>'
 to the build command line.
 
+The Tegra platform code expects a pointer to the following platform specific
+structure via 'x1' register from the BL2 layer which is used by the
+bl31_early_platform_setup() handler to extract the TZDRAM carveout base and
+size for loading the Trusted OS and the UART port ID to be used. The Tegra
+memory controller driver programs this base/size in order to restrict NS
+accesses.
+
+typedef struct plat_params_from_bl2 {
+	/* TZ memory size */
+	uint64_t tzdram_size;
+	/* TZ memory base */
+	uint64_t tzdram_base;
+	/* UART port ID */
+	int uart_id;
+} plat_params_from_bl2_t;
+
 Power Management
 ================
 The PSCI implementation expects each platform to expose the 'power state'
diff --git a/docs/plat/xilinx-zynqmp.md b/docs/plat/xilinx-zynqmp.md
index 09546b0..d2dc8b7 100644
--- a/docs/plat/xilinx-zynqmp.md
+++ b/docs/plat/xilinx-zynqmp.md
@@ -12,12 +12,12 @@
 
 To build:
 ```bash
-make ERROR_DEPRECATED=1 RESET_TO_BL31=1 CROSS_COMPILE=aarch64-none-elf- PLAT=zynqmp bl31
+make ERROR_DEPRECATED=1 CROSS_COMPILE=aarch64-none-elf- PLAT=zynqmp bl31
 ```
 
 To build bl32 TSP you have to rebuild bl31 too:
 ```bash
-make ERROR_DEPRECATED=1 RESET_TO_BL31=1 CROSS_COMPILE=aarch64-none-elf- PLAT=zynqmp SPD=tspd bl31 bl32
+make ERROR_DEPRECATED=1 CROSS_COMPILE=aarch64-none-elf- PLAT=zynqmp SPD=tspd bl31 bl32
 ```
 
 # ZynqMP platform specific build options
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index e8486f1..a5e5966 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -1707,9 +1707,55 @@
 convert the power-state parameter (possibly encoding a composite power state)
 passed in a PSCI `CPU_SUSPEND` call to this representation.
 
-The following functions must be implemented to initialize PSCI functionality in
-the ARM Trusted Firmware.
+The following functions form part of platform port of PSCI functionality.
+
+
+### Function : plat_psci_stat_accounting_start() [optional]
+
+    Argument : const psci_power_state_t *
+    Return   : void
+
+This is an optional hook that platforms can implement for residency statistics
+accounting before entering a low power state.  The `pwr_domain_state` field of
+`state_info` (first argument) can be inspected if stat accounting is done
+differently at CPU level versus higher levels.  As an example, if the element at
+index 0 (CPU power level) in the `pwr_domain_state` array indicates a power down
+state, special hardware logic may be programmed in order to keep track of the
+residency statistics.  For higher levels (array indices > 0), the residency
+statistics could be tracked in software using PMF.  If `ENABLE_PMF` is set, the
+default implementation will use PMF to capture timestamps.
+
+### Function : plat_psci_stat_accounting_stop() [optional]
+
+    Argument : const psci_power_state_t *
+    Return   : void
+
+This is an optional hook that platforms can implement for residency statistics
+accounting after exiting from a low power state.  The `pwr_domain_state` field
+of `state_info` (first argument) can be inspected if stat accounting is done
+differently at CPU level versus higher levels.  As an example, if the element at
+index 0 (CPU power level) in the `pwr_domain_state` array indicates a power down
+state, special hardware logic may be programmed in order to keep track of the
+residency statistics.  For higher levels (array indices > 0), the residency
+statistics could be tracked in software using PMF.  If `ENABLE_PMF` is set, the
+default implementation will use PMF to capture timestamps.
+
+### Function : plat_psci_stat_get_residency() [optional]
+
+    Argument : unsigned int, const psci_power_state_t *, int
+    Return   : u_register_t
 
+This is an optional interface that is is invoked after resuming from a low power
+state and provides the time spent resident in that low power state by the power
+domain at a particular power domain level.  When a CPU wakes up from suspend,
+all its parent power domain levels are also woken up.  The generic PSCI code
+invokes this function for each parent power domain that is resumed and it
+identified by the `lvl` (first argument) parameter.  The `state_info` (second
+argument) describes the low power state that the power domain has resumed from.
+The current CPU is the first CPU in the power domain to resume from the low
+power state and the `last_cpu_idx` (third parameter) is the index of the last
+CPU in the power domain to suspend and may be needed to calculate the residency
+for that power domain.
 
 ### Function : plat_get_target_pwr_state() [optional]
 
diff --git a/docs/user-guide.md b/docs/user-guide.md
index ebdb5a2..091aeba 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -37,6 +37,9 @@
 *   Linux kernel image
 *   Root filesystem
 
+Note: the ARM TF v1.3 release was tested with Linaro Release 16.06, and the
+latest version of ARM TF is tested with Linaro Release 16.12.
+
 This document also assumes that the user is familiar with the FVP models and
 the different command line options available to launch the model.
 
@@ -181,6 +184,14 @@
     is used to determine the number of valid slave interfaces available in the
     ARM CCI driver. Default is 400 (that is, CCI-400).
 
+*   `ARM_ARCH_MAJOR`: The major version of ARM Architecture to target when
+    compiling ARM Trusted Firmware. Its value must be numeric, and defaults to
+    8. See also, _ARMv8 Architecture Extensions_ in [Firmware Design].
+
+*   `ARM_ARCH_MINOR`: The minor version of ARM Architecture to target when
+    compiling ARM Trusted Firmware. Its value must be a numeric, and defaults
+    to 0. See also, _ARMv8 Architecture Extensions_ in [Firmware Design].
+
 *   `ARM_GIC_ARCH`: Choice of ARM GIC architecture version used by the ARM
     Legacy GIC driver for implementing the platform GIC API. This API is used
     by the interrupt management framework. Default is 2 (that is, version 2.0).
@@ -274,8 +285,9 @@
 
 *   `ENABLE_PSCI_STAT`: Boolean option to enable support for optional PSCI
      functions `PSCI_STAT_RESIDENCY` and `PSCI_STAT_COUNT`. Default is 0.
-     Enabling this option enables the `ENABLE_PMF` build option as well.
-     The PMF is used for collecting the statistics.
+     In the absence of an alternate stat collection backend, `ENABLE_PMF` must
+     be enabled. If `ENABLE_PMF` is set, the residency statistics are tracked in
+     software.
 
 *   `ENABLE_RUNTIME_INSTRUMENTATION`: Boolean option to enable runtime
     instrumentation which injects timestamp collection points into
@@ -878,8 +890,8 @@
 a single FIP binary. It assumes that a [Linaro Release][Linaro Release Notes]
 has been installed.
 
-Note currently [Linaro Release][Linaro Release Notes] only includes pre-built
-binaries for AArch64. For AArch32, pre-built binaries are not available.
+Note: Linaro Release 16.06 only includes pre-built binaries for AArch64. For
+AArch32, pre-built binaries are only available from Linaro Release 16.12.
 
 Note: follow the full instructions for one platform before switching to a
 different one. Mixing instructions for different platforms may result in
@@ -1099,19 +1111,19 @@
 9.  Running the software on FVP
 -------------------------------
 
-The AArch64 build of this version of ARM Trusted Firmware has been tested on
-the following ARM FVPs (64-bit host machine only).
+The latest version of the AArch64 build of ARM Trusted Firmware has been tested
+on the following ARM FVPs (64-bit host machine only).
 
 *   `Foundation_Platform` (Version 10.2, Build 10.2.20)
-*   `FVP_Base_AEMv8A-AEMv8A` (Version 7.7, Build 0.8.7701)
-*   `FVP_Base_Cortex-A57x4-A53x4` (Version 7.7, Build 0.8.7701)
-*   `FVP_Base_Cortex-A57x1-A53x1` (Version 7.7, Build 0.8.7701)
-*   `FVP_Base_Cortex-A57x2-A53x4` (Version 7.7, Build 0.8.7701)
+*   `FVP_Base_AEMv8A-AEMv8A` (Version 8.2, Build 0.8.8202)
+*   `FVP_Base_Cortex-A57x4-A53x4` (Version 8.2, Build 0.8.8202)
+*   `FVP_Base_Cortex-A57x1-A53x1` (Version 8.2, Build 0.8.8202)
+*   `FVP_Base_Cortex-A57x2-A53x4` (Version 8.2, Build 0.8.8202)
 
-The AArch32 build of this version of ARM Trusted Firmware has been tested on
-the following ARM FVPs (64-bit host machine only).
+The latest version of the AArch32 build of ARM Trusted Firmware has been tested
+on the following ARM FVPs (64-bit host machine only).
 
-*   `FVP_Base_AEMv8A-AEMv8A` (Version 7.7, Build 0.8.7701)
+*   `FVP_Base_AEMv8A-AEMv8A` (Version 8.2, Build 0.8.8202)
 *   `FVP_Base_Cortex-A32x4` (Version 10.1, Build 10.1.32)
 
 NOTE: The build numbers quoted above are those reported by launching the FVP
@@ -1131,6 +1143,9 @@
 parameter options. A brief description of the important ones that affect the ARM
 Trusted Firmware and normal world software behavior is provided below.
 
+Note the instructions in the following sections assume that Linaro Release 16.06
+is being used.
+
 ### Obtaining the Flattened Device Trees
 
 Depending on the FVP configuration and Linux configuration used, different
@@ -1436,12 +1451,12 @@
 
 - - - - - - - - - - - - - - - - - - - - - - - - - -
 
-_Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved._
+_Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved._
 
 
 [Firmware Design]:             firmware-design.md
 [ARM FVP website]:             https://developer.arm.com/products/system-design/fixed-virtual-platforms
-[Linaro Release Notes]:        https://community.arm.com/docs/DOC-10952#jive_content_id_Linaro_Release_1606
+[Linaro Release Notes]:        https://community.arm.com/tools/dev-platforms/b/documents/posts/linaro-release-notes-deprecated
 [ARM Platforms Portal]:        https://community.arm.com/groups/arm-development-platforms
 [Linaro SW Instructions]:      https://community.arm.com/docs/DOC-10803
 [Juno Instructions]:           https://community.arm.com/docs/DOC-10804
diff --git a/drivers/auth/mbedtls/mbedtls_crypto.c b/drivers/auth/mbedtls/mbedtls_crypto.c
index 11d3ede..1a96e8f 100644
--- a/drivers/auth/mbedtls/mbedtls_crypto.c
+++ b/drivers/auth/mbedtls/mbedtls_crypto.c
@@ -217,7 +217,7 @@
 	}
 
 	/* Compare values */
-	rc = timingsafe_bcmp(data_hash, hash, mbedtls_md_get_size(md_info));
+	rc = memcmp(data_hash, hash, mbedtls_md_get_size(md_info));
 	if (rc != 0) {
 		return CRYPTO_ERR_HASH;
 	}
diff --git a/drivers/auth/mbedtls/mbedtls_x509_parser.c b/drivers/auth/mbedtls/mbedtls_x509_parser.c
index f9485de..092c346 100644
--- a/drivers/auth/mbedtls/mbedtls_x509_parser.c
+++ b/drivers/auth/mbedtls/mbedtls_x509_parser.c
@@ -43,6 +43,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+#include <utils.h>
 
 /* mbed TLS headers */
 #include <mbedtls/asn1.h>
@@ -71,7 +72,7 @@
 {
 #define ZERO_AND_CLEAN(x)					\
 	do {							\
-		memset(&x, 0, sizeof(x));			\
+		zeromem(&x, sizeof(x));				\
 		clean_dcache_range((uintptr_t)&x, sizeof(x));	\
 	} while (0);
 
@@ -111,7 +112,7 @@
 			     MBEDTLS_ASN1_SEQUENCE);
 
 	while (p < end) {
-		memset(&extn_oid, 0x0, sizeof(extn_oid));
+		zeromem(&extn_oid, sizeof(extn_oid));
 		is_critical = 0; /* DEFAULT FALSE */
 
 		mbedtls_asn1_get_tag(&p, end, &len, MBEDTLS_ASN1_CONSTRUCTED |
@@ -392,7 +393,7 @@
 	if (sig_alg1.len != sig_alg2.len) {
 		return IMG_PARSER_ERR_FORMAT;
 	}
-	if (0 != timingsafe_bcmp(sig_alg1.p, sig_alg2.p, sig_alg1.len)) {
+	if (0 != memcmp(sig_alg1.p, sig_alg2.p, sig_alg1.len)) {
 		return IMG_PARSER_ERR_FORMAT;
 	}
 	memcpy(&sig_alg, &sig_alg1, sizeof(sig_alg));
diff --git a/drivers/emmc/emmc.c b/drivers/emmc/emmc.c
index 3fae2a1..1c1ea82 100644
--- a/drivers/emmc/emmc.c
+++ b/drivers/emmc/emmc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,6 +36,7 @@
 #include <emmc.h>
 #include <errno.h>
 #include <string.h>
+#include <utils.h>
 
 static const emmc_ops_t *ops;
 static unsigned int emmc_ocr_value;
@@ -53,7 +54,7 @@
 	int ret;
 
 	do {
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		cmd.cmd_idx = EMMC_CMD13;
 		cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET;
 		cmd.resp_type = EMMC_RESPONSE_R1;
@@ -71,7 +72,7 @@
 	emmc_cmd_t cmd;
 	int ret, state;
 
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD6;
 	cmd.cmd_arg = EXTCSD_WRITE_BYTES | EXTCSD_CMD(ext_cmd) |
 		      EXTCSD_VALUE(value) | 1;
@@ -107,14 +108,14 @@
 	ops->init();
 
 	/* CMD0: reset to IDLE */
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD0;
 	ret = ops->send_cmd(&cmd);
 	assert(ret == 0);
 
 	while (1) {
 		/* CMD1: get OCR register */
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		cmd.cmd_idx = EMMC_CMD1;
 		cmd.cmd_arg = OCR_SECTOR_MODE | OCR_VDD_MIN_2V7 |
 			      OCR_VDD_MIN_1V7;
@@ -127,14 +128,14 @@
 	}
 
 	/* CMD2: Card Identification */
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD2;
 	cmd.resp_type = EMMC_RESPONSE_R2;
 	ret = ops->send_cmd(&cmd);
 	assert(ret == 0);
 
 	/* CMD3: Set Relative Address */
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD3;
 	cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET;
 	cmd.resp_type = EMMC_RESPONSE_R1;
@@ -142,7 +143,7 @@
 	assert(ret == 0);
 
 	/* CMD9: CSD Register */
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD9;
 	cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET;
 	cmd.resp_type = EMMC_RESPONSE_R2;
@@ -151,7 +152,7 @@
 	memcpy(&emmc_csd, &cmd.resp_data, sizeof(cmd.resp_data));
 
 	/* CMD7: Select Card */
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD7;
 	cmd.cmd_arg = EMMC_FIX_RCA << RCA_SHIFT_OFFSET;
 	cmd.resp_type = EMMC_RESPONSE_R1;
@@ -181,7 +182,7 @@
 	assert(ret == 0);
 
 	if (is_cmd23_enabled()) {
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		/* set block count */
 		cmd.cmd_idx = EMMC_CMD23;
 		cmd.cmd_arg = size / EMMC_BLOCK_SIZE;
@@ -189,7 +190,7 @@
 		ret = ops->send_cmd(&cmd);
 		assert(ret == 0);
 
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		cmd.cmd_idx = EMMC_CMD18;
 	} else {
 		if (size > EMMC_BLOCK_SIZE)
@@ -213,7 +214,7 @@
 
 	if (is_cmd23_enabled() == 0) {
 		if (size > EMMC_BLOCK_SIZE) {
-			memset(&cmd, 0, sizeof(emmc_cmd_t));
+			zeromem(&cmd, sizeof(emmc_cmd_t));
 			cmd.cmd_idx = EMMC_CMD12;
 			ret = ops->send_cmd(&cmd);
 			assert(ret == 0);
@@ -240,17 +241,17 @@
 
 	if (is_cmd23_enabled()) {
 		/* set block count */
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		cmd.cmd_idx = EMMC_CMD23;
 		cmd.cmd_arg = size / EMMC_BLOCK_SIZE;
 		cmd.resp_type = EMMC_RESPONSE_R1;
 		ret = ops->send_cmd(&cmd);
 		assert(ret == 0);
 
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		cmd.cmd_idx = EMMC_CMD25;
 	} else {
-		memset(&cmd, 0, sizeof(emmc_cmd_t));
+		zeromem(&cmd, sizeof(emmc_cmd_t));
 		if (size > EMMC_BLOCK_SIZE)
 			cmd.cmd_idx = EMMC_CMD25;
 		else
@@ -272,7 +273,7 @@
 
 	if (is_cmd23_enabled() == 0) {
 		if (size > EMMC_BLOCK_SIZE) {
-			memset(&cmd, 0, sizeof(emmc_cmd_t));
+			zeromem(&cmd, sizeof(emmc_cmd_t));
 			cmd.cmd_idx = EMMC_CMD12;
 			ret = ops->send_cmd(&cmd);
 			assert(ret == 0);
@@ -291,21 +292,21 @@
 	assert(ops != 0);
 	assert((size != 0) && ((size % EMMC_BLOCK_SIZE) == 0));
 
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD35;
 	cmd.cmd_arg = lba;
 	cmd.resp_type = EMMC_RESPONSE_R1;
 	ret = ops->send_cmd(&cmd);
 	assert(ret == 0);
 
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD36;
 	cmd.cmd_arg = lba + (size / EMMC_BLOCK_SIZE) - 1;
 	cmd.resp_type = EMMC_RESPONSE_R1;
 	ret = ops->send_cmd(&cmd);
 	assert(ret == 0);
 
-	memset(&cmd, 0, sizeof(emmc_cmd_t));
+	zeromem(&cmd, sizeof(emmc_cmd_t));
 	cmd.cmd_idx = EMMC_CMD38;
 	cmd.resp_type = EMMC_RESPONSE_R1B;
 	ret = ops->send_cmd(&cmd);
diff --git a/drivers/io/io_block.c b/drivers/io/io_block.c
index 4ec59bc..a855581 100644
--- a/drivers/io/io_block.c
+++ b/drivers/io/io_block.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,6 +36,7 @@
 #include <io_storage.h>
 #include <platform_def.h>
 #include <string.h>
+#include <utils.h>
 
 typedef struct {
 	io_block_dev_spec_t	*dev_spec;
@@ -135,8 +136,8 @@
 	result = find_first_block_state(state->dev_spec, &index);
 	if (result ==  0) {
 		/* free if device info is valid */
-		memset(state, 0, sizeof(block_dev_state_t));
-		memset(dev_info, 0, sizeof(io_dev_info_t));
+		zeromem(state, sizeof(block_dev_state_t));
+		zeromem(dev_info, sizeof(io_dev_info_t));
 		--block_dev_count;
 	}
 
diff --git a/drivers/io/io_fip.c b/drivers/io/io_fip.c
index 99cf15b..6724fc3 100644
--- a/drivers/io/io_fip.c
+++ b/drivers/io/io_fip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -40,6 +40,7 @@
 #include <platform_def.h>
 #include <stdint.h>
 #include <string.h>
+#include <utils.h>
 #include <uuid.h>
 
 /* Useful for printing UUIDs when debugging.*/
@@ -351,7 +352,7 @@
 	 * If we had malloc() we would free() here.
 	 */
 	if (current_file.entry.offset_address != 0) {
-		memset(&current_file, 0, sizeof(current_file));
+		zeromem(&current_file, sizeof(current_file));
 	}
 
 	/* Clear the Entity info. */
diff --git a/drivers/io/io_memmap.c b/drivers/io/io_memmap.c
index fe39652..5104fb1 100644
--- a/drivers/io/io_memmap.c
+++ b/drivers/io/io_memmap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
 #include <io_driver.h>
 #include <io_storage.h>
 #include <string.h>
+#include <utils.h>
 
 /* As we need to be able to keep state for seek, only one file can be open
  * at a time. Make this a structure and point to the entity->info. When we
@@ -118,13 +119,14 @@
 
 
 /* Open a file on the memmap device */
-/* TODO: Can we do any sensible limit checks on requested memory */
 static int memmap_block_open(io_dev_info_t *dev_info, const uintptr_t spec,
 			     io_entity_t *entity)
 {
 	int result = -ENOMEM;
 	const io_block_spec_t *block_spec = (io_block_spec_t *)spec;
 
+	assert(block_spec->length >= 0);
+
 	/* Since we need to track open state for seek() we only allow one open
 	 * spec at a time. When we have dynamic memory we can malloc and set
 	 * entity->info.
@@ -152,13 +154,19 @@
 static int memmap_block_seek(io_entity_t *entity, int mode, ssize_t offset)
 {
 	int result = -ENOENT;
+	file_state_t *fp;
 
 	/* We only support IO_SEEK_SET for the moment. */
 	if (mode == IO_SEEK_SET) {
 		assert(entity != NULL);
 
-		/* TODO: can we do some basic limit checks on seek? */
-		((file_state_t *)entity->info)->file_pos = offset;
+		fp = (file_state_t *) entity->info;
+
+		/* Assert that new file position is valid */
+		assert((offset >= 0) && (offset < fp->size));
+
+		/* Reset file position */
+		fp->file_pos = offset;
 		result = 0;
 	}
 
@@ -183,18 +191,24 @@
 			     size_t length, size_t *length_read)
 {
 	file_state_t *fp;
+	size_t pos_after;
 
 	assert(entity != NULL);
 	assert(buffer != (uintptr_t)NULL);
 	assert(length_read != NULL);
 
-	fp = (file_state_t *)entity->info;
+	fp = (file_state_t *) entity->info;
+
+	/* Assert that file position is valid for this read operation */
+	pos_after = fp->file_pos + length;
+	assert((pos_after >= fp->file_pos) && (pos_after <= fp->size));
 
 	memcpy((void *)buffer, (void *)(fp->base + fp->file_pos), length);
 
 	*length_read = length;
-	/* advance the file 'cursor' for incremental reads */
-	fp->file_pos += length;
+
+	/* Set file position after read */
+	fp->file_pos = pos_after;
 
 	return 0;
 }
@@ -205,19 +219,24 @@
 			      size_t length, size_t *length_written)
 {
 	file_state_t *fp;
+	size_t pos_after;
 
 	assert(entity != NULL);
 	assert(buffer != (uintptr_t)NULL);
 	assert(length_written != NULL);
 
-	fp = (file_state_t *)entity->info;
+	fp = (file_state_t *) entity->info;
+
+	/* Assert that file position is valid for this write operation */
+	pos_after = fp->file_pos + length;
+	assert((pos_after >= fp->file_pos) && (pos_after <= fp->size));
 
 	memcpy((void *)(fp->base + fp->file_pos), (void *)buffer, length);
 
 	*length_written = length;
 
-	/* advance the file 'cursor' for incremental writes */
-	fp->file_pos += length;
+	/* Set file position after write */
+	fp->file_pos = pos_after;
 
 	return 0;
 }
@@ -231,7 +250,7 @@
 	entity->info = 0;
 
 	/* This would be a mem free() if we had malloc.*/
-	memset((void *)&current_file, 0, sizeof(current_file));
+	zeromem((void *)&current_file, sizeof(current_file));
 
 	return 0;
 }
diff --git a/drivers/io/io_semihosting.c b/drivers/io/io_semihosting.c
index 30ca99c..e33a044 100644
--- a/drivers/io/io_semihosting.c
+++ b/drivers/io/io_semihosting.c
@@ -95,7 +95,7 @@
 		const uintptr_t spec, io_entity_t *entity)
 {
 	int result = -ENOENT;
-	long sh_result = -1;
+	long sh_result;
 	const io_file_spec_t *file_spec = (const io_file_spec_t *)spec;
 
 	assert(file_spec != NULL);
@@ -151,7 +151,7 @@
 		size_t *length_read)
 {
 	int result = -ENOENT;
-	long sh_result = -1;
+	long sh_result;
 	size_t bytes = length;
 	long file_handle;
 
@@ -176,7 +176,7 @@
 static int sh_file_write(io_entity_t *entity, const uintptr_t buffer,
 		size_t length, size_t *length_written)
 {
-	long sh_result = -1;
+	long sh_result;
 	long file_handle;
 	size_t bytes = length;
 
@@ -197,7 +197,7 @@
 /* Close a file on the semi-hosting device */
 static int sh_file_close(io_entity_t *entity)
 {
-	long sh_result = -1;
+	long sh_result;
 	long file_handle;
 
 	assert(entity != NULL);
diff --git a/drivers/partition/gpt.c b/drivers/partition/gpt.c
index 9240d5a..05f13f3 100644
--- a/drivers/partition/gpt.c
+++ b/drivers/partition/gpt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
 #include <errno.h>
 #include <gpt.h>
 #include <string.h>
+#include <utils.h>
 
 static int unicode_to_ascii(unsigned short *str_in, unsigned char *str_out)
 {
@@ -65,7 +66,7 @@
 		return -EINVAL;
 	}
 
-	memset(entry, 0, sizeof(partition_entry_t));
+	zeromem(entry, sizeof(partition_entry_t));
 	result = unicode_to_ascii(gpt_entry->name, (uint8_t *)entry->name);
 	if (result != 0) {
 		return result;
diff --git a/include/common/aarch32/el3_common_macros.S b/include/common/aarch32/el3_common_macros.S
index 463a080..f6b7527 100644
--- a/include/common/aarch32/el3_common_macros.S
+++ b/include/common/aarch32/el3_common_macros.S
@@ -98,6 +98,11 @@
 	orr	r0, r0, #FPEXC_EN_BIT
 	vmsr	FPEXC, r0
 	isb
+
+	/* Disable secure self-hosted invasive debug. */
+	ldr	r0, =SDCR_DEF_VAL
+	stcopr	r0, SDCR
+
 	.endm
 
 /* -----------------------------------------------------------------------------
diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
index cbfa6ee..e085f9f 100644
--- a/include/common/aarch64/el3_common_macros.S
+++ b/include/common/aarch64/el3_common_macros.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -79,10 +79,11 @@
 	msr	scr_el3, x0
 
 	/* ---------------------------------------------------------------------
-	 * Reset registers that may have architecturally unknown reset values
+	 * Disable secure self-hosted invasive debug.
 	 * ---------------------------------------------------------------------
 	 */
-	msr	mdcr_el3, xzr
+	mov_imm	x0, MDCR_DEF_VAL
+	msr	mdcr_el3, x0
 
 	/* ---------------------------------------------------------------------
 	 * Enable External Aborts and SError Interrupts now that the exception
@@ -252,12 +253,12 @@
 
 		ldr	x0, =__BSS_START__
 		ldr	x1, =__BSS_SIZE__
-		bl	zeromem16
+		bl	zeromem
 
 #if USE_COHERENT_MEM
 		ldr	x0, =__COHERENT_RAM_START__
 		ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
-		bl	zeromem16
+		bl	zeromem
 #endif
 
 #ifdef IMAGE_BL1
diff --git a/include/lib/aarch32/arch.h b/include/lib/aarch32/arch.h
index 170fa84..8525c7b 100644
--- a/include/lib/aarch32/arch.h
+++ b/include/lib/aarch32/arch.h
@@ -125,6 +125,14 @@
 #define SCTLR_AFE_BIT		(1 << 29)
 #define SCTLR_TE_BIT		(1 << 30)
 
+/* SDCR definitions */
+#define SDCR_SPD(x)		((x) << 14)
+#define SDCR_SPD_LEGACY		0x0
+#define SDCR_SPD_DISABLE	0x2
+#define SDCR_SPD_ENABLE		0x3
+
+#define SDCR_DEF_VAL		SDCR_SPD(SDCR_SPD_DISABLE)
+
 /* HSCTLR definitions */
 #define HSCTLR_RES1 	((1 << 29) | (1 << 28) | (1 << 23) | (1 << 22)	\
 			| (1 << 18) | (1 << 16) | (1 << 11) | (1 << 4)	\
@@ -345,6 +353,7 @@
 /* System register defines The format is: coproc, opt1, CRn, CRm, opt2 */
 #define SCR		p15, 0, c1, c1, 0
 #define SCTLR		p15, 0, c1, c0, 0
+#define SDCR		p15, 0, c1, c3, 1
 #define MPIDR		p15, 0, c0, c0, 5
 #define MIDR		p15, 0, c0, c0, 0
 #define VBAR		p15, 0, c12, c0, 0
diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h
index 3f71824..5876ce8 100644
--- a/include/lib/aarch64/arch.h
+++ b/include/lib/aarch64/arch.h
@@ -195,6 +195,15 @@
 #define SCR_NS_BIT		(1 << 0)
 #define SCR_VALID_BIT_MASK	0x2f8f
 
+/* MDCR definitions */
+#define MDCR_SPD32(x)		((x) << 14)
+#define MDCR_SPD32_LEGACY	0x0
+#define MDCR_SPD32_DISABLE	0x2
+#define MDCR_SPD32_ENABLE	0x3
+#define MDCR_SDD_BIT		(1 << 16)
+
+#define MDCR_DEF_VAL		(MDCR_SDD_BIT | MDCR_SPD32(MDCR_SPD32_DISABLE))
+
 /* HCR definitions */
 #define HCR_RW_BIT		(1ull << 31)
 #define HCR_AMO_BIT		(1 << 5)
diff --git a/include/lib/cpus/aarch64/denver.h b/include/lib/cpus/aarch64/denver.h
index c7bee80..e083533 100644
--- a/include/lib/cpus/aarch64/denver.h
+++ b/include/lib/cpus/aarch64/denver.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,10 +31,24 @@
 #ifndef __DENVER_H__
 #define __DENVER_H__
 
-/* MIDR for Denver v1.0 */
-#define DENVER_1_0_MIDR			0x4E0F0000
+/* MIDR values for Denver */
+#define DENVER_MIDR_PN0			0x4E0F0000
+#define DENVER_MIDR_PN1			0x4E0F0010
+#define DENVER_MIDR_PN2			0x4E0F0020
+#define DENVER_MIDR_PN3			0x4E0F0030
+#define DENVER_MIDR_PN4			0x4E0F0040
+
+/* Implementer code in the MIDR register */
+#define DENVER_IMPL			0x4E
 
 /* CPU state ids - implementation defined */
 #define DENVER_CPU_STATE_POWER_DOWN	0x3
 
+#ifndef __ASSEMBLY__
+
+/* Disable Dynamic Code Optimisation */
+void denver_disable_dco(void);
+
+#endif
+
 #endif /* __DENVER_H__ */
diff --git a/include/lib/pmf/pmf.h b/include/lib/pmf/pmf.h
index 7c33387..d5415f4 100644
--- a/include/lib/pmf/pmf.h
+++ b/include/lib/pmf/pmf.h
@@ -37,13 +37,13 @@
 /*
  * Constants used for/by PMF services.
  */
-#define PMF_ARM_TIF_IMPL_ID	(0x41000000)
+#define PMF_ARM_TIF_IMPL_ID	0x41
 #define PMF_TID_SHIFT		0
 #define PMF_TID_MASK		(0xFF << PMF_TID_SHIFT)
 #define PMF_SVC_ID_SHIFT	10
 #define PMF_SVC_ID_MASK		(0x3F << PMF_SVC_ID_SHIFT)
 #define PMF_IMPL_ID_SHIFT	24
-#define PMF_IMPL_ID_MASK	(0xFF << PMF_IMPL_ID_SHIFT)
+#define PMF_IMPL_ID_MASK	(0xFFU << PMF_IMPL_ID_SHIFT)
 
 /*
  * Flags passed to PMF_REGISTER_SERVICE
diff --git a/include/lib/utils.h b/include/lib/utils.h
index b6bc9af..69bbb43 100644
--- a/include/lib/utils.h
+++ b/include/lib/utils.h
@@ -80,4 +80,35 @@
 # define ULL(_x)	(_x##ull)
 #endif
 
+/*
+ * C code should be put in this part of the header to avoid breaking ASM files
+ * or linker scripts including it.
+ */
+#if !(defined(__LINKER__) || defined(__ASSEMBLY__))
+
+#include <types.h>
+
+/*
+ * Fill a region of normal memory of size "length" in bytes with zero bytes.
+ *
+ * WARNING: This function can only operate on normal memory. This means that
+ *          the MMU must be enabled when using this function. Otherwise, use
+ *          zeromem.
+ */
+void zero_normalmem(void *mem, u_register_t length);
+
+/*
+ * Fill a region of memory of size "length" in bytes with null bytes.
+ *
+ * Unlike zero_normalmem, this function has no restriction on the type of
+ * memory targeted and can be used for any device memory as well as normal
+ * memory. This function must be used instead of zero_normalmem when MMU is
+ * disabled.
+ *
+ * NOTE: When data cache and MMU are enabled, prefer zero_normalmem for faster
+ *       zeroing.
+ */
+void zeromem(void *mem, u_register_t length);
+#endif /* !(defined(__LINKER__) || defined(__ASSEMBLY__)) */
+
 #endif /* __UTILS_H__ */
diff --git a/include/plat/arm/css/common/css_def.h b/include/plat/arm/css/common/css_def.h
index a2fe0d5..7cfaf59 100644
--- a/include/plat/arm/css/common/css_def.h
+++ b/include/plat/arm/css/common/css_def.h
@@ -101,6 +101,13 @@
 #define SSC_VERSION_DESIGNER_ID_MASK		0xff
 #define SSC_VERSION_PART_NUM_MASK		0xfff
 
+/* SSC debug configuration registers */
+#define SSC_DBGCFG_SET		0x14
+#define SSC_DBGCFG_CLR		0x18
+
+#define SPIDEN_INT_CLR_SHIFT	6
+#define SPIDEN_SEL_SET_SHIFT	7
+
 #ifndef __ASSEMBLY__
 
 /* SSC_VERSION related accessors */
diff --git a/include/plat/common/platform.h b/include/plat/common/platform.h
index f904292..73bb643 100644
--- a/include/plat/common/platform.h
+++ b/include/plat/common/platform.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -254,6 +254,11 @@
 /*******************************************************************************
  * Optional PSCI functions (BL31).
  ******************************************************************************/
+void plat_psci_stat_accounting_start(const psci_power_state_t *state_info);
+void plat_psci_stat_accounting_stop(const psci_power_state_t *state_info);
+u_register_t plat_psci_stat_get_residency(unsigned int lvl,
+			const psci_power_state_t *state_info,
+			int last_cpu_index);
 plat_local_state_t plat_get_target_pwr_state(unsigned int lvl,
 			const plat_local_state_t *states,
 			unsigned int ncpu);
diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S
index bf4084a..dc84799 100644
--- a/lib/aarch32/misc_helpers.S
+++ b/lib/aarch32/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,7 @@
 
 	.globl	smc
 	.globl	zeromem
+	.globl	zero_normalmem
 	.globl	memcpy4
 	.globl	disable_mmu_icache_secure
 	.globl	disable_mmu_secure
@@ -50,30 +51,108 @@
 endfunc smc
 
 /* -----------------------------------------------------------------------
- * void zeromem(void *mem, unsigned int length);
+ * void zeromem(void *mem, unsigned int length)
  *
- * Initialise a memory region to 0.
- * The memory address and length must be 4-byte aligned.
+ * Initialise a region in normal memory to 0. This functions complies with the
+ * AAPCS and can be called from C code.
+ *
  * -----------------------------------------------------------------------
  */
 func zeromem
-#if ASM_ASSERTION
-	tst	r0, #0x3
-	ASM_ASSERT(eq)
-	tst	r1, #0x3
-	ASM_ASSERT(eq)
-#endif
-	add	r2, r0, r1
-	mov	r1, #0
-z_loop:
-	cmp	r2, r0
-	beq	z_end
-	str	r1, [r0], #4
-	b	z_loop
-z_end:
+	/*
+	 * Readable names for registers
+	 *
+	 * Registers r0, r1 and r2 are also set by zeromem which
+	 * branches into the fallback path directly, so cursor, length and
+	 * stop_address should not be retargeted to other registers.
+	 */
+	cursor       .req r0 /* Start address and then current address */
+	length       .req r1 /* Length in bytes of the region to zero out */
+	/*
+	 * Reusing the r1 register as length is only used at the beginning of
+	 * the function.
+	 */
+	stop_address .req r1  /* Address past the last zeroed byte */
+	zeroreg1     .req r2  /* Source register filled with 0 */
+	zeroreg2     .req r3  /* Source register filled with 0 */
+	tmp	     .req r12 /* Temporary scratch register */
+
+	mov	zeroreg1, #0
+
+	/* stop_address is the address past the last to zero */
+	add	stop_address, cursor, length
+
+	/*
+	 * Length cannot be used anymore as it shares the same register with
+	 * stop_address.
+	 */
+	.unreq	length
+
+	/*
+	 * If the start address is already aligned to 8 bytes, skip this loop.
+	 */
+	tst	cursor, #(8-1)
+	beq	.Lzeromem_8bytes_aligned
+
+	/* Calculate the next address aligned to 8 bytes */
+	orr	tmp, cursor, #(8-1)
+	adds	tmp, tmp, #1
+	/* If it overflows, fallback to byte per byte zeroing */
+	beq	.Lzeromem_1byte_aligned
+	/* If the next aligned address is after the stop address, fall back */
+	cmp	tmp, stop_address
+	bhs	.Lzeromem_1byte_aligned
+
+	/* zero byte per byte */
+1:
+	strb	zeroreg1, [cursor], #1
+	cmp	cursor, tmp
+	bne	1b
+
+	/* zero 8 bytes at a time */
+.Lzeromem_8bytes_aligned:
+
+	/* Calculate the last 8 bytes aligned address. */
+	bic	tmp, stop_address, #(8-1)
+
+	cmp	cursor, tmp
+	bhs	2f
+
+	mov	zeroreg2, #0
+1:
+	stmia	cursor!, {zeroreg1, zeroreg2}
+	cmp	cursor, tmp
+	blo	1b
+2:
+
+	/* zero byte per byte */
+.Lzeromem_1byte_aligned:
+	cmp	cursor, stop_address
+	beq	2f
+1:
+	strb	zeroreg1, [cursor], #1
+	cmp	cursor, stop_address
+	bne	1b
+2:
 	bx	lr
+
+	.unreq	cursor
+	/*
+	 * length is already unreq'ed to reuse the register for another
+	 * variable.
+	 */
+	.unreq	stop_address
+	.unreq	zeroreg1
+	.unreq	zeroreg2
+	.unreq	tmp
 endfunc zeromem
 
+/*
+ * AArch32 does not have special ways of zeroing normal memory as AArch64 does
+ * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem.
+ */
+.equ	zero_normalmem, zeromem
+
 /* --------------------------------------------------------------------------
  * void memcpy4(void *dest, const void *src, unsigned int length)
  *
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 574146f..84265e0 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,6 +37,8 @@
 	.globl	eret
 	.globl	smc
 
+	.globl	zero_normalmem
+	.globl	zeromem
 	.globl	zeromem16
 	.globl	memcpy16
 
@@ -80,31 +82,358 @@
  *
  * Initialise a memory region to 0.
  * The memory address must be 16-byte aligned.
+ * NOTE: This function is deprecated and zeromem should be used instead.
  * -----------------------------------------------------------------------
  */
-func zeromem16
+.equ	zeromem16, zeromem
+
+/* -----------------------------------------------------------------------
+ * void zero_normalmem(void *mem, unsigned int length);
+ *
+ * Initialise a region in normal memory to 0. This functions complies with the
+ * AAPCS and can be called from C code.
+ *
+ * NOTE: MMU must be enabled when using this function as it can only operate on
+ *       normal memory. It is intended to be mainly used from C code when MMU
+ *       is usually enabled.
+ * -----------------------------------------------------------------------
+ */
+.equ	zero_normalmem, zeromem_dczva
+
+/* -----------------------------------------------------------------------
+ * void zeromem(void *mem, unsigned int length);
+ *
+ * Initialise a region of device memory to 0. This functions complies with the
+ * AAPCS and can be called from C code.
+ *
+ * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
+ *       used instead for faster zeroing.
+ *
+ * -----------------------------------------------------------------------
+ */
+func zeromem
+	/* x2 is the address past the last zeroed address */
+	add	x2, x0, x1
+	/*
+	 * Uses the fallback path that does not use DC ZVA instruction and
+	 * therefore does not need enabled MMU
+	 */
+	b	.Lzeromem_dczva_fallback_entry
+endfunc zeromem
+
+/* -----------------------------------------------------------------------
+ * void zeromem_dczva(void *mem, unsigned int length);
+ *
+ * Fill a region of normal memory of size "length" in bytes with null bytes.
+ * MMU must be enabled and the memory be of
+ * normal type. This is because this function internally uses the DC ZVA
+ * instruction, which generates an Alignment fault if used on any type of
+ * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
+ * is disabled, all memory behaves like Device-nGnRnE memory (see section
+ * D4.2.8), hence the requirement on the MMU being enabled.
+ * NOTE: The code assumes that the block size as defined in DCZID_EL0
+ *       register is at least 16 bytes.
+ *
+ * -----------------------------------------------------------------------
+ */
+func zeromem_dczva
+
+	/*
+	 * The function consists of a series of loops that zero memory one byte
+	 * at a time, 16 bytes at a time or using the DC ZVA instruction to
+	 * zero aligned block of bytes, which is assumed to be more than 16.
+	 * In the case where the DC ZVA instruction cannot be used or if the
+	 * first 16 bytes loop would overflow, there is fallback path that does
+	 * not use DC ZVA.
+	 * Note: The fallback path is also used by the zeromem function that
+	 *       branches to it directly.
+	 *
+	 *              +---------+   zeromem_dczva
+	 *              |  entry  |
+	 *              +----+----+
+	 *                   |
+	 *                   v
+	 *              +---------+
+	 *              | checks  |>o-------+ (If any check fails, fallback)
+	 *              +----+----+         |
+	 *                   |              |---------------+
+	 *                   v              | Fallback path |
+	 *            +------+------+       |---------------+
+	 *            | 1 byte loop |       |
+	 *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
+	 *                   |              |
+	 *                   v              |
+	 *           +-------+-------+      |
+	 *           | 16 bytes loop |      |
+	 *           +-------+-------+      |
+	 *                   |              |
+	 *                   v              |
+	 *            +------+------+ .Lzeromem_dczva_blocksize_aligned
+	 *            | DC ZVA loop |       |
+	 *            +------+------+       |
+	 *       +--------+  |              |
+	 *       |        |  |              |
+	 *       |        v  v              |
+	 *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
+	 *       |   | 16 bytes loop |      |
+	 *       |   +-------+-------+      |
+	 *       |           |              |
+	 *       |           v              |
+	 *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
+	 *       |    | 1 byte loop |       |
+	 *       |    +-------------+       |
+	 *       |           |              |
+	 *       |           v              |
+	 *       |       +---+--+           |
+	 *       |       | exit |           |
+	 *       |       +------+           |
+	 *       |			    |
+	 *       |           +--------------+    +------------------+ zeromem
+	 *       |           |  +----------------| zeromem function |
+	 *       |           |  |                +------------------+
+	 *       |           v  v
+	 *       |    +-------------+ .Lzeromem_dczva_fallback_entry
+	 *       |    | 1 byte loop |
+	 *       |    +------+------+
+	 *       |           |
+	 *       +-----------+
+	 */
+
+	/*
+	 * Readable names for registers
+	 *
+	 * Registers x0, x1 and x2 are also set by zeromem which
+	 * branches into the fallback path directly, so cursor, length and
+	 * stop_address should not be retargeted to other registers.
+	 */
+	cursor       .req x0 /* Start address and then current address */
+	length       .req x1 /* Length in bytes of the region to zero out */
+	/* Reusing x1 as length is never used after block_mask is set */
+	block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
+	stop_address .req x2 /* Address past the last zeroed byte */
+	block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
+	tmp1         .req x4
+	tmp2         .req x5
+
 #if ASM_ASSERTION
-	tst	x0, #0xf
-	ASM_ASSERT(eq)
+	/*
+	 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
+	 * register value and panic if the MMU is disabled.
+	 */
+#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
+	mrs	tmp1, sctlr_el3
+#else
+	mrs	tmp1, sctlr_el1
 #endif
-	add	x2, x0, x1
-/* zero 16 bytes at a time */
-z_loop16:
-	sub	x3, x2, x0
-	cmp	x3, #16
-	b.lt	z_loop1
-	stp	xzr, xzr, [x0], #16
-	b	z_loop16
-/* zero byte per byte */
-z_loop1:
-	cmp	x0, x2
-	b.eq	z_end
-	strb	wzr, [x0], #1
-	b	z_loop1
-z_end:
+
+	tst	tmp1, #SCTLR_M_BIT
+	ASM_ASSERT(ne)
+#endif /* ASM_ASSERTION */
+
+	/* stop_address is the address past the last to zero */
+	add	stop_address, cursor, length
+
+	/*
+	 * Get block_size = (log2(<block size>) >> 2) (see encoding of
+	 * dczid_el0 reg)
+	 */
+	mrs	block_size, dczid_el0
+
+	/*
+	 * Select the 4 lowest bits and convert the extracted log2(<block size
+	 * in words>) to <block size in bytes>
+	 */
+	ubfx	block_size, block_size, #0, #4
+	mov	tmp2, #(1 << 2)
+	lsl	block_size, tmp2, block_size
+
+#if ASM_ASSERTION
+	/*
+	 * Assumes block size is at least 16 bytes to avoid manual realignment
+	 * of the cursor at the end of the DCZVA loop.
+	 */
+	cmp	block_size, #16
+	ASM_ASSERT(hs)
+#endif
+	/*
+	 * Not worth doing all the setup for a region less than a block and
+	 * protects against zeroing a whole block when the area to zero is
+	 * smaller than that. Also, as it is assumed that the block size is at
+	 * least 16 bytes, this also protects the initial aligning loops from
+	 * trying to zero 16 bytes when length is less than 16.
+	 */
+	cmp	length, block_size
+	b.lo	.Lzeromem_dczva_fallback_entry
+
+	/*
+	 * Calculate the bitmask of the block alignment. It will never
+	 * underflow as the block size is between 4 bytes and 2kB.
+	 * block_mask = block_size - 1
+	 */
+	sub	block_mask, block_size, #1
+
+	/*
+	 * length alias should not be used after this point unless it is
+	 * defined as a register other than block_mask's.
+	 */
+	 .unreq length
+
+	/*
+	 * If the start address is already aligned to zero block size, go
+	 * straight to the cache zeroing loop. This is safe because at this
+	 * point, the length cannot be smaller than a block size.
+	 */
+	tst	cursor, block_mask
+	b.eq	.Lzeromem_dczva_blocksize_aligned
+
+	/*
+	 * Calculate the first block-size-aligned address. It is assumed that
+	 * the zero block size is at least 16 bytes. This address is the last
+	 * address of this initial loop.
+	 */
+	orr	tmp1, cursor, block_mask
+	add	tmp1, tmp1, #1
+
+	/*
+	 * If the addition overflows, skip the cache zeroing loops. This is
+	 * quite unlikely however.
+	 */
+	cbz	tmp1, .Lzeromem_dczva_fallback_entry
+
+	/*
+	 * If the first block-size-aligned address is past the last address,
+	 * fallback to the simpler code.
+	 */
+	cmp	tmp1, stop_address
+	b.hi	.Lzeromem_dczva_fallback_entry
+
+	/*
+	 * If the start address is already aligned to 16 bytes, skip this loop.
+	 * It is safe to do this because tmp1 (the stop address of the initial
+	 * 16 bytes loop) will never be greater than the final stop address.
+	 */
+	tst	cursor, #0xf
+	b.eq	.Lzeromem_dczva_initial_1byte_aligned_end
+
+	/* Calculate the next address aligned to 16 bytes */
+	orr	tmp2, cursor, #0xf
+	add	tmp2, tmp2, #1
+	/* If it overflows, fallback to the simple path (unlikely) */
+	cbz	tmp2, .Lzeromem_dczva_fallback_entry
+	/*
+	 * Next aligned address cannot be after the stop address because the
+	 * length cannot be smaller than 16 at this point.
+	 */
+
+	/* First loop: zero byte per byte */
+1:
+	strb	wzr, [cursor], #1
+	cmp	cursor, tmp2
+	b.ne	1b
+.Lzeromem_dczva_initial_1byte_aligned_end:
+
+	/*
+	 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
+	 * before being able to use the code that deals with block-size-aligned
+	 * addresses.
+	 */
+	cmp	cursor, tmp1
+	b.hs	2f
+1:
+	stp	xzr, xzr, [cursor], #16
+	cmp	cursor, tmp1
+	b.lo	1b
+2:
+
+	/*
+	 * Third loop: zero a block at a time using DC ZVA cache block zeroing
+	 * instruction.
+	 */
+.Lzeromem_dczva_blocksize_aligned:
+	/*
+	 * Calculate the last block-size-aligned address. If the result equals
+	 * to the start address, the loop will exit immediately.
+	 */
+	bic	tmp1, stop_address, block_mask
+
+	cmp	cursor, tmp1
+	b.hs	2f
+1:
+	/* Zero the block containing the cursor */
+	dc	zva, cursor
+	/* Increment the cursor by the size of a block */
+	add	cursor, cursor, block_size
+	cmp	cursor, tmp1
+	b.lo	1b
+2:
+
+	/*
+	 * Fourth loop: zero 16 bytes at a time and then byte per byte the
+	 * remaining area
+	 */
+.Lzeromem_dczva_final_16bytes_aligned:
+	/*
+	 * Calculate the last 16 bytes aligned address. It is assumed that the
+	 * block size will never be smaller than 16 bytes so that the current
+	 * cursor is aligned to at least 16 bytes boundary.
+	 */
+	bic	tmp1, stop_address, #15
+
+	cmp	cursor, tmp1
+	b.hs	2f
+1:
+	stp	xzr, xzr, [cursor], #16
+	cmp	cursor, tmp1
+	b.lo	1b
+2:
+
+	/* Fifth and final loop: zero byte per byte */
+.Lzeromem_dczva_final_1byte_aligned:
+	cmp	cursor, stop_address
+	b.eq	2f
+1:
+	strb	wzr, [cursor], #1
+	cmp	cursor, stop_address
+	b.ne	1b
+2:
 	ret
-endfunc zeromem16
+
+	/* Fallback for unaligned start addresses */
+.Lzeromem_dczva_fallback_entry:
+	/*
+	 * If the start address is already aligned to 16 bytes, skip this loop.
+	 */
+	tst	cursor, #0xf
+	b.eq	.Lzeromem_dczva_final_16bytes_aligned
+
+	/* Calculate the next address aligned to 16 bytes */
+	orr	tmp1, cursor, #15
+	add	tmp1, tmp1, #1
+	/* If it overflows, fallback to byte per byte zeroing */
+	cbz	tmp1, .Lzeromem_dczva_final_1byte_aligned
+	/* If the next aligned address is after the stop address, fall back */
+	cmp	tmp1, stop_address
+	b.hs	.Lzeromem_dczva_final_1byte_aligned
+
+	/* Fallback entry loop: zero byte per byte */
+1:
+	strb	wzr, [cursor], #1
+	cmp	cursor, tmp1
+	b.ne	1b
+
+	b	.Lzeromem_dczva_final_16bytes_aligned
 
+	.unreq	cursor
+	/*
+	 * length is already unreq'ed to reuse the register for another
+	 * variable.
+	 */
+	.unreq	stop_address
+	.unreq	block_size
+	.unreq	block_mask
+	.unreq	tmp1
+	.unreq	tmp2
+endfunc zeromem_dczva
 
 /* --------------------------------------------------------------------------
  * void memcpy16(void *dest, const void *src, unsigned int length)
diff --git a/lib/cpus/aarch64/denver.S b/lib/cpus/aarch64/denver.S
index 0b61440..3e238a1 100644
--- a/lib/cpus/aarch64/denver.S
+++ b/lib/cpus/aarch64/denver.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,6 +35,8 @@
 #include <cpu_macros.S>
 #include <plat_macros.S>
 
+	.global	denver_disable_dco
+
 	/* ---------------------------------------------
 	 * Disable debug interfaces
 	 * ---------------------------------------------
@@ -111,23 +113,7 @@
 
 	mov	x19, x30
 
-	/* ----------------------------------------------------
-	 * We enter the 'core power gated with ARM state not
-	 * retained' power state during CPU power down. We let
-	 * DCO know that we expect to enter this power state
-	 * by writing to the ACTLR_EL1 register.
- 	 * ----------------------------------------------------
- 	 */
-	mov	x0, #DENVER_CPU_STATE_POWER_DOWN
-	msr	actlr_el1, x0
-
 	/* ---------------------------------------------
-	 * Force DCO to be quiescent
-	 * ---------------------------------------------
-	 */
-	bl	denver_disable_dco
-
-	/* ---------------------------------------------
 	 * Force the debug interfaces to be quiescent
 	 * ---------------------------------------------
 	 */
@@ -163,7 +149,27 @@
 	ret
 endfunc denver_cpu_reg_dump
 
+declare_cpu_ops denver, DENVER_MIDR_PN0, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN1, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN2, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN3, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
-declare_cpu_ops denver, DENVER_1_0_MIDR, \
+declare_cpu_ops denver, DENVER_MIDR_PN4, \
 	denver_reset_func, \
 	denver_core_pwr_dwn, \
 	denver_cluster_pwr_dwn
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
index 51b7759..df22eaf 100644
--- a/lib/el3_runtime/aarch32/context_mgmt.c
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,6 +38,7 @@
 #include <platform_def.h>
 #include <smcc_helpers.h>
 #include <string.h>
+#include <utils.h>
 
 /*******************************************************************************
  * Context management library initialisation routine. This library is used by
@@ -84,7 +85,7 @@
 	security_state = GET_SECURITY_STATE(ep->h.attr);
 
 	/* Clear any residual register values from the context */
-	memset(ctx, 0, sizeof(*ctx));
+	zeromem(ctx, sizeof(*ctx));
 
 	reg_ctx = get_regs_ctx(ctx);
 
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index e26950d..5cce879 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -39,6 +39,7 @@
 #include <platform_def.h>
 #include <smcc_helpers.h>
 #include <string.h>
+#include <utils.h>
 
 
 /*******************************************************************************
@@ -91,7 +92,7 @@
 	security_state = GET_SECURITY_STATE(ep->h.attr);
 
 	/* Clear any residual register values from the context */
-	memset(ctx, 0, sizeof(*ctx));
+	zeromem(ctx, sizeof(*ctx));
 
 	/*
 	 * Base the context SCR on the current value, adjust for entry point
diff --git a/lib/locks/exclusive/aarch64/spinlock.S b/lib/locks/exclusive/aarch64/spinlock.S
index 1ca5912..bdc9ea0 100644
--- a/lib/locks/exclusive/aarch64/spinlock.S
+++ b/lib/locks/exclusive/aarch64/spinlock.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,10 +33,69 @@
 	.globl	spin_lock
 	.globl	spin_unlock
 
+#if (ARM_ARCH_MAJOR > 8) || ((ARM_ARCH_MAJOR == 8) && (ARM_ARCH_MINOR >= 1))
 
+/*
+ * When compiled for ARMv8.1 or later, choose spin locks based on Compare and
+ * Swap instruction.
+ */
+# define USE_CAS	1
+
+/*
+ * Lock contenders using CAS, upon failing to acquire the lock, wait with the
+ * monitor in open state. Therefore, a normal store upon unlocking won't
+ * generate an SEV. Use explicit SEV instruction with CAS unlock.
+ */
+# define COND_SEV()	sev
+
+#else
+
+# define USE_CAS	0
+
+/*
+ * Lock contenders using exclusive pairs, upon failing to acquire the lock, wait
+ * with the monitor in exclusive state. A normal store upon unlocking will
+ * implicitly generate an envent; so, no explicit SEV with unlock is required.
+ */
+# define COND_SEV()
+
+#endif
+
+#if USE_CAS
+
+	.arch	armv8.1-a
+
+/*
+ * Acquire lock using Compare and Swap instruction.
+ *
+ * Compare for 0 with acquire semantics, and swap 1. Wait until CAS returns
+ * 0.
+ *
+ * void spin_lock(spinlock_t *lock);
+ */
 func spin_lock
 	mov	w2, #1
 	sevl
+1:
+	wfe
+	mov	w1, wzr
+	casa	w1, w2, [x0]
+	cbnz	w1, 1b
+	ret
+endfunc spin_lock
+
+	.arch	armv8-a
+
+#else /* !USE_CAS */
+
+/*
+ * Acquire lock using load-/store-exclusive instruction pair.
+ *
+ * void spin_lock(spinlock_t *lock);
+ */
+func spin_lock
+	mov	w2, #1
+	sevl
 l1:	wfe
 l2:	ldaxr	w1, [x0]
 	cbnz	w1, l1
@@ -45,8 +104,17 @@
 	ret
 endfunc spin_lock
 
+#endif /* USE_CAS */
 
+/*
+ * Release lock previously acquired by spin_lock.
+ *
+ * Unconditionally write 0, and conditionally generate an event.
+ *
+ * void spin_unlock(spinlock_t *lock);
+ */
 func spin_unlock
 	stlr	wzr, [x0]
+	COND_SEV()
 	ret
 endfunc spin_unlock
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 68cdd6e..9fdce49 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,6 +37,7 @@
 #include <debug.h>
 #include <platform.h>
 #include <string.h>
+#include <utils.h>
 #include "psci_private.h"
 
 /*
@@ -622,7 +623,7 @@
 	SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr);
 
 	ep->pc = entrypoint;
-	memset(&ep->args, 0, sizeof(ep->args));
+	zeromem(&ep->args, sizeof(ep->args));
 	ep->args.arg0 = context_id;
 
 	mode = scr & SCR_HCE_BIT ? MODE32_hyp : MODE32_svc;
@@ -659,7 +660,7 @@
 	SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr);
 
 	ep->pc = entrypoint;
-	memset(&ep->args, 0, sizeof(ep->args));
+	zeromem(&ep->args, sizeof(ep->args));
 	ep->args.arg0 = context_id;
 
 	/*
@@ -760,13 +761,7 @@
 				      cpu_idx);
 
 #if ENABLE_PSCI_STAT
-	/*
-	 * Capture power up time-stamp.
-	 * No cache maintenance is required as caches are off
-	 * and writes are direct to the main memory.
-	 */
-	PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_EXIT_LOW_PWR,
-		PMF_NO_CACHE_MAINT);
+	plat_psci_stat_accounting_stop(&state_info);
 #endif
 
 	psci_get_target_local_pwr_states(end_pwrlvl, &state_info);
@@ -801,7 +796,7 @@
 	 * Since caches are now enabled, it's necessary to do cache
 	 * maintenance before reading that same data.
 	 */
-	psci_stats_update_pwr_up(end_pwrlvl, &state_info, PMF_CACHE_MAINT);
+	psci_stats_update_pwr_up(end_pwrlvl, &state_info);
 #endif
 
 	/*
@@ -957,7 +952,7 @@
 {
 	psci_power_state_t state_info;
 
-	memset(&state_info, 0, sizeof(state_info));
+	zeromem(&state_info, sizeof(state_info));
 	psci_get_target_local_pwr_states(PLAT_MAX_PWR_LVL, &state_info);
 
 	return psci_find_target_suspend_lvl(&state_info);
diff --git a/lib/psci/psci_main.c b/lib/psci/psci_main.c
index 0a3a60a..5e166b5 100644
--- a/lib/psci/psci_main.c
+++ b/lib/psci/psci_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -117,13 +117,7 @@
 		psci_set_cpu_local_state(cpu_pd_state);
 
 #if ENABLE_PSCI_STAT
-		/*
-		 * Capture time-stamp before CPU standby
-		 * No cache maintenance is needed as caches
-		 * are ON through out the CPU standby operation.
-		 */
-		PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_ENTER_LOW_PWR,
-			PMF_NO_CACHE_MAINT);
+		plat_psci_stat_accounting_start(&state_info);
 #endif
 
 #if ENABLE_RUNTIME_INSTRUMENTATION
@@ -144,13 +138,10 @@
 #endif
 
 #if ENABLE_PSCI_STAT
-		/* Capture time-stamp after CPU standby */
-		PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_EXIT_LOW_PWR,
-			PMF_NO_CACHE_MAINT);
+		plat_psci_stat_accounting_stop(&state_info);
 
 		/* Update PSCI stats */
-		psci_stats_update_pwr_up(PSCI_CPU_PWR_LVL, &state_info,
-			PMF_NO_CACHE_MAINT);
+		psci_stats_update_pwr_up(PSCI_CPU_PWR_LVL, &state_info);
 #endif
 
 		return PSCI_E_SUCCESS;
diff --git a/lib/psci/psci_off.c b/lib/psci/psci_off.c
index 897bf31..394aaa3 100644
--- a/lib/psci/psci_off.c
+++ b/lib/psci/psci_off.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -137,13 +137,7 @@
 	psci_plat_pm_ops->pwr_domain_off(&state_info);
 
 #if ENABLE_PSCI_STAT
-	/*
-	 * Capture time-stamp while entering low power state.
-	 * No cache maintenance needed because caches are off
-	 * and writes are direct to main memory.
-	 */
-	PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_ENTER_LOW_PWR,
-		PMF_NO_CACHE_MAINT);
+	plat_psci_stat_accounting_start(&state_info);
 #endif
 
 exit:
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
index 781b3b5..ca8291e 100644
--- a/lib/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,7 +35,6 @@
 #include <bakery_lock.h>
 #include <bl_common.h>
 #include <cpu_data.h>
-#include <pmf.h>
 #include <psci.h>
 #include <spinlock.h>
 
@@ -106,15 +105,6 @@
 #define is_cpu_standby_req(is_power_down_state, retn_lvl) \
 		(((!(is_power_down_state)) && ((retn_lvl) == 0)) ? 1 : 0)
 
-/* Following are used as ID's to capture time-stamp */
-#define PSCI_STAT_ID_ENTER_LOW_PWR		0
-#define PSCI_STAT_ID_EXIT_LOW_PWR		1
-#define PSCI_STAT_TOTAL_IDS			2
-
-/* Declare PMF service functions for PSCI */
-PMF_DECLARE_CAPTURE_TIMESTAMP(psci_svc)
-PMF_DECLARE_GET_TIMESTAMP(psci_svc)
-
 /*******************************************************************************
  * The following two data structures implement the power domain tree. The tree
  * is used to track the state of all the nodes i.e. power domain instances
@@ -246,8 +236,7 @@
 void psci_stats_update_pwr_down(unsigned int end_pwrlvl,
 			const psci_power_state_t *state_info);
 void psci_stats_update_pwr_up(unsigned int end_pwrlvl,
-			const psci_power_state_t *state_info,
-			unsigned int flags);
+			const psci_power_state_t *state_info);
 u_register_t psci_stat_residency(u_register_t target_cpu,
 			unsigned int power_state);
 u_register_t psci_stat_count(u_register_t target_cpu,
diff --git a/lib/psci/psci_stat.c b/lib/psci/psci_stat.c
index ecbe592..d8034a5 100644
--- a/lib/psci/psci_stat.c
+++ b/lib/psci/psci_stat.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,9 +38,6 @@
 #define PLAT_MAX_PWR_LVL_STATES 2
 #endif
 
-/* Ticks elapsed in one second by a signal of 1 MHz */
-#define MHZ_TICKS_PER_SEC 1000000
-
 /* Following structure is used for PSCI STAT */
 typedef struct psci_stat {
 	u_register_t residency;
@@ -62,28 +59,7 @@
 static psci_stat_t psci_non_cpu_stat[PSCI_NUM_NON_CPU_PWR_DOMAINS]
 				[PLAT_MAX_PWR_LVL_STATES];
 
-/* Register PMF PSCI service */
-PMF_REGISTER_SERVICE(psci_svc, PMF_PSCI_STAT_SVC_ID,
-	 PSCI_STAT_TOTAL_IDS, PMF_STORE_ENABLE)
-
-/* The divisor to use to convert raw timestamp into microseconds */
-u_register_t residency_div;
-
 /*
- * This macro calculates the stats residency in microseconds,
- * taking in account the wrap around condition.
- */
-#define calc_stat_residency(_pwrupts, _pwrdnts, _res)		\
-	do {							\
-		if (_pwrupts < _pwrdnts)			\
-			_res = UINT64_MAX - _pwrdnts + _pwrupts;\
-		else						\
-			_res = _pwrupts - _pwrdnts;		\
-		/* Convert timestamp into microseconds */	\
-		_res = _res/residency_div;			\
-	} while (0)
-
-/*
  * This functions returns the index into the `psci_stat_t` array given the
  * local power state and power domain level. If the platform implements the
  * `get_pwr_lvl_state_idx` pm hook, then that will be used to return the index.
@@ -150,44 +126,23 @@
  * It is called with caches enabled and locks acquired(for NON-CPU domain)
  ******************************************************************************/
 void psci_stats_update_pwr_up(unsigned int end_pwrlvl,
-			const psci_power_state_t *state_info,
-			unsigned int flags)
+			const psci_power_state_t *state_info)
 {
 	int parent_idx, cpu_idx = plat_my_core_pos();
 	int lvl, stat_idx;
 	plat_local_state_t local_state;
-	unsigned long long pwrup_ts = 0, pwrdn_ts = 0;
 	u_register_t residency;
 
 	assert(end_pwrlvl <= PLAT_MAX_PWR_LVL);
 	assert(state_info);
 
-	/* Initialize the residency divisor if not already initialized */
-	if (!residency_div) {
-		/* Pre-calculate divisor so that it can be directly used to
-		   convert time-stamp into microseconds */
-		residency_div = read_cntfrq_el0() / MHZ_TICKS_PER_SEC;
-		assert(residency_div);
-	}
-
-	/* Get power down time-stamp for current CPU */
-	PMF_GET_TIMESTAMP_BY_INDEX(psci_svc, PSCI_STAT_ID_ENTER_LOW_PWR,
-			cpu_idx, flags, pwrdn_ts);
-
-	/* In the case of 1st power on just return */
-	if (!pwrdn_ts)
-		return;
-
-	/* Get power up time-stamp for current CPU */
-	PMF_GET_TIMESTAMP_BY_INDEX(psci_svc, PSCI_STAT_ID_EXIT_LOW_PWR,
-			cpu_idx, flags, pwrup_ts);
-
 	/* Get the index into the stats array */
 	local_state = state_info->pwr_domain_state[PSCI_CPU_PWR_LVL];
 	stat_idx = get_stat_idx(local_state, PSCI_CPU_PWR_LVL);
 
-	/* Calculate stats residency */
-	calc_stat_residency(pwrup_ts, pwrdn_ts, residency);
+	/* Call into platform interface to calculate residency. */
+	residency = plat_psci_stat_get_residency(PSCI_CPU_PWR_LVL,
+	    state_info, cpu_idx);
 
 	/* Update CPU stats. */
 	psci_cpu_stat[cpu_idx][stat_idx].residency += residency;
@@ -207,10 +162,9 @@
 
 		assert(last_cpu_in_non_cpu_pd[parent_idx] != -1);
 
-		/* Get power down time-stamp for last CPU */
-		PMF_GET_TIMESTAMP_BY_INDEX(psci_svc, PSCI_STAT_ID_ENTER_LOW_PWR,
-				last_cpu_in_non_cpu_pd[parent_idx],
-				flags, pwrdn_ts);
+		/* Call into platform interface to calculate residency. */
+		residency = plat_psci_stat_get_residency(lvl, state_info,
+		    last_cpu_in_non_cpu_pd[parent_idx]);
 
 		/* Initialize back to reset value */
 		last_cpu_in_non_cpu_pd[parent_idx] = -1;
@@ -218,9 +172,6 @@
 		/* Get the index into the stats array */
 		stat_idx = get_stat_idx(local_state, lvl);
 
-		/* Calculate stats residency */
-		calc_stat_residency(pwrup_ts, pwrdn_ts, residency);
-
 		/* Update non cpu stats */
 		psci_non_cpu_stat[parent_idx][stat_idx].residency += residency;
 		psci_non_cpu_stat[parent_idx][stat_idx].count++;
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
index dc2ab77..302116b 100644
--- a/lib/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -211,13 +211,7 @@
 	psci_plat_pm_ops->pwr_domain_suspend(state_info);
 
 #if ENABLE_PSCI_STAT
-	/*
-	 * Capture time-stamp while entering low power state.
-	 * No cache maintenance needed because caches are off
-	 * and writes are direct to main memory.
-	 */
-	PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_ENTER_LOW_PWR,
-		PMF_NO_CACHE_MAINT);
+	plat_psci_stat_accounting_start(state_info);
 #endif
 
 exit:
@@ -257,6 +251,10 @@
 	    PMF_NO_CACHE_MAINT);
 #endif
 
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_start(state_info);
+#endif
+
 	/*
 	 * We will reach here if only retention/standby states have been
 	 * requested at multiple power levels. This means that the cpu
@@ -264,6 +262,11 @@
 	 */
 	wfi();
 
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_stop(state_info);
+	psci_stats_update_pwr_up(end_pwrlvl, state_info);
+#endif
+
 #if ENABLE_RUNTIME_INSTRUMENTATION
 	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
 	    RT_INSTR_EXIT_HW_LOW_PWR,
diff --git a/make_helpers/build_macros.mk b/make_helpers/build_macros.mk
index 2312d2c..93db2d6 100644
--- a/make_helpers/build_macros.mk
+++ b/make_helpers/build_macros.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -81,6 +81,16 @@
     $(and $(patsubst 0,,$(value $(1))),$(patsubst 1,,$(value $(1))),$(error $(1) must be boolean))
 endef
 
+0-9 := 0 1 2 3 4 5 6 7 8 9
+
+# Function to verify that a given option $(1) contains a numeric value
+define assert_numeric
+$(if $($(1)),,$(error $(1) must not be empty))
+$(eval __numeric := $($(1)))
+$(foreach d,$(0-9),$(eval __numeric := $(subst $(d),,$(__numeric))))
+$(if $(__numeric),$(error $(1) must be numeric))
+endef
+
 # IMG_LINKERFILE defines the linker script corresponding to a BL stage
 #   $(1) = BL stage (2, 30, 31, 32, 33)
 define IMG_LINKERFILE
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index 0b93dfc..b47ea46 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -44,6 +44,10 @@
 # port can change this value if needed.
 ARM_CCI_PRODUCT_ID		:= 400
 
+# ARM Architecture major and minor versions: 8.0 by default.
+ARM_ARCH_MAJOR			:= 8
+ARM_ARCH_MINOR			:= 0
+
 # Determine the version of ARM GIC architecture to use for interrupt management
 # in EL3. The platform port can change this value if needed.
 ARM_GIC_ARCH			:= 2
diff --git a/plat/arm/board/juno/juno_security.c b/plat/arm/board/juno/juno_security.c
index 202342a..70637d6 100644
--- a/plat/arm/board/juno/juno_security.c
+++ b/plat/arm/board/juno/juno_security.c
@@ -60,16 +60,34 @@
 }
 
 /*******************************************************************************
+ * Initialize debug configuration.
+ ******************************************************************************/
+static void init_debug_cfg(void)
+{
+#if !DEBUG
+	/* Set internal drive selection for SPIDEN. */
+	mmio_write_32(SSC_REG_BASE + SSC_DBGCFG_SET,
+		1U << SPIDEN_SEL_SET_SHIFT);
+
+	/* Drive SPIDEN LOW to disable invasive debug of secure state. */
+	mmio_write_32(SSC_REG_BASE + SSC_DBGCFG_CLR,
+		1U << SPIDEN_INT_CLR_SHIFT);
+#endif
+}
+
+/*******************************************************************************
  * Initialize the secure environment.
  ******************************************************************************/
 void plat_arm_security_setup(void)
 {
+	/* Initialize debug configuration */
+	init_debug_cfg();
 	/* Initialize the TrustZone Controller */
 	arm_tzc400_setup();
 	/* Do ARM CSS internal NIC setup */
 	css_init_nic400();
 	/* Do ARM CSS SoC security setup */
 	soc_css_security_setup();
-	/* Initialize the SMMU SSD tables*/
+	/* Initialize the SMMU SSD tables */
 	init_mmu401();
 }
diff --git a/plat/arm/common/arm_bl2_setup.c b/plat/arm/common/arm_bl2_setup.c
index 5f30708..007108d 100644
--- a/plat/arm/common/arm_bl2_setup.c
+++ b/plat/arm/common/arm_bl2_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,6 +38,7 @@
 #include <plat_arm.h>
 #include <platform_def.h>
 #include <string.h>
+#include <utils.h>
 
 /* Data structure which holds the extents of the trusted SRAM for BL2 */
 static meminfo_t bl2_tzram_layout __aligned(CACHE_WRITEBACK_GRANULE);
@@ -123,7 +124,7 @@
 	 * Initialise the memory for all the arguments that needs to
 	 * be passed to BL31
 	 */
-	memset(&bl31_params_mem, 0, sizeof(bl2_to_bl31_params_mem_t));
+	zeromem(&bl31_params_mem, sizeof(bl2_to_bl31_params_mem_t));
 
 	/* Assign memory for TF related information */
 	bl2_to_bl31_params = &bl31_params_mem.bl31_params;
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index c2f28f9..4628a43 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -92,6 +92,7 @@
 
 # Enable PSCI_STAT_COUNT/RESIDENCY APIs on ARM platforms
 ENABLE_PSCI_STAT		:=	1
+ENABLE_PMF			:=	1
 
 # On ARM platforms, separate the code and read-only data sections to allow
 # mapping the former as executable and the latter as execute-never.
diff --git a/plat/arm/css/common/css_bl2_setup.c b/plat/arm/css/common/css_bl2_setup.c
index 11ca342..5361d89 100644
--- a/plat/arm/css/common/css_bl2_setup.c
+++ b/plat/arm/css/common/css_bl2_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -98,7 +98,7 @@
 	 *  - restoring the SCP boot configuration.
 	 */
 	VERBOSE("BL2: Restoring SCP reset data in Trusted SRAM\n");
-	memset((void *) ARM_TRUSTED_SRAM_BASE, 0, 128);
+	zero_normalmem((void *)ARM_TRUSTED_SRAM_BASE, 128);
 	mmio_write_32(SCP_BOOT_CFG_ADDR, scp_boot_config);
 }
 #endif /* EL3_PAYLOAD_BASE */
diff --git a/plat/arm/css/drivers/scpi/css_scpi.c b/plat/arm/css/drivers/scpi/css_scpi.c
index f419abd..65ae978 100644
--- a/plat/arm/css/drivers/scpi/css_scpi.c
+++ b/plat/arm/css/drivers/scpi/css_scpi.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,7 @@
 #include <debug.h>
 #include <platform.h>
 #include <string.h>
+#include <utils.h>
 #include "css_mhu.h"
 #include "css_scpi.h"
 
@@ -204,7 +205,8 @@
 	scpi_secure_message_start();
 
 	/* Populate request headers */
-	cmd = memset(SCPI_CMD_HEADER_AP_TO_SCP, 0, sizeof(*cmd));
+	zeromem(SCPI_CMD_HEADER_AP_TO_SCP, sizeof(*cmd));
+	cmd = SCPI_CMD_HEADER_AP_TO_SCP;
 	cmd->id = SCPI_CMD_GET_CSS_POWER_STATE;
 
 	/*
diff --git a/plat/common/plat_psci_common.c b/plat/common/plat_psci_common.c
index 3eb6886..0e00faa 100644
--- a/plat/common/plat_psci_common.c
+++ b/plat/common/plat_psci_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,8 +31,125 @@
 #include <arch.h>
 #include <assert.h>
 #include <platform.h>
+#include <pmf.h>
 #include <psci.h>
 
+#if ENABLE_PSCI_STAT && ENABLE_PMF
+#pragma weak plat_psci_stat_accounting_start
+#pragma weak plat_psci_stat_accounting_stop
+#pragma weak plat_psci_stat_get_residency
+
+/* Ticks elapsed in one second by a signal of 1 MHz */
+#define MHZ_TICKS_PER_SEC 1000000
+
+/* Following are used as ID's to capture time-stamp */
+#define PSCI_STAT_ID_ENTER_LOW_PWR		0
+#define PSCI_STAT_ID_EXIT_LOW_PWR		1
+#define PSCI_STAT_TOTAL_IDS			2
+
+PMF_REGISTER_SERVICE(psci_svc, PMF_PSCI_STAT_SVC_ID, PSCI_STAT_TOTAL_IDS,
+	PMF_STORE_ENABLE)
+
+/*
+ * This function calculates the stats residency in microseconds,
+ * taking in account the wrap around condition.
+ */
+static u_register_t calc_stat_residency(unsigned long long pwrupts,
+	unsigned long long pwrdnts)
+{
+	/* The divisor to use to convert raw timestamp into microseconds. */
+	u_register_t residency_div;
+	u_register_t res;
+
+	/*
+	 * Calculate divisor so that it can be directly used to
+	 * convert time-stamp into microseconds.
+	 */
+	residency_div = read_cntfrq_el0() / MHZ_TICKS_PER_SEC;
+	assert(residency_div);
+
+	if (pwrupts < pwrdnts)
+		res = UINT64_MAX - pwrdnts + pwrupts;
+	else
+		res = pwrupts - pwrdnts;
+
+	return res / residency_div;
+}
+
+/*
+ * Capture timestamp before entering a low power state.
+ * No cache maintenance is required when capturing the timestamp.
+ * Cache maintenance may be needed when reading these timestamps.
+ */
+void plat_psci_stat_accounting_start(
+	__unused const psci_power_state_t *state_info)
+{
+	assert(state_info);
+	PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_ENTER_LOW_PWR,
+		PMF_NO_CACHE_MAINT);
+}
+
+/*
+ * Capture timestamp after exiting a low power state.
+ * No cache maintenance is required when capturing the timestamp.
+ * Cache maintenance may be needed when reading these timestamps.
+ */
+void plat_psci_stat_accounting_stop(
+	__unused const psci_power_state_t *state_info)
+{
+	assert(state_info);
+	PMF_CAPTURE_TIMESTAMP(psci_svc, PSCI_STAT_ID_EXIT_LOW_PWR,
+		PMF_NO_CACHE_MAINT);
+}
+
+/*
+ * Calculate the residency for the given level and power state
+ * information.
+ */
+u_register_t plat_psci_stat_get_residency(unsigned int lvl,
+	const psci_power_state_t *state_info,
+	int last_cpu_idx)
+{
+	plat_local_state_t state;
+	unsigned long long pwrup_ts = 0, pwrdn_ts = 0;
+	unsigned int pmf_flags;
+
+	assert(lvl >= PSCI_CPU_PWR_LVL && lvl <= PLAT_MAX_PWR_LVL);
+	assert(state_info);
+	assert(last_cpu_idx >= 0 && last_cpu_idx <= PLATFORM_CORE_COUNT);
+
+	if (lvl == PSCI_CPU_PWR_LVL)
+		assert(last_cpu_idx == plat_my_core_pos());
+
+	/*
+	 * If power down is requested, then timestamp capture will
+	 * be with caches OFF.  Hence we have to do cache maintenance
+	 * when reading the timestamp.
+	 */
+	state = state_info->pwr_domain_state[PSCI_CPU_PWR_LVL];
+	if (is_local_state_off(state)) {
+		pmf_flags = PMF_CACHE_MAINT;
+	} else {
+		assert(is_local_state_retn(state));
+		pmf_flags = PMF_NO_CACHE_MAINT;
+	}
+
+	PMF_GET_TIMESTAMP_BY_INDEX(psci_svc,
+		PSCI_STAT_ID_ENTER_LOW_PWR,
+		last_cpu_idx,
+		pmf_flags,
+		pwrdn_ts);
+
+	PMF_GET_TIMESTAMP_BY_INDEX(psci_svc,
+		PSCI_STAT_ID_EXIT_LOW_PWR,
+		plat_my_core_pos(),
+		pmf_flags,
+		pwrup_ts);
+
+	return calc_stat_residency(pwrup_ts, pwrdn_ts);
+}
+#endif /* ENABLE_PSCI_STAT && ENABLE_PMF */
+
 /*
  * The PSCI generic code uses this API to let the platform participate in state
  * coordination during a power management operation. It compares the platform
diff --git a/plat/mediatek/mt6795/bl31.ld.S b/plat/mediatek/mt6795/bl31.ld.S
index 44510a7..472cd2e 100644
--- a/plat/mediatek/mt6795/bl31.ld.S
+++ b/plat/mediatek/mt6795/bl31.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -113,7 +113,8 @@
 
     /*
      * The .bss section gets initialised to 0 at runtime.
-     * Its base address must be 16-byte aligned.
+     * Its base address should be 16-byte aligned for better performance of the
+     * zero-initialization code.
      */
     .bss (NOLOAD) : ALIGN(16) {
         __BSS_START__ = .;
diff --git a/plat/nvidia/tegra/common/aarch64/tegra_helpers.S b/plat/nvidia/tegra/common/aarch64/tegra_helpers.S
index 905c4c5..6851b15 100644
--- a/plat/nvidia/tegra/common/aarch64/tegra_helpers.S
+++ b/plat/nvidia/tegra/common/aarch64/tegra_helpers.S
@@ -35,6 +35,22 @@
 #include <cortex_a53.h>
 #include <tegra_def.h>
 
+#define MIDR_PN_CORTEX_A57		0xD07
+
+/*******************************************************************************
+ * Implementation defined ACTLR_EL3 bit definitions
+ ******************************************************************************/
+#define ACTLR_EL3_L2ACTLR_BIT		(1 << 6)
+#define ACTLR_EL3_L2ECTLR_BIT		(1 << 5)
+#define ACTLR_EL3_L2CTLR_BIT		(1 << 4)
+#define ACTLR_EL3_CPUECTLR_BIT		(1 << 1)
+#define ACTLR_EL3_CPUACTLR_BIT		(1 << 0)
+#define ACTLR_EL3_ENABLE_ALL_ACCESS	(ACTLR_EL3_L2ACTLR_BIT | \
+					 ACTLR_EL3_L2ECTLR_BIT | \
+					 ACTLR_EL3_L2CTLR_BIT | \
+					 ACTLR_EL3_CPUECTLR_BIT | \
+					 ACTLR_EL3_CPUACTLR_BIT)
+
 	/* Global functions */
 	.globl	plat_is_my_cpu_primary
 	.globl	plat_my_core_pos
@@ -50,6 +66,7 @@
 	.globl	tegra_sec_entry_point
 	.globl	ns_image_entrypoint
 	.globl	tegra_bl31_phys_base
+	.globl	tegra_console_base
 
 	/* ---------------------
 	 * Common CPU init code
@@ -57,7 +74,18 @@
 	 */
 .macro	cpu_init_common
 
+	/* ------------------------------------------------
+	 * We enable procesor retention and L2/CPUECTLR NS
+	 * access for A57 CPUs only.
+	 * ------------------------------------------------
+	 */
+	mrs	x0, midr_el1
+	mov	x1, #(MIDR_PN_MASK << MIDR_PN_SHIFT)
+	and	x0, x0, x1
+	lsr	x0, x0, #MIDR_PN_SHIFT
+	cmp	x0, #MIDR_PN_CORTEX_A57
+	b.ne	1f
+
-#if ENABLE_L2_DYNAMIC_RETENTION
 	/* ---------------------------
 	 * Enable processor retention
 	 * ---------------------------
@@ -68,18 +96,14 @@
 	orr	x0, x0, x1
 	msr	L2ECTLR_EL1, x0
 	isb
-#endif
 
-#if ENABLE_CPU_DYNAMIC_RETENTION
 	mrs	x0, CPUECTLR_EL1
 	mov	x1, #RETENTION_ENTRY_TICKS_512 << CPUECTLR_CPU_RET_CTRL_SHIFT
 	bic	x0, x0, #CPUECTLR_CPU_RET_CTRL_MASK
 	orr	x0, x0, x1
 	msr	CPUECTLR_EL1, x0
 	isb
-#endif
 
-#if ENABLE_NS_L2_CPUECTRL_RW_ACCESS
 	/* -------------------------------------------------------
 	 * Enable L2 and CPU ECTLR RW access from non-secure world
 	 * -------------------------------------------------------
@@ -88,13 +112,12 @@
 	msr	actlr_el3, x0
 	msr	actlr_el2, x0
 	isb
-#endif
 
 	/* --------------------------------
 	 * Enable the cycle count register
 	 * --------------------------------
 	 */
-	mrs	x0, pmcr_el0
+1:	mrs	x0, pmcr_el0
 	ubfx	x0, x0, #11, #5		// read PMCR.N field
 	mov	x1, #1
 	lsl	x0, x1, x0
@@ -158,6 +181,20 @@
 endfunc plat_get_my_entrypoint
 
 	/* -----------------------------------------------------
+	 * int platform_get_core_pos(int mpidr);
+	 *
+	 * With this function: CorePos = (ClusterId * 4) +
+	 *                                CoreId
+	 * -----------------------------------------------------
+	 */
+func platform_get_core_pos
+	and	x1, x0, #MPIDR_CPU_MASK
+	and	x0, x0, #MPIDR_CLUSTER_MASK
+	add	x0, x1, x0, LSR #6
+	ret
+endfunc platform_get_core_pos
+
+	/* -----------------------------------------------------
 	 * void plat_secondary_cold_boot_setup (void);
 	 *
 	 * This function performs any platform specific actions
@@ -190,7 +227,8 @@
 	 * ---------------------------------------------
 	 */
 func plat_crash_console_init
-	mov_imm	x0, TEGRA_BOOT_UART_BASE
+	adr	x0, tegra_console_base
+	ldr	x0, [x0]
 	mov_imm	x1, TEGRA_BOOT_UART_CLK_IN_HZ
 	mov_imm	x2, TEGRA_CONSOLE_BAUDRATE
 	b	console_core_init
@@ -204,7 +242,8 @@
 	 * ---------------------------------------------
 	 */
 func plat_crash_console_putc
-	mov_imm	x1, TEGRA_BOOT_UART_BASE
+	adr	x1, tegra_console_base
+	ldr	x1, [x1]
 	b	console_core_putc
 endfunc plat_crash_console_putc
 
@@ -366,3 +405,10 @@
 	 */
 tegra_bl31_phys_base:
 	.quad	0
+
+	/* --------------------------------------------------
+	 * UART controller base for console init
+	 * --------------------------------------------------
+	 */
+tegra_console_base:
+	.quad	0
diff --git a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c b/plat/nvidia/tegra/common/drivers/memctrl/memctrl_v1.c
similarity index 88%
rename from plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
rename to plat/nvidia/tegra/common/drivers/memctrl/memctrl_v1.c
index 40d1bab..c417050 100644
--- a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
+++ b/plat/nvidia/tegra/common/drivers/memctrl/memctrl_v1.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,14 +31,14 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <debug.h>
-#include <mmio.h>
 #include <memctrl.h>
+#include <memctrl_v1.h>
+#include <mmio.h>
 #include <string.h>
 #include <tegra_def.h>
+#include <utils.h>
 #include <xlat_tables.h>
 
-extern void zeromem16(void *mem, unsigned int length);
-
 #define TEGRA_GPU_RESET_REG_OFFSET	0x28c
 #define  GPU_RESET_BIT			(1 << 24)
 
@@ -55,7 +55,7 @@
 	 * Setup the Memory controller to allow only secure accesses to
 	 * the TZDRAM carveout
 	 */
-	INFO("Configuring SMMU\n");
+	INFO("Tegra Memory Controller (v1)\n");
 
 	/* allow translations for all MC engines */
 	tegra_mc_write_32(MC_SMMU_TRANSLATION_ENABLE_0_0,
@@ -90,6 +90,14 @@
 }
 
 /*
+ * Restore Memory Controller settings after "System Suspend"
+ */
+void tegra_memctrl_restore_settings(void)
+{
+	tegra_memctrl_setup();
+}
+
+/*
  * Secure the BL31 DRAM aperture.
  *
  * phys_base = physical base of TZDRAM aperture
@@ -107,6 +115,20 @@
 	tegra_mc_write_32(MC_SECURITY_CFG1_0, size_in_bytes >> 20);
 }
 
+/*
+ * Secure the BL31 TZRAM aperture.
+ *
+ * phys_base = physical base of TZRAM aperture
+ * size_in_bytes = size of aperture in bytes
+ */
+void tegra_memctrl_tzram_setup(uint64_t phys_base, uint32_t size_in_bytes)
+{
+	/*
+	 * The v1 hardware controller does not have any registers
+	 * for setting up the on-chip TZRAM.
+	 */
+}
+
 static void tegra_clear_videomem(uintptr_t non_overlap_area_start,
 				 unsigned long long non_overlap_area_size)
 {
@@ -114,13 +136,13 @@
 	 * Perform cache maintenance to ensure that the non-overlapping area is
 	 * zeroed out. The first invalidation of this range ensures that
 	 * possible evictions of dirty cache lines do not interfere with the
-	 * 'zeromem16' operation. Other CPUs could speculatively prefetch the
+	 * 'zeromem' operation. Other CPUs could speculatively prefetch the
 	 * main memory contents of this area between the first invalidation and
-	 * the 'zeromem16' operation. The second invalidation ensures that any
+	 * the 'zeromem' operation. The second invalidation ensures that any
 	 * such cache lines are removed as well.
 	 */
 	inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
-	zeromem16((void *)non_overlap_area_start, non_overlap_area_size);
+	zeromem((void *)non_overlap_area_start, non_overlap_area_size);
 	inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
 }
 
diff --git a/plat/nvidia/tegra/common/tegra_bl31_setup.c b/plat/nvidia/tegra/common/tegra_bl31_setup.c
index 0fd7c82..72da4b3 100644
--- a/plat/nvidia/tegra/common/tegra_bl31_setup.c
+++ b/plat/nvidia/tegra/common/tegra_bl31_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,12 +37,14 @@
 #include <cortex_a57.h>
 #include <cortex_a53.h>
 #include <debug.h>
+#include <denver.h>
 #include <errno.h>
 #include <memctrl.h>
 #include <mmio.h>
 #include <platform.h>
 #include <platform_def.h>
 #include <stddef.h>
+#include <tegra_def.h>
 #include <tegra_private.h>
 
 /*******************************************************************************
@@ -54,6 +56,7 @@
 extern unsigned long __BL31_END__;
 
 extern uint64_t tegra_bl31_phys_base;
+extern uint64_t tegra_console_base;
 
 /*
  * The next 3 constants identify the extents of the code, RO data region and the
@@ -110,28 +113,47 @@
 {
 	plat_params_from_bl2_t *plat_params =
 		(plat_params_from_bl2_t *)plat_params_from_bl2;
+#if DEBUG
+	int impl = (read_midr() >> MIDR_IMPL_SHIFT) & MIDR_IMPL_MASK;
+#endif
 
 	/*
-	 * Configure the UART port to be used as the console
+	 * Copy BL3-3, BL3-2 entry point information.
+	 * They are stored in Secure RAM, in BL2's address space.
 	 */
-	console_init(TEGRA_BOOT_UART_BASE, TEGRA_BOOT_UART_CLK_IN_HZ,
-			TEGRA_CONSOLE_BAUDRATE);
+	assert(from_bl2->bl33_ep_info);
+	bl33_image_ep_info = *from_bl2->bl33_ep_info;
 
-	/* Initialise crash console */
-	plat_crash_console_init();
+	if (from_bl2->bl32_ep_info)
+		bl32_image_ep_info = *from_bl2->bl32_ep_info;
 
 	/*
-	 * Copy BL3-3, BL3-2 entry point information.
-	 * They are stored in Secure RAM, in BL2's address space.
+	 * Parse platform specific parameters - TZDRAM aperture base and size
 	 */
-	bl33_image_ep_info = *from_bl2->bl33_ep_info;
-	bl32_image_ep_info = *from_bl2->bl32_ep_info;
+	assert(plat_params);
+	plat_bl31_params_from_bl2.tzdram_base = plat_params->tzdram_base;
+	plat_bl31_params_from_bl2.tzdram_size = plat_params->tzdram_size;
+	plat_bl31_params_from_bl2.uart_id = plat_params->uart_id;
 
 	/*
-	 * Parse platform specific parameters - TZDRAM aperture size
+	 * Get the base address of the UART controller to be used for the
+	 * console
 	 */
-	if (plat_params)
-		plat_bl31_params_from_bl2.tzdram_size = plat_params->tzdram_size;
+	assert(plat_params->uart_id);
+	tegra_console_base = plat_get_console_from_id(plat_params->uart_id);
+
+	/*
+	 * Configure the UART port to be used as the console
+	 */
+	assert(tegra_console_base);
+	console_init(tegra_console_base, TEGRA_BOOT_UART_CLK_IN_HZ,
+		TEGRA_CONSOLE_BAUDRATE);
+
+	/* Initialise crash console */
+	plat_crash_console_init();
+
+	INFO("BL3-1: Boot CPU: %s Processor [%lx]\n", (impl == DENVER_IMPL) ?
+		"Denver" : "ARM", read_mpidr());
 }
 
 /*******************************************************************************
@@ -159,18 +181,36 @@
 	/*
 	 * Do initial security configuration to allow DRAM/device access.
 	 */
-	tegra_memctrl_tzdram_setup(tegra_bl31_phys_base,
+	tegra_memctrl_tzdram_setup(plat_bl31_params_from_bl2.tzdram_base,
 			plat_bl31_params_from_bl2.tzdram_size);
 
+	/*
+	 * Set up the TZRAM memory aperture to allow only secure world
+	 * access
+	 */
+	tegra_memctrl_tzram_setup(TEGRA_TZRAM_BASE, TEGRA_TZRAM_SIZE);
+
 	/* Set the next EL to be AArch64 */
 	tmp_reg = SCR_RES1_BITS | SCR_RW_BIT;
 	write_scr(tmp_reg);
 
 	/* Initialize the gic cpu and distributor interfaces */
 	tegra_gic_setup();
+
+	INFO("BL3-1: Tegra platform setup complete\n");
 }
 
 /*******************************************************************************
+ * Perform any BL3-1 platform runtime setup prior to BL3-1 cold boot exit
+ ******************************************************************************/
+void bl31_plat_runtime_setup(void)
+{
+	/* Initialize the runtime console */
+	console_init(tegra_console_base, TEGRA_BOOT_UART_CLK_IN_HZ,
+		TEGRA_CONSOLE_BAUDRATE);
+}
+
+/*******************************************************************************
  * Perform the very early platform specific architectural setup here. At the
  * moment this only intializes the mmu in a quick and dirty way.
  ******************************************************************************/
@@ -185,6 +225,7 @@
 #if USE_COHERENT_MEM
 	unsigned long coh_start, coh_size;
 #endif
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
 
 	/* add memory regions */
 	mmap_add_region(total_base, total_base,
@@ -194,6 +235,14 @@
 			ro_size,
 			MT_MEMORY | MT_RO | MT_SECURE);
 
+	/* map TZDRAM used by BL31 as coherent memory */
+	if (TEGRA_TZRAM_BASE == tegra_bl31_phys_base) {
+		mmap_add_region(params_from_bl2->tzdram_base,
+				params_from_bl2->tzdram_base,
+				BL31_SIZE,
+				MT_DEVICE | MT_RW | MT_SECURE);
+	}
+
 #if USE_COHERENT_MEM
 	coh_start = total_base + (BL_COHERENT_RAM_BASE - BL31_RO_BASE);
 	coh_size = BL_COHERENT_RAM_END - BL_COHERENT_RAM_BASE;
@@ -215,6 +264,8 @@
 
 	/* enable the MMU */
 	enable_mmu_el3(0);
+
+	INFO("BL3-1: Tegra: MMU enabled\n");
 }
 
 /*******************************************************************************
diff --git a/plat/nvidia/tegra/common/tegra_common.mk b/plat/nvidia/tegra/common/tegra_common.mk
index 3c07032..c9e9255 100644
--- a/plat/nvidia/tegra/common/tegra_common.mk
+++ b/plat/nvidia/tegra/common/tegra_common.mk
@@ -54,9 +54,7 @@
 				plat/common/aarch64/platform_mp_stack.S		\
 				plat/common/plat_psci_common.c			\
 				${COMMON_DIR}/aarch64/tegra_helpers.S		\
-				${COMMON_DIR}/drivers/memctrl/memctrl.c		\
 				${COMMON_DIR}/drivers/pmc/pmc.c			\
-				${COMMON_DIR}/drivers/flowctrl/flowctrl.c	\
 				${COMMON_DIR}/tegra_bl31_setup.c		\
 				${COMMON_DIR}/tegra_delay_timer.c		\
 				${COMMON_DIR}/tegra_gic.c			\
diff --git a/plat/nvidia/tegra/common/tegra_pm.c b/plat/nvidia/tegra/common/tegra_pm.c
index 6fb3e9c..f5ef3e7 100644
--- a/plat/nvidia/tegra/common/tegra_pm.c
+++ b/plat/nvidia/tegra/common/tegra_pm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -54,7 +54,9 @@
 #pragma weak tegra_soc_pwr_domain_on
 #pragma weak tegra_soc_pwr_domain_off
 #pragma weak tegra_soc_pwr_domain_on_finish
+#pragma weak tegra_soc_pwr_domain_power_down_wfi
 #pragma weak tegra_soc_prepare_system_reset
+#pragma weak tegra_soc_prepare_system_off
 
 int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 {
@@ -76,11 +78,22 @@
 	return PSCI_E_SUCCESS;
 }
 
+int tegra_soc_pwr_domain_power_down_wfi(const psci_power_state_t *target_state)
+{
+	return PSCI_E_SUCCESS;
+}
+
 int tegra_soc_prepare_system_reset(void)
 {
 	return PSCI_E_SUCCESS;
 }
 
+__dead2 void tegra_soc_prepare_system_off(void)
+{
+	ERROR("Tegra System Off: operation not handled.\n");
+	panic();
+}
+
 /*******************************************************************************
  * This handler is called by the PSCI implementation during the `SYSTEM_SUSPEND`
  * call to get the `power_state` parameter. This allows the platform to encode
@@ -129,7 +142,7 @@
 }
 
 /*******************************************************************************
- * Handler called when called when a power domain is about to be suspended. The
+ * Handler called when a power domain is about to be suspended. The
  * target_state encodes the power state that each level should transition to.
  ******************************************************************************/
 void tegra_pwr_domain_suspend(const psci_power_state_t *target_state)
@@ -141,6 +154,24 @@
 }
 
 /*******************************************************************************
+ * Handler called at the end of the power domain suspend sequence. The
+ * target_state encodes the power state that each level should transition to.
+ ******************************************************************************/
+__dead2 void tegra_pwr_domain_power_down_wfi(const psci_power_state_t
+					     *target_state)
+{
+	/* call the chip's power down handler */
+	tegra_soc_pwr_domain_power_down_wfi(target_state);
+
+	/* enter power down state */
+	wfi();
+
+	/* we can never reach here */
+	ERROR("%s: operation not handled.\n", __func__);
+	panic();
+}
+
+/*******************************************************************************
  * Handler called when a power domain has just been powered on after
  * being turned off earlier. The target_state encodes the low power state that
  * each level has woken up from.
@@ -161,20 +192,16 @@
 			PSTATE_ID_SOC_POWERDN) {
 
 		/*
-		 * Lock scratch registers which hold the CPU vectors.
-		 */
-		tegra_pmc_lock_cpu_vectors();
-
-		/*
-		 * SMMU configuration.
+		 * Restore Memory Controller settings as it loses state
+		 * during system suspend.
 		 */
-		tegra_memctrl_setup();
+		tegra_memctrl_restore_settings();
 
 		/*
 		 * Security configuration to allow DRAM/device access.
 		 */
 		plat_params = bl31_get_plat_params();
-		tegra_memctrl_tzdram_setup(tegra_bl31_phys_base,
+		tegra_memctrl_tzdram_setup(plat_params->tzdram_base,
 			plat_params->tzdram_size);
 	}
 
@@ -199,8 +226,9 @@
  ******************************************************************************/
 __dead2 void tegra_system_off(void)
 {
-	ERROR("Tegra System Off: operation not handled.\n");
-	panic();
+	INFO("Powering down system...\n");
+
+	tegra_soc_prepare_system_off();
 }
 
 /*******************************************************************************
@@ -208,6 +236,8 @@
  ******************************************************************************/
 __dead2 void tegra_system_reset(void)
 {
+	INFO("Restarting system...\n");
+
 	/* per-SoC system reset handler */
 	tegra_soc_prepare_system_reset();
 
@@ -223,13 +253,8 @@
 int32_t tegra_validate_power_state(unsigned int power_state,
 				   psci_power_state_t *req_state)
 {
-	int pwr_lvl = psci_get_pstate_pwrlvl(power_state);
-
 	assert(req_state);
 
-	if (pwr_lvl > PLAT_MAX_PWR_LVL)
-		return PSCI_E_INVALID_PARAMS;
-
 	return tegra_soc_validate_power_state(power_state, req_state);
 }
 
@@ -258,6 +283,7 @@
 	.pwr_domain_suspend		= tegra_pwr_domain_suspend,
 	.pwr_domain_on_finish		= tegra_pwr_domain_on_finish,
 	.pwr_domain_suspend_finish	= tegra_pwr_domain_suspend_finish,
+	.pwr_domain_pwr_down_wfi	= tegra_pwr_domain_power_down_wfi,
 	.system_off			= tegra_system_off,
 	.system_reset			= tegra_system_reset,
 	.validate_power_state		= tegra_validate_power_state,
diff --git a/plat/nvidia/tegra/common/tegra_sip_calls.c b/plat/nvidia/tegra/common/tegra_sip_calls.c
index de36a3c..3bcd441 100644
--- a/plat/nvidia/tegra/common/tegra_sip_calls.c
+++ b/plat/nvidia/tegra/common/tegra_sip_calls.c
@@ -32,28 +32,32 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <bl_common.h>
-#include <context_mgmt.h>
 #include <debug.h>
 #include <errno.h>
 #include <memctrl.h>
 #include <runtime_svc.h>
 #include <tegra_private.h>
 
-#define NS_SWITCH_AARCH32	1
-#define SCR_RW_BITPOS		__builtin_ctz(SCR_RW_BIT)
-
 /*******************************************************************************
- * Tegra SiP SMCs
+ * Common Tegra SiP SMCs
  ******************************************************************************/
 #define TEGRA_SIP_NEW_VIDEOMEM_REGION		0x82000003
-#define TEGRA_SIP_AARCH_SWITCH			0x82000004
 
 /*******************************************************************************
- * SPSR settings for AARCH32/AARCH64 modes
+ * SoC specific SiP handler
  ******************************************************************************/
-#define SPSR32		SPSR_MODE32(MODE32_svc, SPSR_T_ARM, SPSR_E_LITTLE, \
-			DAIF_FIQ_BIT | DAIF_IRQ_BIT | DAIF_ABT_BIT)
-#define SPSR64		SPSR_64(MODE_EL2, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS)
+#pragma weak plat_sip_handler
+int plat_sip_handler(uint32_t smc_fid,
+		     uint64_t x1,
+		     uint64_t x2,
+		     uint64_t x3,
+		     uint64_t x4,
+		     void *cookie,
+		     void *handle,
+		     uint64_t flags)
+{
+	return -ENOTSUP;
+}
 
 /*******************************************************************************
  * This function is responsible for handling all SiP calls from the NS world
@@ -75,6 +79,11 @@
 	if (!ns)
 		SMC_RET1(handle, SMC_UNK);
 
+	/* Check if this is a SoC specific SiP */
+	err = plat_sip_handler(smc_fid, x1, x2, x3, x4, cookie, handle, flags);
+	if (err == 0)
+		SMC_RET1(handle, err);
+
 	switch (smc_fid) {
 
 	case TEGRA_SIP_NEW_VIDEOMEM_REGION:
@@ -102,29 +111,6 @@
 		/* new video memory carveout settings */
 		tegra_memctrl_videomem_setup(x1, x2);
 
-		SMC_RET1(handle, 0);
-		break;
-
-	case TEGRA_SIP_AARCH_SWITCH:
-
-		/* clean up the high bits */
-		x1 = (uint32_t)x1;
-		x2 = (uint32_t)x2;
-
-		if (!x1 || x2 > NS_SWITCH_AARCH32) {
-			ERROR("%s: invalid parameters\n", __func__);
-			SMC_RET1(handle, SMC_UNK);
-		}
-
-		/* x1 = ns entry point */
-		cm_set_elr_spsr_el3(NON_SECURE, x1,
-			(x2 == NS_SWITCH_AARCH32) ? SPSR32 : SPSR64);
-
-		/* switch NS world mode */
-		cm_write_scr_el3_bit(NON_SECURE, SCR_RW_BITPOS, !x2);
-
-		INFO("CPU switched to AARCH%s mode\n",
-			(x2 == NS_SWITCH_AARCH32) ? "32" : "64");
 		SMC_RET1(handle, 0);
 		break;
 
diff --git a/plat/nvidia/tegra/include/drivers/memctrl.h b/plat/nvidia/tegra/include/drivers/memctrl.h
index 26c8057..a3f0875 100644
--- a/plat/nvidia/tegra/include/drivers/memctrl.h
+++ b/plat/nvidia/tegra/include/drivers/memctrl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,55 +31,10 @@
 #ifndef __MEMCTRL_H__
 #define __MEMCTRL_H__
 
-#include <mmio.h>
-#include <tegra_def.h>
-
-/* SMMU registers */
-#define MC_SMMU_CONFIG_0			0x10
-#define  MC_SMMU_CONFIG_0_SMMU_ENABLE_DISABLE	0
-#define  MC_SMMU_CONFIG_0_SMMU_ENABLE_ENABLE	1
-#define MC_SMMU_TLB_CONFIG_0			0x14
-#define  MC_SMMU_TLB_CONFIG_0_RESET_VAL		0x20000010
-#define MC_SMMU_PTC_CONFIG_0			0x18
-#define  MC_SMMU_PTC_CONFIG_0_RESET_VAL		0x2000003f
-#define MC_SMMU_TLB_FLUSH_0			0x30
-#define  TLB_FLUSH_VA_MATCH_ALL			0
-#define  TLB_FLUSH_ASID_MATCH_DISABLE		0
-#define  TLB_FLUSH_ASID_MATCH_SHIFT		31
-#define  MC_SMMU_TLB_FLUSH_ALL		\
-	 (TLB_FLUSH_VA_MATCH_ALL | 	\
-	 (TLB_FLUSH_ASID_MATCH_DISABLE << TLB_FLUSH_ASID_MATCH_SHIFT))
-#define MC_SMMU_PTC_FLUSH_0			0x34
-#define  MC_SMMU_PTC_FLUSH_ALL			0
-#define MC_SMMU_ASID_SECURITY_0			0x38
-#define  MC_SMMU_ASID_SECURITY			0
-#define MC_SMMU_TRANSLATION_ENABLE_0_0		0x228
-#define MC_SMMU_TRANSLATION_ENABLE_1_0		0x22c
-#define MC_SMMU_TRANSLATION_ENABLE_2_0		0x230
-#define MC_SMMU_TRANSLATION_ENABLE_3_0		0x234
-#define MC_SMMU_TRANSLATION_ENABLE_4_0		0xb98
-#define  MC_SMMU_TRANSLATION_ENABLE		(~0)
-
-/* TZDRAM carveout configuration registers */
-#define MC_SECURITY_CFG0_0			0x70
-#define MC_SECURITY_CFG1_0			0x74
-
-/* Video Memory carveout configuration registers */
-#define MC_VIDEO_PROTECT_BASE			0x648
-#define MC_VIDEO_PROTECT_SIZE_MB		0x64c
-
-static inline uint32_t tegra_mc_read_32(uint32_t off)
-{
-	return mmio_read_32(TEGRA_MC_BASE + off);
-}
-
-static inline void tegra_mc_write_32(uint32_t off, uint32_t val)
-{
-	mmio_write_32(TEGRA_MC_BASE + off, val);
-}
-
 void tegra_memctrl_setup(void);
+void tegra_memctrl_restore_settings(void);
 void tegra_memctrl_tzdram_setup(uint64_t phys_base, uint32_t size_in_bytes);
+void tegra_memctrl_tzram_setup(uint64_t phys_base, uint32_t size_in_bytes);
 void tegra_memctrl_videomem_setup(uint64_t phys_base, uint32_t size_in_bytes);
 
 #endif /* __MEMCTRL_H__ */
diff --git a/plat/nvidia/tegra/include/drivers/memctrl_v1.h b/plat/nvidia/tegra/include/drivers/memctrl_v1.h
new file mode 100644
index 0000000..e44a9ea
--- /dev/null
+++ b/plat/nvidia/tegra/include/drivers/memctrl_v1.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __MEMCTRLV1_H__
+#define __MEMCTRLV1_H__
+
+#include <mmio.h>
+#include <tegra_def.h>
+
+/* SMMU registers */
+#define MC_SMMU_CONFIG_0			0x10
+#define  MC_SMMU_CONFIG_0_SMMU_ENABLE_DISABLE	0
+#define  MC_SMMU_CONFIG_0_SMMU_ENABLE_ENABLE	1
+#define MC_SMMU_TLB_CONFIG_0			0x14
+#define  MC_SMMU_TLB_CONFIG_0_RESET_VAL		0x20000010
+#define MC_SMMU_PTC_CONFIG_0			0x18
+#define  MC_SMMU_PTC_CONFIG_0_RESET_VAL		0x2000003f
+#define MC_SMMU_TLB_FLUSH_0			0x30
+#define  TLB_FLUSH_VA_MATCH_ALL			0
+#define  TLB_FLUSH_ASID_MATCH_DISABLE		0
+#define  TLB_FLUSH_ASID_MATCH_SHIFT		31
+#define  MC_SMMU_TLB_FLUSH_ALL		\
+	 (TLB_FLUSH_VA_MATCH_ALL | 	\
+	 (TLB_FLUSH_ASID_MATCH_DISABLE << TLB_FLUSH_ASID_MATCH_SHIFT))
+#define MC_SMMU_PTC_FLUSH_0			0x34
+#define  MC_SMMU_PTC_FLUSH_ALL			0
+#define MC_SMMU_ASID_SECURITY_0			0x38
+#define  MC_SMMU_ASID_SECURITY			0
+#define MC_SMMU_TRANSLATION_ENABLE_0_0		0x228
+#define MC_SMMU_TRANSLATION_ENABLE_1_0		0x22c
+#define MC_SMMU_TRANSLATION_ENABLE_2_0		0x230
+#define MC_SMMU_TRANSLATION_ENABLE_3_0		0x234
+#define MC_SMMU_TRANSLATION_ENABLE_4_0		0xb98
+#define  MC_SMMU_TRANSLATION_ENABLE		(~0)
+
+/* TZDRAM carveout configuration registers */
+#define MC_SECURITY_CFG0_0			0x70
+#define MC_SECURITY_CFG1_0			0x74
+
+/* Video Memory carveout configuration registers */
+#define MC_VIDEO_PROTECT_BASE			0x648
+#define MC_VIDEO_PROTECT_SIZE_MB		0x64c
+
+static inline uint32_t tegra_mc_read_32(uint32_t off)
+{
+	return mmio_read_32(TEGRA_MC_BASE + off);
+}
+
+static inline void tegra_mc_write_32(uint32_t off, uint32_t val)
+{
+	mmio_write_32(TEGRA_MC_BASE + off, val);
+}
+
+#endif /* __MEMCTRLV1_H__ */
diff --git a/plat/nvidia/tegra/include/platform_def.h b/plat/nvidia/tegra/include/platform_def.h
index cd06d93..ad245e2 100644
--- a/plat/nvidia/tegra/include/platform_def.h
+++ b/plat/nvidia/tegra/include/platform_def.h
@@ -53,12 +53,6 @@
 					 PLATFORM_CLUSTER_COUNT + 1)
 
 /*******************************************************************************
- * Platform power states
- ******************************************************************************/
-#define PLAT_MAX_RET_STATE		1
-#define PLAT_MAX_OFF_STATE		(PSTATE_ID_SOC_POWERDN + 1)
-
-/*******************************************************************************
  * Platform console related constants
  ******************************************************************************/
 #define TEGRA_CONSOLE_BAUDRATE		115200
@@ -74,7 +68,7 @@
 /*******************************************************************************
  * BL31 specific defines.
  ******************************************************************************/
-#define BL31_SIZE			0x20000
+#define BL31_SIZE			0x40000
 #define BL31_BASE			TZDRAM_BASE
 #define BL31_LIMIT			(TZDRAM_BASE + BL31_SIZE - 1)
 #define BL32_BASE			(TZDRAM_BASE + BL31_SIZE)
@@ -84,8 +78,6 @@
  * Platform specific page table and MMU setup constants
  ******************************************************************************/
 #define ADDR_SPACE_SIZE			(1ull << 32)
-#define MAX_XLAT_TABLES			3
-#define MAX_MMAP_REGIONS		8
 
 /*******************************************************************************
  * Some data must be aligned on the biggest cache line size in the platform.
diff --git a/plat/nvidia/tegra/include/t132/tegra_def.h b/plat/nvidia/tegra/include/t132/tegra_def.h
index 683c903..318f4de 100644
--- a/plat/nvidia/tegra/include/t132/tegra_def.h
+++ b/plat/nvidia/tegra/include/t132/tegra_def.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -40,6 +40,15 @@
 #define PSTATE_ID_SOC_POWERDN	0xD
 
 /*******************************************************************************
+ * Platform power states (used by PSCI framework)
+ *
+ * - PLAT_MAX_RET_STATE should be less than lowest PSTATE_ID
+ * - PLAT_MAX_OFF_STATE should be greater than the highest PSTATE_ID
+ ******************************************************************************/
+#define PLAT_MAX_RET_STATE		1
+#define PLAT_MAX_OFF_STATE		(PSTATE_ID_SOC_POWERDN + 1)
+
+/*******************************************************************************
  * GIC memory map
  ******************************************************************************/
 #define TEGRA_GICD_BASE			0x50041000
@@ -71,6 +80,15 @@
 #define TEGRA_EVP_BASE			0x6000F000
 
 /*******************************************************************************
+ * Tegra UART controller base addresses
+ ******************************************************************************/
+#define TEGRA_UARTA_BASE		0x70006000
+#define TEGRA_UARTB_BASE		0x70006040
+#define TEGRA_UARTC_BASE		0x70006200
+#define TEGRA_UARTD_BASE		0x70006300
+#define TEGRA_UARTE_BASE		0x70006400
+
+/*******************************************************************************
  * Tegra Power Mgmt Controller constants
  ******************************************************************************/
 #define TEGRA_PMC_BASE			0x7000E400
@@ -80,4 +98,10 @@
  ******************************************************************************/
 #define TEGRA_MC_BASE			0x70019000
 
+/*******************************************************************************
+ * Tegra TZRAM constants
+ ******************************************************************************/
+#define TEGRA_TZRAM_BASE		0x7C010000
+#define TEGRA_TZRAM_SIZE		0x10000
+
 #endif /* __TEGRA_DEF_H__ */
diff --git a/plat/nvidia/tegra/include/t210/tegra_def.h b/plat/nvidia/tegra/include/t210/tegra_def.h
index 750e6e3..ce85427 100644
--- a/plat/nvidia/tegra/include/t210/tegra_def.h
+++ b/plat/nvidia/tegra/include/t210/tegra_def.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -48,18 +48,13 @@
 #define PLAT_SYS_SUSPEND_STATE_ID	PSTATE_ID_SOC_POWERDN
 
 /*******************************************************************************
- * Implementation defined ACTLR_EL3 bit definitions
+ * Platform power states (used by PSCI framework)
+ *
+ * - PLAT_MAX_RET_STATE should be less than lowest PSTATE_ID
+ * - PLAT_MAX_OFF_STATE should be greater than the highest PSTATE_ID
  ******************************************************************************/
-#define ACTLR_EL3_L2ACTLR_BIT		(1 << 6)
-#define ACTLR_EL3_L2ECTLR_BIT		(1 << 5)
-#define ACTLR_EL3_L2CTLR_BIT		(1 << 4)
-#define ACTLR_EL3_CPUECTLR_BIT		(1 << 1)
-#define ACTLR_EL3_CPUACTLR_BIT		(1 << 0)
-#define ACTLR_EL3_ENABLE_ALL_ACCESS	(ACTLR_EL3_L2ACTLR_BIT | \
-					 ACTLR_EL3_L2ECTLR_BIT | \
-					 ACTLR_EL3_L2CTLR_BIT | \
-					 ACTLR_EL3_CPUECTLR_BIT | \
-					 ACTLR_EL3_CPUACTLR_BIT)
+#define PLAT_MAX_RET_STATE		1
+#define PLAT_MAX_OFF_STATE		(PSTATE_ID_SOC_POWERDN + 1)
 
 /*******************************************************************************
  * GIC memory map
@@ -110,6 +105,15 @@
 #define TEGRA_EVP_BASE			0x6000F000
 
 /*******************************************************************************
+ * Tegra UART controller base addresses
+ ******************************************************************************/
+#define TEGRA_UARTA_BASE		0x70006000
+#define TEGRA_UARTB_BASE		0x70006040
+#define TEGRA_UARTC_BASE		0x70006200
+#define TEGRA_UARTD_BASE		0x70006300
+#define TEGRA_UARTE_BASE		0x70006400
+
+/*******************************************************************************
  * Tegra Power Mgmt Controller constants
  ******************************************************************************/
 #define TEGRA_PMC_BASE			0x7000E400
@@ -119,4 +123,10 @@
  ******************************************************************************/
 #define TEGRA_MC_BASE			0x70019000
 
+/*******************************************************************************
+ * Tegra TZRAM constants
+ ******************************************************************************/
+#define TEGRA_TZRAM_BASE		0x7C010000
+#define TEGRA_TZRAM_SIZE		0x10000
+
 #endif /* __TEGRA_DEF_H__ */
diff --git a/plat/nvidia/tegra/include/tegra_private.h b/plat/nvidia/tegra/include/tegra_private.h
index cf75d9f..75416ec 100644
--- a/plat/nvidia/tegra/include/tegra_private.h
+++ b/plat/nvidia/tegra/include/tegra_private.h
@@ -43,7 +43,12 @@
 #define TEGRA_DRAM_END		0x27FFFFFFF
 
 typedef struct plat_params_from_bl2 {
+	/* TZ memory size */
 	uint64_t tzdram_size;
+	/* TZ memory base */
+	uint64_t tzdram_base;
+	/* UART port ID */
+	int uart_id;
 } plat_params_from_bl2_t;
 
 /* Declarations for plat_psci_handlers.c */
@@ -52,6 +57,7 @@
 
 /* Declarations for plat_setup.c */
 const mmap_region_t *plat_get_mmio_map(void);
+uint32_t plat_get_console_from_id(int id);
 
 /* Declarations for plat_secondary.c */
 void plat_secondary_setup(void);
diff --git a/plat/nvidia/tegra/platform.mk b/plat/nvidia/tegra/platform.mk
index cec7caf..756899c 100644
--- a/plat/nvidia/tegra/platform.mk
+++ b/plat/nvidia/tegra/platform.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,9 @@
 
 SOC_DIR			:=	plat/nvidia/tegra/soc/${TARGET_SOC}
 
+# Enable PSCI v1.0 extended state ID format
+PSCI_EXTENDED_STATE_ID	:=	1
+
 # Disable the PSCI platform compatibility layer
 ENABLE_PLAT_COMPAT	:=	0
 
diff --git a/plat/nvidia/tegra/soc/t132/plat_psci_handlers.c b/plat/nvidia/tegra/soc/t132/plat_psci_handlers.c
index 48a2fba..f05f3d0 100644
--- a/plat/nvidia/tegra/soc/t132/plat_psci_handlers.c
+++ b/plat/nvidia/tegra/soc/t132/plat_psci_handlers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -59,36 +59,15 @@
 int32_t tegra_soc_validate_power_state(unsigned int power_state,
 					psci_power_state_t *req_state)
 {
-	int pwr_lvl = psci_get_pstate_pwrlvl(power_state);
 	int state_id = psci_get_pstate_id(power_state);
 	int cpu = read_mpidr() & MPIDR_CPU_MASK;
 
-	if (pwr_lvl > PLAT_MAX_PWR_LVL)
-		return PSCI_E_INVALID_PARAMS;
-
-	/* Sanity check the requested afflvl */
-	if (psci_get_pstate_type(power_state) == PSTATE_TYPE_STANDBY) {
-		/*
-		 * It's possible to enter standby only on affinity level 0 i.e.
-		 * a cpu on Tegra. Ignore any other affinity level.
-		 */
-		if (pwr_lvl != MPIDR_AFFLVL0)
-			return PSCI_E_INVALID_PARAMS;
-
-		/* power domain in standby state */
-		req_state->pwr_domain_state[pwr_lvl] = PLAT_MAX_RET_STATE;
-
-		return PSCI_E_SUCCESS;
-	}
-
 	/*
 	 * Sanity check the requested state id, power level and CPU number.
 	 * Currently T132 only supports SYSTEM_SUSPEND on last standing CPU
 	 * i.e. CPU 0
 	 */
-	if ((pwr_lvl != PLAT_MAX_PWR_LVL) ||
-	    (state_id != PSTATE_ID_SOC_POWERDN) ||
-	    (cpu != 0)) {
+	if ((state_id != PSTATE_ID_SOC_POWERDN) || (cpu != 0)) {
 		ERROR("unsupported state id @ power level\n");
 		return PSCI_E_INVALID_PARAMS;
 	}
@@ -128,9 +107,26 @@
 	return PSCI_E_SUCCESS;
 }
 
+int tegra_soc_pwr_domain_on_finish(const psci_power_state_t *target_state)
+{
+	/*
+	 * Lock scratch registers which hold the CPU vectors
+	 */
+	tegra_pmc_lock_cpu_vectors();
+
+	return PSCI_E_SUCCESS;
+}
+
 int tegra_soc_pwr_domain_off(const psci_power_state_t *target_state)
 {
 	tegra_fc_cpu_off(read_mpidr() & MPIDR_CPU_MASK);
+
+	/* Disable DCO operations */
+	denver_disable_dco();
+
+	/* Power down the CPU */
+	write_actlr_el1(DENVER_CPU_STATE_POWER_DOWN);
+
 	return PSCI_E_SUCCESS;
 }
 
@@ -149,7 +145,10 @@
 	/* Program FC to enter suspend state */
 	tegra_fc_cpu_powerdn(read_mpidr());
 
-	/* Suspend DCO operations */
+	/* Disable DCO operations */
+	denver_disable_dco();
+
+	/* Program the suspend state ID */
 	write_actlr_el1(target_state->pwr_domain_state[PLAT_MAX_PWR_LVL]);
 
 	return PSCI_E_SUCCESS;
diff --git a/plat/nvidia/tegra/soc/t132/plat_setup.c b/plat/nvidia/tegra/soc/t132/plat_setup.c
index 0d66413..337a2c5 100644
--- a/plat/nvidia/tegra/soc/t132/plat_setup.c
+++ b/plat/nvidia/tegra/soc/t132/plat_setup.c
@@ -78,3 +78,31 @@
 {
 	return 12000000;
 }
+
+/*******************************************************************************
+ * Maximum supported UART controllers
+ ******************************************************************************/
+#define TEGRA132_MAX_UART_PORTS		5
+
+/*******************************************************************************
+ * This variable holds the UART port base addresses
+ ******************************************************************************/
+static uint32_t tegra132_uart_addresses[TEGRA132_MAX_UART_PORTS + 1] = {
+	0,	/* undefined - treated as an error case */
+	TEGRA_UARTA_BASE,
+	TEGRA_UARTB_BASE,
+	TEGRA_UARTC_BASE,
+	TEGRA_UARTD_BASE,
+	TEGRA_UARTE_BASE,
+};
+
+/*******************************************************************************
+ * Retrieve the UART controller base to be used as the console
+ ******************************************************************************/
+uint32_t plat_get_console_from_id(int id)
+{
+	if (id > TEGRA132_MAX_UART_PORTS)
+		return 0;
+
+	return tegra132_uart_addresses[id];
+}
diff --git a/plat/nvidia/tegra/soc/t132/plat_sip_calls.c b/plat/nvidia/tegra/soc/t132/plat_sip_calls.c
new file mode 100644
index 0000000..6c89944
--- /dev/null
+++ b/plat/nvidia/tegra/soc/t132/plat_sip_calls.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context_mgmt.h>
+#include <debug.h>
+#include <errno.h>
+#include <tegra_private.h>
+
+#define NS_SWITCH_AARCH32	1
+#define SCR_RW_BITPOS		__builtin_ctz(SCR_RW_BIT)
+
+/*******************************************************************************
+ * Tegra132 SiP SMCs
+ ******************************************************************************/
+#define TEGRA_SIP_AARCH_SWITCH			0x82000004
+
+/*******************************************************************************
+ * SPSR settings for AARCH32/AARCH64 modes
+ ******************************************************************************/
+#define SPSR32		SPSR_MODE32(MODE32_svc, SPSR_T_ARM, SPSR_E_LITTLE, \
+			DAIF_FIQ_BIT | DAIF_IRQ_BIT | DAIF_ABT_BIT)
+#define SPSR64		SPSR_64(MODE_EL2, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS)
+
+/*******************************************************************************
+ * This function is responsible for handling all T132 SiP calls
+ ******************************************************************************/
+int plat_sip_handler(uint32_t smc_fid,
+		     uint64_t x1,
+		     uint64_t x2,
+		     uint64_t x3,
+		     uint64_t x4,
+		     void *cookie,
+		     void *handle,
+		     uint64_t flags)
+{
+	switch (smc_fid) {
+
+	case TEGRA_SIP_AARCH_SWITCH:
+
+		/* clean up the high bits */
+		x1 = (uint32_t)x1;
+		x2 = (uint32_t)x2;
+
+		if (!x1 || x2 > NS_SWITCH_AARCH32) {
+			ERROR("%s: invalid parameters\n", __func__);
+			return -EINVAL;
+		}
+
+		/* x1 = ns entry point */
+		cm_set_elr_spsr_el3(NON_SECURE, x1,
+			(x2 == NS_SWITCH_AARCH32) ? SPSR32 : SPSR64);
+
+		/* switch NS world mode */
+		cm_write_scr_el3_bit(NON_SECURE, SCR_RW_BITPOS, !x2);
+
+		INFO("CPU switched to AARCH%s mode\n",
+			(x2 == NS_SWITCH_AARCH32) ? "32" : "64");
+		return 0;
+
+	default:
+		ERROR("%s: unhandled SMC (0x%x)\n", __func__, smc_fid);
+		break;
+	}
+
+	return -ENOTSUP;
+}
diff --git a/plat/nvidia/tegra/soc/t132/platform_t132.mk b/plat/nvidia/tegra/soc/t132/platform_t132.mk
index 69d6296..6b9fce3 100644
--- a/plat/nvidia/tegra/soc/t132/platform_t132.mk
+++ b/plat/nvidia/tegra/soc/t132/platform_t132.mk
@@ -28,9 +28,6 @@
 # POSSIBILITY OF SUCH DAMAGE.
 #
 
-TEGRA_BOOT_UART_BASE		:= 0x70006300
-$(eval $(call add_define,TEGRA_BOOT_UART_BASE))
-
 TZDRAM_BASE			:= 0xF5C00000
 $(eval $(call add_define,TZDRAM_BASE))
 
@@ -40,7 +37,16 @@
 PLATFORM_MAX_CPUS_PER_CLUSTER	:= 2
 $(eval $(call add_define,PLATFORM_MAX_CPUS_PER_CLUSTER))
 
+MAX_XLAT_TABLES			:= 3
+$(eval $(call add_define,MAX_XLAT_TABLES))
+
+MAX_MMAP_REGIONS		:= 8
+$(eval $(call add_define,MAX_MMAP_REGIONS))
+
 BL31_SOURCES		+=	lib/cpus/aarch64/denver.S		\
+				${COMMON_DIR}/drivers/flowctrl/flowctrl.c	\
+				${COMMON_DIR}/drivers/memctrl/memctrl_v1.c	\
 				${SOC_DIR}/plat_psci_handlers.c		\
+				${SOC_DIR}/plat_sip_calls.c		\
 				${SOC_DIR}/plat_setup.c			\
 				${SOC_DIR}/plat_secondary.c
diff --git a/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c b/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
index b184063..95fb93f 100644
--- a/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
+++ b/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -58,39 +58,14 @@
 int32_t tegra_soc_validate_power_state(unsigned int power_state,
 					psci_power_state_t *req_state)
 {
-	int pwr_lvl = psci_get_pstate_pwrlvl(power_state);
 	int state_id = psci_get_pstate_id(power_state);
 
-	if (pwr_lvl > PLAT_MAX_PWR_LVL) {
-		ERROR("%s: unsupported power_state (0x%x)\n", __func__,
-			power_state);
-		return PSCI_E_INVALID_PARAMS;
-	}
-
-	/* Sanity check the requested afflvl */
-	if (psci_get_pstate_type(power_state) == PSTATE_TYPE_STANDBY) {
-		/*
-		 * It's possible to enter standby only on affinity level 0 i.e.
-		 * a cpu on Tegra. Ignore any other affinity level.
-		 */
-		if (pwr_lvl != MPIDR_AFFLVL0)
-			return PSCI_E_INVALID_PARAMS;
-
-		/* power domain in standby state */
-		req_state->pwr_domain_state[pwr_lvl] = PLAT_MAX_RET_STATE;
-
-		return PSCI_E_SUCCESS;
-	}
-
 	/* Sanity check the requested state id */
 	switch (state_id) {
 	case PSTATE_ID_CORE_POWERDN:
 		/*
 		 * Core powerdown request only for afflvl 0
 		 */
-		if (pwr_lvl != MPIDR_AFFLVL0)
-			goto error;
-
 		req_state->pwr_domain_state[MPIDR_AFFLVL0] = state_id & 0xff;
 
 		break;
@@ -100,9 +75,6 @@
 		/*
 		 * Cluster powerdown/idle request only for afflvl 1
 		 */
-		if (pwr_lvl != MPIDR_AFFLVL1)
-			goto error;
-
 		req_state->pwr_domain_state[MPIDR_AFFLVL1] = state_id;
 		req_state->pwr_domain_state[MPIDR_AFFLVL0] = PLAT_MAX_OFF_STATE;
 
@@ -112,9 +84,6 @@
 		/*
 		 * System powerdown request only for afflvl 2
 		 */
-		if (pwr_lvl != PLAT_MAX_PWR_LVL)
-			goto error;
-
 		for (int i = MPIDR_AFFLVL0; i < PLAT_MAX_PWR_LVL; i++)
 			req_state->pwr_domain_state[i] = PLAT_MAX_OFF_STATE;
 
@@ -129,10 +98,6 @@
 	}
 
 	return PSCI_E_SUCCESS;
-
-error:
-	ERROR("%s: unsupported state id (%d)\n", __func__, state_id);
-	return PSCI_E_INVALID_PARAMS;
 }
 
 int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
@@ -190,6 +155,11 @@
 			PLAT_SYS_SUSPEND_STATE_ID) {
 
 		/*
+		 * Lock scratch registers which hold the CPU vectors
+		 */
+		tegra_pmc_lock_cpu_vectors();
+
+		/*
 		 * Enable WRAP to INCR burst type conversions for
 		 * incoming requests on the AXI slave ports.
 		 */
diff --git a/plat/nvidia/tegra/soc/t210/plat_setup.c b/plat/nvidia/tegra/soc/t210/plat_setup.c
index 70a55c6..246faf8 100644
--- a/plat/nvidia/tegra/soc/t210/plat_setup.c
+++ b/plat/nvidia/tegra/soc/t210/plat_setup.c
@@ -84,3 +84,31 @@
 {
 	return 19200000;
 }
+
+/*******************************************************************************
+ * Maximum supported UART controllers
+ ******************************************************************************/
+#define TEGRA210_MAX_UART_PORTS		5
+
+/*******************************************************************************
+ * This variable holds the UART port base addresses
+ ******************************************************************************/
+static uint32_t tegra210_uart_addresses[TEGRA210_MAX_UART_PORTS + 1] = {
+	0,	/* undefined - treated as an error case */
+	TEGRA_UARTA_BASE,
+	TEGRA_UARTB_BASE,
+	TEGRA_UARTC_BASE,
+	TEGRA_UARTD_BASE,
+	TEGRA_UARTE_BASE,
+};
+
+/*******************************************************************************
+ * Retrieve the UART controller base to be used as the console
+ ******************************************************************************/
+uint32_t plat_get_console_from_id(int id)
+{
+	if (id > TEGRA210_MAX_UART_PORTS)
+		return 0;
+
+	return tegra210_uart_addresses[id];
+}
diff --git a/plat/nvidia/tegra/soc/t210/platform_t210.mk b/plat/nvidia/tegra/soc/t210/platform_t210.mk
index 5001629..2c908f9 100644
--- a/plat/nvidia/tegra/soc/t210/platform_t210.mk
+++ b/plat/nvidia/tegra/soc/t210/platform_t210.mk
@@ -28,32 +28,28 @@
 # POSSIBILITY OF SUCH DAMAGE.
 #
 
-TEGRA_BOOT_UART_BASE 			:= 0x70006000
-$(eval $(call add_define,TEGRA_BOOT_UART_BASE))
-
 TZDRAM_BASE				:= 0xFDC00000
 $(eval $(call add_define,TZDRAM_BASE))
 
 ERRATA_TEGRA_INVALIDATE_BTB_AT_BOOT	:= 1
 $(eval $(call add_define,ERRATA_TEGRA_INVALIDATE_BTB_AT_BOOT))
 
-ENABLE_NS_L2_CPUECTRL_RW_ACCESS		:= 1
-$(eval $(call add_define,ENABLE_NS_L2_CPUECTRL_RW_ACCESS))
-
-ENABLE_L2_DYNAMIC_RETENTION		:= 1
-$(eval $(call add_define,ENABLE_L2_DYNAMIC_RETENTION))
-
-ENABLE_CPU_DYNAMIC_RETENTION		:= 1
-$(eval $(call add_define,ENABLE_CPU_DYNAMIC_RETENTION))
-
 PLATFORM_CLUSTER_COUNT			:= 2
 $(eval $(call add_define,PLATFORM_CLUSTER_COUNT))
 
 PLATFORM_MAX_CPUS_PER_CLUSTER		:= 4
 $(eval $(call add_define,PLATFORM_MAX_CPUS_PER_CLUSTER))
 
+MAX_XLAT_TABLES				:= 3
+$(eval $(call add_define,MAX_XLAT_TABLES))
+
+MAX_MMAP_REGIONS			:= 8
+$(eval $(call add_define,MAX_MMAP_REGIONS))
+
 BL31_SOURCES		+=	lib/cpus/aarch64/cortex_a53.S		\
 				lib/cpus/aarch64/cortex_a57.S		\
+				${COMMON_DIR}/drivers/flowctrl/flowctrl.c	\
+				${COMMON_DIR}/drivers/memctrl/memctrl_v1.c	\
 				${SOC_DIR}/plat_psci_handlers.c		\
 				${SOC_DIR}/plat_setup.c			\
 				${SOC_DIR}/plat_secondary.c
diff --git a/plat/qemu/qemu_bl2_setup.c b/plat/qemu/qemu_bl2_setup.c
index dba3bee..738d671 100644
--- a/plat/qemu/qemu_bl2_setup.c
+++ b/plat/qemu/qemu_bl2_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,7 +35,7 @@
 #include <platform_def.h>
 #include "qemu_private.h"
 #include <string.h>
-
+#include <utils.h>
 
 /*
  * The next 2 constants identify the extents of the code & RO data region.
@@ -91,7 +91,7 @@
 	 * Initialise the memory for all the arguments that needs to
 	 * be passed to BL3-1
 	 */
-	memset(&bl31_params_mem, 0, sizeof(bl2_to_bl31_params_mem_t));
+	zeromem(&bl31_params_mem, sizeof(bl2_to_bl31_params_mem_t));
 
 	/* Assign memory for TF related information */
 	bl2_to_bl31_params = &bl31_params_mem.bl31_params;
diff --git a/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c b/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c
index 8a5704e..6288de4 100644
--- a/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c
+++ b/plat/rockchip/rk3399/drivers/dram/dram_spec_timing.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,6 +31,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <dram.h>
+#include <utils.h>
 #include "dram_spec_timing.h"
 
 static const uint8_t ddr3_cl_cwl[][7] = {
@@ -228,7 +229,7 @@
 	uint32_t ddr_capability_per_die = get_max_die_capability(timing_config);
 	uint32_t tmp;
 
-	memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t));
+	zeromem((void *)pdram_timing, sizeof(struct dram_timing_t));
 	pdram_timing->mhz = nmhz;
 	pdram_timing->al = 0;
 	pdram_timing->bl = timing_config->bl;
@@ -444,7 +445,7 @@
 	uint32_t ddr_capability_per_die = get_max_die_capability(timing_config);
 	uint32_t tmp, trp_tmp, trppb_tmp, tras_tmp, twr_tmp, bl_tmp;
 
-	memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t));
+	zeromem((void *)pdram_timing, sizeof(struct dram_timing_t));
 	pdram_timing->mhz = nmhz;
 	pdram_timing->al = 0;
 	pdram_timing->bl = timing_config->bl;
@@ -684,7 +685,7 @@
 	uint32_t ddr_capability_per_die = get_max_die_capability(timing_config);
 	uint32_t tmp, trp_tmp, trppb_tmp, tras_tmp, twr_tmp, bl_tmp;
 
-	memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t));
+	zeromem((void *)pdram_timing, sizeof(struct dram_timing_t));
 	pdram_timing->mhz = nmhz;
 	pdram_timing->al = 0;
 	pdram_timing->bl = timing_config->bl;
@@ -980,7 +981,7 @@
 	uint32_t ddr_capability_per_die = get_max_die_capability(timing_config);
 	uint32_t tmp, trp_tmp, trppb_tmp, tras_tmp;
 
-	memset((void *)pdram_timing, 0, sizeof(struct dram_timing_t));
+	zeromem((void *)pdram_timing, sizeof(struct dram_timing_t));
 	pdram_timing->mhz = nmhz;
 	pdram_timing->al = 0;
 	pdram_timing->bl = timing_config->bl;
diff --git a/plat/xilinx/zynqmp/pm_service/pm_client.c b/plat/xilinx/zynqmp/pm_service/pm_client.c
index e102b4f..0fe17b5 100644
--- a/plat/xilinx/zynqmp/pm_service/pm_client.c
+++ b/plat/xilinx/zynqmp/pm_service/pm_client.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -40,6 +40,7 @@
 #include <bl_common.h>
 #include <mmio.h>
 #include <string.h>
+#include <utils.h>
 #include "pm_api_sys.h"
 #include "pm_client.h"
 #include "pm_ipi.h"
@@ -188,7 +189,7 @@
 	uint8_t pm_wakeup_nodes_set[NODE_MAX];
 	uintptr_t isenabler1 = BASE_GICD_BASE + GICD_ISENABLER + 4;
 
-	memset(&pm_wakeup_nodes_set, 0, sizeof(pm_wakeup_nodes_set));
+	zeromem(&pm_wakeup_nodes_set, sizeof(pm_wakeup_nodes_set));
 
 	for (reg_num = 0; reg_num < NUM_GICD_ISENABLER; reg_num++) {
 		uint32_t base_irq = reg_num << ISENABLER_SHIFT;
diff --git a/services/spd/opteed/opteed_common.c b/services/spd/opteed/opteed_common.c
index 2f20b7c..910f900 100644
--- a/services/spd/opteed/opteed_common.c
+++ b/services/spd/opteed/opteed_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
 #include <bl_common.h>
 #include <context_mgmt.h>
 #include <string.h>
+#include <utils.h>
 #include "opteed_private.h"
 
 /*******************************************************************************
@@ -73,7 +74,7 @@
 						      DAIF_FIQ_BIT |
 							DAIF_IRQ_BIT |
 							DAIF_ABT_BIT);
-	memset(&optee_entry_point->args, 0, sizeof(optee_entry_point->args));
+	zeromem(&optee_entry_point->args, sizeof(optee_entry_point->args));
 }
 
 /*******************************************************************************
diff --git a/services/spd/trusty/trusty.c b/services/spd/trusty/trusty.c
index 78a68ba..b21ce71 100644
--- a/services/spd/trusty/trusty.c
+++ b/services/spd/trusty/trusty.c
@@ -395,7 +395,7 @@
 DECLARE_RT_SVC(
 	trusty_std,
 
-	OEN_TOS_START,
+	OEN_TAP_START,
 	SMC_ENTITY_SECURE_MONITOR,
 	SMC_TYPE_STD,
 	NULL,
diff --git a/services/spd/tspd/tspd_common.c b/services/spd/tspd/tspd_common.c
index 3dcefea..70959d7 100644
--- a/services/spd/tspd/tspd_common.c
+++ b/services/spd/tspd/tspd_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,6 +35,7 @@
 #include <debug.h>
 #include <string.h>
 #include <tsp.h>
+#include <utils.h>
 #include "tspd_private.h"
 
 /*******************************************************************************
@@ -78,7 +79,7 @@
 	tsp_entry_point->spsr = SPSR_64(MODE_EL1,
 					MODE_SP_ELX,
 					DISABLE_ALL_EXCEPTIONS);
-	memset(&tsp_entry_point->args, 0, sizeof(tsp_entry_point->args));
+	zeromem(&tsp_entry_point->args, sizeof(tsp_entry_point->args));
 }
 
 /*******************************************************************************
diff --git a/tools/cert_create/include/key.h b/tools/cert_create/include/key.h
index f60997f..433f72c 100644
--- a/tools/cert_create/include/key.h
+++ b/tools/cert_create/include/key.h
@@ -73,6 +73,7 @@
 /* Exported API */
 int key_init(void);
 key_t *key_get_by_opt(const char *opt);
+int key_new(key_t *key);
 int key_create(key_t *key, int type);
 int key_load(key_t *key, unsigned int *err_code);
 int key_store(key_t *key);
diff --git a/tools/cert_create/src/cert.c b/tools/cert_create/src/cert.c
index a559832..375c66b 100644
--- a/tools/cert_create/src/cert.c
+++ b/tools/cert_create/src/cert.c
@@ -103,10 +103,10 @@
 	cert_t *issuer_cert = &certs[cert->issuer];
 	EVP_PKEY *ikey = keys[issuer_cert->key].key;
 	X509 *issuer = issuer_cert->x;
-	X509 *x = NULL;
-	X509_EXTENSION *ex = NULL;
-	X509_NAME *name = NULL;
-	ASN1_INTEGER *sno = NULL;
+	X509 *x;
+	X509_EXTENSION *ex;
+	X509_NAME *name;
+	ASN1_INTEGER *sno;
 	int i, num;
 
 	/* Create the certificate structure */
@@ -202,7 +202,7 @@
 
 cert_t *cert_get_by_opt(const char *opt)
 {
-	cert_t *cert = NULL;
+	cert_t *cert;
 	unsigned int i;
 
 	for (i = 0; i < num_certs; i++) {
diff --git a/tools/cert_create/src/ext.c b/tools/cert_create/src/ext.c
index 3f56edb..a50919e 100644
--- a/tools/cert_create/src/ext.c
+++ b/tools/cert_create/src/ext.c
@@ -181,13 +181,13 @@
 X509_EXTENSION *ext_new_hash(int nid, int crit, const EVP_MD *md,
 		unsigned char *buf, size_t len)
 {
-	X509_EXTENSION *ex = NULL;
-	ASN1_OCTET_STRING *octet = NULL;
-	HASH *hash = NULL;
-	ASN1_OBJECT *algorithm = NULL;
-	X509_ALGOR *x509_algor = NULL;
+	X509_EXTENSION *ex;
+	ASN1_OCTET_STRING *octet;
+	HASH *hash;
+	ASN1_OBJECT *algorithm;
+	X509_ALGOR *x509_algor;
 	unsigned char *p = NULL;
-	int sz = -1;
+	int sz;
 
 	/* OBJECT_IDENTIFIER with hash algorithm */
 	algorithm = OBJ_nid2obj(md->type);
@@ -254,16 +254,15 @@
  */
 X509_EXTENSION *ext_new_nvcounter(int nid, int crit, int value)
 {
-	X509_EXTENSION *ex = NULL;
-	ASN1_INTEGER *counter = NULL;
+	X509_EXTENSION *ex;
+	ASN1_INTEGER *counter;
 	unsigned char *p = NULL;
-	int sz = -1;
+	int sz;
 
 	/* Encode counter */
 	counter = ASN1_INTEGER_new();
 	ASN1_INTEGER_set(counter, value);
-	sz = i2d_ASN1_INTEGER(counter, NULL);
-	i2d_ASN1_INTEGER(counter, &p);
+	sz = i2d_ASN1_INTEGER(counter, &p);
 
 	/* Create the extension */
 	ex = ext_new(nid, crit, p, sz);
@@ -292,9 +291,9 @@
  */
 X509_EXTENSION *ext_new_key(int nid, int crit, EVP_PKEY *k)
 {
-	X509_EXTENSION *ex = NULL;
-	unsigned char *p = NULL;
-	int sz = -1;
+	X509_EXTENSION *ex;
+	unsigned char *p;
+	int sz;
 
 	/* Encode key */
 	BIO *mem = BIO_new(BIO_s_mem());
@@ -316,7 +315,7 @@
 
 ext_t *ext_get_by_opt(const char *opt)
 {
-	ext_t *ext = NULL;
+	ext_t *ext;
 	unsigned int i;
 
 	/* Sequential search. This is not a performance concern since the number
diff --git a/tools/cert_create/src/key.c b/tools/cert_create/src/key.c
index a7ee759..ce0e4da 100644
--- a/tools/cert_create/src/key.c
+++ b/tools/cert_create/src/key.c
@@ -49,7 +49,7 @@
 /*
  * Create a new key container
  */
-static int key_new(key_t *key)
+int key_new(key_t *key)
 {
 	/* Create key pair container */
 	key->key = EVP_PKEY_new();
@@ -62,7 +62,7 @@
 
 static int key_create_rsa(key_t *key)
 {
-	RSA *rsa = NULL;
+	RSA *rsa;
 
 	rsa = RSA_generate_key(RSA_KEY_BITS, RSA_F4, NULL, NULL);
 	if (rsa == NULL) {
@@ -83,7 +83,7 @@
 #ifndef OPENSSL_NO_EC
 static int key_create_ecdsa(key_t *key)
 {
-	EC_KEY *ec = NULL;
+	EC_KEY *ec;
 
 	ec = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1);
 	if (ec == NULL) {
@@ -123,11 +123,6 @@
 		return 0;
 	}
 
-	/* Create OpenSSL key container */
-	if (!key_new(key)) {
-		return 0;
-	}
-
 	if (key_create_fn[type]) {
 		return key_create_fn[type](key);
 	}
@@ -137,14 +132,8 @@
 
 int key_load(key_t *key, unsigned int *err_code)
 {
-	FILE *fp = NULL;
-	EVP_PKEY *k = NULL;
-
-	/* Create OpenSSL key container */
-	if (!key_new(key)) {
-		*err_code = KEY_ERR_MALLOC;
-		return 0;
-	}
+	FILE *fp;
+	EVP_PKEY *k;
 
 	if (key->fn) {
 		/* Load key from file */
@@ -173,7 +162,7 @@
 
 int key_store(key_t *key)
 {
-	FILE *fp = NULL;
+	FILE *fp;
 
 	if (key->fn) {
 		fp = fopen(key->fn, "w");
@@ -196,7 +185,6 @@
 {
 	cmd_opt_t cmd_opt;
 	key_t *key;
-	int rc = 0;
 	unsigned int i;
 
 	for (i = 0; i < num_keys; i++) {
@@ -211,12 +199,12 @@
 		}
 	}
 
-	return rc;
+	return 0;
 }
 
 key_t *key_get_by_opt(const char *opt)
 {
-	key_t *key = NULL;
+	key_t *key;
 	unsigned int i;
 
 	/* Sequential search. This is not a performance concern since the number
diff --git a/tools/cert_create/src/main.c b/tools/cert_create/src/main.c
index c58f41d..c9c9622 100644
--- a/tools/cert_create/src/main.c
+++ b/tools/cert_create/src/main.c
@@ -134,7 +134,6 @@
 	printf("\t%s [OPTIONS]\n\n", cmd);
 
 	printf("Available options:\n");
-	i = 0;
 	opt = long_opt;
 	while (opt->name) {
 		p = line;
@@ -261,12 +260,12 @@
 
 int main(int argc, char *argv[])
 {
-	STACK_OF(X509_EXTENSION) * sk = NULL;
-	X509_EXTENSION *cert_ext = NULL;
-	ext_t *ext = NULL;
-	key_t *key = NULL;
-	cert_t *cert = NULL;
-	FILE *file = NULL;
+	STACK_OF(X509_EXTENSION) * sk;
+	X509_EXTENSION *cert_ext;
+	ext_t *ext;
+	key_t *key;
+	cert_t *cert;
+	FILE *file;
 	int i, j, ext_nid, nvctr;
 	int c, opt_idx = 0;
 	const struct option *cmd_opt;
@@ -367,6 +366,11 @@
 
 	/* Load private keys from files (or generate new ones) */
 	for (i = 0 ; i < num_keys ; i++) {
+		if (!key_new(&keys[i])) {
+			ERROR("Failed to allocate key container\n");
+			exit(1);
+		}
+
 		/* First try to load the key from disk */
 		if (key_load(&keys[i], &err_code)) {
 			/* Key loaded successfully */
@@ -374,11 +378,7 @@
 		}
 
 		/* Key not loaded. Check the error code */
-		if (err_code == KEY_ERR_MALLOC) {
-			/* Cannot allocate memory. Abort. */
-			ERROR("Malloc error while loading '%s'\n", keys[i].fn);
-			exit(1);
-		} else if (err_code == KEY_ERR_LOAD) {
+		if (err_code == KEY_ERR_LOAD) {
 			/* File exists, but it does not contain a valid private
 			 * key. Abort. */
 			ERROR("Error loading '%s'\n", keys[i].fn);
diff --git a/tools/fiptool/fiptool.c b/tools/fiptool/fiptool.c
index 865aeae..f3f831b 100644
--- a/tools/fiptool/fiptool.c
+++ b/tools/fiptool/fiptool.c
@@ -52,8 +52,6 @@
 #define OPT_PLAT_TOC_FLAGS 1
 #define OPT_ALIGN 2
 
-static image_desc_t *lookup_image_desc_from_uuid(const uuid_t *uuid);
-static image_t *lookup_image_from_uuid(const uuid_t *uuid);
 static int info_cmd(int argc, char *argv[]);
 static void info_usage(void);
 static int create_cmd(int argc, char *argv[]);
@@ -822,11 +820,9 @@
 	printf("\n");
 	printf("Options:\n");
 	printf("  --align <value>\t\tEach image is aligned to <value> (default: 1).\n");
-	printf("  --blob uuid=...,file=...\tAdd an image with the given UUID "
-	    "pointed to by file.\n");
-	printf("  --plat-toc-flags <value>\t16-bit platform specific flag field "
-	    "occupying bits 32-47 in 64-bit ToC header.\n");
-	fputc('\n', stderr);
+	printf("  --blob uuid=...,file=...\tAdd an image with the given UUID pointed to by file.\n");
+	printf("  --plat-toc-flags <value>\t16-bit platform specific flag field occupying bits 32-47 in 64-bit ToC header.\n");
+	printf("\n");
 	printf("Specific images are packed with the following options:\n");
 	for (; toc_entry->cmdline_name != NULL; toc_entry++)
 		printf("  --%-16s FILENAME\t%s\n", toc_entry->cmdline_name,
@@ -938,12 +934,10 @@
 	printf("\n");
 	printf("Options:\n");
 	printf("  --align <value>\t\tEach image is aligned to <value> (default: 1).\n");
-	printf("  --blob uuid=...,file=...\tAdd or update an image "
-	    "with the given UUID pointed to by file.\n");
+	printf("  --blob uuid=...,file=...\tAdd or update an image with the given UUID pointed to by file.\n");
 	printf("  --out FIP_FILENAME\t\tSet an alternative output FIP file.\n");
-	printf("  --plat-toc-flags <value>\t16-bit platform specific flag field "
-	    "occupying bits 32-47 in 64-bit ToC header.\n");
-	fputc('\n', stderr);
+	printf("  --plat-toc-flags <value>\t16-bit platform specific flag field occupying bits 32-47 in 64-bit ToC header.\n");
+	printf("\n");
 	printf("Specific images are packed with the following options:\n");
 	for (; toc_entry->cmdline_name != NULL; toc_entry++)
 		printf("  --%-16s FILENAME\t%s\n", toc_entry->cmdline_name,
@@ -1076,17 +1070,15 @@
 	printf("fiptool unpack [opts] FIP_FILENAME\n");
 	printf("\n");
 	printf("Options:\n");
-	printf("  --blob uuid=...,file=...\tUnpack an image with the given UUID "
-	    "to file.\n");
-	printf("  --force\t\t\tIf the output file already exists, use --force to "
-	    "overwrite it.\n");
+	printf("  --blob uuid=...,file=...\tUnpack an image with the given UUID to file.\n");
+	printf("  --force\t\t\tIf the output file already exists, use --force to overwrite it.\n");
 	printf("  --out path\t\t\tSet the output directory path.\n");
-	fputc('\n', stderr);
+	printf("\n");
 	printf("Specific images are unpacked with the following options:\n");
 	for (; toc_entry->cmdline_name != NULL; toc_entry++)
 		printf("  --%-16s FILENAME\t%s\n", toc_entry->cmdline_name,
 		    toc_entry->name);
-	fputc('\n', stderr);
+	printf("\n");
 	printf("If no options are provided, all images will be unpacked.\n");
 	exit(1);
 }
@@ -1207,10 +1199,9 @@
 	printf("Options:\n");
 	printf("  --align <value>\tEach image is aligned to <value> (default: 1).\n");
 	printf("  --blob uuid=...\tRemove an image with the given UUID.\n");
-	printf("  --force\t\tIf the output FIP file already exists, use --force to "
-	    "overwrite it.\n");
+	printf("  --force\t\tIf the output FIP file already exists, use --force to overwrite it.\n");
 	printf("  --out FIP_FILENAME\tSet an alternative output FIP file.\n");
-	fputc('\n', stderr);
+	printf("\n");
 	printf("Specific images are removed with the following options:\n");
 	for (; toc_entry->cmdline_name != NULL; toc_entry++)
 		printf("  --%-16s\t%s\n", toc_entry->cmdline_name,
@@ -1258,7 +1249,7 @@
 	printf("usage: fiptool [--verbose] <command> [<args>]\n");
 	printf("Global options supported:\n");
 	printf("  --verbose\tEnable verbose output for all commands.\n");
-	fputc('\n', stderr);
+	printf("\n");
 	printf("Commands supported:\n");
 	printf("  info\t\tList images contained in FIP.\n");
 	printf("  create\tCreate a new FIP with the given images.\n");
