Merge pull request #393 from mtk09422/misc-updates

mt8173: Update SPM and fix watchdog setting
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 987d30e..1d26229 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -82,6 +82,20 @@
 	b.ne	_panic
 
 	/* ---------------------------------------------
+	 * Invalidate the RW memory used by the BL2
+	 * image. This includes the data and NOBITS
+	 * sections. This is done to safeguard against
+	 * possible corruption of this memory by dirty
+	 * cache lines in a system cache as a result of
+	 * use by an earlier boot loader stage.
+	 * ---------------------------------------------
+	 */
+	adr	x0, __RW_START__
+	adr	x1, __RW_END__
+	sub	x1, x1, x0
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
 	 * Zero out NOBITS sections. There are 2 of them:
 	 *   - the .bss section;
 	 *   - the coherent memory section.
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index 33588e6..a660bda 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -68,6 +68,12 @@
         __RO_END__ = .;
     } >RAM
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -121,6 +127,11 @@
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL2_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 5ba0f9c..636b1d2 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -113,5 +113,22 @@
 	 */
 	bl	bl31_main
 
+	/* -------------------------------------------------------------
+	 * Clean the .data & .bss sections to main memory. This ensures
+	 * that any global data which was initialised by the primary CPU
+	 * is visible to secondary CPUs before they enable their data
+	 * caches and participate in coherency.
+	 * -------------------------------------------------------------
+	 */
+	adr	x0, __DATA_START__
+	adr	x1, __DATA_END__
+	sub	x1, x1, x0
+	bl	clean_dcache_range
+
+	adr	x0, __BSS_START__
+	adr	x1, __BSS_END__
+	sub	x1, x1, x0
+	bl	clean_dcache_range
+
 	b	el3_exit
 endfunc bl31_entrypoint
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 3327f31..e572f9b 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -81,6 +81,12 @@
     ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
            "cpu_ops not defined for this platform.")
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -101,10 +107,31 @@
      * The .bss section gets initialised to 0 at runtime.
      * Its base address must be 16-byte aligned.
      */
-    .bss : ALIGN(16) {
+    .bss (NOLOAD) : ALIGN(16) {
         __BSS_START__ = .;
         *(.bss*)
         *(COMMON)
+#if !USE_COHERENT_MEM
+        /*
+         * Bakery locks are stored in normal .bss memory
+         *
+         * Each lock's data is spread across multiple cache lines, one per CPU,
+         * but multiple locks can share the same cache line.
+         * The compiler will allocate enough memory for one CPU's bakery locks,
+         * the remaining cache lines are allocated by the linker script
+         */
+        . = ALIGN(CACHE_WRITEBACK_GRANULE);
+        __BAKERY_LOCK_START__ = .;
+        *(bakery_lock)
+        . = ALIGN(CACHE_WRITEBACK_GRANULE);
+        __PERCPU_BAKERY_LOCK_SIZE__ = ABSOLUTE(. - __BAKERY_LOCK_START__);
+        . = . + (__PERCPU_BAKERY_LOCK_SIZE__ * (PLATFORM_CORE_COUNT - 1));
+        __BAKERY_LOCK_END__ = .;
+#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE
+    ASSERT(__PERCPU_BAKERY_LOCK_SIZE__ == PLAT_PERCPU_BAKERY_LOCK_SIZE,
+        "PLAT_PERCPU_BAKERY_LOCK_SIZE does not match bakery lock requirements");
+#endif
+#endif
         __BSS_END__ = .;
     } >RAM
 
@@ -126,6 +153,12 @@
      */
     coherent_ram (NOLOAD) : ALIGN(4096) {
         __COHERENT_RAM_START__ = .;
+        /*
+         * Bakery locks are stored in coherent memory
+         *
+         * Each lock's data is contiguous and fully allocated by the compiler
+         */
+        *(bakery_lock)
         *(tzfw_coherent_mem)
         __COHERENT_RAM_END_UNALIGNED__ = .;
         /*
@@ -138,6 +171,11 @@
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL31_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index a1a3710..a244a5c 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -87,9 +87,6 @@
 	INFO("BL3-1: Initializing runtime services\n");
 	runtime_svc_init();
 
-	/* Clean caches before re-entering normal world */
-	dcsw_op_all(DCCSW);
-
 	/*
 	 * All the cold boot actions on the primary cpu are done. We now need to
 	 * decide which is the next image (BL32 or BL33) and how to execute it.
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 4e8da74..9732ff2 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -99,6 +99,20 @@
 	isb
 
 	/* ---------------------------------------------
+	 * Invalidate the RW memory used by the BL32
+	 * image. This includes the data and NOBITS
+	 * sections. This is done to safeguard against
+	 * possible corruption of this memory by dirty
+	 * cache lines in a system cache as a result of
+	 * use by an earlier boot loader stage.
+	 * ---------------------------------------------
+	 */
+	adr	x0, __RW_START__
+	adr	x1, __RW_END__
+	sub	x1, x1, x0
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
 	 * Zero out NOBITS sections. There are 2 of them:
 	 *   - the .bss section;
 	 *   - the coherent memory section.
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index d411ad0..41c4b4a 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -62,6 +62,12 @@
         __RO_END__ = .;
     } >RAM
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -119,6 +125,11 @@
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark the end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL32_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index 18f634f..41fb7c0 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -1523,38 +1523,52 @@
 The below sections analyze the data structures allocated in the coherent memory
 region and the changes required to allocate them in normal memory.
 
-### PSCI Affinity map nodes
+### Coherent memory usage in PSCI implementation
 
-The `psci_aff_map` data structure stores the hierarchial node information for
-each affinity level in the system including the PSCI states associated with them.
-By default, this data structure is allocated in the coherent memory region in
-the Trusted Firmware because it can be accessed by multiple CPUs, either with
-their caches enabled or disabled.
+The `psci_non_cpu_pd_nodes` data structure stores the platform's power domain
+tree information for state management of power domains. By default, this data
+structure is allocated in the coherent memory region in the Trusted Firmware
+because it can be accessed by multple CPUs, either with caches enabled or
+disabled.
 
-	typedef struct aff_map_node {
-		unsigned long mpidr;
-		unsigned char ref_count;
-		unsigned char state;
-		unsigned char level;
-	#if USE_COHERENT_MEM
-		bakery_lock_t lock;
-	#else
-		unsigned char aff_map_index;
-	#endif
-	} aff_map_node_t;
+typedef struct non_cpu_pwr_domain_node {
+        /*
+         * Index of the first CPU power domain node level 0 which has this node
+         * as its parent.
+         */
+        unsigned int cpu_start_idx;
+
+        /*
+         * Number of CPU power domains which are siblings of the domain indexed
+         * by 'cpu_start_idx' i.e. all the domains in the range 'cpu_start_idx
+         * -> cpu_start_idx + ncpus' have this node as their parent.
+         */
+        unsigned int ncpus;
+
+        /*
+         * Index of the parent power domain node.
+         * TODO: Figure out whether to whether using pointer is more efficient.
+         */
+        unsigned int parent_node;
+
+        plat_local_state_t local_state;
+
+        unsigned char level;
+
+        /* For indexing the psci_lock array*/
+        unsigned char lock_index;
+} non_cpu_pd_node_t;
 
 In order to move this data structure to normal memory, the use of each of its
-fields must be analyzed. Fields like `mpidr` and `level` are only written once
-during cold boot. Hence removing them from coherent memory involves only doing
-a clean and invalidate of the cache lines after these fields are written.
+fields must be analyzed. Fields like `cpu_start_idx`, `ncpus`, `parent_node`
+`level` and `lock_index` are only written once during cold boot. Hence removing
+them from coherent memory involves only doing a clean and invalidate of the
+cache lines after these fields are written.
 
-The fields `state` and `ref_count` can be concurrently accessed by multiple
-CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual
-exlusion to these fields. As a result, it is possible to move these fields out
-of coherent memory by performing software cache maintenance on them. The field
-`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled.
-The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM`
-is disabled.
+The field `local_state` can be concurrently accessed by multiple CPUs in
+different cache states. A Lamport's Bakery lock `psci_locks` is used to ensure
+mutual exlusion to this field and a clean and invalidate is needed after it
+is written.
 
 ### Bakery lock data
 
@@ -1563,9 +1577,13 @@
 defined as follows:
 
     typedef struct bakery_lock {
-        int owner;
-        volatile char entering[BAKERY_LOCK_MAX_CPUS];
-        volatile unsigned number[BAKERY_LOCK_MAX_CPUS];
+        /*
+         * The lock_data is a bit-field of 2 members:
+         * Bit[0]       : choosing. This field is set when the CPU is
+         *                choosing its bakery number.
+         * Bits[1 - 15] : number. This is the bakery number allocated.
+         */
+        volatile uint16_t lock_data[BAKERY_LOCK_MAX_CPUS];
     } bakery_lock_t;
 
 It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU
@@ -1589,17 +1607,14 @@
 
 To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure
 has been redesigned. The changes utilise the characteristic of Lamport's Bakery
-algorithm mentioned earlier. The per-CPU fields of the new lock structure are
-aligned such that they are allocated on separate cache lines. The per-CPU data
-framework in Trusted Firmware is used to achieve this. This enables software to
+algorithm mentioned earlier. The bakery_lock structure only allocates the memory
+for a single CPU. The macro `DEFINE_BAKERY_LOCK` allocates all the bakery locks
+needed for a CPU into a section `bakery_lock`. The linker allocates the memory
+for other cores by using the total size allocated for the bakery_lock section
+and multiplying it with (PLATFORM_CORE_COUNT - 1). This enables software to
 perform software cache maintenance on the lock data structure without running
 into coherency issues associated with mismatched attributes.
 
-The per-CPU data framework enables consolidation of data structures on the
-fewest cache lines possible. This saves memory as compared to the scenario where
-each data structure is separately aligned to the cache line boundary to achieve
-the same effect.
-
 The bakery lock data structure `bakery_info_t` is defined for use when
 `USE_COHERENT_MEM` is disabled as follows:
 
@@ -1615,12 +1630,10 @@
 
 The `bakery_info_t` represents a single per-CPU field of one lock and
 the combination of corresponding `bakery_info_t` structures for all CPUs in the
-system represents the complete bakery lock. It is embedded in the per-CPU
-data framework `cpu_data` as shown below:
+system represents the complete bakery lock. The view in memory for a system
+with n bakery locks are:
 
-      CPU0 cpu_data
-    ------------------
-    | ....           |
+    bakery_lock section start
     |----------------|
     | `bakery_info_t`| <-- Lock_0 per-CPU field
     |    Lock_0      |     for CPU0
@@ -1633,12 +1646,11 @@
     | `bakery_info_t`| <-- Lock_N per-CPU field
     |    Lock_N      |     for CPU0
     ------------------
-
-
-      CPU1 cpu_data
+    |    XXXXX       |
+    | Padding to     |
+    | next Cache WB  | <--- Calculate PERCPU_BAKERY_LOCK_SIZE, allocate
+    |  Granule       |       continuous memory for remaining CPUs.
     ------------------
-    | ....           |
-    |----------------|
     | `bakery_info_t`| <-- Lock_0 per-CPU field
     |    Lock_0      |     for CPU1
     |----------------|
@@ -1650,14 +1662,20 @@
     | `bakery_info_t`| <-- Lock_N per-CPU field
     |    Lock_N      |     for CPU1
     ------------------
+    |    XXXXX       |
+    | Padding to     |
+    | next Cache WB  |
+    |  Granule       |
+    ------------------
 
-Consider a system of 2 CPUs with 'N' bakery locks as shown above.  For an
+Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an
 operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1
-`cpu_data` need to be fetched and appropriate cache operations need to be
-performed for each access.
+`bakery_lock` section need to be fetched and appropriate cache operations need
+to be performed for each access.
 
-For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data`
-and each lock is given an `id` to identify it in the array.
+On ARM Platforms, bakery locks are used in psci (`psci_locks`) and power controller
+driver (`arm_lock`).
+
 
 ### Non Functional Impact of removing coherent memory
 
@@ -1680,10 +1698,9 @@
 As mentioned earlier, almost a page of memory can be saved by disabling
 `USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide
 whether coherent memory should be used. If a platform disables
-`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should
-reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see
-the [Porting Guide]). Refer to the reference platform code for examples.
-
+`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it can
+optionally define macro `PLAT_PERCPU_BAKERY_LOCK_SIZE`  (see the [Porting
+Guide]). Refer to the reference platform code for examples.
 
 12.  Code Structure
 -------------------
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 6846ddf..50d36ea 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -76,21 +76,24 @@
 stage. In ARM standard platforms, each BL stage configures the MMU in
 the platform-specific architecture setup function, `blX_plat_arch_setup()`.
 
-If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a
+If the build option `USE_COHERENT_MEM` is enabled, each platform can allocate a
 block of identity mapped secure memory with Device-nGnRE attributes aligned to
-page boundary (4K) for each BL stage. This memory is identified by the section
-name `tzfw_coherent_mem` so that its possible for the firmware to place
-variables in it using the following C code directive:
+page boundary (4K) for each BL stage. All sections which allocate coherent
+memory are grouped under `coherent_ram`. For ex: Bakery locks are placed in a
+section identified by name `bakery_lock` inside `coherent_ram` so that its
+possible for the firmware to place variables in it using the following C code
+directive:
 
-    __attribute__ ((section("tzfw_coherent_mem")))
+    __attribute__ ((section("bakery_lock")))
 
 Or alternatively the following assembler code directive:
 
-    .section tzfw_coherent_mem
+    .section bakery_lock
 
-The `tzfw_coherent_mem` section is used to allocate any data structures that are
-accessed both when a CPU is executing with its MMU and caches enabled, and when
-it's running with its MMU and caches disabled. Examples are given below.
+The `coherent_ram` section is a sum of all sections like `bakery_lock` which are
+used to allocate any data structures that are accessed both when a CPU is
+executing with its MMU and caches enabled, and when it's running with its MMU
+and caches disabled. Examples are given below.
 
 The following variables, functions and constants must be defined by the platform
 for the firmware to work correctly.
@@ -1150,6 +1153,24 @@
 modes table.
 
 
+### #define : PLAT_PERCPU_BAKERY_LOCK_SIZE [optional]
+
+   When `USE_COHERENT_MEM = 0`, this constant defines the total memory (in
+   bytes) aligned to the cache line boundary that should be allocated per-cpu to
+   accommodate all the bakery locks.
+
+   If this constant is not defined when `USE_COHERENT_MEM = 0`, the linker
+   calculates the size of the `bakery_lock` input section, aligns it to the
+   nearest `CACHE_WRITEBACK_GRANULE`, multiplies it with `PLATFORM_CORE_COUNT`
+   and stores the result in a linker symbol. This constant prevents a platform
+   from relying on the linker and provide a more efficient mechanism for
+   accessing per-cpu bakery lock information.
+
+   If this constant is defined and its value is not equal to the value
+   calculated by the linker then a link time assertion is raised. A compile time
+   assertion is raised if the value of the constant is not aligned to the cache
+   line boundary.
+
 3.3 Power State Coordination Interface (in BL3-1)
 ------------------------------------------------
 
diff --git a/drivers/arm/ccn/ccn.c b/drivers/arm/ccn/ccn.c
new file mode 100644
index 0000000..aef891b
--- /dev/null
+++ b/drivers/arm/ccn/ccn.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <assert.h>
+#include <bakery_lock.h>
+#include <ccn.h>
+#include <debug.h>
+#include <errno.h>
+#include <mmio.h>
+#include "ccn_private.h"
+
+static const ccn_desc_t *ccn_plat_desc;
+#if IMAGE_BL31
+DEFINE_BAKERY_LOCK(ccn_lock);
+#endif
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV), a
+ * region ID of one of the 256 regions (0-255) and a register offset within the
+ * region. It converts the first two parameters into a base address and uses it
+ * to read the register at the offset.
+ ******************************************************************************/
+static inline unsigned long long ccn_reg_read(uintptr_t periphbase,
+			     unsigned int region_id,
+			     unsigned int register_offset)
+{
+	uintptr_t region_base;
+
+	assert(periphbase);
+	assert(region_id < REGION_ID_LIMIT);
+
+	region_base = periphbase + region_id_to_base(region_id);
+	return mmio_read_64(region_base + register_offset);
+}
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV), a
+ * region ID of one of the 256 regions (0-255), a register offset within the
+ * region and a value. It converts the first two parameters into a base address
+ * and uses it to write the value in the register at the offset.
+ ******************************************************************************/
+static inline void ccn_reg_write(uintptr_t periphbase,
+			  unsigned int region_id,
+			  unsigned int register_offset,
+			  unsigned long long value)
+{
+	uintptr_t region_base;
+
+	assert(periphbase);
+	assert(region_id < REGION_ID_LIMIT);
+
+	region_base = periphbase + region_id_to_base(region_id);
+	mmio_write_64(region_base + register_offset, value);
+}
+
+#if DEBUG
+
+typedef struct rn_info {
+		unsigned char node_desc[MAX_RN_NODES];
+	} rn_info_t;
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV) and
+ * the node ID of a Request Node (RN-D or RN-I). It returns the maximum number
+ * of master interfaces resident on that node. This number is equal to the least
+ * significant two bits of the node type ID + 1.
+ ******************************************************************************/
+static unsigned int ccn_get_rni_mcount(uintptr_t periphbase,
+				       unsigned int rn_id)
+{
+	unsigned int rn_type_id;
+
+	/* Use the node id to find the type of RN-I/D node */
+	rn_type_id = get_node_type(ccn_reg_read(periphbase,
+						rn_id + RNI_REGION_ID_START,
+						REGION_ID_OFFSET));
+
+	/* Return the number master interfaces based on node type */
+	return rn_type_id_to_master_cnt(rn_type_id);
+}
+
+/*******************************************************************************
+ * This function reads the CCN registers to find the following information about
+ * the ACE/ACELite/ACELite+DVM/CHI interfaces resident on the various types of
+ * Request Nodes (RN-Fs, RN-Is and RN-Ds) in the system:
+ *
+ * 1. The total number of such interfaces that this CCN IP supports. This is the
+ *    cumulative number of interfaces across all Request node types. It is
+ *    passed back as the return value of this function.
+ *
+ * 2. The maximum number of interfaces of a type resident on a Request node of
+ *    one of the three types. This information is populated in the 'info'
+ *    array provided by the caller as described next.
+ *
+ *    The array has 64 entries. Each entry corresponds to a Request node. The
+ *    Miscellaneous node's programmer's view has RN-F, RN-I and RN-D ID
+ *    registers. For each RN-I and RN-D ID indicated as being present in these
+ *    registers, its identification register (offset 0xFF00) is read. This
+ *    register specifies the maximum number of master interfaces the node
+ *    supports. For RN-Fs it is assumed that there can be only a single fully
+ *    coherent master resident on each node. The counts for each type of node
+ *    are use to populate the array entry at the index corresponding to the node
+ *    ID i.e. rn_info[node ID] = <number of master interfaces>
+ ******************************************************************************/
+static unsigned int ccn_get_rn_master_info(uintptr_t periphbase,
+					   rn_info_t *info)
+{
+	unsigned int num_masters = 0;
+	rn_types_t rn_type;
+
+	assert (info);
+
+	for (rn_type = RN_TYPE_RNF; rn_type < NUM_RN_TYPES; rn_type++) {
+		unsigned int mn_reg_off, node_id;
+		unsigned long long rn_bitmap;
+
+		/*
+		 * RN-F, RN-I, RN-D node registers in the MN region occupy
+		 * contiguous 16 byte apart offsets.
+		 */
+		mn_reg_off = MN_RNF_NODEID_OFFSET + (rn_type << 4);
+		rn_bitmap = ccn_reg_read(periphbase, MN_REGION_ID, mn_reg_off);
+
+		FOR_EACH_PRESENT_NODE_ID(node_id, rn_bitmap) {
+			unsigned int node_mcount;
+
+			/*
+			 * A RN-F does not have a node type since it does not
+			 * export a programmer's interface. It can only have a
+			 * single fully coherent master residing on it. If the
+			 * offset of the MN(Miscellaneous Node) register points
+			 * to a RN-I/D node then the master count is set to the
+			 * maximum number of master interfaces that can possibly
+			 * reside on the node.
+			 */
+			node_mcount = (mn_reg_off == MN_RNF_NODEID_OFFSET ? 1 :
+				       ccn_get_rni_mcount(periphbase, node_id));
+
+			/*
+			 * Use this value to increment the maximum possible
+			 * master interfaces in the system.
+			 */
+			num_masters += node_mcount;
+
+			/*
+			 * Update the entry in 'info' for this node ID with
+			 * the maximum number of masters than can sit on
+			 * it. This information will be used to validate the
+			 * node information passed by the platform later.
+			 */
+			info->node_desc[node_id] = node_mcount;
+		}
+	}
+
+	return num_masters;
+}
+
+/*******************************************************************************
+ * This function validates parameters passed by the platform (in a debug build).
+ * It collects information about the maximum number of master interfaces that:
+ * a) the CCN IP can accommodate and
+ * b) can exist on each Request node.
+ * It compares this with the information provided by the platform to determine
+ * the validity of the latter.
+ ******************************************************************************/
+static void ccn_validate_plat_params(const ccn_desc_t *plat_desc)
+{
+	unsigned int master_id, num_rn_masters;
+	rn_info_t info = { {0} };
+
+	assert(plat_desc);
+	assert(plat_desc->periphbase);
+	assert(plat_desc->master_to_rn_id_map);
+	assert(plat_desc->num_masters);
+	assert(plat_desc->num_masters < CCN_MAX_RN_MASTERS);
+
+	/*
+	 * Find the number and properties of fully coherent, IO coherent and IO
+	 * coherent + DVM master interfaces
+	 */
+	num_rn_masters = ccn_get_rn_master_info(plat_desc->periphbase, &info);
+	assert(plat_desc->num_masters < num_rn_masters);
+
+	/*
+	 * Iterate through the Request nodes specified by the platform.
+	 * Decrement the count of the masters in the 'info' array for each
+	 * Request node encountered. If the count would drop below 0 then the
+	 * platform's view of this aspect of CCN configuration is incorrect.
+	 */
+	for (master_id = 0; master_id < plat_desc->num_masters; master_id++) {
+		unsigned int node_id;
+
+		node_id = plat_desc->master_to_rn_id_map[master_id];
+		assert(node_id < MAX_RN_NODES);
+		assert(info.node_desc[node_id]);
+		info.node_desc[node_id]--;
+	}
+}
+#endif /* DEBUG */
+
+/*******************************************************************************
+ * This function validates parameters passed by the platform (in a debug build)
+ * and initialises its internal data structures. A lock is required to prevent
+ * simultaneous CCN operations at runtime (only BL31) to add and remove Request
+ * nodes from coherency.
+ ******************************************************************************/
+void ccn_init(const ccn_desc_t *plat_desc)
+{
+#if DEBUG
+	ccn_validate_plat_params(plat_desc);
+#endif
+
+	ccn_plat_desc = plat_desc;
+}
+
+/*******************************************************************************
+ * This function converts a bit map of master interface IDs to a bit map of the
+ * Request node IDs that they reside on.
+ ******************************************************************************/
+static unsigned long long ccn_master_to_rn_id_map(unsigned long long master_map)
+{
+	unsigned long long rn_id_map = 0;
+	unsigned int node_id, iface_id;
+
+	assert(master_map);
+	assert(ccn_plat_desc);
+
+	FOR_EACH_PRESENT_MASTER_INTERFACE(iface_id, master_map) {
+
+		/* Convert the master ID into the node ID */
+		node_id = ccn_plat_desc->master_to_rn_id_map[iface_id];
+
+		/* Set the bit corresponding to this node ID */
+		rn_id_map |= (1UL << node_id);
+	}
+
+	return rn_id_map;
+}
+
+/*******************************************************************************
+ * This function executes the necessary operations to add or remove Request node
+ * IDs specified in the 'rn_id_map' bitmap from the snoop/DVM domains specified
+ * in the 'hn_id_map'. The 'region_id' specifies the ID of the first HN-F/HN-I
+ * on which the operation should be performed. 'op_reg_offset' specifies the
+ * type of operation (add/remove). 'stat_reg_offset' specifies the register
+ * which should be polled to determine if the operation has completed or not.
+ ******************************************************************************/
+static void ccn_snoop_dvm_do_op(unsigned long long rn_id_map,
+				unsigned long long hn_id_map,
+				unsigned int region_id,
+				unsigned int op_reg_offset,
+				unsigned int stat_reg_offset)
+{
+	unsigned int start_region_id;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+#if IMAGE_BL31
+	bakery_lock_get(&ccn_lock);
+#endif
+	start_region_id = region_id;
+	FOR_EACH_PRESENT_REGION_ID(start_region_id, hn_id_map) {
+		ccn_reg_write(ccn_plat_desc->periphbase,
+			      start_region_id,
+			      op_reg_offset,
+			      rn_id_map);
+	}
+
+	start_region_id = region_id;
+
+	FOR_EACH_PRESENT_REGION_ID(start_region_id, hn_id_map) {
+		WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(start_region_id,
+						   stat_reg_offset,
+						   op_reg_offset,
+						   rn_id_map);
+	}
+
+#if IMAGE_BL31
+	bakery_lock_release(&ccn_lock);
+#endif
+}
+
+/*******************************************************************************
+ * This function reads the bitmap of Home nodes on the basis of the
+ * 'mn_hn_id_reg_offset' parameter from the Miscellaneous node's (MN)
+ * programmer's view. The MN has a register which carries the bitmap of present
+ * Home nodes of each type i.e. HN-Fs, HN-Is & HN-Ds. It calls
+ * 'ccn_snoop_dvm_do_op()' with this information to perform the actual
+ * operation.
+ ******************************************************************************/
+static void ccn_snoop_dvm_domain_common(unsigned long long rn_id_map,
+					unsigned int hn_op_reg_offset,
+					unsigned int hn_stat_reg_offset,
+					unsigned int mn_hn_id_reg_offset,
+					unsigned int hn_region_id)
+{
+	unsigned long long mn_hn_id_map;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+	mn_hn_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+				    MN_REGION_ID,
+				    mn_hn_id_reg_offset);
+	ccn_snoop_dvm_do_op(rn_id_map,
+			    mn_hn_id_map,
+			    hn_region_id,
+			    hn_op_reg_offset,
+			    hn_stat_reg_offset);
+}
+
+/*******************************************************************************
+ * The following functions provide the boot and runtime API to the platform for
+ * adding and removing master interfaces from the snoop/DVM domains. A bitmap of
+ * master interfaces IDs is passed as a parameter. It is converted into a bitmap
+ * of Request node IDs using the mapping provided by the platform while
+ * initialising the driver.
+ * For example, consider a dual cluster system where the clusters have values 0
+ * & 1 in the affinity level 1 field of their respective MPIDRs. While
+ * initialising this driver, the platform provides the mapping between each
+ * cluster and the corresponding Request node. To add or remove a cluster from
+ * the snoop and dvm domain, the bit position corresponding to the cluster ID
+ * should be set in the 'master_iface_map' i.e. to remove both clusters the
+ * bitmap would equal 0x11.
+ ******************************************************************************/
+void ccn_enter_snoop_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    HNF_SDC_SET_OFFSET,
+				    HNF_SDC_STAT_OFFSET,
+				    MN_HNF_NODEID_OFFSET,
+				    HNF_REGION_ID_START);
+
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_SET_OFF,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+void ccn_exit_snoop_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    HNF_SDC_CLR_OFFSET,
+				    HNF_SDC_STAT_OFFSET,
+				    MN_HNF_NODEID_OFFSET,
+				    HNF_REGION_ID_START);
+
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_CLR_OFFSET,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+void ccn_enter_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_SET_OFF,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+void ccn_exit_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_CLR_OFFSET,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+/*******************************************************************************
+ * This function returns the run mode of all the L3 cache partitions in the
+ * system. The state is expected to be one of NO_L3, SF_ONLY, L3_HAM or
+ * L3_FAM. Instead of comparing the states reported by all HN-Fs, the state of
+ * the first present HN-F node is reported. Since the driver does not export an
+ * interface to program them seperately, there is no reason to perform this
+ * check. An HN-F could report that the L3 cache is transitioning from one mode
+ * to another e.g. HNF_PM_NOL3_2_SFONLY. In this case, the function waits for
+ * the transition to complete and reports the final state.
+ ******************************************************************************/
+unsigned int ccn_get_l3_run_mode(void)
+{
+	unsigned long long hnf_pstate_stat;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+	/*
+	 * Wait for a L3 cache paritition to enter any run mode. The pstate
+	 * parameter is read from an HN-F P-state status register. A non-zero
+	 * value in bits[1:0] means that the cache is transitioning to a run
+	 * mode.
+	 */
+	do {
+		hnf_pstate_stat = ccn_reg_read(ccn_plat_desc->periphbase,
+					       HNF_REGION_ID_START,
+					       HNF_PSTATE_STAT_OFFSET);
+	} while (hnf_pstate_stat & 0x3);
+
+	return PSTATE_TO_RUN_MODE(hnf_pstate_stat);
+}
+
+/*******************************************************************************
+ * This function sets the run mode of all the L3 cache partitions in the
+ * system to one of NO_L3, SF_ONLY, L3_HAM or L3_FAM depending upon the state
+ * specified by the 'mode' argument.
+ ******************************************************************************/
+void ccn_set_l3_run_mode(unsigned int mode)
+{
+	unsigned long long mn_hnf_id_map, hnf_pstate_stat;
+	unsigned int region_id;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+	assert(mode <= CCN_L3_RUN_MODE_FAM);
+
+	mn_hnf_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+				     MN_REGION_ID,
+				     MN_HNF_NODEID_OFFSET);
+	region_id = HNF_REGION_ID_START;
+
+	/* Program the desired run mode */
+	FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+		ccn_reg_write(ccn_plat_desc->periphbase,
+			      region_id,
+			      HNF_PSTATE_REQ_OFFSET,
+			      mode);
+	}
+
+	/* Wait for the caches to transition to the run mode */
+	region_id = HNF_REGION_ID_START;
+	FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+		/*
+		 * Wait for a L3 cache paritition to enter a target run
+		 * mode. The pstate parameter is read from an HN-F P-state
+		 * status register.
+		 */
+		do {
+			hnf_pstate_stat = ccn_reg_read(ccn_plat_desc->periphbase,
+					       region_id,
+					       HNF_PSTATE_STAT_OFFSET);
+		} while (((hnf_pstate_stat & HNF_PSTATE_MASK) >> 2) != mode);
+	}
+}
+
+/*******************************************************************************
+ * This function configures system address map and provides option to enable the
+ * 3SN striping mode of Slave node operation. The Slave node IDs and the Top
+ * Address bit1 and bit0 are provided as parameters to this function. This
+ * configuration is needed only if network contains a single SN-F or 3 SN-F and
+ * must be completed before the first request by the system to normal memory.
+ ******************************************************************************/
+void ccn_program_sys_addrmap(unsigned int sn0_id,
+		 unsigned int sn1_id,
+		 unsigned int sn2_id,
+		 unsigned int top_addr_bit0,
+		 unsigned int top_addr_bit1,
+		 unsigned char three_sn_en)
+{
+	unsigned long long mn_hnf_id_map, hnf_sam_ctrl_value;
+	unsigned int region_id;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+	mn_hnf_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+				     MN_REGION_ID,
+				     MN_HNF_NODEID_OFFSET);
+	region_id = HNF_REGION_ID_START;
+	hnf_sam_ctrl_value = MAKE_HNF_SAM_CTRL_VALUE(sn0_id,
+						     sn1_id,
+						     sn2_id,
+						     top_addr_bit0,
+						     top_addr_bit1,
+						     three_sn_en);
+
+	FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+
+		/* Program the SAM control register */
+		ccn_reg_write(ccn_plat_desc->periphbase,
+			      region_id,
+			      HNF_SAM_CTRL_OFFSET,
+			      hnf_sam_ctrl_value);
+	}
+
+}
diff --git a/drivers/arm/ccn/ccn_private.h b/drivers/arm/ccn/ccn_private.h
new file mode 100644
index 0000000..e92e870
--- /dev/null
+++ b/drivers/arm/ccn/ccn_private.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CCN_PRIVATE_H__
+#define __CCN_PRIVATE_H__
+
+/*
+ * A CCN implementation can have a maximum of 64 Request nodes with node IDs
+ * from 0-63. These IDs are split across the three types of Request nodes
+ * i.e. RN-F, RN-D and RN-I.
+ */
+#define MAX_RN_NODES		64
+
+/* Enum used to loop through the 3 types of Request nodes */
+typedef enum rn_types {
+	RN_TYPE_RNF = 0,
+	RN_TYPE_RNI,
+	RN_TYPE_RND,
+	NUM_RN_TYPES
+} rn_types_t;
+
+/* Macro to convert a region id to its base address */
+#define region_id_to_base(id)	((id) << 16)
+
+/*
+ * Macro to calculate the number of master interfaces resident on a RN-I/RN-D.
+ * Value of first two bits of the RN-I/D node type + 1 == Maximum number of
+ * ACE-Lite or ACE-Lite+DVM interfaces supported on this node. E.g.
+ *
+ * 0x14 : RN-I with 1 ACE-Lite interface
+ * 0x15 : RN-I with 2 ACE-Lite interfaces
+ * 0x16 : RN-I with 3 ACE-Lite interfaces
+ */
+#define rn_type_id_to_master_cnt(id)	(((id) & 0x3) + 1)
+
+/*
+ * Constants used to identify a region in the programmer's view. These are
+ * common for all regions.
+ */
+#define REGION_ID_LIMIT		256
+#define REGION_ID_OFFSET	0xFF00
+
+#define REGION_NODE_ID_SHIFT	8
+#define REGION_NODE_ID_MASK	0x7f
+#define get_node_id(id_reg)	(((id_reg) >> REGION_NODE_ID_SHIFT) \
+				 & REGION_NODE_ID_MASK)
+
+#define REGION_NODE_TYPE_SHIFT	0
+#define REGION_NODE_TYPE_MASK	0x1f
+#define get_node_type(id_reg)	(((id_reg) >> REGION_NODE_TYPE_SHIFT) \
+				 & REGION_NODE_TYPE_MASK)
+
+/* Common offsets of registers to enter or exit a snoop/dvm domain */
+#define DOMAIN_CTRL_STAT_OFFSET	0x0200
+#define DOMAIN_CTRL_SET_OFFSET	0x0210
+#define DOMAIN_CTRL_CLR_OFFSET	0x0220
+
+/*
+ * Thess macros are used to determine if an operation to add or remove a Request
+ * node from the snoop/dvm domain has completed. 'rn_id_map' is a bit map of
+ * nodes. It was used to program the SET or CLEAR control register. The type of
+ * register is specified by 'op_reg_offset'. 'status_reg' is the bit map of
+ * nodes currently present in the snoop/dvm domain. 'rn_id_map' and 'status_reg'
+ * are logically ANDed and the result it stored back in the 'status_reg'. There
+ * are two outcomes of this operation:
+ *
+ * 1. If the DOMAIN_CTRL_SET_OFFSET register was programmed, then the set bits in
+ *    'rn_id_map' should appear in 'status_reg' when the operation completes. So
+ *    after the AND operation, at some point of time 'status_reg' should equal
+ *    'rn_id_map'.
+ *
+ * 2. If the DOMAIN_CTRL_CLR_OFFSET register was programmed, then the set bits in
+ *    'rn_id_map' should disappear in 'status_reg' when the operation
+ *    completes. So after the AND operation, at some point of time 'status_reg'
+ *    should equal 0.
+ */
+#define WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(region_id, stat_reg_offset,		\
+					   op_reg_offset, rn_id_map)		\
+	{									\
+		uint64_t status_reg;						\
+		do {								\
+			status_reg = ccn_reg_read((ccn_plat_desc->periphbase),	\
+						  (region_id),			\
+						  (stat_reg_offset));		\
+			status_reg &= (rn_id_map);				\
+		} while ((op_reg_offset) == DOMAIN_CTRL_SET_OFFSET ?		\
+			 (rn_id_map) != status_reg : status_reg);		\
+	}
+
+/*
+ * Region ID of the Miscellaneous Node is always 0 as its located at the base of
+ * the programmer's view.
+ */
+#define MN_REGION_ID		0
+
+#define MN_REGION_ID_START	0
+#define DEBUG_REGION_ID_START	1
+#define HNI_REGION_ID_START	8
+#define SBSX_REGION_ID_START	16
+#define HNF_REGION_ID_START	32
+#define XP_REGION_ID_START	64
+#define RNI_REGION_ID_START	128
+
+/* Selected register offsets from the base of a HNF region */
+#define HNF_CFG_CTRL_OFFSET	0x0000
+#define HNF_SAM_CTRL_OFFSET	0x0008
+#define HNF_PSTATE_REQ_OFFSET	0x0010
+#define HNF_PSTATE_STAT_OFFSET	0x0018
+#define HNF_SDC_STAT_OFFSET	DOMAIN_CTRL_STAT_OFFSET
+#define HNF_SDC_SET_OFFSET	DOMAIN_CTRL_SET_OFFSET
+#define HNF_SDC_CLR_OFFSET	DOMAIN_CTRL_CLR_OFFSET
+#define HNF_AUX_CTRL_OFFSET	0x0500
+
+/* Selected register offsets from the base of a MN region */
+#define MN_SAR_OFFSET		0x0000
+#define MN_RNF_NODEID_OFFSET	0x0180
+#define MN_RNI_NODEID_OFFSET	0x0190
+#define MN_RND_NODEID_OFFSET	0x01A0
+#define MN_HNF_NODEID_OFFSET	0x01B0
+#define MN_HNI_NODEID_OFFSET	0x01C0
+#define MN_SN_NODEID_OFFSET	0x01D0
+#define MN_DDC_STAT_OFFSET	DOMAIN_CTRL_STAT_OFFSET
+#define MN_DDC_SET_OFF		DOMAIN_CTRL_SET_OFFSET
+#define MN_DDC_CLR_OFFSET	DOMAIN_CTRL_CLR_OFFSET
+#define MN_ID_OFFSET		REGION_ID_OFFSET
+
+/* HNF System Address Map register bit masks and shifts */
+#define HNF_SAM_CTRL_SN_ID_MASK		0x7f
+#define HNF_SAM_CTRL_SN0_ID_SHIFT	0
+#define HNF_SAM_CTRL_SN1_ID_SHIFT	8
+#define HNF_SAM_CTRL_SN2_ID_SHIFT	16
+
+#define HNF_SAM_CTRL_TAB0_MASK		0x3fUL
+#define HNF_SAM_CTRL_TAB0_SHIFT		48
+#define HNF_SAM_CTRL_TAB1_MASK		0x3fUL
+#define HNF_SAM_CTRL_TAB1_SHIFT		56
+
+#define HNF_SAM_CTRL_3SN_ENB_SHIFT	32
+#define HNF_SAM_CTRL_3SN_ENB_MASK	0x01UL
+
+/*
+ * Macro to create a value suitable for programming into a HNF SAM Control
+ * register for enabling 3SN striping.
+ */
+#define MAKE_HNF_SAM_CTRL_VALUE(sn0, sn1, sn2, tab0, tab1, three_sn_en)     \
+	((((sn0) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN0_ID_SHIFT) | \
+	 (((sn1) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN1_ID_SHIFT) | \
+	 (((sn2) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN2_ID_SHIFT) | \
+	 (((tab0) & HNF_SAM_CTRL_TAB0_MASK) << HNF_SAM_CTRL_TAB0_SHIFT)   | \
+	 (((tab1) & HNF_SAM_CTRL_TAB1_MASK) << HNF_SAM_CTRL_TAB1_SHIFT)   | \
+	 (((three_sn_en) & HNF_SAM_CTRL_3SN_ENB_MASK) << HNF_SAM_CTRL_3SN_ENB_SHIFT))
+
+/* Mask to read the power state value from an HN-F P-state register */
+#define HNF_PSTATE_MASK		0xf
+
+/* Macro to extract the run mode from a p-state value */
+#define PSTATE_TO_RUN_MODE(pstate)	(((pstate) & HNF_PSTATE_MASK) >> 2)
+
+/*
+ * Helper macro that iterates through a given bit map. In each iteration,
+ * it returns the position of the set bit.
+ * It can be used by other utility macros to iterates through all nodes
+ * or masters given a bit map of them.
+ */
+#define FOR_EACH_BIT(bit_pos, bit_map)			\
+	for (bit_pos = __builtin_ctzll(bit_map);	\
+	     bit_map;					\
+	     bit_map &= ~(1UL << bit_pos),		\
+	     bit_pos = __builtin_ctzll(bit_map))
+
+/*
+ * Utility macro that iterates through a bit map of node IDs. In each
+ * iteration, it returns the ID of the next present node in the bit map. Node
+ * ID of a present node == Position of set bit == Number of zeroes trailing the
+ * bit.
+ */
+#define FOR_EACH_PRESENT_NODE_ID(node_id, bit_map)	\
+		FOR_EACH_BIT(node_id, bit_map)
+
+/*
+ * Helper function to return number of set bits in bitmap
+ */
+static inline unsigned int count_set_bits(uint64_t bitmap)
+{
+	unsigned int count = 0;
+
+	for (; bitmap; bitmap &= bitmap - 1)
+		++count;
+
+	return count;
+}
+
+/*
+ * Utility macro that iterates through a bit map of node IDs. In each iteration,
+ * it returns the ID of the next present region corresponding to a node present
+ * in the bit map. Region ID of a present node is in between passed region id
+ * and region id + number of set bits in the bitmap i.e. the number of present
+ * nodes.
+ */
+#define FOR_EACH_PRESENT_REGION_ID(region_id, bit_map)				\
+	for (unsigned long long region_id_limit = count_set_bits(bit_map)	\
+							+ region_id;		\
+	    region_id < region_id_limit;					\
+	    region_id++)
+
+/*
+ * Same macro as FOR_EACH_PRESENT_NODE, but renamed to indicate it traverses
+ * through a bit map of master interfaces.
+ */
+#define FOR_EACH_PRESENT_MASTER_INTERFACE(iface_id, bit_map)	\
+			FOR_EACH_BIT(iface_id, bit_map)
+#endif /* __CCN_PRIVATE_H__ */
diff --git a/include/bl31/services/psci.h b/include/bl31/services/psci.h
index 30a53ca..6298a40 100644
--- a/include/bl31/services/psci.h
+++ b/include/bl31/services/psci.h
@@ -251,9 +251,6 @@
 
 	/* The local power state of this CPU */
 	plat_local_state_t local_state;
-#if !USE_COHERENT_MEM
-	bakery_info_t pcpu_bakery_info[PSCI_NUM_NON_CPU_PWR_DOMAINS];
-#endif
 } psci_cpu_data_t;
 
 /*******************************************************************************
diff --git a/include/common/el3_common_macros.S b/include/common/el3_common_macros.S
index 7946e72..87e172e 100644
--- a/include/common/el3_common_macros.S
+++ b/include/common/el3_common_macros.S
@@ -214,6 +214,21 @@
 	 * ---------------------------------------------------------------------
 	 */
 	.if \_init_c_runtime
+#if IMAGE_BL31
+		/* -------------------------------------------------------------
+		 * Invalidate the RW memory used by the BL31 image. This
+		 * includes the data and NOBITS sections. This is done to
+		 * safeguard against possible corruption of this memory by
+		 * dirty cache lines in a system cache as a result of use by
+		 * an earlier boot loader stage.
+		 * -------------------------------------------------------------
+		 */
+		adr	x0, __RW_START__
+		adr	x1, __RW_END__
+		sub	x1, x1, x0
+		bl	inv_dcache_range
+#endif /* IMAGE_BL31 */
+
 		ldr	x0, =__BSS_START__
 		ldr	x1, =__BSS_SIZE__
 		bl	zeromem16
diff --git a/include/drivers/arm/ccn.h b/include/drivers/arm/ccn.h
new file mode 100644
index 0000000..2361596
--- /dev/null
+++ b/include/drivers/arm/ccn.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CCN_H__
+#define __CCN_H__
+
+/*
+ * This macro defines the maximum number of master interfaces that reside on
+ * Request nodes which the CCN driver can accommodate. The driver APIs to add
+ * and remove Request nodes from snoop/dvm domains take a bit map of master
+ * interfaces as inputs. The largest C data type that can be used is a 64-bit
+ * unsigned integer. Hence the value of 64. The platform will have to ensure
+ * that the master interfaces are numbered from 0-63.
+ */
+#define CCN_MAX_RN_MASTERS	64
+
+/*
+ * The following constants define the various run modes that the platform can
+ * request the CCN driver to place the L3 cache in. These map to the
+ * programmable P-State values in a HN-F P-state register.
+ */
+#define CCN_L3_RUN_MODE_NOL3	0x0	/* HNF_PM_NOL3 */
+#define CCN_L3_RUN_MODE_SFONLY	0x1	/* HNF_PM_SFONLY */
+#define CCN_L3_RUN_MODE_HAM	0x2	/* HNF_PM_HALF */
+#define CCN_L3_RUN_MODE_FAM	0x3	/* HNF_PM_FULL */
+
+/*
+ * The following macro takes the value returned from a read of a HN-F P-state
+ * status register and returns the retention state value.
+ */
+#define CCN_GET_RETENTION_STATE(pstate)	((pstate >> 4) & 0x3)
+
+/*
+ * The following macro takes the value returned from a read of a HN-F P-state
+ * status register and returns the run state value.
+ */
+#define CCN_GET_RUN_STATE(pstate)	(pstate & 0xf)
+
+#ifndef __ASSEMBLY__
+#include <stdint.h>
+
+/*
+ * This structure describes some of the implementation defined attributes of the
+ * CCN IP. It is used by the platform port to specify these attributes in order
+ * to initialise the CCN driver. The attributes are described below.
+ *
+ * 1. The 'num_masters' field specifies the total number of master interfaces
+ *    resident on Request nodes.
+ *
+ * 2. The 'master_to_rn_id_map' field is a ponter to an array in which each
+ *    index corresponds to a master interface and its value corresponds to the
+ *    Request node on which the master interface resides.
+ *    This field is not simply defined as an array of size CCN_MAX_RN_MASTERS.
+ *    In reality, a platform will have much fewer master * interfaces than
+ *    CCN_MAX_RN_MASTERS. With an array of this size, it would also have to
+ *    set the unused entries to a suitable value. Zeroing the array would not
+ *    be enough since 0 is also a valid node id. Hence, such an array is not
+ *    used.
+ *
+ * 3. The 'periphbase' field is the base address of the programmer's view of the
+ *    CCN IP.
+ */
+typedef struct ccn_desc {
+	unsigned int num_masters;
+	const unsigned char *master_to_rn_id_map;
+	uintptr_t periphbase;
+} ccn_desc_t;
+
+
+void ccn_init(const ccn_desc_t *plat_ccn_desc);
+void ccn_enter_snoop_dvm_domain(unsigned long long master_iface_map);
+void ccn_exit_snoop_dvm_domain(unsigned long long master_iface_map);
+void ccn_enter_dvm_domain(unsigned long long master_iface_map);
+void ccn_exit_dvm_domain(unsigned long long master_iface_map);
+void ccn_set_l3_run_mode(unsigned int mode);
+void ccn_program_sys_addrmap(unsigned int sn0_id,
+		 unsigned int sn1_id,
+		 unsigned int sn2_id,
+		 unsigned int top_addr_bit0,
+		 unsigned int top_addr_bit1,
+		 unsigned char three_sn_en);
+unsigned int ccn_get_l3_run_mode(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __CCN_H__ */
diff --git a/include/drivers/arm/nic_400.h b/include/drivers/arm/nic_400.h
new file mode 100644
index 0000000..1031662
--- /dev/null
+++ b/include/drivers/arm/nic_400.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NIC_400_H__
+#define __NIC_400_H__
+
+/*
+ * Address of slave 'n' security setting in the NIC-400 address region
+ * control
+ */
+#define NIC400_ADDR_CTRL_SECURITY_REG(n)	(0x8 + (n) * 4)
+
+#endif /* __NIC_400_H__ */
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index b7ab3da..d01ea31 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -145,6 +145,7 @@
 DEFINE_SYSOP_TYPE_PARAM_FUNC(at, s12e0w)
 
 void flush_dcache_range(uint64_t, uint64_t);
+void clean_dcache_range(uint64_t, uint64_t);
 void inv_dcache_range(uint64_t, uint64_t);
 void dcsw_op_louis(uint32_t);
 void dcsw_op_all(uint32_t);
diff --git a/include/lib/bakery_lock.h b/include/lib/bakery_lock.h
index 2e1afa2..86adb9c 100644
--- a/include/lib/bakery_lock.h
+++ b/include/lib/bakery_lock.h
@@ -56,6 +56,11 @@
  * External bakery lock interface.
  ****************************************************************************/
 #if USE_COHERENT_MEM
+/*
+ * Bakery locks are stored in coherent memory
+ *
+ * Each lock's data is contiguous and fully allocated by the compiler
+ */
 
 typedef struct bakery_lock {
 	/*
@@ -67,12 +72,15 @@
 	volatile uint16_t lock_data[BAKERY_LOCK_MAX_CPUS];
 } bakery_lock_t;
 
-void bakery_lock_init(bakery_lock_t *bakery);
-void bakery_lock_get(bakery_lock_t *bakery);
-void bakery_lock_release(bakery_lock_t *bakery);
-int bakery_lock_try(bakery_lock_t *bakery);
-
 #else
+/*
+ * Bakery locks are stored in normal .bss memory
+ *
+ * Each lock's data is spread across multiple cache lines, one per CPU,
+ * but multiple locks can share the same cache line.
+ * The compiler will allocate enough memory for one CPU's bakery locks,
+ * the remaining cache lines are allocated by the linker script
+ */
 
 typedef struct bakery_info {
 	/*
@@ -84,9 +92,19 @@
 	volatile uint16_t lock_data;
 } bakery_info_t;
 
-void bakery_lock_get(unsigned int id, unsigned int offset);
-void bakery_lock_release(unsigned int id, unsigned int offset);
+typedef bakery_info_t bakery_lock_t;
 
 #endif /* __USE_COHERENT_MEM__ */
+
+inline void bakery_lock_init(bakery_lock_t *bakery) {}
+void bakery_lock_get(bakery_lock_t *bakery);
+void bakery_lock_release(bakery_lock_t *bakery);
+
+#define DEFINE_BAKERY_LOCK(_name) bakery_lock_t _name \
+			__attribute__ ((section("bakery_lock")))
+
+#define DECLARE_BAKERY_LOCK(_name) extern bakery_lock_t _name
+
+
 #endif /* __ASSEMBLY__ */
 #endif /* __BAKERY_LOCK_H__ */
diff --git a/include/plat/arm/common/arm_def.h b/include/plat/arm/common/arm_def.h
index 377bfaa..c236970 100644
--- a/include/plat/arm/common/arm_def.h
+++ b/include/plat/arm/common/arm_def.h
@@ -173,10 +173,6 @@
 
 #define ARM_CONSOLE_BAUDRATE		115200
 
-/* TZC related constants */
-#define ARM_TZC_BASE			0x2a4a0000
-
-
 /******************************************************************************
  * Required platform porting definitions common to all ARM standard platforms
  *****************************************************************************/
@@ -210,14 +206,6 @@
  */
 #define CACHE_WRITEBACK_GRANULE		(1 << ARM_CACHE_WRITEBACK_SHIFT)
 
-#if !USE_COHERENT_MEM
-/*
- * Size of the per-cpu data in bytes that should be reserved in the generic
- * per-cpu data structure for the ARM platform port.
- */
-#define PLAT_PCPU_DATA_SIZE		2
-#endif
-
 
 /*******************************************************************************
  * BL1 specific defines.
@@ -305,4 +293,10 @@
 #define TSP_IRQ_SEC_PHY_TIMER		ARM_IRQ_SEC_PHY_TIMER
 
 
+/*
+ * One cache line needed for bakery locks on ARM platforms
+ */
+#define PLAT_PERCPU_BAKERY_LOCK_SIZE		(1 * CACHE_WRITEBACK_GRANULE)
+
+
 #endif /* __ARM_DEF_H__ */
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index 823212c..ad41f4f 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -71,14 +71,11 @@
 );
 
 #if IMAGE_BL31
-#if USE_COHERENT_MEM
-
 /*
  * Use this macro to instantiate lock before it is used in below
  * arm_lock_xxx() macros
  */
-#define ARM_INSTANTIATE_LOCK	bakery_lock_t arm_lock	\
-	__attribute__ ((section("tzfw_coherent_mem")));
+#define ARM_INSTANTIATE_LOCK	DEFINE_BAKERY_LOCK(arm_lock);
 
 /*
  * These are wrapper macros to the Coherent Memory Bakery Lock API.
@@ -89,58 +86,9 @@
 
 #else
 
-/*******************************************************************************
- * Constants to specify how many bakery locks this platform implements. These
- * are used if the platform chooses not to use coherent memory for bakery lock
- * data structures.
- ******************************************************************************/
-#define ARM_MAX_BAKERIES	1
-#define ARM_PWRC_BAKERY_ID	0
-
-/* Empty definition */
-#define ARM_INSTANTIATE_LOCK
-
-/*******************************************************************************
- * Definition of structure which holds platform specific per-cpu data. Currently
- * it holds only the bakery lock information for each cpu.
- ******************************************************************************/
-typedef struct arm_cpu_data {
-	bakery_info_t pcpu_bakery_info[ARM_MAX_BAKERIES];
-} arm_cpu_data_t;
-
-/* Macro to define the offset of bakery_info_t in arm_cpu_data_t */
-#define ARM_CPU_DATA_LOCK_OFFSET	__builtin_offsetof\
-					    (arm_cpu_data_t, pcpu_bakery_info)
-
-
-/*******************************************************************************
- * Helper macros for bakery lock api when using the above arm_cpu_data_t for
- * bakery lock data structures. It assumes that the bakery_info is at the
- * beginning of the platform specific per-cpu data.
- ******************************************************************************/
-#define arm_lock_init()		/* No init required */
-#define arm_lock_get()		bakery_lock_get(ARM_PWRC_BAKERY_ID,	\
-					CPU_DATA_PLAT_PCPU_OFFSET +	\
-					ARM_CPU_DATA_LOCK_OFFSET)
-#define arm_lock_release()	bakery_lock_release(ARM_PWRC_BAKERY_ID,	\
-					CPU_DATA_PLAT_PCPU_OFFSET +	\
-					ARM_CPU_DATA_LOCK_OFFSET)
-
 /*
- * Ensure that the size of the platform specific per-cpu data structure and
- * the size of the memory allocated in generic per-cpu data for the platform
- * are the same.
+ * Empty macros for all other BL stages other than BL3-1
  */
-CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(arm_cpu_data_t),
-	arm_pcpu_data_size_mismatch);
-
-#endif /* USE_COHERENT_MEM */
-
-#else
-
-/*
-* Dummy macros for all other BL stages other than BL3-1
-*/
 #define ARM_INSTANTIATE_LOCK
 #define arm_lock_init()
 #define arm_lock_get()
diff --git a/include/plat/arm/css/common/css_def.h b/include/plat/arm/css/common/css_def.h
index 157a22f..38ff9dd 100644
--- a/include/plat/arm/css/common/css_def.h
+++ b/include/plat/arm/css/common/css_def.h
@@ -111,6 +111,9 @@
 
 /* TZC related constants */
 #define PLAT_ARM_TZC_FILTERS		REG_ATTR_FILTER_BIT_ALL
+#define PLAT_ARM_TZC_BASE		0x2a4a0000
 
+/* System timer related constants */
+#define PLAT_ARM_NSTIMER_FRAME_ID	1
 
 #endif /* __CSS_DEF_H__ */
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 0dbab1b..476b906 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -32,6 +32,7 @@
 #include <asm_macros.S>
 
 	.globl	flush_dcache_range
+	.globl	clean_dcache_range
 	.globl	inv_dcache_range
 	.globl	dcsw_op_louis
 	.globl	dcsw_op_all
@@ -39,25 +40,39 @@
 	.globl	dcsw_op_level2
 	.globl	dcsw_op_level3
 
-	/* ------------------------------------------
-	 * Clean+Invalidate from base address till
-	 * size. 'x0' = addr, 'x1' = size
-	 * ------------------------------------------
-	 */
-func flush_dcache_range
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
 	dcache_line_size x2, x3
 	add	x1, x0, x1
 	sub	x3, x2, #1
 	bic	x0, x0, x3
-flush_loop:
-	dc	civac, x0
+loop_\op:
+	dc	\op, x0
 	add	x0, x0, x2
 	cmp	x0, x1
-	b.lo    flush_loop
+	b.lo    loop_\op
 	dsb	sy
 	ret
+.endm
+	/* ------------------------------------------
+	 * Clean+Invalidate from base address till
+	 * size. 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func flush_dcache_range
+	do_dcache_maintenance_by_mva civac
 endfunc flush_dcache_range
 
+	/* ------------------------------------------
+	 * Clean from base address till size.
+	 * 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func clean_dcache_range
+	do_dcache_maintenance_by_mva cvac
+endfunc clean_dcache_range
 
 	/* ------------------------------------------
 	 * Invalidate from base address till
@@ -65,17 +80,7 @@
 	 * ------------------------------------------
 	 */
 func inv_dcache_range
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-inv_loop:
-	dc	ivac, x0
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo    inv_loop
-	dsb	sy
-	ret
+	do_dcache_maintenance_by_mva ivac
 endfunc inv_dcache_range
 
 
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 5f80b59..e7c246e 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -141,9 +141,6 @@
 
 /* ---------------------------------------------------------------------------
  * Disable the MMU at EL3
- * This is implemented in assembler to ensure that the data cache is cleaned
- * and invalidated after the MMU is disabled without any intervening cacheable
- * data accesses
  * ---------------------------------------------------------------------------
  */
 
@@ -154,8 +151,8 @@
 	bic	x0, x0, x1
 	msr	sctlr_el3, x0
 	isb				// ensure MMU is off
-	mov	x0, #DCCISW		// DCache clean and invalidate
-	b	dcsw_op_all
+	dsb	sy
+	ret
 endfunc disable_mmu_el3
 
 
diff --git a/lib/locks/bakery/bakery_lock_coherent.c b/lib/locks/bakery/bakery_lock_coherent.c
index 1c60dba..f221222 100644
--- a/lib/locks/bakery/bakery_lock_coherent.c
+++ b/lib/locks/bakery/bakery_lock_coherent.c
@@ -63,16 +63,6 @@
 	assert(entry < BAKERY_LOCK_MAX_CPUS);		\
 } while (0)
 
-/* Initialize Bakery Lock to reset all ticket values */
-void bakery_lock_init(bakery_lock_t *bakery)
-{
-	assert(bakery);
-
-	/* All ticket values need to be 0 */
-	memset(bakery, 0, sizeof(*bakery));
-}
-
-
 /* Obtain a ticket for a given CPU */
 static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me)
 {
diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c
index 3ca76e0..45b870b 100644
--- a/lib/locks/bakery/bakery_lock_normal.c
+++ b/lib/locks/bakery/bakery_lock_normal.c
@@ -56,12 +56,29 @@
  * accesses regardless of status of address translation.
  */
 
-/* This macro assumes that the bakery_info array is located at the offset specified */
-#define get_my_bakery_info(offset, id)		\
-	(((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id)
+#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE
+/*
+ * Verify that the platform defined value for the per-cpu space for bakery locks is
+ * a multiple of the cache line size, to prevent multiple CPUs writing to the same
+ * bakery lock cache line
+ *
+ * Using this value, if provided, rather than the linker generated value results in
+ * more efficient code
+ */
+CASSERT((PLAT_PERCPU_BAKERY_LOCK_SIZE & (CACHE_WRITEBACK_GRANULE - 1)) == 0, \
+	PLAT_PERCPU_BAKERY_LOCK_SIZE_not_cacheline_multiple);
+#define PERCPU_BAKERY_LOCK_SIZE (PLAT_PERCPU_BAKERY_LOCK_SIZE)
+#else
+/*
+ * Use the linker defined symbol which has evaluated the size reqiurement.
+ * This is not as efficient as using a platform defined constant
+ */
+extern void *__PERCPU_BAKERY_LOCK_SIZE__;
+#define PERCPU_BAKERY_LOCK_SIZE ((uintptr_t)&__PERCPU_BAKERY_LOCK_SIZE__)
+#endif
 
-#define get_bakery_info_by_index(offset, id, ix)	\
-	(((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id)
+#define get_bakery_info(cpu_ix, lock)	\
+	(bakery_info_t *)((uintptr_t)lock + cpu_ix * PERCPU_BAKERY_LOCK_SIZE)
 
 #define write_cache_op(addr, cached)	\
 				do {	\
@@ -73,7 +90,7 @@
 #define read_cache_op(addr, cached)	if (cached) \
 					    dccivac((uint64_t)addr)
 
-static unsigned int bakery_get_ticket(int id, unsigned int offset,
+static unsigned int bakery_get_ticket(bakery_lock_t *lock,
 						unsigned int me, int is_cached)
 {
 	unsigned int my_ticket, their_ticket;
@@ -84,7 +101,7 @@
 	 * Obtain a reference to the bakery information for this cpu and ensure
 	 * it is not NULL.
 	 */
-	my_bakery_info = get_my_bakery_info(offset, id);
+	my_bakery_info = get_bakery_info(me, lock);
 	assert(my_bakery_info);
 
 	/*
@@ -115,7 +132,7 @@
 		 * Get a reference to the other contender's bakery info and
 		 * ensure that a stale copy is not read.
 		 */
-		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		their_bakery_info = get_bakery_info(they, lock);
 		assert(their_bakery_info);
 
 		read_cache_op(their_bakery_info, is_cached);
@@ -141,7 +158,7 @@
 	return my_ticket;
 }
 
-void bakery_lock_get(unsigned int id, unsigned int offset)
+void bakery_lock_get(bakery_lock_t *lock)
 {
 	unsigned int they, me, is_cached;
 	unsigned int my_ticket, my_prio, their_ticket;
@@ -153,7 +170,7 @@
 	is_cached = read_sctlr_el3() & SCTLR_C_BIT;
 
 	/* Get a ticket */
-	my_ticket = bakery_get_ticket(id, offset, me, is_cached);
+	my_ticket = bakery_get_ticket(lock, me, is_cached);
 
 	/*
 	 * Now that we got our ticket, compute our priority value, then compare
@@ -168,7 +185,7 @@
 		 * Get a reference to the other contender's bakery info and
 		 * ensure that a stale copy is not read.
 		 */
-		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		their_bakery_info = get_bakery_info(they, lock);
 		assert(their_bakery_info);
 
 		/* Wait for the contender to get their ticket */
@@ -199,12 +216,12 @@
 	/* Lock acquired */
 }
 
-void bakery_lock_release(unsigned int id, unsigned int offset)
+void bakery_lock_release(bakery_lock_t *lock)
 {
 	bakery_info_t *my_bakery_info;
 	unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT;
 
-	my_bakery_info = get_my_bakery_info(offset, id);
+	my_bakery_info = get_bakery_info(plat_my_core_pos(), lock);
 	assert(bakery_ticket_number(my_bakery_info->lock_data));
 
 	my_bakery_info->lock_data = 0;
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index c2a7d6a..155216a 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -85,6 +85,9 @@
 #define PLAT_ARM_CCI_CLUSTER0_SL_IFACE_IX	3
 #define PLAT_ARM_CCI_CLUSTER1_SL_IFACE_IX	4
 
+/* System timer related constants */
+#define PLAT_ARM_NSTIMER_FRAME_ID		1
+
 /* TrustZone controller related constants
  *
  * Currently only filters 0 and 2 are connected on Base FVP.
@@ -100,6 +103,7 @@
  * Give access to the CPUs and Virtio. Some devices
  * would normally use the default ID so allow that too.
  */
+#define PLAT_ARM_TZC_BASE		0x2a4a0000
 #define PLAT_ARM_TZC_FILTERS		REG_ATTR_FILTER_BIT(0)
 
 #define PLAT_ARM_TZC_NS_DEV_ACCESS	(				\
diff --git a/plat/arm/board/juno/juno_security.c b/plat/arm/board/juno/juno_security.c
index 1de38c3..f9386ca 100644
--- a/plat/arm/board/juno/juno_security.c
+++ b/plat/arm/board/juno/juno_security.c
@@ -29,6 +29,7 @@
  */
 
 #include <mmio.h>
+#include <nic_400.h>
 #include <plat_arm.h>
 #include <soc_css.h>
 #include "juno_def.h"
@@ -48,12 +49,25 @@
 }
 
 /*******************************************************************************
+ * Program CSS-NIC400 to allow non-secure access to some CSS regions.
+ ******************************************************************************/
+static void css_init_nic400(void)
+{
+	/* Note: This is the NIC-400 device on the CSS */
+	mmio_write_32(PLAT_SOC_CSS_NIC400_BASE +
+		NIC400_ADDR_CTRL_SECURITY_REG(CSS_NIC400_SLAVE_BOOTSECURE),
+		~0);
+}
+
+/*******************************************************************************
  * Initialize the secure environment.
  ******************************************************************************/
 void plat_arm_security_setup(void)
 {
 	/* Initialize the TrustZone Controller */
 	arm_tzc_setup();
+	/* Do ARM CSS internal NIC setup */
+	css_init_nic400();
 	/* Do ARM CSS SoC security setup */
 	soc_css_security_setup();
 	/* Initialize the SMMU SSD tables*/
diff --git a/plat/arm/common/arm_bl31_setup.c b/plat/arm/common/arm_bl31_setup.c
index 3fda2ef..899463e 100644
--- a/plat/arm/common/arm_bl31_setup.c
+++ b/plat/arm/common/arm_bl31_setup.c
@@ -40,6 +40,7 @@
 #include <mmio.h>
 #include <plat_arm.h>
 #include <platform.h>
+#include <platform_def.h>
 
 
 /*
@@ -219,9 +220,9 @@
 	reg_val = (1 << CNTACR_RPCT_SHIFT) | (1 << CNTACR_RVCT_SHIFT);
 	reg_val |= (1 << CNTACR_RFRQ_SHIFT) | (1 << CNTACR_RVOFF_SHIFT);
 	reg_val |= (1 << CNTACR_RWVT_SHIFT) | (1 << CNTACR_RWPT_SHIFT);
-	mmio_write_32(ARM_SYS_TIMCTL_BASE + CNTACR_BASE(1), reg_val);
+	mmio_write_32(ARM_SYS_TIMCTL_BASE + CNTACR_BASE(PLAT_ARM_NSTIMER_FRAME_ID), reg_val);
 
-	reg_val = (1 << CNTNSAR_NS_SHIFT(1));
+	reg_val = (1 << CNTNSAR_NS_SHIFT(PLAT_ARM_NSTIMER_FRAME_ID));
 	mmio_write_32(ARM_SYS_TIMCTL_BASE + CNTNSAR, reg_val);
 
 	/* Initialize power controller before setting up topology */
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index 1234619..eb5ae11 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -74,6 +74,7 @@
 				plat/common/aarch64/plat_common.c
 
 BL1_SOURCES		+=	drivers/arm/cci/cci.c				\
+				drivers/arm/ccn/ccn.c				\
 				drivers/io/io_fip.c				\
 				drivers/io/io_memmap.c				\
 				drivers/io/io_storage.c				\
@@ -91,6 +92,7 @@
 				plat/common/aarch64/platform_up_stack.S
 
 BL31_SOURCES		+=	drivers/arm/cci/cci.c				\
+				drivers/arm/ccn/ccn.c				\
 				drivers/arm/gic/arm_gic.c			\
 				drivers/arm/gic/gic_v2.c			\
 				drivers/arm/gic/gic_v3.c			\
diff --git a/plat/arm/common/arm_security.c b/plat/arm/common/arm_security.c
index 8bee4fe..990d8d4 100644
--- a/plat/arm/common/arm_security.c
+++ b/plat/arm/common/arm_security.c
@@ -47,7 +47,7 @@
 {
 	INFO("Configuring TrustZone Controller\n");
 
-	tzc_init(ARM_TZC_BASE);
+	tzc_init(PLAT_ARM_TZC_BASE);
 
 	/* Disable filters. */
 	tzc_disable_filters();
diff --git a/plat/arm/soc/common/soc_css_security.c b/plat/arm/soc/common/soc_css_security.c
index 36f59ea..37fd37c 100644
--- a/plat/arm/soc/common/soc_css_security.c
+++ b/plat/arm/soc/common/soc_css_security.c
@@ -30,17 +30,10 @@
 
 #include <board_css_def.h>
 #include <mmio.h>
+#include <nic_400.h>
 #include <platform_def.h>
 #include <soc_css_def.h>
 
-/*
- * Address of slave 'n' security setting in the NIC-400 address region
- * control
- * TODO: Ideally this macro should be moved in a "nic-400.h" header file but
- * it would be the only thing in there so it's not worth it at the moment.
- */
-#define NIC400_ADDR_CTRL_SECURITY_REG(n)	(0x8 + (n) * 4)
-
 void soc_css_init_nic400(void)
 {
 	/*
@@ -70,13 +63,6 @@
 		NIC400_ADDR_CTRL_SECURITY_REG(SOC_CSS_NIC400_BOOTSEC_BRIDGE),
 		~SOC_CSS_NIC400_BOOTSEC_BRIDGE_UART1);
 
-	/*
-	 * Allow non-secure access to some CSS regions.
-	 * Note: This is the NIC-400 device on the CSS
-	 */
-	mmio_write_32(PLAT_SOC_CSS_NIC400_BASE +
-		NIC400_ADDR_CTRL_SECURITY_REG(CSS_NIC400_SLAVE_BOOTSECURE),
-		~0);
 }
 
 
diff --git a/plat/mediatek/mt8173/drivers/spm/spm.c b/plat/mediatek/mt8173/drivers/spm/spm.c
index f67daea..7c6d72b 100644
--- a/plat/mediatek/mt8173/drivers/spm/spm.c
+++ b/plat/mediatek/mt8173/drivers/spm/spm.c
@@ -53,7 +53,8 @@
 static int spm_dormant_sta = CPU_DORMANT_RESET;
 #endif
 
-static bakery_lock_t spm_lock __attribute__ ((section("tzfw_coherent_mem")));
+DEFINE_BAKERY_LOCK(spm_lock);
+
 static int spm_hotplug_ready __attribute__ ((section("tzfw_coherent_mem")));
 static int spm_mcdi_ready __attribute__ ((section("tzfw_coherent_mem")));
 static int spm_suspend_ready __attribute__ ((section("tzfw_coherent_mem")));
diff --git a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
index 0d8e370..40d1bab 100644
--- a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
+++ b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
@@ -107,6 +107,23 @@
 	tegra_mc_write_32(MC_SECURITY_CFG1_0, size_in_bytes >> 20);
 }
 
+static void tegra_clear_videomem(uintptr_t non_overlap_area_start,
+				 unsigned long long non_overlap_area_size)
+{
+	/*
+	 * Perform cache maintenance to ensure that the non-overlapping area is
+	 * zeroed out. The first invalidation of this range ensures that
+	 * possible evictions of dirty cache lines do not interfere with the
+	 * 'zeromem16' operation. Other CPUs could speculatively prefetch the
+	 * main memory contents of this area between the first invalidation and
+	 * the 'zeromem16' operation. The second invalidation ensures that any
+	 * such cache lines are removed as well.
+	 */
+	inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
+	zeromem16((void *)non_overlap_area_start, non_overlap_area_size);
+	inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
+}
+
 /*
  * Program the Video Memory carveout region
  *
@@ -118,7 +135,7 @@
 	uintptr_t vmem_end_old = video_mem_base + (video_mem_size << 20);
 	uintptr_t vmem_end_new = phys_base + size_in_bytes;
 	uint32_t regval;
-	uint64_t size;
+	unsigned long long non_overlap_area_size;
 
 	/*
 	 * The GPU is the user of the Video Memory region. In order to
@@ -155,15 +172,15 @@
 
 	disable_mmu_el3();
 	if (phys_base > vmem_end_old || video_mem_base > vmem_end_new) {
-		zeromem16((void *)video_mem_base, video_mem_size << 20);
+		tegra_clear_videomem(video_mem_base, video_mem_size << 20);
 	} else {
 		if (video_mem_base < phys_base) {
-			size = phys_base - video_mem_base;
-			zeromem16((void *)video_mem_base, size);
+			non_overlap_area_size = phys_base - video_mem_base;
+			tegra_clear_videomem(video_mem_base, non_overlap_area_size);
 		}
 		if (vmem_end_old > vmem_end_new) {
-			size = vmem_end_old - vmem_end_new;
-			zeromem16((void *)vmem_end_new, size);
+			non_overlap_area_size = vmem_end_old - vmem_end_new;
+			tegra_clear_videomem(vmem_end_new, non_overlap_area_size);
 		}
 	}
 	enable_mmu_el3(0);
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index e12df04..7332695 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -78,6 +78,8 @@
 #endif
 ;
 
+DEFINE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+
 cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
 
 /*******************************************************************************
diff --git a/services/std_svc/psci/psci_on.c b/services/std_svc/psci/psci_on.c
index cf1a782..c37adc2 100644
--- a/services/std_svc/psci/psci_on.c
+++ b/services/std_svc/psci/psci_on.c
@@ -203,7 +203,4 @@
 	 * call to set this cpu on its way.
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
-
-	/* Clean caches before re-entering normal world */
-	dcsw_op_louis(DCCSW);
 }
diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h
index 9b55d9f..8c028a7 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/services/std_svc/psci/psci_private.h
@@ -42,23 +42,12 @@
  * The following helper macros abstract the interface to the Bakery
  * Lock API.
  */
-#if USE_COHERENT_MEM
-#define psci_lock_init(non_cpu_pd_node, idx)	\
-	bakery_lock_init(&(non_cpu_pd_node)[(idx)].lock)
-#define psci_lock_get(non_cpu_pd_node)		\
-	bakery_lock_get(&((non_cpu_pd_node)->lock))
-#define psci_lock_release(non_cpu_pd_node)	\
-	bakery_lock_release(&((non_cpu_pd_node)->lock))
-#else
 #define psci_lock_init(non_cpu_pd_node, idx)			\
 	((non_cpu_pd_node)[(idx)].lock_index = (idx))
 #define psci_lock_get(non_cpu_pd_node)				\
-	bakery_lock_get((non_cpu_pd_node)->lock_index, 		\
-			CPU_DATA_PSCI_LOCK_OFFSET)
+	bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index])
 #define psci_lock_release(non_cpu_pd_node)			\
-	bakery_lock_release((non_cpu_pd_node)->lock_index,	\
-			     CPU_DATA_PSCI_LOCK_OFFSET)
-#endif
+	bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index])
 
 /*
  * The PSCI capability which are provided by the generic code but does not
@@ -140,12 +129,9 @@
 	plat_local_state_t local_state;
 
 	unsigned char level;
-#if USE_COHERENT_MEM
-	bakery_lock_t lock;
-#else
-	/* For indexing the bakery_info array in per CPU data */
+
+	/* For indexing the psci_lock array*/
 	unsigned char lock_index;
-#endif
 } non_cpu_pd_node_t;
 
 typedef struct cpu_pwr_domain_node {
@@ -174,6 +160,9 @@
 extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
 extern unsigned int psci_caps;
 
+/* One bakery lock is required for each non-cpu power domain */
+DECLARE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+
 /*******************************************************************************
  * SPD's power management hooks registered with PSCI
  ******************************************************************************/
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index 94fe630..cd1bb09 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -181,12 +181,6 @@
 
 	/* Validate the sanity of array exported by the platform */
 	assert(j == PLATFORM_CORE_COUNT);
-
-#if !USE_COHERENT_MEM
-	/* Flush the non CPU power domain data to memory */
-	flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes,
-			   sizeof(psci_non_cpu_pd_nodes));
-#endif
 }
 
 /*******************************************************************************
@@ -227,18 +221,6 @@
 	psci_cpu_pd_nodes[plat_my_core_pos()].mpidr =
 		read_mpidr() & MPIDR_AFFINITY_MASK;
 
-#if !USE_COHERENT_MEM
-	/*
-	 * The psci_non_cpu_pd_nodes only needs flushing when it's not allocated in
-	 * coherent memory.
-	 */
-	flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes,
-			   sizeof(psci_non_cpu_pd_nodes));
-#endif
-
-	flush_dcache_range((uintptr_t) &psci_cpu_pd_nodes,
-			   sizeof(psci_cpu_pd_nodes));
-
 	psci_init_req_local_pwr_states();
 
 	/*
diff --git a/services/std_svc/psci/psci_suspend.c b/services/std_svc/psci/psci_suspend.c
index 675ef9e..bd0c5db 100644
--- a/services/std_svc/psci/psci_suspend.c
+++ b/services/std_svc/psci/psci_suspend.c
@@ -261,7 +261,4 @@
 	 * call to set this cpu on its way.
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
-
-	/* Clean caches before re-entering normal world */
-	dcsw_op_louis(DCCSW);
 }