Merge pull request #394 from achingupta/ag/ccn_driver

Support for ARM CoreLink CCN interconnects
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 987d30e..1d26229 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -82,6 +82,20 @@
 	b.ne	_panic
 
 	/* ---------------------------------------------
+	 * Invalidate the RW memory used by the BL2
+	 * image. This includes the data and NOBITS
+	 * sections. This is done to safeguard against
+	 * possible corruption of this memory by dirty
+	 * cache lines in a system cache as a result of
+	 * use by an earlier boot loader stage.
+	 * ---------------------------------------------
+	 */
+	adr	x0, __RW_START__
+	adr	x1, __RW_END__
+	sub	x1, x1, x0
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
 	 * Zero out NOBITS sections. There are 2 of them:
 	 *   - the .bss section;
 	 *   - the coherent memory section.
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index 33588e6..a660bda 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -68,6 +68,12 @@
         __RO_END__ = .;
     } >RAM
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -121,6 +127,11 @@
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL2_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 5ba0f9c..636b1d2 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -113,5 +113,22 @@
 	 */
 	bl	bl31_main
 
+	/* -------------------------------------------------------------
+	 * Clean the .data & .bss sections to main memory. This ensures
+	 * that any global data which was initialised by the primary CPU
+	 * is visible to secondary CPUs before they enable their data
+	 * caches and participate in coherency.
+	 * -------------------------------------------------------------
+	 */
+	adr	x0, __DATA_START__
+	adr	x1, __DATA_END__
+	sub	x1, x1, x0
+	bl	clean_dcache_range
+
+	adr	x0, __BSS_START__
+	adr	x1, __BSS_END__
+	sub	x1, x1, x0
+	bl	clean_dcache_range
+
 	b	el3_exit
 endfunc bl31_entrypoint
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 0639d81..7250791 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -81,6 +81,12 @@
     ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
            "cpu_ops not defined for this platform.")
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -165,6 +171,11 @@
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL31_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index a1a3710..a244a5c 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -87,9 +87,6 @@
 	INFO("BL3-1: Initializing runtime services\n");
 	runtime_svc_init();
 
-	/* Clean caches before re-entering normal world */
-	dcsw_op_all(DCCSW);
-
 	/*
 	 * All the cold boot actions on the primary cpu are done. We now need to
 	 * decide which is the next image (BL32 or BL33) and how to execute it.
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 4e8da74..9732ff2 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -99,6 +99,20 @@
 	isb
 
 	/* ---------------------------------------------
+	 * Invalidate the RW memory used by the BL32
+	 * image. This includes the data and NOBITS
+	 * sections. This is done to safeguard against
+	 * possible corruption of this memory by dirty
+	 * cache lines in a system cache as a result of
+	 * use by an earlier boot loader stage.
+	 * ---------------------------------------------
+	 */
+	adr	x0, __RW_START__
+	adr	x1, __RW_END__
+	sub	x1, x1, x0
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
 	 * Zero out NOBITS sections. There are 2 of them:
 	 *   - the .bss section;
 	 *   - the coherent memory section.
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index d411ad0..41c4b4a 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -62,6 +62,12 @@
         __RO_END__ = .;
     } >RAM
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -119,6 +125,11 @@
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark the end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL32_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/drivers/arm/ccn/ccn.c b/drivers/arm/ccn/ccn.c
new file mode 100644
index 0000000..aef891b
--- /dev/null
+++ b/drivers/arm/ccn/ccn.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <assert.h>
+#include <bakery_lock.h>
+#include <ccn.h>
+#include <debug.h>
+#include <errno.h>
+#include <mmio.h>
+#include "ccn_private.h"
+
+static const ccn_desc_t *ccn_plat_desc;
+#if IMAGE_BL31
+DEFINE_BAKERY_LOCK(ccn_lock);
+#endif
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV), a
+ * region ID of one of the 256 regions (0-255) and a register offset within the
+ * region. It converts the first two parameters into a base address and uses it
+ * to read the register at the offset.
+ ******************************************************************************/
+static inline unsigned long long ccn_reg_read(uintptr_t periphbase,
+			     unsigned int region_id,
+			     unsigned int register_offset)
+{
+	uintptr_t region_base;
+
+	assert(periphbase);
+	assert(region_id < REGION_ID_LIMIT);
+
+	region_base = periphbase + region_id_to_base(region_id);
+	return mmio_read_64(region_base + register_offset);
+}
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV), a
+ * region ID of one of the 256 regions (0-255), a register offset within the
+ * region and a value. It converts the first two parameters into a base address
+ * and uses it to write the value in the register at the offset.
+ ******************************************************************************/
+static inline void ccn_reg_write(uintptr_t periphbase,
+			  unsigned int region_id,
+			  unsigned int register_offset,
+			  unsigned long long value)
+{
+	uintptr_t region_base;
+
+	assert(periphbase);
+	assert(region_id < REGION_ID_LIMIT);
+
+	region_base = periphbase + region_id_to_base(region_id);
+	mmio_write_64(region_base + register_offset, value);
+}
+
+#if DEBUG
+
+typedef struct rn_info {
+		unsigned char node_desc[MAX_RN_NODES];
+	} rn_info_t;
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV) and
+ * the node ID of a Request Node (RN-D or RN-I). It returns the maximum number
+ * of master interfaces resident on that node. This number is equal to the least
+ * significant two bits of the node type ID + 1.
+ ******************************************************************************/
+static unsigned int ccn_get_rni_mcount(uintptr_t periphbase,
+				       unsigned int rn_id)
+{
+	unsigned int rn_type_id;
+
+	/* Use the node id to find the type of RN-I/D node */
+	rn_type_id = get_node_type(ccn_reg_read(periphbase,
+						rn_id + RNI_REGION_ID_START,
+						REGION_ID_OFFSET));
+
+	/* Return the number master interfaces based on node type */
+	return rn_type_id_to_master_cnt(rn_type_id);
+}
+
+/*******************************************************************************
+ * This function reads the CCN registers to find the following information about
+ * the ACE/ACELite/ACELite+DVM/CHI interfaces resident on the various types of
+ * Request Nodes (RN-Fs, RN-Is and RN-Ds) in the system:
+ *
+ * 1. The total number of such interfaces that this CCN IP supports. This is the
+ *    cumulative number of interfaces across all Request node types. It is
+ *    passed back as the return value of this function.
+ *
+ * 2. The maximum number of interfaces of a type resident on a Request node of
+ *    one of the three types. This information is populated in the 'info'
+ *    array provided by the caller as described next.
+ *
+ *    The array has 64 entries. Each entry corresponds to a Request node. The
+ *    Miscellaneous node's programmer's view has RN-F, RN-I and RN-D ID
+ *    registers. For each RN-I and RN-D ID indicated as being present in these
+ *    registers, its identification register (offset 0xFF00) is read. This
+ *    register specifies the maximum number of master interfaces the node
+ *    supports. For RN-Fs it is assumed that there can be only a single fully
+ *    coherent master resident on each node. The counts for each type of node
+ *    are use to populate the array entry at the index corresponding to the node
+ *    ID i.e. rn_info[node ID] = <number of master interfaces>
+ ******************************************************************************/
+static unsigned int ccn_get_rn_master_info(uintptr_t periphbase,
+					   rn_info_t *info)
+{
+	unsigned int num_masters = 0;
+	rn_types_t rn_type;
+
+	assert (info);
+
+	for (rn_type = RN_TYPE_RNF; rn_type < NUM_RN_TYPES; rn_type++) {
+		unsigned int mn_reg_off, node_id;
+		unsigned long long rn_bitmap;
+
+		/*
+		 * RN-F, RN-I, RN-D node registers in the MN region occupy
+		 * contiguous 16 byte apart offsets.
+		 */
+		mn_reg_off = MN_RNF_NODEID_OFFSET + (rn_type << 4);
+		rn_bitmap = ccn_reg_read(periphbase, MN_REGION_ID, mn_reg_off);
+
+		FOR_EACH_PRESENT_NODE_ID(node_id, rn_bitmap) {
+			unsigned int node_mcount;
+
+			/*
+			 * A RN-F does not have a node type since it does not
+			 * export a programmer's interface. It can only have a
+			 * single fully coherent master residing on it. If the
+			 * offset of the MN(Miscellaneous Node) register points
+			 * to a RN-I/D node then the master count is set to the
+			 * maximum number of master interfaces that can possibly
+			 * reside on the node.
+			 */
+			node_mcount = (mn_reg_off == MN_RNF_NODEID_OFFSET ? 1 :
+				       ccn_get_rni_mcount(periphbase, node_id));
+
+			/*
+			 * Use this value to increment the maximum possible
+			 * master interfaces in the system.
+			 */
+			num_masters += node_mcount;
+
+			/*
+			 * Update the entry in 'info' for this node ID with
+			 * the maximum number of masters than can sit on
+			 * it. This information will be used to validate the
+			 * node information passed by the platform later.
+			 */
+			info->node_desc[node_id] = node_mcount;
+		}
+	}
+
+	return num_masters;
+}
+
+/*******************************************************************************
+ * This function validates parameters passed by the platform (in a debug build).
+ * It collects information about the maximum number of master interfaces that:
+ * a) the CCN IP can accommodate and
+ * b) can exist on each Request node.
+ * It compares this with the information provided by the platform to determine
+ * the validity of the latter.
+ ******************************************************************************/
+static void ccn_validate_plat_params(const ccn_desc_t *plat_desc)
+{
+	unsigned int master_id, num_rn_masters;
+	rn_info_t info = { {0} };
+
+	assert(plat_desc);
+	assert(plat_desc->periphbase);
+	assert(plat_desc->master_to_rn_id_map);
+	assert(plat_desc->num_masters);
+	assert(plat_desc->num_masters < CCN_MAX_RN_MASTERS);
+
+	/*
+	 * Find the number and properties of fully coherent, IO coherent and IO
+	 * coherent + DVM master interfaces
+	 */
+	num_rn_masters = ccn_get_rn_master_info(plat_desc->periphbase, &info);
+	assert(plat_desc->num_masters < num_rn_masters);
+
+	/*
+	 * Iterate through the Request nodes specified by the platform.
+	 * Decrement the count of the masters in the 'info' array for each
+	 * Request node encountered. If the count would drop below 0 then the
+	 * platform's view of this aspect of CCN configuration is incorrect.
+	 */
+	for (master_id = 0; master_id < plat_desc->num_masters; master_id++) {
+		unsigned int node_id;
+
+		node_id = plat_desc->master_to_rn_id_map[master_id];
+		assert(node_id < MAX_RN_NODES);
+		assert(info.node_desc[node_id]);
+		info.node_desc[node_id]--;
+	}
+}
+#endif /* DEBUG */
+
+/*******************************************************************************
+ * This function validates parameters passed by the platform (in a debug build)
+ * and initialises its internal data structures. A lock is required to prevent
+ * simultaneous CCN operations at runtime (only BL31) to add and remove Request
+ * nodes from coherency.
+ ******************************************************************************/
+void ccn_init(const ccn_desc_t *plat_desc)
+{
+#if DEBUG
+	ccn_validate_plat_params(plat_desc);
+#endif
+
+	ccn_plat_desc = plat_desc;
+}
+
+/*******************************************************************************
+ * This function converts a bit map of master interface IDs to a bit map of the
+ * Request node IDs that they reside on.
+ ******************************************************************************/
+static unsigned long long ccn_master_to_rn_id_map(unsigned long long master_map)
+{
+	unsigned long long rn_id_map = 0;
+	unsigned int node_id, iface_id;
+
+	assert(master_map);
+	assert(ccn_plat_desc);
+
+	FOR_EACH_PRESENT_MASTER_INTERFACE(iface_id, master_map) {
+
+		/* Convert the master ID into the node ID */
+		node_id = ccn_plat_desc->master_to_rn_id_map[iface_id];
+
+		/* Set the bit corresponding to this node ID */
+		rn_id_map |= (1UL << node_id);
+	}
+
+	return rn_id_map;
+}
+
+/*******************************************************************************
+ * This function executes the necessary operations to add or remove Request node
+ * IDs specified in the 'rn_id_map' bitmap from the snoop/DVM domains specified
+ * in the 'hn_id_map'. The 'region_id' specifies the ID of the first HN-F/HN-I
+ * on which the operation should be performed. 'op_reg_offset' specifies the
+ * type of operation (add/remove). 'stat_reg_offset' specifies the register
+ * which should be polled to determine if the operation has completed or not.
+ ******************************************************************************/
+static void ccn_snoop_dvm_do_op(unsigned long long rn_id_map,
+				unsigned long long hn_id_map,
+				unsigned int region_id,
+				unsigned int op_reg_offset,
+				unsigned int stat_reg_offset)
+{
+	unsigned int start_region_id;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+#if IMAGE_BL31
+	bakery_lock_get(&ccn_lock);
+#endif
+	start_region_id = region_id;
+	FOR_EACH_PRESENT_REGION_ID(start_region_id, hn_id_map) {
+		ccn_reg_write(ccn_plat_desc->periphbase,
+			      start_region_id,
+			      op_reg_offset,
+			      rn_id_map);
+	}
+
+	start_region_id = region_id;
+
+	FOR_EACH_PRESENT_REGION_ID(start_region_id, hn_id_map) {
+		WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(start_region_id,
+						   stat_reg_offset,
+						   op_reg_offset,
+						   rn_id_map);
+	}
+
+#if IMAGE_BL31
+	bakery_lock_release(&ccn_lock);
+#endif
+}
+
+/*******************************************************************************
+ * This function reads the bitmap of Home nodes on the basis of the
+ * 'mn_hn_id_reg_offset' parameter from the Miscellaneous node's (MN)
+ * programmer's view. The MN has a register which carries the bitmap of present
+ * Home nodes of each type i.e. HN-Fs, HN-Is & HN-Ds. It calls
+ * 'ccn_snoop_dvm_do_op()' with this information to perform the actual
+ * operation.
+ ******************************************************************************/
+static void ccn_snoop_dvm_domain_common(unsigned long long rn_id_map,
+					unsigned int hn_op_reg_offset,
+					unsigned int hn_stat_reg_offset,
+					unsigned int mn_hn_id_reg_offset,
+					unsigned int hn_region_id)
+{
+	unsigned long long mn_hn_id_map;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+	mn_hn_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+				    MN_REGION_ID,
+				    mn_hn_id_reg_offset);
+	ccn_snoop_dvm_do_op(rn_id_map,
+			    mn_hn_id_map,
+			    hn_region_id,
+			    hn_op_reg_offset,
+			    hn_stat_reg_offset);
+}
+
+/*******************************************************************************
+ * The following functions provide the boot and runtime API to the platform for
+ * adding and removing master interfaces from the snoop/DVM domains. A bitmap of
+ * master interfaces IDs is passed as a parameter. It is converted into a bitmap
+ * of Request node IDs using the mapping provided by the platform while
+ * initialising the driver.
+ * For example, consider a dual cluster system where the clusters have values 0
+ * & 1 in the affinity level 1 field of their respective MPIDRs. While
+ * initialising this driver, the platform provides the mapping between each
+ * cluster and the corresponding Request node. To add or remove a cluster from
+ * the snoop and dvm domain, the bit position corresponding to the cluster ID
+ * should be set in the 'master_iface_map' i.e. to remove both clusters the
+ * bitmap would equal 0x11.
+ ******************************************************************************/
+void ccn_enter_snoop_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    HNF_SDC_SET_OFFSET,
+				    HNF_SDC_STAT_OFFSET,
+				    MN_HNF_NODEID_OFFSET,
+				    HNF_REGION_ID_START);
+
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_SET_OFF,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+void ccn_exit_snoop_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    HNF_SDC_CLR_OFFSET,
+				    HNF_SDC_STAT_OFFSET,
+				    MN_HNF_NODEID_OFFSET,
+				    HNF_REGION_ID_START);
+
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_CLR_OFFSET,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+void ccn_enter_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_SET_OFF,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+void ccn_exit_dvm_domain(unsigned long long master_iface_map)
+{
+	unsigned long long rn_id_map;
+
+	rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+	ccn_snoop_dvm_domain_common(rn_id_map,
+				    MN_DDC_CLR_OFFSET,
+				    MN_DDC_STAT_OFFSET,
+				    MN_HNI_NODEID_OFFSET,
+				    MN_REGION_ID);
+}
+
+/*******************************************************************************
+ * This function returns the run mode of all the L3 cache partitions in the
+ * system. The state is expected to be one of NO_L3, SF_ONLY, L3_HAM or
+ * L3_FAM. Instead of comparing the states reported by all HN-Fs, the state of
+ * the first present HN-F node is reported. Since the driver does not export an
+ * interface to program them seperately, there is no reason to perform this
+ * check. An HN-F could report that the L3 cache is transitioning from one mode
+ * to another e.g. HNF_PM_NOL3_2_SFONLY. In this case, the function waits for
+ * the transition to complete and reports the final state.
+ ******************************************************************************/
+unsigned int ccn_get_l3_run_mode(void)
+{
+	unsigned long long hnf_pstate_stat;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+	/*
+	 * Wait for a L3 cache paritition to enter any run mode. The pstate
+	 * parameter is read from an HN-F P-state status register. A non-zero
+	 * value in bits[1:0] means that the cache is transitioning to a run
+	 * mode.
+	 */
+	do {
+		hnf_pstate_stat = ccn_reg_read(ccn_plat_desc->periphbase,
+					       HNF_REGION_ID_START,
+					       HNF_PSTATE_STAT_OFFSET);
+	} while (hnf_pstate_stat & 0x3);
+
+	return PSTATE_TO_RUN_MODE(hnf_pstate_stat);
+}
+
+/*******************************************************************************
+ * This function sets the run mode of all the L3 cache partitions in the
+ * system to one of NO_L3, SF_ONLY, L3_HAM or L3_FAM depending upon the state
+ * specified by the 'mode' argument.
+ ******************************************************************************/
+void ccn_set_l3_run_mode(unsigned int mode)
+{
+	unsigned long long mn_hnf_id_map, hnf_pstate_stat;
+	unsigned int region_id;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+	assert(mode <= CCN_L3_RUN_MODE_FAM);
+
+	mn_hnf_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+				     MN_REGION_ID,
+				     MN_HNF_NODEID_OFFSET);
+	region_id = HNF_REGION_ID_START;
+
+	/* Program the desired run mode */
+	FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+		ccn_reg_write(ccn_plat_desc->periphbase,
+			      region_id,
+			      HNF_PSTATE_REQ_OFFSET,
+			      mode);
+	}
+
+	/* Wait for the caches to transition to the run mode */
+	region_id = HNF_REGION_ID_START;
+	FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+		/*
+		 * Wait for a L3 cache paritition to enter a target run
+		 * mode. The pstate parameter is read from an HN-F P-state
+		 * status register.
+		 */
+		do {
+			hnf_pstate_stat = ccn_reg_read(ccn_plat_desc->periphbase,
+					       region_id,
+					       HNF_PSTATE_STAT_OFFSET);
+		} while (((hnf_pstate_stat & HNF_PSTATE_MASK) >> 2) != mode);
+	}
+}
+
+/*******************************************************************************
+ * This function configures system address map and provides option to enable the
+ * 3SN striping mode of Slave node operation. The Slave node IDs and the Top
+ * Address bit1 and bit0 are provided as parameters to this function. This
+ * configuration is needed only if network contains a single SN-F or 3 SN-F and
+ * must be completed before the first request by the system to normal memory.
+ ******************************************************************************/
+void ccn_program_sys_addrmap(unsigned int sn0_id,
+		 unsigned int sn1_id,
+		 unsigned int sn2_id,
+		 unsigned int top_addr_bit0,
+		 unsigned int top_addr_bit1,
+		 unsigned char three_sn_en)
+{
+	unsigned long long mn_hnf_id_map, hnf_sam_ctrl_value;
+	unsigned int region_id;
+
+	assert(ccn_plat_desc);
+	assert(ccn_plat_desc->periphbase);
+
+	mn_hnf_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+				     MN_REGION_ID,
+				     MN_HNF_NODEID_OFFSET);
+	region_id = HNF_REGION_ID_START;
+	hnf_sam_ctrl_value = MAKE_HNF_SAM_CTRL_VALUE(sn0_id,
+						     sn1_id,
+						     sn2_id,
+						     top_addr_bit0,
+						     top_addr_bit1,
+						     three_sn_en);
+
+	FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+
+		/* Program the SAM control register */
+		ccn_reg_write(ccn_plat_desc->periphbase,
+			      region_id,
+			      HNF_SAM_CTRL_OFFSET,
+			      hnf_sam_ctrl_value);
+	}
+
+}
diff --git a/drivers/arm/ccn/ccn_private.h b/drivers/arm/ccn/ccn_private.h
new file mode 100644
index 0000000..e92e870
--- /dev/null
+++ b/drivers/arm/ccn/ccn_private.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CCN_PRIVATE_H__
+#define __CCN_PRIVATE_H__
+
+/*
+ * A CCN implementation can have a maximum of 64 Request nodes with node IDs
+ * from 0-63. These IDs are split across the three types of Request nodes
+ * i.e. RN-F, RN-D and RN-I.
+ */
+#define MAX_RN_NODES		64
+
+/* Enum used to loop through the 3 types of Request nodes */
+typedef enum rn_types {
+	RN_TYPE_RNF = 0,
+	RN_TYPE_RNI,
+	RN_TYPE_RND,
+	NUM_RN_TYPES
+} rn_types_t;
+
+/* Macro to convert a region id to its base address */
+#define region_id_to_base(id)	((id) << 16)
+
+/*
+ * Macro to calculate the number of master interfaces resident on a RN-I/RN-D.
+ * Value of first two bits of the RN-I/D node type + 1 == Maximum number of
+ * ACE-Lite or ACE-Lite+DVM interfaces supported on this node. E.g.
+ *
+ * 0x14 : RN-I with 1 ACE-Lite interface
+ * 0x15 : RN-I with 2 ACE-Lite interfaces
+ * 0x16 : RN-I with 3 ACE-Lite interfaces
+ */
+#define rn_type_id_to_master_cnt(id)	(((id) & 0x3) + 1)
+
+/*
+ * Constants used to identify a region in the programmer's view. These are
+ * common for all regions.
+ */
+#define REGION_ID_LIMIT		256
+#define REGION_ID_OFFSET	0xFF00
+
+#define REGION_NODE_ID_SHIFT	8
+#define REGION_NODE_ID_MASK	0x7f
+#define get_node_id(id_reg)	(((id_reg) >> REGION_NODE_ID_SHIFT) \
+				 & REGION_NODE_ID_MASK)
+
+#define REGION_NODE_TYPE_SHIFT	0
+#define REGION_NODE_TYPE_MASK	0x1f
+#define get_node_type(id_reg)	(((id_reg) >> REGION_NODE_TYPE_SHIFT) \
+				 & REGION_NODE_TYPE_MASK)
+
+/* Common offsets of registers to enter or exit a snoop/dvm domain */
+#define DOMAIN_CTRL_STAT_OFFSET	0x0200
+#define DOMAIN_CTRL_SET_OFFSET	0x0210
+#define DOMAIN_CTRL_CLR_OFFSET	0x0220
+
+/*
+ * Thess macros are used to determine if an operation to add or remove a Request
+ * node from the snoop/dvm domain has completed. 'rn_id_map' is a bit map of
+ * nodes. It was used to program the SET or CLEAR control register. The type of
+ * register is specified by 'op_reg_offset'. 'status_reg' is the bit map of
+ * nodes currently present in the snoop/dvm domain. 'rn_id_map' and 'status_reg'
+ * are logically ANDed and the result it stored back in the 'status_reg'. There
+ * are two outcomes of this operation:
+ *
+ * 1. If the DOMAIN_CTRL_SET_OFFSET register was programmed, then the set bits in
+ *    'rn_id_map' should appear in 'status_reg' when the operation completes. So
+ *    after the AND operation, at some point of time 'status_reg' should equal
+ *    'rn_id_map'.
+ *
+ * 2. If the DOMAIN_CTRL_CLR_OFFSET register was programmed, then the set bits in
+ *    'rn_id_map' should disappear in 'status_reg' when the operation
+ *    completes. So after the AND operation, at some point of time 'status_reg'
+ *    should equal 0.
+ */
+#define WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(region_id, stat_reg_offset,		\
+					   op_reg_offset, rn_id_map)		\
+	{									\
+		uint64_t status_reg;						\
+		do {								\
+			status_reg = ccn_reg_read((ccn_plat_desc->periphbase),	\
+						  (region_id),			\
+						  (stat_reg_offset));		\
+			status_reg &= (rn_id_map);				\
+		} while ((op_reg_offset) == DOMAIN_CTRL_SET_OFFSET ?		\
+			 (rn_id_map) != status_reg : status_reg);		\
+	}
+
+/*
+ * Region ID of the Miscellaneous Node is always 0 as its located at the base of
+ * the programmer's view.
+ */
+#define MN_REGION_ID		0
+
+#define MN_REGION_ID_START	0
+#define DEBUG_REGION_ID_START	1
+#define HNI_REGION_ID_START	8
+#define SBSX_REGION_ID_START	16
+#define HNF_REGION_ID_START	32
+#define XP_REGION_ID_START	64
+#define RNI_REGION_ID_START	128
+
+/* Selected register offsets from the base of a HNF region */
+#define HNF_CFG_CTRL_OFFSET	0x0000
+#define HNF_SAM_CTRL_OFFSET	0x0008
+#define HNF_PSTATE_REQ_OFFSET	0x0010
+#define HNF_PSTATE_STAT_OFFSET	0x0018
+#define HNF_SDC_STAT_OFFSET	DOMAIN_CTRL_STAT_OFFSET
+#define HNF_SDC_SET_OFFSET	DOMAIN_CTRL_SET_OFFSET
+#define HNF_SDC_CLR_OFFSET	DOMAIN_CTRL_CLR_OFFSET
+#define HNF_AUX_CTRL_OFFSET	0x0500
+
+/* Selected register offsets from the base of a MN region */
+#define MN_SAR_OFFSET		0x0000
+#define MN_RNF_NODEID_OFFSET	0x0180
+#define MN_RNI_NODEID_OFFSET	0x0190
+#define MN_RND_NODEID_OFFSET	0x01A0
+#define MN_HNF_NODEID_OFFSET	0x01B0
+#define MN_HNI_NODEID_OFFSET	0x01C0
+#define MN_SN_NODEID_OFFSET	0x01D0
+#define MN_DDC_STAT_OFFSET	DOMAIN_CTRL_STAT_OFFSET
+#define MN_DDC_SET_OFF		DOMAIN_CTRL_SET_OFFSET
+#define MN_DDC_CLR_OFFSET	DOMAIN_CTRL_CLR_OFFSET
+#define MN_ID_OFFSET		REGION_ID_OFFSET
+
+/* HNF System Address Map register bit masks and shifts */
+#define HNF_SAM_CTRL_SN_ID_MASK		0x7f
+#define HNF_SAM_CTRL_SN0_ID_SHIFT	0
+#define HNF_SAM_CTRL_SN1_ID_SHIFT	8
+#define HNF_SAM_CTRL_SN2_ID_SHIFT	16
+
+#define HNF_SAM_CTRL_TAB0_MASK		0x3fUL
+#define HNF_SAM_CTRL_TAB0_SHIFT		48
+#define HNF_SAM_CTRL_TAB1_MASK		0x3fUL
+#define HNF_SAM_CTRL_TAB1_SHIFT		56
+
+#define HNF_SAM_CTRL_3SN_ENB_SHIFT	32
+#define HNF_SAM_CTRL_3SN_ENB_MASK	0x01UL
+
+/*
+ * Macro to create a value suitable for programming into a HNF SAM Control
+ * register for enabling 3SN striping.
+ */
+#define MAKE_HNF_SAM_CTRL_VALUE(sn0, sn1, sn2, tab0, tab1, three_sn_en)     \
+	((((sn0) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN0_ID_SHIFT) | \
+	 (((sn1) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN1_ID_SHIFT) | \
+	 (((sn2) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN2_ID_SHIFT) | \
+	 (((tab0) & HNF_SAM_CTRL_TAB0_MASK) << HNF_SAM_CTRL_TAB0_SHIFT)   | \
+	 (((tab1) & HNF_SAM_CTRL_TAB1_MASK) << HNF_SAM_CTRL_TAB1_SHIFT)   | \
+	 (((three_sn_en) & HNF_SAM_CTRL_3SN_ENB_MASK) << HNF_SAM_CTRL_3SN_ENB_SHIFT))
+
+/* Mask to read the power state value from an HN-F P-state register */
+#define HNF_PSTATE_MASK		0xf
+
+/* Macro to extract the run mode from a p-state value */
+#define PSTATE_TO_RUN_MODE(pstate)	(((pstate) & HNF_PSTATE_MASK) >> 2)
+
+/*
+ * Helper macro that iterates through a given bit map. In each iteration,
+ * it returns the position of the set bit.
+ * It can be used by other utility macros to iterates through all nodes
+ * or masters given a bit map of them.
+ */
+#define FOR_EACH_BIT(bit_pos, bit_map)			\
+	for (bit_pos = __builtin_ctzll(bit_map);	\
+	     bit_map;					\
+	     bit_map &= ~(1UL << bit_pos),		\
+	     bit_pos = __builtin_ctzll(bit_map))
+
+/*
+ * Utility macro that iterates through a bit map of node IDs. In each
+ * iteration, it returns the ID of the next present node in the bit map. Node
+ * ID of a present node == Position of set bit == Number of zeroes trailing the
+ * bit.
+ */
+#define FOR_EACH_PRESENT_NODE_ID(node_id, bit_map)	\
+		FOR_EACH_BIT(node_id, bit_map)
+
+/*
+ * Helper function to return number of set bits in bitmap
+ */
+static inline unsigned int count_set_bits(uint64_t bitmap)
+{
+	unsigned int count = 0;
+
+	for (; bitmap; bitmap &= bitmap - 1)
+		++count;
+
+	return count;
+}
+
+/*
+ * Utility macro that iterates through a bit map of node IDs. In each iteration,
+ * it returns the ID of the next present region corresponding to a node present
+ * in the bit map. Region ID of a present node is in between passed region id
+ * and region id + number of set bits in the bitmap i.e. the number of present
+ * nodes.
+ */
+#define FOR_EACH_PRESENT_REGION_ID(region_id, bit_map)				\
+	for (unsigned long long region_id_limit = count_set_bits(bit_map)	\
+							+ region_id;		\
+	    region_id < region_id_limit;					\
+	    region_id++)
+
+/*
+ * Same macro as FOR_EACH_PRESENT_NODE, but renamed to indicate it traverses
+ * through a bit map of master interfaces.
+ */
+#define FOR_EACH_PRESENT_MASTER_INTERFACE(iface_id, bit_map)	\
+			FOR_EACH_BIT(iface_id, bit_map)
+#endif /* __CCN_PRIVATE_H__ */
diff --git a/include/common/el3_common_macros.S b/include/common/el3_common_macros.S
index 7946e72..87e172e 100644
--- a/include/common/el3_common_macros.S
+++ b/include/common/el3_common_macros.S
@@ -214,6 +214,21 @@
 	 * ---------------------------------------------------------------------
 	 */
 	.if \_init_c_runtime
+#if IMAGE_BL31
+		/* -------------------------------------------------------------
+		 * Invalidate the RW memory used by the BL31 image. This
+		 * includes the data and NOBITS sections. This is done to
+		 * safeguard against possible corruption of this memory by
+		 * dirty cache lines in a system cache as a result of use by
+		 * an earlier boot loader stage.
+		 * -------------------------------------------------------------
+		 */
+		adr	x0, __RW_START__
+		adr	x1, __RW_END__
+		sub	x1, x1, x0
+		bl	inv_dcache_range
+#endif /* IMAGE_BL31 */
+
 		ldr	x0, =__BSS_START__
 		ldr	x1, =__BSS_SIZE__
 		bl	zeromem16
diff --git a/include/drivers/arm/ccn.h b/include/drivers/arm/ccn.h
new file mode 100644
index 0000000..2361596
--- /dev/null
+++ b/include/drivers/arm/ccn.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CCN_H__
+#define __CCN_H__
+
+/*
+ * This macro defines the maximum number of master interfaces that reside on
+ * Request nodes which the CCN driver can accommodate. The driver APIs to add
+ * and remove Request nodes from snoop/dvm domains take a bit map of master
+ * interfaces as inputs. The largest C data type that can be used is a 64-bit
+ * unsigned integer. Hence the value of 64. The platform will have to ensure
+ * that the master interfaces are numbered from 0-63.
+ */
+#define CCN_MAX_RN_MASTERS	64
+
+/*
+ * The following constants define the various run modes that the platform can
+ * request the CCN driver to place the L3 cache in. These map to the
+ * programmable P-State values in a HN-F P-state register.
+ */
+#define CCN_L3_RUN_MODE_NOL3	0x0	/* HNF_PM_NOL3 */
+#define CCN_L3_RUN_MODE_SFONLY	0x1	/* HNF_PM_SFONLY */
+#define CCN_L3_RUN_MODE_HAM	0x2	/* HNF_PM_HALF */
+#define CCN_L3_RUN_MODE_FAM	0x3	/* HNF_PM_FULL */
+
+/*
+ * The following macro takes the value returned from a read of a HN-F P-state
+ * status register and returns the retention state value.
+ */
+#define CCN_GET_RETENTION_STATE(pstate)	((pstate >> 4) & 0x3)
+
+/*
+ * The following macro takes the value returned from a read of a HN-F P-state
+ * status register and returns the run state value.
+ */
+#define CCN_GET_RUN_STATE(pstate)	(pstate & 0xf)
+
+#ifndef __ASSEMBLY__
+#include <stdint.h>
+
+/*
+ * This structure describes some of the implementation defined attributes of the
+ * CCN IP. It is used by the platform port to specify these attributes in order
+ * to initialise the CCN driver. The attributes are described below.
+ *
+ * 1. The 'num_masters' field specifies the total number of master interfaces
+ *    resident on Request nodes.
+ *
+ * 2. The 'master_to_rn_id_map' field is a ponter to an array in which each
+ *    index corresponds to a master interface and its value corresponds to the
+ *    Request node on which the master interface resides.
+ *    This field is not simply defined as an array of size CCN_MAX_RN_MASTERS.
+ *    In reality, a platform will have much fewer master * interfaces than
+ *    CCN_MAX_RN_MASTERS. With an array of this size, it would also have to
+ *    set the unused entries to a suitable value. Zeroing the array would not
+ *    be enough since 0 is also a valid node id. Hence, such an array is not
+ *    used.
+ *
+ * 3. The 'periphbase' field is the base address of the programmer's view of the
+ *    CCN IP.
+ */
+typedef struct ccn_desc {
+	unsigned int num_masters;
+	const unsigned char *master_to_rn_id_map;
+	uintptr_t periphbase;
+} ccn_desc_t;
+
+
+void ccn_init(const ccn_desc_t *plat_ccn_desc);
+void ccn_enter_snoop_dvm_domain(unsigned long long master_iface_map);
+void ccn_exit_snoop_dvm_domain(unsigned long long master_iface_map);
+void ccn_enter_dvm_domain(unsigned long long master_iface_map);
+void ccn_exit_dvm_domain(unsigned long long master_iface_map);
+void ccn_set_l3_run_mode(unsigned int mode);
+void ccn_program_sys_addrmap(unsigned int sn0_id,
+		 unsigned int sn1_id,
+		 unsigned int sn2_id,
+		 unsigned int top_addr_bit0,
+		 unsigned int top_addr_bit1,
+		 unsigned char three_sn_en);
+unsigned int ccn_get_l3_run_mode(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __CCN_H__ */
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index b7ab3da..d01ea31 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -145,6 +145,7 @@
 DEFINE_SYSOP_TYPE_PARAM_FUNC(at, s12e0w)
 
 void flush_dcache_range(uint64_t, uint64_t);
+void clean_dcache_range(uint64_t, uint64_t);
 void inv_dcache_range(uint64_t, uint64_t);
 void dcsw_op_louis(uint32_t);
 void dcsw_op_all(uint32_t);
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 0dbab1b..476b906 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -32,6 +32,7 @@
 #include <asm_macros.S>
 
 	.globl	flush_dcache_range
+	.globl	clean_dcache_range
 	.globl	inv_dcache_range
 	.globl	dcsw_op_louis
 	.globl	dcsw_op_all
@@ -39,25 +40,39 @@
 	.globl	dcsw_op_level2
 	.globl	dcsw_op_level3
 
-	/* ------------------------------------------
-	 * Clean+Invalidate from base address till
-	 * size. 'x0' = addr, 'x1' = size
-	 * ------------------------------------------
-	 */
-func flush_dcache_range
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
 	dcache_line_size x2, x3
 	add	x1, x0, x1
 	sub	x3, x2, #1
 	bic	x0, x0, x3
-flush_loop:
-	dc	civac, x0
+loop_\op:
+	dc	\op, x0
 	add	x0, x0, x2
 	cmp	x0, x1
-	b.lo    flush_loop
+	b.lo    loop_\op
 	dsb	sy
 	ret
+.endm
+	/* ------------------------------------------
+	 * Clean+Invalidate from base address till
+	 * size. 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func flush_dcache_range
+	do_dcache_maintenance_by_mva civac
 endfunc flush_dcache_range
 
+	/* ------------------------------------------
+	 * Clean from base address till size.
+	 * 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func clean_dcache_range
+	do_dcache_maintenance_by_mva cvac
+endfunc clean_dcache_range
 
 	/* ------------------------------------------
 	 * Invalidate from base address till
@@ -65,17 +80,7 @@
 	 * ------------------------------------------
 	 */
 func inv_dcache_range
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-inv_loop:
-	dc	ivac, x0
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo    inv_loop
-	dsb	sy
-	ret
+	do_dcache_maintenance_by_mva ivac
 endfunc inv_dcache_range
 
 
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 5f80b59..e7c246e 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -141,9 +141,6 @@
 
 /* ---------------------------------------------------------------------------
  * Disable the MMU at EL3
- * This is implemented in assembler to ensure that the data cache is cleaned
- * and invalidated after the MMU is disabled without any intervening cacheable
- * data accesses
  * ---------------------------------------------------------------------------
  */
 
@@ -154,8 +151,8 @@
 	bic	x0, x0, x1
 	msr	sctlr_el3, x0
 	isb				// ensure MMU is off
-	mov	x0, #DCCISW		// DCache clean and invalidate
-	b	dcsw_op_all
+	dsb	sy
+	ret
 endfunc disable_mmu_el3
 
 
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index 1234619..eb5ae11 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -74,6 +74,7 @@
 				plat/common/aarch64/plat_common.c
 
 BL1_SOURCES		+=	drivers/arm/cci/cci.c				\
+				drivers/arm/ccn/ccn.c				\
 				drivers/io/io_fip.c				\
 				drivers/io/io_memmap.c				\
 				drivers/io/io_storage.c				\
@@ -91,6 +92,7 @@
 				plat/common/aarch64/platform_up_stack.S
 
 BL31_SOURCES		+=	drivers/arm/cci/cci.c				\
+				drivers/arm/ccn/ccn.c				\
 				drivers/arm/gic/arm_gic.c			\
 				drivers/arm/gic/gic_v2.c			\
 				drivers/arm/gic/gic_v3.c			\
diff --git a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
index 0d8e370..40d1bab 100644
--- a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
+++ b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
@@ -107,6 +107,23 @@
 	tegra_mc_write_32(MC_SECURITY_CFG1_0, size_in_bytes >> 20);
 }
 
+static void tegra_clear_videomem(uintptr_t non_overlap_area_start,
+				 unsigned long long non_overlap_area_size)
+{
+	/*
+	 * Perform cache maintenance to ensure that the non-overlapping area is
+	 * zeroed out. The first invalidation of this range ensures that
+	 * possible evictions of dirty cache lines do not interfere with the
+	 * 'zeromem16' operation. Other CPUs could speculatively prefetch the
+	 * main memory contents of this area between the first invalidation and
+	 * the 'zeromem16' operation. The second invalidation ensures that any
+	 * such cache lines are removed as well.
+	 */
+	inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
+	zeromem16((void *)non_overlap_area_start, non_overlap_area_size);
+	inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
+}
+
 /*
  * Program the Video Memory carveout region
  *
@@ -118,7 +135,7 @@
 	uintptr_t vmem_end_old = video_mem_base + (video_mem_size << 20);
 	uintptr_t vmem_end_new = phys_base + size_in_bytes;
 	uint32_t regval;
-	uint64_t size;
+	unsigned long long non_overlap_area_size;
 
 	/*
 	 * The GPU is the user of the Video Memory region. In order to
@@ -155,15 +172,15 @@
 
 	disable_mmu_el3();
 	if (phys_base > vmem_end_old || video_mem_base > vmem_end_new) {
-		zeromem16((void *)video_mem_base, video_mem_size << 20);
+		tegra_clear_videomem(video_mem_base, video_mem_size << 20);
 	} else {
 		if (video_mem_base < phys_base) {
-			size = phys_base - video_mem_base;
-			zeromem16((void *)video_mem_base, size);
+			non_overlap_area_size = phys_base - video_mem_base;
+			tegra_clear_videomem(video_mem_base, non_overlap_area_size);
 		}
 		if (vmem_end_old > vmem_end_new) {
-			size = vmem_end_old - vmem_end_new;
-			zeromem16((void *)vmem_end_new, size);
+			non_overlap_area_size = vmem_end_old - vmem_end_new;
+			tegra_clear_videomem(vmem_end_new, non_overlap_area_size);
 		}
 	}
 	enable_mmu_el3(0);
diff --git a/services/std_svc/psci/psci_on.c b/services/std_svc/psci/psci_on.c
index cf1a782..c37adc2 100644
--- a/services/std_svc/psci/psci_on.c
+++ b/services/std_svc/psci/psci_on.c
@@ -203,7 +203,4 @@
 	 * call to set this cpu on its way.
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
-
-	/* Clean caches before re-entering normal world */
-	dcsw_op_louis(DCCSW);
 }
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index 7a80187..cd1bb09 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -221,18 +221,6 @@
 	psci_cpu_pd_nodes[plat_my_core_pos()].mpidr =
 		read_mpidr() & MPIDR_AFFINITY_MASK;
 
-#if !USE_COHERENT_MEM
-	/*
-	 * The psci_non_cpu_pd_nodes only needs flushing when it's not allocated in
-	 * coherent memory.
-	 */
-	flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes,
-			   sizeof(psci_non_cpu_pd_nodes));
-#endif
-
-	flush_dcache_range((uintptr_t) &psci_cpu_pd_nodes,
-			   sizeof(psci_cpu_pd_nodes));
-
 	psci_init_req_local_pwr_states();
 
 	/*
diff --git a/services/std_svc/psci/psci_suspend.c b/services/std_svc/psci/psci_suspend.c
index 675ef9e..bd0c5db 100644
--- a/services/std_svc/psci/psci_suspend.c
+++ b/services/std_svc/psci/psci_suspend.c
@@ -261,7 +261,4 @@
 	 * call to set this cpu on its way.
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
-
-	/* Clean caches before re-entering normal world */
-	dcsw_op_louis(DCCSW);
 }