Merge pull request #1040 from sliai/support-opteed-header

Support opteed header
diff --git a/Makefile b/Makefile
index 23ebcaa..b57b212 100644
--- a/Makefile
+++ b/Makefile
@@ -465,7 +465,6 @@
 # platform to overwrite the default options
 ################################################################################
 
-$(eval $(call add_define,ARM_CCI_PRODUCT_ID))
 $(eval $(call add_define,ARM_ARCH_MAJOR))
 $(eval $(call add_define,ARM_ARCH_MINOR))
 $(eval $(call add_define,ARM_GIC_ARCH))
diff --git a/bl2u/aarch32/bl2u_entrypoint.S b/bl2u/aarch32/bl2u_entrypoint.S
index 1fa669e..7fb64f3 100644
--- a/bl2u/aarch32/bl2u_entrypoint.S
+++ b/bl2u/aarch32/bl2u_entrypoint.S
@@ -32,7 +32,7 @@
 	 * ---------------------------------------------
 	 */
 	mov	r11, r1
-	mov	r12, r2
+	mov	r10, r2
 
 	/* ---------------------------------------------
 	 * Set the exception vector to something sane.
@@ -107,7 +107,7 @@
 	 * ---------------------------------------------
 	 */
 	mov	r0, r11
-	mov	r1, r12
+	mov	r1, r10
 	bl	bl2u_early_platform_setup
 	bl	bl2u_plat_arch_setup
 
diff --git a/docs/diagrams/generate_xlat_images.sh b/docs/diagrams/generate_xlat_images.sh
new file mode 100755
index 0000000..9daef5f
--- /dev/null
+++ b/docs/diagrams/generate_xlat_images.sh
@@ -0,0 +1,26 @@
+#! /bin/bash
+
+#
+# This script generates the image file used in the ARM Trusted Firmware
+# Translation Tables Library V2 Design document from the 'xlat_align.dia' file.
+#
+
+set -e
+
+# Usage: generate_image <dia_filename> <layers> <image_filename>
+function generate_image
+{
+	dia				\
+		--show-layers=$2	\
+		--filter=svg		\
+		--export=$3		\
+		$1
+
+}
+
+generate_image			\
+	xlat_align.dia		\
+	bg,translations		\
+	xlat_align.svg
+
+inkscape -z xlat_align.svg -e xlat_align.png -d 45
diff --git a/docs/diagrams/xlat_align.dia b/docs/diagrams/xlat_align.dia
new file mode 100644
index 0000000..bd88c0c
--- /dev/null
+++ b/docs/diagrams/xlat_align.dia
Binary files differ
diff --git a/docs/diagrams/xlat_align.png b/docs/diagrams/xlat_align.png
new file mode 100644
index 0000000..cffd3c1
--- /dev/null
+++ b/docs/diagrams/xlat_align.png
Binary files differ
diff --git a/docs/firmware-design.rst b/docs/firmware-design.rst
index f50890b..4c3c420 100644
--- a/docs/firmware-design.rst
+++ b/docs/firmware-design.rst
@@ -25,6 +25,10 @@
 management framework and its design can be found in ARM Trusted Firmware
 Interrupt Management Design guide [4]_.
 
+The ARM Trusted Firmware also implements a library for setting up and managing
+the translation tables. The details of this library can be found in
+`Xlat_tables design`_.
+
 The ARM Trusted Firmware can be built to support either AArch64 or AArch32
 execution state.
 
@@ -2418,7 +2422,7 @@
 
 --------------
 
-*Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.*
+*Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.*
 
 .. _Reset Design: ./reset-design.rst
 .. _Porting Guide: ./porting-guide.rst
@@ -2436,5 +2440,6 @@
 .. _User Guide: ./user-guide.rst
 .. _SMC Calling Convention PDD: http://infocenter.arm.com/help/topic/com.arm.doc.den0028b/ARM_DEN0028B_SMC_Calling_Convention.pdf
 .. _ARM Trusted Firmware Interrupt Management Design guide: ./interrupt-framework-design.rst
+.. _Xlat_tables design: xlat-tables-lib-v2-design.rst
 
 .. |Image 1| image:: diagrams/rt-svc-descs-layout.png?raw=true
diff --git a/docs/user-guide.rst b/docs/user-guide.rst
index 7f949d4..fe86f23 100644
--- a/docs/user-guide.rst
+++ b/docs/user-guide.rst
@@ -73,6 +73,10 @@
 
 -  For debugging, ARM `Development Studio 5 (DS-5)`_.
 
+-  To create and modify the diagram files included in the documentation, `Dia`_.
+   This tool can be found in most Linux distributions. Inkscape is needed to
+   generate the actual *.png files.
+
 Getting the Trusted Firmware source code
 ----------------------------------------
 
@@ -213,10 +217,6 @@
    It can take either ``aarch64`` or ``aarch32`` as values. By default, it is
    defined to ``aarch64``.
 
--  ``ARM_CCI_PRODUCT_ID``: Choice of ARM CCI product used by the platform. This
-   is used to determine the number of valid slave interfaces available in the
-   ARM CCI driver. Default is 400 (that is, CCI-400).
-
 -  ``ARM_ARCH_MAJOR``: The major version of ARM Architecture to target when
    compiling ARM Trusted Firmware. Its value must be numeric, and defaults to
    8 . See also, *ARMv8 Architecture Extensions* in `Firmware Design`_.
@@ -231,10 +231,10 @@
    This build option is deprecated.
 
 -  ``ARM_PLAT_MT``: This flag determines whether the ARM platform layer has to
-   cater for the multi-threading ``MT`` bit when accessing MPIDR. When this
-   flag is set, the functions which deal with MPIDR assume that the ``MT`` bit
-   in MPIDR is set and access the bit-fields in MPIDR accordingly. Default
-   value of this flag is 0.
+   cater for the multi-threading ``MT`` bit when accessing MPIDR. When this flag
+   is set, the functions which deal with MPIDR assume that the ``MT`` bit in
+   MPIDR is set and access the bit-fields in MPIDR accordingly. Default value of
+   this flag is 0. Note that this option is not used on FVP platforms.
 
 -  ``BL2``: This is an optional build option which specifies the path to BL2
    image for the ``fip`` target. In this case, the BL2 in the ARM Trusted
@@ -683,6 +683,10 @@
    -  ``FVP_CCN`` : The CCN driver is selected. This is the default
       if ``FVP_CLUSTER_COUNT`` > 2.
 
+-  ``FVP_MAX_PE_PER_CPU``: Sets the maximum number of PEs implemented on any CPU
+   in the system. This option defaults to 1. Note that the build option
+   ``ARM_PLAT_MT`` doesn't have any effect on FVP platforms.
+
 -  ``FVP_USE_GIC_DRIVER`` : Selects the GIC driver to be built. Options:
 
    -  ``FVP_GIC600`` : The GIC600 implementation of GICv3 is selected
@@ -1745,6 +1749,7 @@
 .. _Instructions for using Linaro's deliverables on Juno: https://community.arm.com/dev-platforms/b/documents/posts/using-linaros-deliverables-on-juno
 .. _ARM Platforms Portal: https://community.arm.com/dev-platforms/
 .. _Development Studio 5 (DS-5): http://www.arm.com/products/tools/software-tools/ds-5/index.php
+.. _Dia: https://wiki.gnome.org/Apps/Dia/Download
 .. _here: psci-lib-integration-guide.rst
 .. _Trusted Board Boot: trusted-board-boot.rst
 .. _Secure-EL1 Payloads and Dispatchers: firmware-design.rst#user-content-secure-el1-payloads-and-dispatchers
diff --git a/docs/xlat-tables-lib-v2-design.rst b/docs/xlat-tables-lib-v2-design.rst
new file mode 100644
index 0000000..3006ce7
--- /dev/null
+++ b/docs/xlat-tables-lib-v2-design.rst
@@ -0,0 +1,370 @@
+Translation Tables Library Design
+=================================
+
+
+.. section-numbering::
+    :suffix: .
+
+.. contents::
+
+
+This document describes the design of the translation tables library (version 2)
+used by the ARM Trusted Firmware. This library provides APIs to create page
+tables based on a description of the memory layout, as well as setting up system
+registers related to the Memory Management Unit (MMU) and performing the
+required Translation Lookaside Buffer (TLB) maintenance operations.
+
+More specifically, some use cases that this library aims to support are:
+
+#. Statically allocate translation tables and populate them (at run-time) based
+   on a description of the memory layout. The memory layout is typically
+   provided by the platform port as a list of memory regions;
+
+#. Support for generating translation tables pertaining to a different
+   translation regime than the exception level the library code is executing at;
+
+#. Support for dynamic mapping and unmapping of regions, even while the MMU is
+   on. This can be used to temporarily map some memory regions and unmap them
+   later on when no longer needed;
+
+#. Support for non-identity virtual to physical mappings to compress the virtual
+   address space;
+
+#. Support for changing memory attributes of memory regions at run-time.
+
+
+About version 1 and version 2
+-----------------------------
+
+This document focuses on version 2 of the library, whose sources are available
+in the `lib/xlat\_tables\_v2`_ directory. Version 1 of the library can still be
+found in `lib/xlat\_tables`_ directory but it is less flexible and doesn't
+support dynamic mapping. Although potential bug fixes will be applied to both
+versions, future features enhancements will focus on version 2 and might not be
+back-ported to version 1. Therefore, it is recommended to use version 2,
+especially for new platform ports.
+
+However, please note that version 2 is still in active development and is not
+considered stable yet. Hence, compatibility breaks might be introduced.
+
+From this point onwards, this document will implicitly refer to version 2 of the
+library.
+
+
+Design concepts and interfaces
+------------------------------
+
+This section presents some of the key concepts and data structures used in the
+translation tables library.
+
+`mmap` regions
+~~~~~~~~~~~~~~
+
+An ``mmap_region`` is an abstract, concise way to represent a memory region to
+map. It is one of the key interfaces to the library. It is identified by:
+
+- its physical base address;
+- its virtual base address;
+- its size;
+- its attributes.
+
+See the ``struct mmap_region`` type in `xlat\_tables\_v2.h`_.
+
+The user usually provides a list of such mmap regions to map and lets the
+library transpose that in a set of translation tables. As a result, the library
+might create new translation tables, update or split existing ones.
+
+The region attributes specify the type of memory (for example device or cached
+normal memory) as well as the memory access permissions (read-only or
+read-write, executable or not, secure or non-secure, and so on). See the
+``mmap_attr_t`` enumeration type in `xlat\_tables\_v2.h`_.
+
+
+Translation Context
+~~~~~~~~~~~~~~~~~~~
+
+The library can create or modify translation tables pertaining to a different
+translation regime than the exception level the library code is executing at.
+For example, the library might be used by EL3 software (for instance BL31) to
+create translation tables pertaining to the S-EL1&0 translation regime.
+
+This flexibility comes from the use of *translation contexts*. A *translation
+context* constitutes the superset of information used by the library to track
+the status of a set of translation tables for a given translation regime.
+
+The library internally allocates a default translation context, which pertains
+to the translation regime of the current exception level. Additional contexts
+may be explicitly allocated and initialized using the
+``REGISTER_XLAT_CONTEXT()`` macro. Separate APIs are provided to act either on
+the default translation context or on an alternative one.
+
+To register a translation context, the user must provide the library with the
+following information:
+
+* A name.
+
+  The resulting translation context variable will be called after this name, to
+  which ``_xlat_ctx`` is appended. For example, if the macro name parameter is
+  ``foo``, the context variable name will be ``foo_xlat_ctx``.
+
+* The maximum number of `mmap` regions to map.
+
+  Should account for both static and dynamic regions, if applicable.
+
+* The number of sub-translation tables to allocate.
+
+  Number of translation tables to statically allocate for this context,
+  excluding the initial lookup level translation table, which is always
+  allocated. For example, if the initial lookup level is 1, this parameter would
+  specify the number of level-2 and level-3 translation tables to pre-allocate
+  for this context.
+
+* The size of the virtual address space.
+
+  Size in bytes of the virtual address space to map using this context. This
+  will incidentally determine the number of entries in the initial lookup level
+  translation table : the library will allocate as many entries as is required
+  to map the entire virtual address space.
+
+* The size of the physical address space.
+
+  Size in bytes of the physical address space to map using this context.
+
+The default translation context is internally initialized using information
+coming (for the most part) from platform-specific defines:
+
+- name: hard-coded to ``tf`` ; hence the name of the default context variable is
+  ``tf_xlat_ctx``;
+- number of `mmap` regions: ``MAX_MMAP_REGIONS``;
+- number of sub-translation tables: ``MAX_XLAT_TABLES``;
+- size of the virtual address space: ``PLAT_VIRT_ADDR_SPACE_SIZE``;
+- size of the physical address space: ``PLAT_PHY_ADDR_SPACE_SIZE``.
+
+Please refer to the `Porting Guide`_ for more details about these macros.
+
+
+Static and dynamic memory regions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The library optionally supports dynamic memory mapping. This feature may be
+enabled using the ``PLAT_XLAT_TABLES_DYNAMIC`` platform build flag.
+
+When dynamic memory mapping is enabled, the library categorises mmap regions as
+*static* or *dynamic*.
+
+- *Static regions* are fixed for the lifetime of the system. They can only be
+  added early on, before the translation tables are created and populated. They
+  cannot be removed afterwards.
+
+- *Dynamic regions* can be added or removed any time.
+
+When the dynamic memory mapping feature is disabled, only static regions exist.
+
+The dynamic memory mapping feature may be used to map and unmap transient memory
+areas. This is useful when the user needs to access some memory for a fixed
+period of time, after which the memory may be discarded and reclaimed. For
+example, a memory region that is only required at boot time while the system is
+initializing, or to temporarily share a memory buffer between the normal world
+and trusted world. Note that it is up to the caller to ensure that these regions
+are not accessed concurrently while the regions are being added or removed.
+
+Although this feature provides some level of dynamic memory allocation, this
+does not allow dynamically allocating an arbitrary amount of memory at an
+arbitrary memory location. The user is still required to declare at compile-time
+the limits of these allocations ; the library will deny any mapping request that
+does not fit within this pre-allocated pool of memory.
+
+
+Library APIs
+------------
+
+The external APIs exposed by this library are declared and documented in the
+`xlat\_tables\_v2.h`_ header file. This should be the reference point for
+getting information about the usage of the different APIs this library
+provides. This section just provides some extra details and clarifications.
+
+Although the ``mmap_region`` structure is a publicly visible type, it is not
+recommended to populate these structures by hand. Instead, wherever APIs expect
+function arguments of type ``mmap_region_t``, these should be constructed using
+the ``MAP_REGION*()`` family of helper macros. This is to limit the risk of
+compatibility breaks, should the ``mmap_region`` structure type evolve in the
+future.
+
+As explained earlier in this document, when the dynamic mapping feature is
+disabled, there is no notion of dynamic regions. Conceptually, there are only
+static regions. For this reason (and to retain backward compatibility with the
+version 1 of the library), the APIs that map static regions do not embed the
+word *static* in their functions names (for example ``mmap_add_region()``), in
+contrast with the dynamic regions APIs (for example
+``mmap_add_dynamic_region()``).
+
+Although the definition of static and dynamic regions is not based on the state
+of the MMU, the two are still related in some way. Static regions can only be
+added before ``init_xlat_tables()`` is called and ``init_xlat_tables()`` must be
+called while the MMU is still off. As a result, static regions cannot be added
+once the MMU has been enabled. Dynamic regions can be added with the MMU on or
+off. In practice, the usual call flow would look like this:
+
+#. The MMU is initially off.
+
+#. Add some static regions, add some dynamic regions.
+
+#. Initialize translation tables based on the list of mmap regions (using one of
+   the ``init_xlat_tables*()`` APIs).
+
+#. At this point, it is no longer possible to add static regions. Dynamic
+   regions can still be added or removed.
+
+#. Enable the MMU.
+
+#. Dynamic regions can continue to be added or removed.
+
+Because static regions are added early on at boot time and are all in the
+control of the platform initialization code, the ``mmap_add*()`` family of APIs
+are not expected to fail. They do not return any error code.
+
+Nonetheless, these APIs will check upfront whether the region can be
+successfully added before updating the translation context structure. If the
+library detects that there is insufficient memory to meet the request, or that
+the new region will overlap another one in an invalid way, or if any other
+unexpected error is encountered, they will print an error message on the UART.
+Additionally, when asserts are enabled (typically in debug builds), an assertion
+will be triggered. Otherwise, the function call will just return straight away,
+without adding the offending memory region.
+
+
+Library limitations
+-------------------
+
+Dynamic regions are not allowed to overlap each other. Static regions are
+allowed to overlap as long as one of them is fully contained inside the other
+one. This is allowed for backwards compatibility with the previous behaviour in
+the version 1 of the library.
+
+
+Implementation details
+----------------------
+
+Code structure
+~~~~~~~~~~~~~~
+
+The library is divided into 2 modules:
+
+The core module
+    Provides the main functionality of the library.
+
+    See `xlat\_tables\_internal.c`_.
+
+The architectural module
+    Provides functions that are dependent on the current execution state
+    (AArch32/AArch64), such as the functions used for TLB invalidation or MMU
+    setup.
+
+    See `aarch32/xlat\_tables\_arch.c`_ and `aarch64/xlat\_tables\_arch.c`_.
+
+Core module
+~~~~~~~~~~~
+
+All the APIs in this module work on a translation context. The translation
+context contains the list of ``mmap_region``, which holds the information of all
+the regions that are mapped at any given time. Whenever there is a request to
+map (resp. unmap) a memory region, it is added to (resp. removed from) the
+``mmap_region`` list.
+
+The mmap regions list is a conceptual way to represent the memory layout. At
+some point, the library has to convert this information into actual translation
+tables to program into the MMU.
+
+Before the ``init_xlat_tables()`` API is called, the library only acts on the
+mmap regions list. Adding a static or dynamic region at this point through one
+of the ``mmap_add*()`` APIs does not affect the translation tables in any way,
+they only get registered in the internal mmap region list. It is only when the
+user calls the ``init_xlat_tables()`` that the translation tables are populated
+in memory based on the list of mmap regions registered so far. This is an
+optimization that allows creation of the initial set of translation tables in
+one go, rather than having to edit them every time while the MMU is disabled.
+
+After the ``init_xlat_tables()`` API has been called, only dynamic regions can
+be added. Changes to the translation tables (as well as the mmap regions list)
+will take effect immediately.
+
+The mapping function is implemented as a recursive algorithm. It is however
+bound by the level of depth of the translation tables (the ARMv8-A architecture
+allows up to 4 lookup levels).
+
+By default, the algorithm will attempt to minimize the number of translation
+tables created to satisfy the user's request. It will favour mapping a region
+using the biggest possible blocks, only creating a sub-table if it is strictly
+necessary. This is to reduce the memory footprint of the firmware.
+
+The most common reason for needing a sub-table is when a specific mapping
+requires a finer granularity. Misaligned regions also require a finer
+granularity than what the user may had originally expected, using a lot more
+memory than expected. The reason is that all levels of translation are
+restricted to address translations of the same granularity as the size of the
+blocks of that level.  For example, for a 4 KiB page size, a level 2 block entry
+can only translate up to a granularity of 2 MiB. If the Physical Address is not
+aligned to 2 MiB then additional level 3 tables are also needed.
+
+Note that not every translation level allows any type of descriptor. Depending
+on the page size, levels 0 and 1 of translation may only allow table
+descriptors. If a block entry could be able to describe a translation, but that
+level does not allow block descriptors, a table descriptor will have to be used
+instead, as well as additional tables at the next level.
+
+|Alignment Example|
+
+The mmap regions are sorted in a way that simplifies the code that maps
+them. Even though this ordering is only strictly needed for overlapping static
+regions, it must also be applied for dynamic regions to maintain a consistent
+order of all regions at all times. As each new region is mapped, existing
+entries in the translation tables are checked to ensure consistency. Please
+refer to the comments in the source code of the core module for more details
+about the sorting algorithm in use.
+
+The library takes care of performing TLB maintenance operations when required.
+For example, when the user requests removing a dynamic region, the library
+invalidates all TLB entries associated to that region to ensure that these
+changes are visible to subsequent execution, including speculative execution,
+that uses the changed translation table entries.
+
+A counter-example is the initialization of translation tables. In this case,
+explicit TLB maintenance is not required. The ARMv8-A architecture guarantees
+that all TLBs are disabled from reset and their contents have no effect on
+address translation at reset [#tlb-reset-ref]_. Therefore, the TLBs invalidation
+is deferred to the ``enable_mmu*()`` family of functions, just before the MMU is
+turned on.
+
+TLB invalidation is not required when adding dynamic regions either. Dynamic
+regions are not allowed to overlap existing memory region. Therefore, if the
+dynamic mapping request is deemed legitimate, it automatically concerns memory
+that was not mapped in this translation regime and the library will have
+initialized its corresponding translation table entry to an invalid
+descriptor. Given that the TLBs are not architecturally permitted to hold any
+invalid translation table entry [#tlb-no-invalid-entry]_, this means that this
+mapping cannot be cached in the TLBs.
+
+.. [#tlb-reset-ref] See section D4.8 `Translation Lookaside Buffers (TLBs)`, subsection `TLB behavior at reset` in ARMv8-A, rev B.a.
+
+.. [#tlb-no-invalid-entry] See section D4.9.1 `General TLB maintenance requirements` in ARMv8-A, rev B.a.
+
+Architectural module
+~~~~~~~~~~~~~~~~~~~~
+
+This module contains functions that have different implementations for AArch32
+and AArch64. For example, it provides APIs to perform TLB maintenance operations,
+enable the MMU or calculate the Physical Address Space size. They do not need a
+translation context to work on.
+
+--------------
+
+*Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.*
+
+.. _lib/xlat\_tables\_v2: ../lib/xlat_tables_v2
+.. _lib/xlat\_tables: ../lib/xlat_tables
+.. _xlat\_tables\_v2.h: ../include/lib/xlat_tables/xlat_tables_v2.h
+.. _xlat\_tables\_internal.c: ../lib/xlat_tables_v2/xlat_tables_internal.c
+.. _aarch32/xlat\_tables\_arch.c: ../lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
+.. _aarch64/xlat\_tables\_arch.c: ../lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
+.. _Porting Guide: porting-guide.rst
+.. |Alignment Example| image:: ./diagrams/xlat_align.png?raw=true
diff --git a/drivers/arm/cci/cci.c b/drivers/arm/cci/cci.c
index 04530b0..e156838 100644
--- a/drivers/arm/cci/cci.c
+++ b/drivers/arm/cci/cci.c
@@ -11,11 +11,26 @@
 #include <mmio.h>
 #include <stdint.h>
 
-static uintptr_t g_cci_base;
-static unsigned int g_max_master_id;
-static const int *g_cci_slave_if_map;
+#define MAKE_CCI_PART_NUMBER(hi, lo)	((hi << 8) | lo)
+#define CCI_PART_LO_MASK		0xff
+#define CCI_PART_HI_MASK		0xf
+
+/* CCI part number codes read from Peripheral ID registers 0 and 1 */
+#define CCI400_PART_NUM		0x420
+#define CCI500_PART_NUM		0x422
+#define CCI550_PART_NUM		0x423
+
+#define CCI400_SLAVE_PORTS	5
+#define CCI500_SLAVE_PORTS	7
+#define CCI550_SLAVE_PORTS	7
+
+static uintptr_t cci_base;
+static const int *cci_slave_if_map;
 
 #if ENABLE_ASSERTIONS
+static unsigned int max_master_id;
+static int cci_num_slave_ports;
+
 static int validate_cci_map(const int *map)
 {
 	unsigned int valid_cci_map = 0;
@@ -23,13 +38,13 @@
 	int i;
 
 	/* Validate the map */
-	for (i = 0; i <= g_max_master_id; i++) {
+	for (i = 0; i <= max_master_id; i++) {
 		slave_if_id = map[i];
 
 		if (slave_if_id < 0)
 			continue;
 
-		if (slave_if_id >= CCI_SLAVE_INTERFACE_COUNT) {
+		if (slave_if_id >= cci_num_slave_ports) {
 			ERROR("Slave interface ID is invalid\n");
 			return 0;
 		}
@@ -48,70 +63,105 @@
 
 	return 1;
 }
+
+/*
+ * Read CCI part number from Peripheral ID registers
+ */
+static unsigned int read_cci_part_number(uintptr_t base)
+{
+	unsigned int part_lo, part_hi;
+
+	part_lo = mmio_read_32(base + PERIPHERAL_ID0) & CCI_PART_LO_MASK;
+	part_hi = mmio_read_32(base + PERIPHERAL_ID1) & CCI_PART_HI_MASK;
+
+	return MAKE_CCI_PART_NUMBER(part_hi, part_lo);
+}
+
+/*
+ * Identify a CCI device, and return the number of slaves. Return -1 for an
+ * unidentified device.
+ */
+static int get_slave_ports(unsigned int part_num)
+{
+	/* Macro to match CCI products */
+#define RET_ON_MATCH(product) \
+	case CCI ## product ## _PART_NUM: \
+		return CCI ## product ## _SLAVE_PORTS
+
+	switch (part_num) {
+
+	RET_ON_MATCH(400);
+	RET_ON_MATCH(500);
+	RET_ON_MATCH(550);
+
+	default:
+		return -1;
+	}
+
+#undef RET_ON_MATCH
+}
 #endif /* ENABLE_ASSERTIONS */
 
-void cci_init(uintptr_t cci_base,
-		const int *map,
-		unsigned int num_cci_masters)
+void cci_init(uintptr_t base, const int *map, unsigned int num_cci_masters)
 {
 	assert(map);
-	assert(cci_base);
+	assert(base);
 
-	g_cci_base = cci_base;
+	cci_base = base;
+	cci_slave_if_map = map;
 
+#if ENABLE_ASSERTIONS
 	/*
 	 * Master Id's are assigned from zero, So in an array of size n
 	 * the max master id is (n - 1).
 	 */
-	g_max_master_id = num_cci_masters - 1;
+	max_master_id = num_cci_masters - 1;
+	cci_num_slave_ports = get_slave_ports(read_cci_part_number(base));
+#endif
+	assert(cci_num_slave_ports >= 0);
 
 	assert(validate_cci_map(map));
-	g_cci_slave_if_map = map;
 }
 
 void cci_enable_snoop_dvm_reqs(unsigned int master_id)
 {
-	int slave_if_id;
-
-	assert(g_cci_base);
-	assert(master_id <= g_max_master_id);
+	int slave_if_id = cci_slave_if_map[master_id];
 
-	slave_if_id = g_cci_slave_if_map[master_id];
-	assert((slave_if_id < CCI_SLAVE_INTERFACE_COUNT) && (slave_if_id >= 0));
+	assert(master_id <= max_master_id);
+	assert((slave_if_id < cci_num_slave_ports) && (slave_if_id >= 0));
+	assert(cci_base);
 
 	/*
 	 * Enable Snoops and DVM messages, no need for Read/Modify/Write as
 	 * rest of bits are write ignore
 	 */
-	mmio_write_32(g_cci_base +
-		      SLAVE_IFACE_OFFSET(slave_if_id) +
-		      SNOOP_CTRL_REG, DVM_EN_BIT | SNOOP_EN_BIT);
+	mmio_write_32(cci_base +
+		      SLAVE_IFACE_OFFSET(slave_if_id) + SNOOP_CTRL_REG,
+		      DVM_EN_BIT | SNOOP_EN_BIT);
 
 	/* Wait for the dust to settle down */
-	while (mmio_read_32(g_cci_base + STATUS_REG) & CHANGE_PENDING_BIT)
+	while (mmio_read_32(cci_base + STATUS_REG) & CHANGE_PENDING_BIT)
 		;
 }
 
 void cci_disable_snoop_dvm_reqs(unsigned int master_id)
 {
-	int slave_if_id;
-
-	assert(g_cci_base);
-	assert(master_id <= g_max_master_id);
+	int slave_if_id = cci_slave_if_map[master_id];
 
-	slave_if_id = g_cci_slave_if_map[master_id];
-	assert((slave_if_id < CCI_SLAVE_INTERFACE_COUNT) && (slave_if_id >= 0));
+	assert(master_id <= max_master_id);
+	assert((slave_if_id < cci_num_slave_ports) && (slave_if_id >= 0));
+	assert(cci_base);
 
 	/*
 	 * Disable Snoops and DVM messages, no need for Read/Modify/Write as
 	 * rest of bits are write ignore.
 	 */
-	mmio_write_32(g_cci_base +
-		      SLAVE_IFACE_OFFSET(slave_if_id) +
-		      SNOOP_CTRL_REG, ~(DVM_EN_BIT | SNOOP_EN_BIT));
+	mmio_write_32(cci_base +
+		      SLAVE_IFACE_OFFSET(slave_if_id) + SNOOP_CTRL_REG,
+		      ~(DVM_EN_BIT | SNOOP_EN_BIT));
 
 	/* Wait for the dust to settle down */
-	while (mmio_read_32(g_cci_base + STATUS_REG) & CHANGE_PENDING_BIT)
+	while (mmio_read_32(cci_base + STATUS_REG) & CHANGE_PENDING_BIT)
 		;
 }
 
diff --git a/drivers/arm/smmu/smmu_v3.c b/drivers/arm/smmu/smmu_v3.c
new file mode 100644
index 0000000..cfe8c2a
--- /dev/null
+++ b/drivers/arm/smmu/smmu_v3.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <mmio.h>
+#include <smmu_v3.h>
+
+/* Test for pending invalidate */
+#define INVAL_PENDING(base)	\
+	smmuv3_read_s_init(base) & SMMU_S_INIT_INV_ALL_MASK
+
+static inline uint32_t smmuv3_read_s_idr1(uintptr_t base)
+{
+	return mmio_read_32(base + SMMU_S_IDR1);
+}
+
+static inline uint32_t smmuv3_read_s_init(uintptr_t base)
+{
+	return mmio_read_32(base + SMMU_S_INIT);
+}
+
+static inline void smmuv3_write_s_init(uintptr_t base, uint32_t value)
+{
+	mmio_write_32(base + SMMU_S_INIT, value);
+}
+
+/*
+ * Initialize the SMMU by invalidating all secure caches and TLBs.
+ *
+ * Returns 0 on success, and -1 on failure.
+ */
+int smmuv3_init(uintptr_t smmu_base)
+{
+	uint32_t idr1_reg;
+
+	/*
+	 * Invalidation of secure caches and TLBs is required only if the SMMU
+	 * supports secure state. If not, it's implementation defined as to how
+	 * SMMU_S_INIT register is accessed.
+	 */
+	idr1_reg = smmuv3_read_s_idr1(smmu_base);
+	if (!((idr1_reg >> SMMU_S_IDR1_SECURE_IMPL_SHIFT) &
+			SMMU_S_IDR1_SECURE_IMPL_MASK)) {
+		return -1;
+	}
+
+	/* Initiate invalidation, and wait for it to finish */
+	smmuv3_write_s_init(smmu_base, SMMU_S_INIT_INV_ALL_MASK);
+	while (INVAL_PENDING(smmu_base))
+		;
+
+	return 0;
+}
diff --git a/fdts/fvp-base-gicv3-psci-1t.dtb b/fdts/fvp-base-gicv3-psci-1t.dtb
new file mode 100644
index 0000000..23d360f
--- /dev/null
+++ b/fdts/fvp-base-gicv3-psci-1t.dtb
Binary files differ
diff --git a/fdts/fvp-base-gicv3-psci-1t.dts b/fdts/fvp-base-gicv3-psci-1t.dts
new file mode 100644
index 0000000..36fbd44
--- /dev/null
+++ b/fdts/fvp-base-gicv3-psci-1t.dts
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/dts-v1/;
+
+/include/ "fvp-base-gicv3-psci-common.dtsi"
+
+&CPU0 {
+	reg = <0x0 0x0>;
+};
+
+&CPU1 {
+	reg = <0x0 0x100>;
+};
+
+&CPU2 {
+	reg = <0x0 0x200>;
+};
+
+&CPU3 {
+	reg = <0x0 0x300>;
+};
+
+&CPU4 {
+	reg = <0x0 0x10000>;
+};
+
+&CPU5 {
+	reg = <0x0 0x10100>;
+};
+
+&CPU6 {
+	reg = <0x0 0x10200>;
+};
+
+&CPU7 {
+	reg = <0x0 0x10300>;
+};
diff --git a/fdts/fvp-base-gicv3-psci-common.dtsi b/fdts/fvp-base-gicv3-psci-common.dtsi
new file mode 100644
index 0000000..2ef2df8
--- /dev/null
+++ b/fdts/fvp-base-gicv3-psci-common.dtsi
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+};
+
+/ {
+	model = "FVP Base";
+	compatible = "arm,vfp-base", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	psci {
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
+		method = "smc";
+		cpu_suspend = <0xc4000001>;
+		cpu_off = <0x84000002>;
+		cpu_on = <0xc4000003>;
+		sys_poweroff = <0x84000008>;
+		sys_reset = <0x84000009>;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&CPU0>;
+				};
+				core1 {
+					cpu = <&CPU1>;
+				};
+				core2 {
+					cpu = <&CPU2>;
+				};
+				core3 {
+					cpu = <&CPU3>;
+				};
+			};
+
+			cluster1 {
+				core0 {
+					cpu = <&CPU4>;
+				};
+				core1 {
+					cpu = <&CPU5>;
+				};
+				core2 {
+					cpu = <&CPU6>;
+				};
+				core3 {
+					cpu = <&CPU7>;
+				};
+			};
+		};
+
+		idle-states {
+			entry-method = "arm,psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				local-timer-stop;
+				arm,psci-suspend-param = <0x0010000>;
+				entry-latency-us = <40>;
+				exit-latency-us = <100>;
+				min-residency-us = <150>;
+			};
+
+			CLUSTER_SLEEP_0: cluster-sleep-0 {
+				compatible = "arm,idle-state";
+				local-timer-stop;
+				arm,psci-suspend-param = <0x1010000>;
+				entry-latency-us = <500>;
+				exit-latency-us = <1000>;
+				min-residency-us = <2500>;
+			};
+		};
+
+		CPU0:cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU1:cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU2:cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU3:cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU4:cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU5:cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU6:cpu@102 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x102>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU7:cpu@103 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x103>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		L2_0: l2-cache0 {
+			compatible = "cache";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0 0x7F000000>,
+		      <0x00000008 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2f000000 {
+		compatible = "arm,gic-v3";
+		#interrupt-cells = <3>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		interrupt-controller;
+		reg = <0x0 0x2f000000 0 0x10000>,	// GICD
+		      <0x0 0x2f100000 0 0x200000>,	// GICR
+		      <0x0 0x2c000000 0 0x2000>,	// GICC
+		      <0x0 0x2c010000 0 0x2000>,	// GICH
+		      <0x0 0x2c02f000 0 0x2000>;	// GICV
+		interrupts = <1 9 4>;
+
+		its: its@2f020000 {
+			compatible = "arm,gic-v3-its";
+			msi-controller;
+			reg = <0x0 0x2f020000 0x0 0x20000>; // GITS
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xff01>,
+			     <1 14 0xff01>,
+			     <1 11 0xff01>,
+			     <1 10 0xff01>;
+		clock-frequency = <100000000>;
+	};
+
+	timer@2a810000 {
+			compatible = "arm,armv7-timer-mem";
+			reg = <0x0 0x2a810000 0x0 0x10000>;
+			clock-frequency = <100000000>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			frame@2a830000 {
+				frame-number = <1>;
+				interrupts = <0 26 4>;
+				reg = <0x0 0x2a830000 0x0 0x10000>;
+			};
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <0 60 4>,
+			     <0 61 4>,
+			     <0 62 4>,
+			     <0 63 4>;
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+
+	panels {
+		panel@0 {
+			compatible	= "panel";
+			mode		= "XVGA";
+			refresh		= <60>;
+			xres		= <1024>;
+			yres		= <768>;
+			pixclock	= <15748>;
+			left_margin	= <152>;
+			right_margin	= <48>;
+			upper_margin	= <23>;
+			lower_margin	= <3>;
+			hsync_len	= <104>;
+			vsync_len	= <4>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+	};
+};
diff --git a/fdts/fvp-base-gicv3-psci.dts b/fdts/fvp-base-gicv3-psci.dts
index d81bfbb..3ea429c 100644
--- a/fdts/fvp-base-gicv3-psci.dts
+++ b/fdts/fvp-base-gicv3-psci.dts
@@ -6,265 +6,4 @@
 
 /dts-v1/;
 
-/memreserve/ 0x80000000 0x00010000;
-
-/ {
-};
-
-/ {
-	model = "FVP Base";
-	compatible = "arm,vfp-base", "arm,vexpress";
-	interrupt-parent = <&gic>;
-	#address-cells = <2>;
-	#size-cells = <2>;
-
-	chosen { };
-
-	aliases {
-		serial0 = &v2m_serial0;
-		serial1 = &v2m_serial1;
-		serial2 = &v2m_serial2;
-		serial3 = &v2m_serial3;
-	};
-
-	psci {
-		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
-		method = "smc";
-		cpu_suspend = <0xc4000001>;
-		cpu_off = <0x84000002>;
-		cpu_on = <0xc4000003>;
-		sys_poweroff = <0x84000008>;
-		sys_reset = <0x84000009>;
-	};
-
-	cpus {
-		#address-cells = <2>;
-		#size-cells = <0>;
-
-		cpu-map {
-			cluster0 {
-				core0 {
-					cpu = <&CPU0>;
-				};
-				core1 {
-					cpu = <&CPU1>;
-				};
-				core2 {
-					cpu = <&CPU2>;
-				};
-				core3 {
-					cpu = <&CPU3>;
-				};
-			};
-
-			cluster1 {
-				core0 {
-					cpu = <&CPU4>;
-				};
-				core1 {
-					cpu = <&CPU5>;
-				};
-				core2 {
-					cpu = <&CPU6>;
-				};
-				core3 {
-					cpu = <&CPU7>;
-				};
-			};
-		};
-
-		idle-states {
-			entry-method = "arm,psci";
-
-			CPU_SLEEP_0: cpu-sleep-0 {
-				compatible = "arm,idle-state";
-				local-timer-stop;
-				arm,psci-suspend-param = <0x0010000>;
-				entry-latency-us = <40>;
-				exit-latency-us = <100>;
-				min-residency-us = <150>;
-			};
-
-			CLUSTER_SLEEP_0: cluster-sleep-0 {
-				compatible = "arm,idle-state";
-				local-timer-stop;
-				arm,psci-suspend-param = <0x1010000>;
-				entry-latency-us = <500>;
-				exit-latency-us = <1000>;
-				min-residency-us = <2500>;
-			};
-		};
-
-		CPU0:cpu@0 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x0>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU1:cpu@1 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x1>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU2:cpu@2 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x2>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU3:cpu@3 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x3>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU4:cpu@100 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x100>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU5:cpu@101 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x101>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU6:cpu@102 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x102>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		CPU7:cpu@103 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x103>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			next-level-cache = <&L2_0>;
-		};
-
-		L2_0: l2-cache0 {
-			compatible = "cache";
-		};
-	};
-
-	memory@80000000 {
-		device_type = "memory";
-		reg = <0x00000000 0x80000000 0 0x7F000000>,
-		      <0x00000008 0x80000000 0 0x80000000>;
-	};
-
-	gic: interrupt-controller@2f000000 {
-		compatible = "arm,gic-v3";
-		#interrupt-cells = <3>;
-		#address-cells = <2>;
-		#size-cells = <2>;
-		ranges;
-		interrupt-controller;
-		reg = <0x0 0x2f000000 0 0x10000>,	// GICD
-		      <0x0 0x2f100000 0 0x200000>,	// GICR
-		      <0x0 0x2c000000 0 0x2000>,	// GICC
-		      <0x0 0x2c010000 0 0x2000>,	// GICH
-		      <0x0 0x2c02f000 0 0x2000>;	// GICV
-		interrupts = <1 9 4>;
-
-		its: its@2f020000 {
-			compatible = "arm,gic-v3-its";
-			msi-controller;
-			reg = <0x0 0x2f020000 0x0 0x20000>; // GITS
-		};
-	};
-
-	timer {
-		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0xff01>,
-			     <1 14 0xff01>,
-			     <1 11 0xff01>,
-			     <1 10 0xff01>;
-		clock-frequency = <100000000>;
-	};
-
-	timer@2a810000 {
-			compatible = "arm,armv7-timer-mem";
-			reg = <0x0 0x2a810000 0x0 0x10000>;
-			clock-frequency = <100000000>;
-			#address-cells = <2>;
-			#size-cells = <2>;
-			ranges;
-			frame@2a830000 {
-				frame-number = <1>;
-				interrupts = <0 26 4>;
-				reg = <0x0 0x2a830000 0x0 0x10000>;
-			};
-	};
-
-	pmu {
-		compatible = "arm,armv8-pmuv3";
-		interrupts = <0 60 4>,
-			     <0 61 4>,
-			     <0 62 4>,
-			     <0 63 4>;
-	};
-
-	smb {
-		compatible = "simple-bus";
-
-		#address-cells = <2>;
-		#size-cells = <1>;
-		ranges = <0 0 0 0x08000000 0x04000000>,
-			 <1 0 0 0x14000000 0x04000000>,
-			 <2 0 0 0x18000000 0x04000000>,
-			 <3 0 0 0x1c000000 0x04000000>,
-			 <4 0 0 0x0c000000 0x04000000>,
-			 <5 0 0 0x10000000 0x04000000>;
-
-		/include/ "rtsm_ve-motherboard.dtsi"
-	};
-
-	panels {
-		panel@0 {
-			compatible	= "panel";
-			mode		= "XVGA";
-			refresh		= <60>;
-			xres		= <1024>;
-			yres		= <768>;
-			pixclock	= <15748>;
-			left_margin	= <152>;
-			right_margin	= <48>;
-			upper_margin	= <23>;
-			lower_margin	= <3>;
-			hsync_len	= <104>;
-			vsync_len	= <4>;
-			sync		= <0>;
-			vmode		= "FB_VMODE_NONINTERLACED";
-			tim2		= "TIM2_BCD", "TIM2_IPC";
-			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
-			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
-			bpp		= <16>;
-		};
-	};
-};
+/include/ "fvp-base-gicv3-psci-common.dtsi"
diff --git a/include/drivers/arm/cci.h b/include/drivers/arm/cci.h
index 5ac79c6..1def6a8 100644
--- a/include/drivers/arm/cci.h
+++ b/include/drivers/arm/cci.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -98,14 +98,6 @@
 
 #define SLAVE_IF_UNUSED			-1
 
-#if ARM_CCI_PRODUCT_ID == 400
-	#define CCI_SLAVE_INTERFACE_COUNT	5
-#elif ARM_CCI_PRODUCT_ID == 500
-	#define CCI_SLAVE_INTERFACE_COUNT	7
-#else
-	#error "Invalid CCI product or CCI not supported"
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <stdint.h>
@@ -114,7 +106,7 @@
 
 /*
  * The ARM CCI driver needs the following:
- * 1. Base address of the CCI-500/CCI-400
+ * 1. Base address of the CCI product
  * 2. An array  of map between AMBA 4 master ids and ACE/ACE lite slave
  *    interfaces.
  * 3. Size of the array.
@@ -122,9 +114,7 @@
  * SLAVE_IF_UNUSED should be used in the map to represent no AMBA 4 master exists
  * for that interface.
  */
-void cci_init(uintptr_t cci_base,
-	const int *map,
-	unsigned int num_cci_masters);
+void cci_init(uintptr_t base, const int *map, unsigned int num_cci_masters);
 
 void cci_enable_snoop_dvm_reqs(unsigned int master_id);
 void cci_disable_snoop_dvm_reqs(unsigned int master_id);
diff --git a/include/drivers/arm/smmu_v3.h b/include/drivers/arm/smmu_v3.h
new file mode 100644
index 0000000..b7efde4
--- /dev/null
+++ b/include/drivers/arm/smmu_v3.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __SMMU_V3_H__
+#define __SMMU_V3_H__
+
+#include <stdint.h>
+
+/* SMMUv3 register offsets from device base */
+#define SMMU_S_IDR1	0x8004
+#define SMMU_S_INIT	0x803c
+
+/* SMMU_S_IDR1 register fields */
+#define SMMU_S_IDR1_SECURE_IMPL_SHIFT	31
+#define SMMU_S_IDR1_SECURE_IMPL_MASK	0x1
+
+/* SMMU_S_INIT register fields */
+#define SMMU_S_INIT_INV_ALL_MASK	0x1
+
+
+int smmuv3_init(uintptr_t smmu_base);
+
+#endif /* __SMMU_V3_H__ */
diff --git a/include/plat/arm/common/arm_config.h b/include/plat/arm/common/arm_config.h
index 2ab7bf2..02e04fd 100644
--- a/include/plat/arm/common/arm_config.h
+++ b/include/plat/arm/common/arm_config.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,14 +7,20 @@
 #define __ARM_CONFIG_H__
 
 #include <stdint.h>
+#include <utils_def.h>
 
 enum arm_config_flags {
 	/* Whether Base memory map is in use */
-	ARM_CONFIG_BASE_MMAP		= 0x1,
-	/* Whether interconnect should be enabled */
-	ARM_CONFIG_HAS_INTERCONNECT	= 0x2,
+	ARM_CONFIG_BASE_MMAP		= BIT(1),
 	/* Whether TZC should be configured */
-	ARM_CONFIG_HAS_TZC		= 0x4
+	ARM_CONFIG_HAS_TZC		= BIT(2),
+	/* FVP model has shifted affinity */
+	ARM_CONFIG_FVP_SHIFTED_AFF	= BIT(3),
+	/* FVP model has SMMUv3 affinity */
+	ARM_CONFIG_FVP_HAS_SMMUV3	= BIT(4),
+	/* FVP model has CCI (400 or 500/550) devices */
+	ARM_CONFIG_FVP_HAS_CCI400	= BIT(5),
+	ARM_CONFIG_FVP_HAS_CCI5XX	= BIT(6),
 };
 
 typedef struct arm_config {
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index 9946fea..302d937 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -16,10 +16,6 @@
 # The Target build architecture. Supported values are: aarch64, aarch32.
 ARCH				:= aarch64
 
-# Determine the version of ARM CCI product used in the platform. The platform
-# port can change this value if needed.
-ARM_CCI_PRODUCT_ID		:= 400
-
 # ARM Architecture major and minor versions: 8.0 by default.
 ARM_ARCH_MAJOR			:= 8
 ARM_ARCH_MINOR			:= 0
diff --git a/plat/arm/board/fvp/aarch32/fvp_helpers.S b/plat/arm/board/fvp/aarch32/fvp_helpers.S
index e80e199..143972d 100644
--- a/plat/arm/board/fvp/aarch32/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch32/fvp_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -13,6 +13,7 @@
 	.globl	plat_secondary_cold_boot_setup
 	.globl	plat_get_my_entrypoint
 	.globl	plat_is_my_cpu_primary
+	.globl	plat_arm_calc_core_pos
 
 	/* --------------------------------------------------------------------
 	 * void plat_secondary_cold_boot_setup (void);
@@ -95,10 +96,43 @@
 	 */
 func plat_is_my_cpu_primary
 	ldcopr	r0, MPIDR
-	ldr	r1, =(MPIDR_CLUSTER_MASK | MPIDR_CPU_MASK)
+	ldr	r1, =MPIDR_AFFINITY_MASK
 	and	r0, r1
 	cmp	r0, #FVP_PRIMARY_CPU
 	moveq	r0, #1
 	movne	r0, #0
 	bx	lr
 endfunc plat_is_my_cpu_primary
+
+	/* -----------------------------------------------------
+	 * unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
+	 *
+	 * Function to calculate the core position on FVP.
+	 *
+	 * (ClusterId * FVP_MAX_CPUS_PER_CLUSTER) +
+	 * (CPUId * FVP_MAX_PE_PER_CPU) +
+	 * ThreadId
+	 * -----------------------------------------------------
+	 */
+func plat_arm_calc_core_pos
+	mov	r3, r0
+
+	/*
+	 * Check for MT bit in MPIDR. If not set, shift MPIDR to left to make it
+	 * look as if in a multi-threaded implementation
+	 */
+	tst	r0, #MPIDR_MT_MASK
+	lsleq	r3, r0, #MPIDR_AFFINITY_BITS
+
+	/* Extract individual affinity fields from MPIDR */
+	mov	r2, #FVP_MAX_PE_PER_CPU
+	ubfx	r0, r3, #MPIDR_AFF0_SHIFT, #MPIDR_AFFINITY_BITS
+	ubfx	r1, r3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
+	mla	r0, r1, r2, r0
+
+	mov	r1, #FVP_MAX_CPUS_PER_CLUSTER
+	ubfx	r2, r3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
+	mla	r0, r1, r2, r0
+
+	bx	lr
+endfunc plat_arm_calc_core_pos
diff --git a/plat/arm/board/fvp/aarch64/fvp_helpers.S b/plat/arm/board/fvp/aarch64/fvp_helpers.S
index f4107de..6ea4585 100644
--- a/plat/arm/board/fvp/aarch64/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch64/fvp_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -16,6 +16,7 @@
 	.globl	plat_secondary_cold_boot_setup
 	.globl	plat_get_my_entrypoint
 	.globl	plat_is_my_cpu_primary
+	.globl	plat_arm_calc_core_pos
 
 	.macro	fvp_choose_gicmmap  param1, param2, x_tmp, w_tmp, res
 	ldr	\x_tmp, =V2M_SYSREGS_BASE + V2M_SYS_ID
@@ -170,8 +171,43 @@
 	 */
 func plat_is_my_cpu_primary
 	mrs	x0, mpidr_el1
-	and	x0, x0, #(MPIDR_CLUSTER_MASK | MPIDR_CPU_MASK)
+	ldr	x1, =MPIDR_AFFINITY_MASK
+	and	x0, x0, x1
 	cmp	x0, #FVP_PRIMARY_CPU
 	cset	w0, eq
 	ret
 endfunc plat_is_my_cpu_primary
+
+	/* -----------------------------------------------------
+	 * unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
+	 *
+	 * Function to calculate the core position on FVP.
+	 *
+	 * (ClusterId * FVP_MAX_CPUS_PER_CLUSTER) +
+	 * (CPUId * FVP_MAX_PE_PER_CPU) +
+	 * ThreadId
+	 * -----------------------------------------------------
+	 */
+func plat_arm_calc_core_pos
+	mov	x3, x0
+
+	/*
+	 * Check for MT bit in MPIDR. If not set, shift MPIDR to left to make it
+	 * look as if in a multi-threaded implementation.
+	 */
+	tst	x0, #MPIDR_MT_MASK
+	lsl	x3, x0, #MPIDR_AFFINITY_BITS
+	csel	x3, x3, x0, eq
+
+	/* Extract individual affinity fields from MPIDR */
+	ubfx	x0, x3, #MPIDR_AFF0_SHIFT, #MPIDR_AFFINITY_BITS
+	ubfx	x1, x3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
+	ubfx	x2, x3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
+
+	/* Compute linear position */
+	mov	x4, #FVP_MAX_PE_PER_CPU
+	madd	x0, x1, x4, x0
+	mov	x5, #FVP_MAX_CPUS_PER_CLUSTER
+	madd	x0, x2, x5, x0
+	ret
+endfunc plat_arm_calc_core_pos
diff --git a/plat/arm/board/fvp/fvp_bl31_setup.c b/plat/arm/board/fvp/fvp_bl31_setup.c
index 52a4432..181c923 100644
--- a/plat/arm/board/fvp/fvp_bl31_setup.c
+++ b/plat/arm/board/fvp/fvp_bl31_setup.c
@@ -1,10 +1,12 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <arm_config.h>
 #include <plat_arm.h>
+#include <smmu_v3.h>
 #include "fvp_private.h"
 
 #if LOAD_IMAGE_V2
@@ -34,4 +36,8 @@
 	 * FVP PSCI code will enable coherency for other clusters.
 	 */
 	fvp_interconnect_enable();
+
+	/* On FVP RevC, intialize SMMUv3 */
+	if (arm_config.flags & ARM_CONFIG_FVP_HAS_SMMUV3)
+		smmuv3_init(PLAT_FVP_SMMUV3_BASE);
 }
diff --git a/plat/arm/board/fvp/fvp_common.c b/plat/arm/board/fvp/fvp_common.c
index 2f5d7fc..c1dcc02 100644
--- a/plat/arm/board/fvp/fvp_common.c
+++ b/plat/arm/board/fvp/fvp_common.c
@@ -1,11 +1,13 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arm_config.h>
 #include <arm_def.h>
+#include <assert.h>
+#include <cci.h>
 #include <ccn.h>
 #include <debug.h>
 #include <gicv2.h>
@@ -118,6 +120,30 @@
 
 ARM_CASSERT_MMAP
 
+#if FVP_INTERCONNECT_DRIVER != FVP_CCN
+static const int fvp_cci400_map[] = {
+	PLAT_FVP_CCI400_CLUS0_SL_PORT,
+	PLAT_FVP_CCI400_CLUS1_SL_PORT,
+};
+
+static const int fvp_cci5xx_map[] = {
+	PLAT_FVP_CCI5XX_CLUS0_SL_PORT,
+	PLAT_FVP_CCI5XX_CLUS1_SL_PORT,
+};
+
+static unsigned int get_interconnect_master(void)
+{
+	unsigned int master;
+	u_register_t mpidr;
+
+	mpidr = read_mpidr_el1();
+	master = (arm_config.flags & ARM_CONFIG_FVP_SHIFTED_AFF) ?
+		MPIDR_AFFLVL2_VAL(mpidr) : MPIDR_AFFLVL1_VAL(mpidr);
+
+	assert(master < FVP_CLUSTER_COUNT);
+	return master;
+}
+#endif
 
 /*******************************************************************************
  * A single boot loader stack is expected to work on both the Foundation FVP
@@ -182,8 +208,7 @@
 		}
 		break;
 	case HBI_BASE_FVP:
-		arm_config.flags |= ARM_CONFIG_BASE_MMAP |
-			ARM_CONFIG_HAS_INTERCONNECT | ARM_CONFIG_HAS_TZC;
+		arm_config.flags |= (ARM_CONFIG_BASE_MMAP | ARM_CONFIG_HAS_TZC);
 
 		/*
 		 * Check for supported revisions
@@ -191,6 +216,12 @@
 		 */
 		switch (rev) {
 		case REV_BASE_FVP_V0:
+			arm_config.flags |= ARM_CONFIG_FVP_HAS_CCI400;
+			break;
+		case REV_BASE_FVP_REVC:
+			arm_config.flags |= (ARM_CONFIG_FVP_SHIFTED_AFF |
+					ARM_CONFIG_FVP_HAS_SMMUV3 |
+					ARM_CONFIG_FVP_HAS_CCI5XX);
 			break;
 		default:
 			WARN("Unrecognized Base FVP revision %x\n", rev);
@@ -206,26 +237,67 @@
 
 void fvp_interconnect_init(void)
 {
-	if (arm_config.flags & ARM_CONFIG_HAS_INTERCONNECT) {
 #if FVP_INTERCONNECT_DRIVER == FVP_CCN
-		if (ccn_get_part0_id(PLAT_ARM_CCN_BASE) != CCN_502_PART0_ID) {
-			ERROR("Unrecognized CCN variant detected. Only CCN-502"
-					" is supported");
-			panic();
-		}
-#endif
-		plat_arm_interconnect_init();
+	if (ccn_get_part0_id(PLAT_ARM_CCN_BASE) != CCN_502_PART0_ID) {
+		ERROR("Unrecognized CCN variant detected. Only CCN-502"
+				" is supported");
+		panic();
+	}
+
+	plat_arm_interconnect_init();
+#else
+	uintptr_t cci_base = 0;
+	const int *cci_map = 0;
+	unsigned int map_size = 0;
+
+	if (!(arm_config.flags & (ARM_CONFIG_FVP_HAS_CCI400 |
+				ARM_CONFIG_FVP_HAS_CCI5XX))) {
+		return;
+	}
+
+	/* Initialize the right interconnect */
+	if (arm_config.flags & ARM_CONFIG_FVP_HAS_CCI5XX) {
+		cci_base = PLAT_FVP_CCI5XX_BASE;
+		cci_map = fvp_cci5xx_map;
+		map_size = ARRAY_SIZE(fvp_cci5xx_map);
+	} else if (arm_config.flags & ARM_CONFIG_FVP_HAS_CCI400) {
+		cci_base = PLAT_FVP_CCI400_BASE;
+		cci_map = fvp_cci400_map;
+		map_size = ARRAY_SIZE(fvp_cci400_map);
 	}
+
+	assert(cci_base);
+	assert(cci_map);
+	cci_init(cci_base, cci_map, map_size);
+#endif
 }
 
 void fvp_interconnect_enable(void)
 {
-	if (arm_config.flags & ARM_CONFIG_HAS_INTERCONNECT)
-		plat_arm_interconnect_enter_coherency();
+#if FVP_INTERCONNECT_DRIVER == FVP_CCN
+	plat_arm_interconnect_enter_coherency();
+#else
+	unsigned int master;
+
+	if (arm_config.flags & (ARM_CONFIG_FVP_HAS_CCI400 |
+				ARM_CONFIG_FVP_HAS_CCI5XX)) {
+		master = get_interconnect_master();
+		cci_enable_snoop_dvm_reqs(master);
+	}
+#endif
 }
 
 void fvp_interconnect_disable(void)
 {
-	if (arm_config.flags & ARM_CONFIG_HAS_INTERCONNECT)
-		plat_arm_interconnect_exit_coherency();
+#if FVP_INTERCONNECT_DRIVER == FVP_CCN
+	plat_arm_interconnect_exit_coherency();
+#else
+	unsigned int master;
+
+	if (arm_config.flags & (ARM_CONFIG_FVP_HAS_CCI400 |
+				ARM_CONFIG_FVP_HAS_CCI5XX)) {
+		master = get_interconnect_master();
+		cci_disable_snoop_dvm_reqs(master);
+	}
+#endif
 }
diff --git a/plat/arm/board/fvp/fvp_def.h b/plat/arm/board/fvp/fvp_def.h
index d4f9d92..a430bca 100644
--- a/plat/arm/board/fvp/fvp_def.h
+++ b/plat/arm/board/fvp/fvp_def.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -12,6 +12,10 @@
 #endif
 #define FVP_MAX_CPUS_PER_CLUSTER	4
 
+#ifndef FVP_MAX_PE_PER_CPU
+# define FVP_MAX_PE_PER_CPU		1
+#endif
+
 #define FVP_PRIMARY_CPU			0x0
 
 /* Defines for the Interconnect build selection */
@@ -74,6 +78,7 @@
 /* Constants to distinguish FVP type */
 #define HBI_BASE_FVP			0x020
 #define REV_BASE_FVP_V0			0x0
+#define REV_BASE_FVP_REVC		0x2
 
 #define HBI_FOUNDATION_FVP		0x010
 #define REV_FOUNDATION_FVP_V2_0		0x0
diff --git a/plat/arm/board/fvp/fvp_topology.c b/plat/arm/board/fvp/fvp_topology.c
index 055b985..cf1492b 100644
--- a/plat/arm/board/fvp/fvp_topology.c
+++ b/plat/arm/board/fvp/fvp_topology.c
@@ -1,10 +1,11 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch.h>
+#include <arm_config.h>
 #include <cassert.h>
 #include <plat_arm.h>
 #include <platform_def.h>
@@ -55,11 +56,38 @@
  ******************************************************************************/
 int plat_core_pos_by_mpidr(u_register_t mpidr)
 {
-	if (arm_check_mpidr(mpidr) == -1)
+	unsigned int clus_id, cpu_id, thread_id;
+
+	/* Validate affinity fields */
+	if (arm_config.flags & ARM_CONFIG_FVP_SHIFTED_AFF) {
+		thread_id = MPIDR_AFFLVL0_VAL(mpidr);
+		cpu_id = MPIDR_AFFLVL1_VAL(mpidr);
+		clus_id = MPIDR_AFFLVL2_VAL(mpidr);
+	} else {
+		thread_id = 0;
+		cpu_id = MPIDR_AFFLVL0_VAL(mpidr);
+		clus_id = MPIDR_AFFLVL1_VAL(mpidr);
+	}
+
+	if (clus_id >= FVP_CLUSTER_COUNT)
+		return -1;
+	if (cpu_id >= FVP_MAX_CPUS_PER_CLUSTER)
+		return -1;
+	if (thread_id >= FVP_MAX_PE_PER_CPU)
 		return -1;
 
 	if (fvp_pwrc_read_psysr(mpidr) == PSYSR_INVALID)
 		return -1;
 
+	/*
+	 * Core position calculation for FVP platform depends on the MT bit in
+	 * MPIDR. This function cannot assume that the supplied MPIDR has the MT
+	 * bit set even if the implementation has. For example, PSCI clients
+	 * might supply MPIDR values without the MT bit set. Therefore, we
+	 * inject the current PE's MT bit so as to get the calculation correct.
+	 * This of course assumes that none or all CPUs on the platform has MT
+	 * bit set.
+	 */
+	mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
 	return plat_arm_calc_core_pos(mpidr);
 }
diff --git a/plat/arm/board/fvp/include/plat_macros.S b/plat/arm/board/fvp/include/plat_macros.S
index d3d70e4..4dcde2d 100644
--- a/plat/arm/board/fvp/include/plat_macros.S
+++ b/plat/arm/board/fvp/include/plat_macros.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,15 +7,13 @@
 #define __PLAT_MACROS_S__
 
 #include <arm_macros.S>
-#include <cci_macros.S>
 #include <v2m_def.h>
 #include "../fvp_def.h"
 
 	/* ---------------------------------------------
 	 * The below required platform porting macro
-	 * prints out relevant GIC and CCI registers
-	 * whenever an unhandled exception is taken in
-	 * BL31.
+	 * prints out relevant GIC registers whenever an
+	 * unhandled exception is taken in BL31.
 	 * Clobbers: x0 - x10, x16, x17, sp
 	 * ---------------------------------------------
 	 */
@@ -40,9 +38,6 @@
 	mov_imm	x16, VE_GICD_BASE
 print_gic_regs:
 	arm_print_gic_regs
-#if FVP_INTERCONNECT_DRIVER == FVP_CCI
-	print_cci_regs
-#endif
 	.endm
 
 #endif /* __PLAT_MACROS_S__ */
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index f13fc8e..bf038e9 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -16,10 +16,13 @@
 #include "../fvp_def.h"
 
 /* Required platform porting definitions */
+#define PLATFORM_CORE_COUNT \
+	(FVP_CLUSTER_COUNT * FVP_MAX_CPUS_PER_CLUSTER * FVP_MAX_PE_PER_CPU)
+
 #define PLAT_NUM_PWR_DOMAINS		(FVP_CLUSTER_COUNT + \
 					PLATFORM_CORE_COUNT)
+
 #define PLAT_MAX_PWR_LVL		ARM_PWR_LVL1
-#define PLATFORM_CORE_COUNT		(FVP_CLUSTER_COUNT * FVP_MAX_CPUS_PER_CLUSTER)
 
 /*
  * Other platform porting definitions are provided by included headers
@@ -65,10 +68,17 @@
 #define PLAT_ARM_TSP_UART_BASE		V2M_IOFPGA_UART2_BASE
 #define PLAT_ARM_TSP_UART_CLK_IN_HZ	V2M_IOFPGA_UART2_CLK_IN_HZ
 
+#define PLAT_FVP_SMMUV3_BASE		0x2b400000
+
 /* CCI related constants */
-#define PLAT_ARM_CCI_BASE		0x2c090000
-#define PLAT_ARM_CCI_CLUSTER0_SL_IFACE_IX	3
-#define PLAT_ARM_CCI_CLUSTER1_SL_IFACE_IX	4
+#define PLAT_FVP_CCI400_BASE		0x2c090000
+#define PLAT_FVP_CCI400_CLUS0_SL_PORT	3
+#define PLAT_FVP_CCI400_CLUS1_SL_PORT	4
+
+/* CCI-500/CCI-550 on Base platform */
+#define PLAT_FVP_CCI5XX_BASE		0x2a000000
+#define PLAT_FVP_CCI5XX_CLUS0_SL_PORT	5
+#define PLAT_FVP_CCI5XX_CLUS1_SL_PORT	6
 
 /* CCN related constants. Only CCN 502 is currently supported */
 #define PLAT_ARM_CCN_BASE		0x2e000000
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index d6e8ced..0b6e1da 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -6,19 +6,28 @@
 
 # Use the GICv3 driver on the FVP by default
 FVP_USE_GIC_DRIVER	:= FVP_GICV3
+
 # Use the SP804 timer instead of the generic one
 FVP_USE_SP804_TIMER	:= 0
 
+# Default cluster count for FVP
+FVP_CLUSTER_COUNT	:= 2
+
+# Default number of threads per CPU on FVP
+FVP_MAX_PE_PER_CPU	:= 1
+
 $(eval $(call assert_boolean,FVP_USE_SP804_TIMER))
 $(eval $(call add_define,FVP_USE_SP804_TIMER))
 
 # The FVP platform depends on this macro to build with correct GIC driver.
 $(eval $(call add_define,FVP_USE_GIC_DRIVER))
 
-# Define default FVP_CLUSTER_COUNT to 2 and pass it into the build system.
-FVP_CLUSTER_COUNT	:= 2
+# Pass FVP_CLUSTER_COUNT to the build system.
 $(eval $(call add_define,FVP_CLUSTER_COUNT))
 
+# Pass FVP_MAX_PE_PER_CPU to the build system.
+$(eval $(call add_define,FVP_MAX_PE_PER_CPU))
+
 # Sanity check the cluster count and if FVP_CLUSTER_COUNT <= 2,
 # choose the CCI driver , else the CCN driver
 ifeq ($(FVP_CLUSTER_COUNT), 0)
@@ -63,8 +72,7 @@
 endif
 
 ifeq (${FVP_INTERCONNECT_DRIVER}, FVP_CCI)
-FVP_INTERCONNECT_SOURCES	:= 	drivers/arm/cci/cci.c		\
-					plat/arm/common/arm_cci.c
+FVP_INTERCONNECT_SOURCES	:= 	drivers/arm/cci/cci.c
 else ifeq (${FVP_INTERCONNECT_DRIVER}, FVP_CCN)
 FVP_INTERCONNECT_SOURCES	:= 	drivers/arm/ccn/ccn.c		\
 					plat/arm/common/arm_ccn.c
@@ -127,7 +135,8 @@
 BL2U_SOURCES		+=	plat/arm/board/fvp/fvp_bl2u_setup.c		\
 				${FVP_SECURITY_SOURCES}
 
-BL31_SOURCES		+=	plat/arm/board/fvp/fvp_bl31_setup.c		\
+BL31_SOURCES		+=	drivers/arm/smmu/smmu_v3.c			\
+				plat/arm/board/fvp/fvp_bl31_setup.c		\
 				plat/arm/board/fvp/fvp_pm.c			\
 				plat/arm/board/fvp/fvp_topology.c		\
 				plat/arm/board/fvp/aarch64/fvp_helpers.S	\
diff --git a/plat/arm/common/arm_gicv3.c b/plat/arm/common/arm_gicv3.c
index 960f691..c9bba09 100644
--- a/plat/arm/common/arm_gicv3.c
+++ b/plat/arm/common/arm_gicv3.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -35,6 +35,26 @@
 	PLAT_ARM_G0_IRQS
 };
 
+/*
+ * MPIDR hashing function for translating MPIDRs read from GICR_TYPER register
+ * to core position.
+ *
+ * Calculating core position is dependent on MPIDR_EL1.MT bit. However, affinity
+ * values read from GICR_TYPER don't have an MT field. To reuse the same
+ * translation used for CPUs, we insert MT bit read from the PE's MPIDR into
+ * that read from GICR_TYPER.
+ *
+ * Assumptions:
+ *
+ *   - All CPUs implemented in the system have MPIDR_EL1.MT bit set;
+ *   - No CPUs implemented in the system use affinity level 3.
+ */
+static unsigned int arm_gicv3_mpidr_hash(u_register_t mpidr)
+{
+	mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
+	return plat_arm_calc_core_pos(mpidr);
+}
+
 const gicv3_driver_data_t arm_gic_data = {
 	.gicd_base = PLAT_ARM_GICD_BASE,
 	.gicr_base = PLAT_ARM_GICR_BASE,
@@ -44,7 +64,7 @@
 	.g1s_interrupt_array = g1s_interrupt_array,
 	.rdistif_num = PLATFORM_CORE_COUNT,
 	.rdistif_base_addrs = rdistif_base_addrs,
-	.mpidr_to_core_pos = plat_arm_calc_core_pos
+	.mpidr_to_core_pos = arm_gicv3_mpidr_hash
 };
 
 void plat_arm_gic_driver_init(void)
diff --git a/plat/common/aarch32/platform_mp_stack.S b/plat/common/aarch32/platform_mp_stack.S
index e43047e..6c3d08d 100644
--- a/plat/common/aarch32/platform_mp_stack.S
+++ b/plat/common/aarch32/platform_mp_stack.S
@@ -19,9 +19,9 @@
 	 * -----------------------------------------------------
 	 */
 func plat_get_my_stack
-	mov	r3, lr
+	push	{r4, lr}
 	get_my_mp_stack platform_normal_stacks, PLATFORM_STACK_SIZE
-	bx	r3
+	pop	{r4, pc}
 endfunc	plat_get_my_stack
 
 	/* -----------------------------------------------------
@@ -32,10 +32,10 @@
 	 * -----------------------------------------------------
 	 */
 func plat_set_my_stack
-	mov	r3, lr
+	mov	r4, lr
 	get_my_mp_stack platform_normal_stacks, PLATFORM_STACK_SIZE
 	mov	sp, r0
-	bx	r3
+	bx	r4
 endfunc plat_set_my_stack
 
 	/* -----------------------------------------------------
diff --git a/plat/qemu/platform.mk b/plat/qemu/platform.mk
index cf4f9ca..e6d7014 100644
--- a/plat/qemu/platform.mk
+++ b/plat/qemu/platform.mk
@@ -11,11 +11,23 @@
 				-Iplat/qemu/include			\
 				-Iinclude/common/tbbr
 
+# Use translation tables library v2 by default
+ARM_XLAT_TABLES_LIB_V1		:=	0
+$(eval $(call assert_boolean,ARM_XLAT_TABLES_LIB_V1))
+$(eval $(call add_define,ARM_XLAT_TABLES_LIB_V1))
+
 
 PLAT_BL_COMMON_SOURCES	:=	plat/qemu/qemu_common.c			\
-				drivers/arm/pl011/aarch64/pl011_console.S \
-				lib/xlat_tables/xlat_tables_common.c	\
+				drivers/arm/pl011/aarch64/pl011_console.S
+
+ifeq (${ARM_XLAT_TABLES_LIB_V1}, 1)
+PLAT_BL_COMMON_SOURCES	+=	lib/xlat_tables/xlat_tables_common.c		\
 				lib/xlat_tables/aarch64/xlat_tables.c
+else
+include lib/xlat_tables_v2/xlat_tables.mk
+
+PLAT_BL_COMMON_SOURCES	+=	${XLAT_TABLES_LIB_SRCS}
+endif
 
 BL1_SOURCES		+=	drivers/io/io_semihosting.c		\
 				drivers/io/io_storage.c			\
diff --git a/plat/qemu/qemu_common.c b/plat/qemu/qemu_common.c
index daa9fc1..e34b16f 100644
--- a/plat/qemu/qemu_common.c
+++ b/plat/qemu/qemu_common.c
@@ -7,7 +7,7 @@
 #include <arch_helpers.h>
 #include <bl_common.h>
 #include <platform_def.h>
-#include <xlat_tables.h>
+#include <arm_xlat_tables.h>
 #include "qemu_private.h"
 
 #define MAP_DEVICE0	MAP_REGION_FLAT(DEVICE0_BASE,			\