Merge pull request #1201 from jeenu-arm/sdei-plat-events

ARM platforms: Allow platforms to define SDEI events
diff --git a/Makefile b/Makefile
index 51e622e..1058b39 100644
--- a/Makefile
+++ b/Makefile
@@ -160,6 +160,13 @@
 				-ffreestanding -fno-builtin -Wall -std=gnu99	\
 				-Os -ffunction-sections -fdata-sections
 
+GCC_V_OUTPUT		:=	$(shell $(CC) -v 2>&1)
+PIE_FOUND		:=	$(findstring --enable-default-pie,${GCC_V_OUTPUT})
+
+ifeq ($(PIE_FOUND),1)
+TF_CFLAGS		+=	-fno-PIE
+endif
+
 TF_LDFLAGS		+=	--fatal-warnings -O1
 TF_LDFLAGS		+=	--gc-sections
 TF_LDFLAGS		+=	$(TF_LDFLAGS_$(ARCH))
diff --git a/common/tf_printf.c b/common/tf_printf.c
index f73842a..d403983 100644
--- a/common/tf_printf.c
+++ b/common/tf_printf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -31,7 +31,8 @@
 		putchar(*str++);
 }
 
-static void unsigned_num_print(unsigned long long int unum, unsigned int radix)
+static void unsigned_num_print(unsigned long long int unum, unsigned int radix,
+			       char padc, int padn)
 {
 	/* Just need enough space to store 64 bit decimal integer */
 	unsigned char num_buf[20];
@@ -45,6 +46,12 @@
 			num_buf[i++] = 'a' + (rem - 0xa);
 	} while (unum /= radix);
 
+	if (padn > 0) {
+		while (i < padn--) {
+			putchar(padc);
+		}
+	}
+
 	while (--i >= 0)
 		putchar(num_buf[i]);
 }
@@ -63,6 +70,9 @@
  * %ll - long long int (64-bit on AArch64)
  * %z - size_t sized integer formats (64 bit on AArch64)
  *
+ * The following padding specifiers are supported by this print
+ * %0NN - Left-pad the number with 0s (NN is a decimal number)
+ *
  * The print exits on all other formats specifiers other than valid
  * combinations of the above specifiers.
  *******************************************************************/
@@ -72,9 +82,12 @@
 	long long int num;
 	unsigned long long int unum;
 	char *str;
+	char padc = 0; /* Padding character */
+	int padn; /* Number of characters to pad */
 
 	while (*fmt) {
 		l_count = 0;
+		padn = 0;
 
 		if (*fmt == '%') {
 			fmt++;
@@ -87,10 +100,11 @@
 				if (num < 0) {
 					putchar('-');
 					unum = (unsigned long long int)-num;
+					padn--;
 				} else
 					unum = (unsigned long long int)num;
 
-				unsigned_num_print(unum, 10);
+				unsigned_num_print(unum, 10, padc, padn);
 				break;
 			case 's':
 				str = va_arg(args, char *);
@@ -98,14 +112,16 @@
 				break;
 			case 'p':
 				unum = (uintptr_t)va_arg(args, void *);
-				if (unum)
+				if (unum) {
 					tf_string_print("0x");
+					padn -= 2;
+				}
 
-				unsigned_num_print(unum, 16);
+				unsigned_num_print(unum, 16, padc, padn);
 				break;
 			case 'x':
 				unum = get_unum_va_args(args, l_count);
-				unsigned_num_print(unum, 16);
+				unsigned_num_print(unum, 16, padc, padn);
 				break;
 			case 'z':
 				if (sizeof(size_t) == 8)
@@ -119,8 +135,21 @@
 				goto loop;
 			case 'u':
 				unum = get_unum_va_args(args, l_count);
-				unsigned_num_print(unum, 10);
+				unsigned_num_print(unum, 10, padc, padn);
 				break;
+			case '0':
+				padc = '0';
+				padn = 0;
+				fmt++;
+
+				while (1) {
+					char ch = *fmt;
+					if (ch < '0' || ch > '9') {
+						goto loop;
+					}
+					padn = (padn * 10) + (ch - '0');
+					fmt++;
+				}
 			default:
 				/* Exit on any other format specifier */
 				return;
diff --git a/docs/diagrams/secure_sw_stack_sp.png b/docs/diagrams/secure_sw_stack_sp.png
new file mode 100644
index 0000000..5cb2ca7
--- /dev/null
+++ b/docs/diagrams/secure_sw_stack_sp.png
Binary files differ
diff --git a/docs/diagrams/secure_sw_stack_tos.png b/docs/diagrams/secure_sw_stack_tos.png
new file mode 100644
index 0000000..1f2d555
--- /dev/null
+++ b/docs/diagrams/secure_sw_stack_tos.png
Binary files differ
diff --git a/docs/secure-partition-manager-design.rst b/docs/secure-partition-manager-design.rst
new file mode 100644
index 0000000..05d4e8b
--- /dev/null
+++ b/docs/secure-partition-manager-design.rst
@@ -0,0 +1,825 @@
+*******************************
+Secure Partition Manager Design
+*******************************
+
+.. section-numbering::
+    :suffix: .
+
+.. contents::
+
+Background
+==========
+
+In some market segments that primarily deal with client-side devices like mobile
+phones, tablets, STBs and embedded devices, a Trusted OS instantiates trusted
+applications to provide security services like DRM, secure payment and
+authentication. The Global Platform TEE Client API specification defines the API
+used by Non-secure world applications to access these services. A Trusted OS
+fulfils the requirements of a security service as described above.
+
+Management services are typically implemented at the highest level of privilege
+in the system (i.e. EL3 in Arm Trusted Firmware). The service requirements are
+fulfilled by the execution environment provided by Arm Trusted Firmware.
+
+The following diagram illustrates the corresponding software stack:
+
+|Image 1|
+
+In other market segments that primarily deal with server-side devices (e.g. data
+centres and enterprise servers) the secure software stack typically does not
+include a Global Platform Trusted OS. Security functions are accessed through
+other interfaces (e.g. ACPI TCG TPM interface, UEFI runtime variable service).
+
+Placement of management and security functions with diverse requirements in a
+privileged Exception Level (i.e. EL3 or S-EL1) makes security auditing of
+firmware more difficult and does not allow isolation of unrelated services from
+each other either.
+
+Introduction
+============
+
+A **Secure Partition** is a software execution environment instantiated in
+S-EL0 that can be used to implement simple management and security services.
+Since S-EL0 is an unprivileged Exception Level, a Secure Partition relies on
+privileged firmware (i.e. Arm Trusted Firmware) to be granted access to system
+and processor resources. Essentially, it is a software sandbox in the Secure
+world that runs under the control of privileged software, provides one or more
+services and accesses the following system resources:
+
+- Memory and device regions in the system address map.
+
+- PE system registers.
+
+- A range of synchronous exceptions (e.g. SMC function identifiers).
+
+Note that currently the Arm Trusted Firmware only supports handling one Secure
+Partition.
+
+A Secure Partition enables Arm Trusted Firmware to implement only the essential
+secure services in EL3 and instantiate the rest in a partition in S-EL0.
+Furthermore, multiple Secure Partitions can be used to isolate unrelated
+services from each other.
+
+The following diagram illustrates the place of a Secure Partition in a typical
+ARMv8-A software stack. A single or multiple Secure Partitions provide secure
+services to software components in the Non-secure world and other Secure
+Partitions.
+
+|Image 2|
+
+The Arm Trusted Firmware build system is responsible for including the Secure
+Partition image in the FIP. During boot, BL2 includes support to authenticate
+and load the Secure Partition image. A BL31 component called **Secure Partition
+Manager (SPM)** is responsible for managing the partition. This is semantically
+similar to a hypervisor managing a virtual machine.
+
+The SPM is responsible for the following actions during boot:
+
+- Allocate resources requested by the Secure Partition.
+
+- Perform architectural and system setup required by the Secure Partition to
+  fulfil a service request.
+
+- Implement a standard interface that is used for initialising a Secure
+  Partition.
+
+The SPM is responsible for the following actions during runtime:
+
+- Implement a standard interface that is used by a Secure Partition to fulfil
+  service requests.
+
+- Implement a standard interface that is used by the Non-secure world for
+  accessing the services exported by a Secure Partition. A service can be
+  invoked through a SMC.
+
+Alternatively, a partition can be viewed as a thread of execution running under
+the control of the SPM. Hence common programming concepts described below are
+applicable to a partition.
+
+Description
+===========
+
+The previous section introduced some general aspects of the software
+architecture of a Secure Partition. This section describes the specific choices
+made in the current implementation of this software architecture. Subsequent
+revisions of the implementation will include a richer set of features that
+enable a more flexible architecture.
+
+Building Arm Trusted Firmware with Secure Partition support
+-----------------------------------------------------------
+
+SPM is supported on the Arm FVP exclusively at the moment. The current
+implementation supports inclusion of only a single Secure Partition in which a
+service always runs to completion (e.g. the requested services cannot be
+preempted to give control back to the Normal world).
+
+It is not currently possible for BL31 to integrate SPM support and a Secure
+Payload Dispatcher (SPD) at the same time; they are mutually exclusive. In the
+SPM bootflow, a Secure Partition image executing at S-EL0 replaces the Secure
+Payload image executing at S-EL1 (e.g. a Trusted OS). Both are referred to as
+BL32.
+
+A working prototype of a SP has been implemented by re-purposing the EDK2 code
+and tools, leveraging the concept of the *Standalone Management Mode (MM)* in
+the UEFI specification (see the PI v1.6 Volume 4: Management Mode Core
+Interface). This will be referred to as the *Standalone MM Secure Partition* in
+the rest of this document.
+
+To enable SPM support in the TF, the source code must be compiled with the build
+flag ``ENABLE_SPM=1``. On Arm platforms the build option ``ARM_BL31_IN_DRAM``
+can be used to select the location of BL31, both SRAM and DRAM are supported.
+Also, the location of the binary that contains the BL32 image
+(``BL32=path/to/image.bin``) must be specified.
+
+First, build the Standalone MM Secure Partition. To build it, refer to the
+`instructions in the EDK2 repository`_.
+
+Then build TF with SPM support and include the Standalone MM Secure Partition
+image in the FIP:
+
+::
+
+    BL32=path/to/standalone/mm/sp BL33=path/to/bl33.bin \
+    make PLAT=fvp ENABLE_SPM=1 fip all
+
+Describing Secure Partition resources
+-------------------------------------
+
+Arm Trusted Firmware exports a porting interface that enables a platform to
+specify the system resources required by the Secure Partition. Some instructions
+are given below. However, this interface is under development and it may change
+as new features are implemented.
+
+- A Secure Partition is considered a BL32 image, so the same defines that apply
+  to BL32 images apply to a Secure Partition: ``BL32_BASE`` and ``BL32_LIMIT``.
+
+- The following defines are needed to allocate space for the translation tables
+  used by the Secure Partition: ``PLAT_SP_IMAGE_MMAP_REGIONS`` and
+  ``PLAT_SP_IMAGE_MAX_XLAT_TABLES``.
+
+- The functions ``plat_get_secure_partition_mmap()`` and
+  ``plat_get_secure_partition_boot_info()`` have to be implemented. The file
+  ``plat/arm/board/fvp/fvp_common.c`` can be used as an example. It uses the
+  defines in ``include/plat/arm/common/arm_spm_def.h``.
+
+  - ``plat_get_secure_partition_mmap()`` returns an array of mmap regions that
+    describe the memory regions that the SPM needs to allocate for a Secure
+    Partition.
+
+  - ``plat_get_secure_partition_boot_info()`` returns a
+    ``secure_partition_boot_info_t`` struct that is populated by the platform
+    with information about the memory map of the Secure Partition.
+
+For an example of all the changes in context, you may refer to commit
+``e29efeb1b4``, in which the port for FVP was introduced.
+
+Accessing Secure Partition services
+-----------------------------------
+
+The `SMC Calling Convention`_ (*ARM DEN 0028B*) describes SMCs as a conduit for
+accessing services implemented in the Secure world. The ``MM_COMMUNICATE``
+interface defined in the `Management Mode Interface Specification`_ (*ARM DEN
+0060A*) is used to invoke a Secure Partition service as a Fast Call.
+
+The mechanism used to identify a service within the partition depends on the
+service implementation. It is assumed that the caller of the service will be
+able to discover this mechanism through standard platform discovery mechanisms
+like ACPI and Device Trees. For example, *Volume 4: Platform Initialisation
+Specification v1.6. Management Mode Core Interface* specifies that a GUID is
+used to identify a management mode service. A client populates the GUID in the
+``EFI_MM_COMMUNICATE_HEADER``. The header is populated in the communication
+buffer shared with the Secure Partition.
+
+A Fast Call appears to be atomic from the perspective of the caller and returns
+when the requested operation has completed. A service invoked through the
+``MM_COMMUNICATE`` SMC will run to completion in the partition on a given CPU.
+The SPM is responsible for guaranteeing this behaviour. This means that there
+can only be a single outstanding Fast Call in a partition on a given CPU.
+
+Exchanging data with the Secure Partition
+-----------------------------------------
+
+The exchange of data between the Non-secure world and the partition takes place
+through a shared memory region. The location of data in the shared memory area
+is passed as a parameter to the ``MM_COMMUNICATE`` SMC. The shared memory area
+is statically allocated by the SPM and is expected to be either implicitly known
+to the Non-secure world or discovered through a platform discovery mechanism
+e.g. ACPI table or device tree. It is possible for the Non-secure world to
+exchange data with a partition only if it has been populated in this shared
+memory area. The shared memory area is implemented as per the guidelines
+specified in Section 3.2.3 of the `Management Mode Interface Specification`_
+(*ARM DEN 0060A*).
+
+The format of data structures used to encapsulate data in the shared memory is
+agreed between the Non-secure world and the Secure Partition. For example, in
+the `Management Mode Interface specification`_ (*ARM DEN 0060A*), Section 4
+describes that the communication buffer shared between the Non-secure world and
+the Management Mode (MM) in the Secure world must be of the type
+``EFI_MM_COMMUNICATE_HEADER``. This data structure is defined in *Volume 4:
+Platform Initialisation Specification v1.6. Management Mode Core Interface*.
+Any caller of a MM service will have to use the ``EFI_MM_COMMUNICATE_HEADER``
+data structure.
+
+Runtime model of the Secure Partition
+=====================================
+
+This section describes how the Secure Partition interfaces with the SPM.
+
+Interface with SPM
+------------------
+
+In order to instantiate one or more secure services in the Secure Partition in
+S-EL0, the SPM should define the following types of interfaces:
+
+- Interfaces that enable access to privileged operations from S-EL0. These
+  operations typically require access to system resources that are either shared
+  amongst multiple software components in the Secure world or cannot be directly
+  accessed from an unprivileged Exception Level.
+
+- Interfaces that establish the control path between the SPM and the Secure
+  Partition.
+
+This section describes the APIs currently exported by the SPM that enable a
+Secure Partition to initialise itself and export its services in S-EL0. These
+interfaces are not accessible from the Non-secure world.
+
+Conduit
+^^^^^^^
+
+The `SMC Calling Convention`_ (*ARM DEN 0028B*) specification describes the SMC
+and HVC conduits for accessing firmware services and their availability
+depending on the implemented Exception levels. In S-EL0, the Supervisor Call
+exception (SVC) is the only architectural mechanism available for unprivileged
+software to make a request for an operation implemented in privileged software.
+Hence, the SVC conduit must be used by the Secure Partition to access interfaces
+implemented by the SPM.
+
+A SVC causes an exception to be taken to S-EL1. Arm Trusted Firmware assumes
+ownership of S-EL1 and installs a simple exception vector table in S-EL1 that
+relays a SVC request from a Secure Partition as a SMC request to the SPM in EL3.
+Upon servicing the SMC request, Arm Trusted Firmware returns control directly to
+S-EL0 through an ERET instruction.
+
+Calling conventions
+^^^^^^^^^^^^^^^^^^^
+
+The `SMC Calling Convention`_ (*ARM DEN 0028B*) specification describes the
+32-bit and 64-bit calling conventions for the SMC and HVC conduits. The SVC
+conduit introduces the concept of SVC32 and SVC64 calling conventions. The SVC32
+and SVC64 calling conventions are equivalent to the 32-bit (SMC32) and the
+64-bit (SMC64) calling conventions respectively.
+
+Communication initiated by SPM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A service request is initiated from the SPM through an exception return
+instruction (ERET) to S-EL0. Later, the Secure Partition issues an SVC
+instruction to signal completion of the request. Some example use cases are
+given below:
+
+- A request to initialise the Secure Partition during system boot.
+
+- A request to handle a runtime service request.
+
+Communication initiated by Secure Partition
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A request is initiated from the Secure Partition by executing a SVC instruction.
+An ERET instruction is used by Arm Trusted Firmware to return to S-EL0 with the
+result of the request.
+
+For instance, a request to perform privileged operations on behalf of a
+partition (e.g.  management of memory attributes in the translation tables for
+the Secure EL1&0 translation regime).
+
+Interfaces
+^^^^^^^^^^
+
+The current implementation reserves function IDs for Fast Calls in the Standard
+Secure Service calls range (see `SMC Calling Convention`_ (*ARM DEN 0028B*)
+specification) for each API exported by the SPM. This section defines the
+function prototypes for each function ID. The function IDs specify whether one
+or both of the SVC32 and SVC64 calling conventions can be used to invoke the
+corresponding interface.
+
+Secure Partition Event Management
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Secure Partition provides an Event Management interface that is used by the
+SPM to delegate service requests to the Secure Partition. The interface also
+allows the Secure Partition to:
+
+- Register with the SPM a service that it provides.
+- Indicate completion of a service request delagated by the SPM
+
+Miscellaneous interfaces
+------------------------
+
+``SPM_VERSION_AARCH32``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Returns the version of the interface exported by SPM.
+
+- Parameters
+
+  - **uint32** - Function ID
+
+    - SVC32 Version: **0x84000060**
+
+- Return parameters
+
+  - **int32** - Status
+
+    On success, the format of the value is as follows:
+
+    - Bit [31]: Must be 0
+    - Bits [30:16]: Major Version. Must be 0 for this revision of the SPM
+      interface.
+    - Bits [15:0]: Minor Version. Must be 1 for this revision of the SPM
+      interface.
+
+    On error, the format of the value is as follows:
+
+    - ``NOT_SUPPORTED``: SPM interface is not supported or not available for the
+      client.
+
+- Usage
+
+  This function returns the version of the Secure Partition Manager
+  implementation. The major version is 0 and the minor version is 1. The version
+  number is a 31-bit unsigned integer, with the upper 15 bits denoting the major
+  revision, and the lower 16 bits denoting the minor revision. The following
+  rules apply to the version numbering:
+
+  - Different major revision values indicate possibly incompatible functions.
+
+  - For two revisions, A and B, for which the major revision values are
+    identical, if the minor revision value of revision B is greater than the
+    minor revision value of revision A, then every function in revision A must
+    work in a compatible way with revision B. However, it is possible for
+    revision B to have a higher function count than revision A.
+
+- Implementation responsibilities
+
+  If this function returns a valid version number, all the functions that are
+  described subsequently must be implemented, unless it is explicitly stated
+  that a function is optional.
+
+See `Error Codes`_ for integer values that are associated with each return
+code.
+
+Secure Partition Initialisation
+-------------------------------
+
+The SPM is responsible for initialising the architectural execution context to
+enable initialisation of a service in S-EL0. The responsibilities of the SPM are
+listed below. At the end of initialisation, the partition issues a
+``SP_EVENT_COMPLETE_AARCH64`` call (described later) to signal readiness for
+handling requests for services implemented by the Secure Partition. The
+initialisation event is executed as a Fast Call.
+
+Entry point invocation
+^^^^^^^^^^^^^^^^^^^^^^
+
+The entry point for service requests that should be handled as Fast Calls is
+used as the target of the ERET instruction to start initialisation of the Secure
+Partition.
+
+Architectural Setup
+^^^^^^^^^^^^^^^^^^^
+
+At cold boot, system registers accessible from S-EL0 will be in their reset
+state unless otherwise specified. The SPM will perform the following
+architectural setup to enable execution in S-EL0
+
+MMU setup
+^^^^^^^^^
+
+The platform port of a Secure Partition specifies to the SPM a list of regions
+that it needs access to and their attributes. The SPM validates this resource
+description and initialises the Secure EL1&0 translation regime as follows.
+
+1. Device regions are mapped with nGnRE attributes and Execute Never
+   instruction access permissions.
+
+2. Code memory regions are mapped with RO data and Executable instruction access
+   permissions.
+
+3. Read Only data memory regions are mapped with RO data and Execute Never
+   instruction access permissions.
+
+4. Read Write data memory regions are mapped with RW data and Execute Never
+   instruction access permissions.
+
+5. If the resource description does not explicitly describe the type of memory
+   regions then all memory regions will be marked with Code memory region
+   attributes.
+
+6. The ``UXN`` and ``PXN`` bits are set for regions that are not executable by
+   S-EL0 or S-EL1.
+
+System Register Setup
+^^^^^^^^^^^^^^^^^^^^^
+
+System registers that influence software execution in S-EL0 are setup by the SPM
+as follows:
+
+1. ``SCTLR_EL1``
+
+   - ``UCI=1``
+   - ``EOE=0``
+   - ``WXN=1``
+   - ``nTWE=1``
+   - ``nTWI=1``
+   - ``UCT=1``
+   - ``DZE=1``
+   - ``I=1``
+   - ``UMA=0``
+   - ``SA0=1``
+   - ``C=1``
+   - ``A=1``
+   - ``M=1``
+
+2. ``CPACR_EL1``
+
+   - ``FPEN=b'11``
+
+3. ``PSTATE``
+
+   - ``D,A,I,F=1``
+   - ``CurrentEL=0`` (EL0)
+   - ``SpSel=0`` (Thread mode)
+   - ``NRW=0`` (AArch64)
+
+General Purpose Register Setup
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SPM will invoke the entry point of a service by executing an ERET instruction.
+This transition into S-EL0 is special since it is not in response to a previous
+request through a SVC instruction. This is the first entry into S-EL0. The
+general purpose register usage at the time of entry will be as specified in the
+"Return State" column of Table 3-1 in Section 3.1 "Register use in AArch64 SMC
+calls" of the `SMC Calling Convention`_ (*ARM DEN 0028B*) specification. In
+addition, certain other restrictions will be applied as described below.
+
+1. ``SP_EL0``
+
+   A non-zero value will indicate that the SPM has initialised the stack pointer
+   for the current CPU.
+
+   The value will be 0 otherwise.
+
+2. ``X4-X30``
+
+   The values of these registers will be 0.
+
+3. ``X0-X3``
+
+   Parameters passed by the SPM.
+
+   - ``X0``: Virtual address of a buffer shared between EL3 and S-EL0. The
+     buffer will be mapped in the Secure EL1&0 translation regime with read-only
+     memory attributes described earlier.
+
+   - ``X1``: Size of the buffer in bytes.
+
+   - ``X2``: Cookie value (*IMPLEMENTATION DEFINED*).
+
+   - ``X3``: Cookie value (*IMPLEMENTATION DEFINED*).
+
+Runtime Event Delegation
+------------------------
+
+The SPM receives requests for Secure Partition services through a synchronous
+invocation (i.e. a SMC from the Non-secure world). These requests are delegated
+to the partition by programming a return from the last
+``SP_EVENT_COMPLETE_AARCH64`` call received from the partition. The last call
+was made to signal either completion of Secure Partition initialisation or
+completion of a partition service request.
+
+``SP_EVENT_COMPLETE_AARCH64``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Signal completion of the last SP service request.
+
+- Parameters
+
+  - **uint32** - Function ID
+
+    - SVC64 Version: **0xC4000061**
+
+  - **int32** - Event Status Code
+
+    Zero or a positive value indicates that the event was handled successfully.
+    The values depend upon the original event that was delegated to the Secure
+    partition. They are described as follows.
+
+    - ``SUCCESS`` : Used to indicate that the Secure Partition was initialised
+      or a runtime request was handled successfully.
+
+    - Any other value greater than 0 is used to pass a specific Event Status
+      code in response to a runtime event.
+
+    A negative value indicates an error. The values of Event Status code depend
+    on the original event.
+
+- Return parameters
+
+  - **int32** - Event ID/Return Code
+
+    Zero or a positive value specifies the unique ID of the event being
+    delegated to the partition by the SPM.
+
+    In the current implementation, this parameter contains the function ID of
+    the ``MM_COMMUNICATE`` SMC. This value indicates to the partition that an
+    event has been delegated to it in response to an ``MM_COMMUNICATE`` request
+    from the Non-secure world.
+
+    A negative value indicates an error. The format of the value is as follows:
+
+    - ``NOT_SUPPORTED``: Function was called from the Non-secure world.
+
+    See `Error Codes`_ for integer values that are associated with each return
+    code.
+
+  - **uint32** - Event Context Address
+
+    Address of a buffer shared between the SPM and Secure Partition to pass
+    event specific information. The format of the data populated in the buffer
+    is implementation defined.
+
+    The buffer is mapped in the Secure EL1&0 translation regime with read-only
+    memory attributes described earlier.
+
+    For the SVC64 version, this parameter is a 64-bit Virtual Address (VA).
+
+    For the SVC32 version, this parameter is a 32-bit Virtual Address (VA).
+
+  - **uint32** - Event context size
+
+    Size of the memory starting at Event Address.
+
+  - **uint32/uint64** - Event Cookie
+
+    This is an optional parameter. If unused its value is SBZ.
+
+- Usage
+
+  This function signals to the SPM that the handling of the last event delegated
+  to a partition has completed. The partition is ready to handle its next event.
+  A return from this function is in response to the next event that will be
+  delegated to the partition. The return parameters describe the next event.
+
+- Caller responsibilities
+
+  A Secure Partition must only call ``SP_EVENT_COMPLETE_AARCH64`` to signal
+  completion of a request that was delegated to it by the SPM.
+
+- Callee responsibilities
+
+  When the SPM receives this call from a Secure Partition, the corresponding
+  syndrome information can be used to return control through an ERET
+  instruction, to the instruction immediately after the call in the Secure
+  Partition context. This syndrome information comprises of general purpose and
+  system register values when the call was made.
+
+  The SPM must save this syndrome information and use it to delegate the next
+  event to the Secure Partition. The return parameters of this interface must
+  specify the properties of the event and be populated in ``X0-X3/W0-W3``
+  registers.
+
+Secure Partition Memory Management
+----------------------------------
+
+A Secure Partition executes at S-EL0, which is an unprivileged Exception Level.
+The SPM is responsible for enabling access to regions of memory in the system
+address map from a Secure Partition. This is done by mapping these regions in
+the Secure EL1&0 Translation regime with appropriate memory attributes.
+Attributes refer to memory type, permission, cacheability and shareability
+attributes used in the Translation tables. The definitions of these attributes
+and their usage can be found in the `ARMv8 ARM`_ (*ARM DDI 0487*).
+
+All memory required by the Secure Partition is allocated upfront in the SPM,
+even before handing over to the Secure Partition for the first time. The initial
+access permissions of the memory regions are statically provided by the platform
+port and should allow the Secure Partition to run its initialisation code.
+
+However, they might not suit the final needs of the Secure Partition because its
+final memory layout might not be known until the Secure Partition initialises
+itself. As the Secure Partition initialises its runtime environment it might,
+for example, load dynamically some modules. For instance, a Secure Partition
+could implement a loader for a standard executable file format (e.g. an PE-COFF
+loader for loading executable files at runtime). These executable files will be
+a part of the Secure Partition image. The location of various sections in an
+executable file and their permission attributes (e.g. read-write data, read-only
+data and code) will be known only when the file is loaded into memory.
+
+In this case, the Secure Partition needs a way to change the access permissions
+of its memory regions. The SPM provides this feature through the
+``SP_MEMORY_ATTRIBUTES_SET_AARCH64`` SVC interface. This interface is available
+to the Secure Partition during a specific time window: from the first entry into
+the Secure Partition up to the first ``SP_EVENT_COMPLETE`` call that signals the
+Secure Partition has finished its initialisation. Once the initialisation is
+complete, the SPM does not allow changes to the memory attributes.
+
+This section describes the standard SVC interface that is implemented by the SPM
+to determine and change permission attributes of memory regions that belong to a
+Secure Partition.
+
+``SP_MEMORY_ATTRIBUTES_GET_AARCH64``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Request the permission attributes of a memory region from S-EL0.
+
+- Parameters
+
+  - **uint32** Function ID
+
+    - SVC64 Version: **0xC4000064**
+
+  - **uint64** Base Address
+
+    This parameter is a 64-bit Virtual Address (VA).
+
+    There are no alignment restrictions on the Base Address. The permission
+    attributes of the translation granule it lies in are returned.
+
+- Return parameters
+
+  - **int32** - Memory Attributes/Return Code
+
+    On success the format of the Return Code is as follows:
+
+    - Bits[1:0] : Data access permission
+
+      - b'00 : No access
+      - b'01 : Read-Write access
+      - b'10 : Reserved
+      - b'11 : Read-only access
+
+    - Bit[2]: Instruction access permission
+
+      - b'0 : Executable
+      - b'1 : Non-executable
+
+    - Bit[30:3] : Reserved. SBZ.
+
+    - Bit[31]   : Must be 0
+
+    On failure the following error codes are returned:
+
+    - ``INVALID_PARAMETERS``: The Secure Partition is not allowed to access the
+      memory region the Base Address lies in.
+
+    - ``NOT_SUPPORTED`` : The SPM does not support retrieval of attributes of
+      any memory page that is accessible by the Secure Partition, or the
+      function was called from the Non-secure world. Also returned if it is
+      used after ``SP_EVENT_COMPLETE_AARCH64``.
+
+    See `Error Codes`_ for integer values that are associated with each return
+    code.
+
+- Usage
+
+  This function is used to request the permission attributes for S-EL0 on a
+  memory region accessible from a Secure Partition. The size of the memory
+  region is equal to the Translation Granule size used in the Secure EL1&0
+  translation regime. Requests to retrieve other memory region attributes are
+  not currently supported.
+
+- Caller responsibilities
+
+  The caller must obtain the Translation Granule Size of the Secure EL1&0
+  translation regime from the SPM through an implementation defined method.
+
+- Callee responsibilities
+
+  The SPM must not return the memory access controls for a page of memory that
+  is not accessible from a Secure Partition.
+
+``SP_MEMORY_ATTRIBUTES_SET_AARCH64``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Description
+
+  Set the permission attributes of a memory region from S-EL0.
+
+- Parameters
+
+  - **uint32** - Function ID
+
+    - SVC64 Version: **0xC4000065**
+
+  - **uint64** - Base Address
+
+    This parameter is a 64-bit Virtual Address (VA).
+
+    The alignment of the Base Address must be greater than or equal to the size
+    of the Translation Granule Size used in the Secure EL1&0 translation
+    regime.
+
+  - **uint32** - Page count
+
+    Number of pages starting from the Base Address whose memory attributes
+    should be changed. The page size is equal to the Translation Granule Size.
+
+  - **uint32** - Memory Access Controls
+
+    - Bits[1:0] : Data access permission
+
+      - b'00 : No access
+      - b'01 : Read-Write access
+      - b'10 : Reserved
+      - b'11 : Read-only access
+
+    - Bit[2] : Instruction access permission
+
+      - b'0 : Executable
+      - b'1 : Non-executable
+
+    - Bits[31:3] : Reserved. SBZ.
+
+    A combination of attributes that mark the region with RW and Executable
+    permissions is prohibited. A request to mark a device memory region with
+    Executable permissions is prohibited.
+
+- Return parameters
+
+  - **int32** - Return Code
+
+    - ``SUCCESS``: The Memory Access Controls were changed successfully.
+
+    - ``DENIED``: The SPM is servicing a request to change the attributes of a
+      memory region that overlaps with the region specified in this request.
+
+    - ``INVALID_PARAMETER``: An invalid combination of Memory Access Controls
+      has been specified. The Base Address is not correctly aligned. The Secure
+      Partition is not allowed to access part or all of the memory region
+      specified in the call.
+
+    - ``NO_MEMORY``: The SPM does not have memory resources to change the
+      attributes of the memory region in the translation tables.
+
+    - ``NOT_SUPPORTED``: The SPM does not permit change of attributes of any
+      memory region that is accessible by the Secure Partition. Function was
+      called from the Non-secure world. Also returned if it is used after
+      ``SP_EVENT_COMPLETE_AARCH64``.
+
+    See `Error Codes`_ for integer values that are associated with each return
+    code.
+
+- Usage
+
+  This function is used to change the permission attributes for S-EL0 on a
+  memory region accessible from a Secure Partition. The size of the memory
+  region is equal to the Translation Granule size used in the Secure EL1&0
+  translation regime. Requests to change other memory region attributes are not
+  currently supported.
+
+  This function is only available at boot time. This interface is revoked after
+  the Secure Partition sends the first ``SP_EVENT_COMPLETE_AARCH64`` to signal
+  that it is initialised and ready to receive run-time requests.
+
+- Caller responsibilities
+
+  The caller must obtain the Translation Granule Size of the Secure EL1&0
+  translation regime from the SPM through an implementation defined method.
+
+- Callee responsibilities
+
+  The SPM must preserve the original memory access controls of the region of
+  memory in case of an unsuccessful call.  The SPM must preserve the consistency
+  of the S-EL1 translation regime if this function is called on different PEs
+  concurrently and the memory regions specified overlap.
+
+Error Codes
+-----------
+
+.. csv-table::
+   :header: "Name", "Value"
+
+   ``SUCCESS``,0
+   ``NOT_SUPPORTED``,-1
+   ``INVALID_PARAMETER``,-2
+   ``DENIED``,-3
+   ``NO_MEMORY``,-5
+   ``NOT_PRESENT``,-7
+
+--------------
+
+*Copyright (c) 2017, Arm Limited and Contributors. All rights reserved.*
+
+.. _ARMv8 ARM: https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
+.. _instructions in the EDK2 repository: https://github.com/tianocore/edk2-staging/blob/AArch64StandaloneMm/HowtoBuild.MD
+.. _Management Mode Interface Specification: http://infocenter.arm.com/help/topic/com.arm.doc.den0060a/DEN0060A_ARM_MM_Interface_Specification.pdf
+.. _SDEI Specification: http://infocenter.arm.com/help/topic/com.arm.doc.den0054a/ARM_DEN0054A_Software_Delegated_Exception_Interface.pdf
+.. _SMC Calling Convention: http://infocenter.arm.com/help/topic/com.arm.doc.den0028b/ARM_DEN0028B_SMC_Calling_Convention.pdf
+
+.. |Image 1| image:: diagrams/secure_sw_stack_tos.png
+.. |Image 2| image:: diagrams/secure_sw_stack_sp.png
diff --git a/docs/spm-user-guide.rst b/docs/spm-user-guide.rst
deleted file mode 100644
index a3b64d9..0000000
--- a/docs/spm-user-guide.rst
+++ /dev/null
@@ -1,59 +0,0 @@
-ARM Trusted Firmware - SPM User Guide
-=====================================
-
-.. section-numbering::
-    :suffix: .
-
-.. contents::
-
-
-This document briefly presents the Secure Partition Management (SPM) support in
-the Arm Trusted Firmware (TF), specifically focusing on how to build Arm TF with
-SPM support.
-
-Overview of the SPM software stack
-----------------------------------
-
-SPM is supported on the Arm FVP exclusively at the moment.
-
-It is not currently possible for BL31 to integrate SPM support and a Secure
-Payload Dispatcher (SPD) at the same time; they are mutually exclusive. In the
-SPM bootflow, a Secure Partition (SP) image executing at Secure-EL0 replaces the
-Secure Payload image executing at Secure-EL1 (e.g. a Trusted OS). Both are
-referred to as BL32.
-
-A working prototype of a SP has been implemented by repurposing the EDK2 code
-and tools, leveraging the concept of the *Standalone Management Mode (MM)* in
-the UEFI specification (see the PI v1.6 Volume 4: Management Mode Core
-Interface). This will be referred to as the *Standalone MM Secure Partition* in
-the rest of this document.
-
-
-Building TF with SPM support
-----------------------------
-
-To enable SPM support in the TF, the source code must be compiled with the build
-flag ``ENABLE_SPM=1``. On Arm platforms the build option ``ARM_BL31_IN_DRAM``
-can be used to select the location of BL31, both SRAM and DRAM are supported.
-
-
-Using the Standalone MM SP
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-First, build the Standalone MM Secure Partition. To build it, refer to the
-`instructions in the EDK2 repository`_.
-
-Then build TF with SPM support and include the Standalone MM Secure Partition
-image in the FIP:
-
-::
-
-    BL32=path/to/standalone/mm/sp BL33=path/to/bl33.bin \
-    make PLAT=fvp ENABLE_SPM=1 fip all
-
-
---------------
-
-*Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.*
-
-.. _instructions in the EDK2 repository: https://github.com/tianocore/edk2-staging/blob/AArch64StandaloneMm/HowtoBuild.MD
diff --git a/drivers/io/io_block.c b/drivers/io/io_block.c
index 128246f..8226554 100644
--- a/drivers/io/io_block.c
+++ b/drivers/io/io_block.c
@@ -167,15 +167,98 @@
 	return 0;
 }
 
+/*
+ * This function allows the caller to read any number of bytes
+ * from any position. It hides from the caller that the low level
+ * driver only can read aligned blocks of data. For this reason
+ * we need to handle the use case where the first byte to be read is not
+ * aligned to start of the block, the last byte to be read is also not
+ * aligned to the end of a block, and there are zero or more blocks-worth
+ * of data in between.
+ *
+ * In such a case we need to read more bytes than requested (i.e. full
+ * blocks) and strip-out the leading bytes (aka skip) and the trailing
+ * bytes (aka padding). See diagram below
+ *
+ * cur->file_pos ------------
+ *                          |
+ * cur->base                |
+ *  |                       |
+ *  v                       v<----  length   ---->
+ *  --------------------------------------------------------------
+ * |           |         block#1    |        |   block#n          |
+ * |  block#0  |            +       |   ...  |     +              |
+ * |           | <- skip -> +       |        |     + <- padding ->|
+ *  ------------------------+----------------------+--------------
+ *             ^                                                  ^
+ *             |                                                  |
+ *             v    iteration#1                iteration#n        v
+ *              --------------------------------------------------
+ *             |                    |        |                    |
+ *             |<----  request ---->|  ...   |<----- request ---->|
+ *             |                    |        |                    |
+ *              --------------------------------------------------
+ *            /                   /          |                    |
+ *           /                   /           |                    |
+ *          /                   /            |                    |
+ *         /                   /             |                    |
+ *        /                   /              |                    |
+ *       /                   /               |                    |
+ *      /                   /                |                    |
+ *     /                   /                 |                    |
+ *    /                   /                  |                    |
+ *   /                   /                   |                    |
+ *  <---- request ------>                    <------ request  ----->
+ *  ---------------------                    -----------------------
+ *  |        |          |                    |          |           |
+ *  |<-skip->|<-nbytes->|           -------->|<-nbytes->|<-padding->|
+ *  |        |          |           |        |          |           |
+ *  ---------------------           |        -----------------------
+ *  ^        \           \          |        |          |
+ *  |         \           \         |        |          |
+ *  |          \           \        |        |          |
+ *  buf->offset \           \   buf->offset  |          |
+ *               \           \               |          |
+ *                \           \              |          |
+ *                 \           \             |          |
+ *                  \           \            |          |
+ *                   \           \           |          |
+ *                    \           \          |          |
+ *                     \           \         |          |
+ *                      --------------------------------
+ *                      |           |        |         |
+ * buffer-------------->|           | ...    |         |
+ *                      |           |        |         |
+ *                      --------------------------------
+ *                      <-count#1->|                   |
+ *                      <----------  count#n   -------->
+ *                      <----------  length  ---------->
+ *
+ * Additionally, the IO driver has an underlying buffer that is at least
+ * one block-size and may be big enough to allow.
+ */
 static int block_read(io_entity_t *entity, uintptr_t buffer, size_t length,
 		      size_t *length_read)
 {
 	block_dev_state_t *cur;
 	io_block_spec_t *buf;
 	io_block_ops_t *ops;
-	size_t aligned_length, skip, count, left, padding, block_size;
 	int lba;
-	int buffer_not_aligned;
+	size_t block_size, left;
+	size_t nbytes;  /* number of bytes read in one iteration */
+	size_t request; /* number of requested bytes in one iteration */
+	size_t count;   /* number of bytes already read */
+	/*
+	 * number of leading bytes from start of the block
+	 * to the first byte to be read
+	 */
+	size_t skip;
+
+	/*
+	 * number of trailing bytes between the last byte
+	 * to be read and the end of the block
+	 */
+	size_t padding;
 
 	assert(entity->info != (uintptr_t)NULL);
 	cur = (block_dev_state_t *)entity->info;
@@ -186,102 +269,107 @@
 	       (length > 0) &&
 	       (ops->read != 0));
 
-	if ((buffer & (block_size - 1)) != 0) {
+	/*
+	 * We don't know the number of bytes that we are going
+	 * to read in every iteration, because it will depend
+	 * on the low level driver.
+	 */
+	count = 0;
+	for (left = length; left > 0; left -= nbytes) {
 		/*
-		 * buffer isn't aligned with block size.
-		 * Block device always relies on DMA operation.
-		 * It's better to make the buffer as block size aligned.
+		 * We must only request operations aligned to the block
+		 * size. Therefore if file_pos is not block-aligned,
+		 * we have to request the operation to start at the
+		 * previous block boundary and skip the leading bytes. And
+		 * similarly, the number of bytes requested must be a
+		 * block size multiple
 		 */
-		buffer_not_aligned = 1;
-	} else {
-		buffer_not_aligned = 0;
-	}
+		skip = cur->file_pos & (block_size - 1);
 
-	skip = cur->file_pos % block_size;
-	aligned_length = ((skip + length) + (block_size - 1)) &
-			 ~(block_size - 1);
-	padding = aligned_length - (skip + length);
-	left = aligned_length;
-	do {
+		/*
+		 * Calculate the block number containing file_pos
+		 * - e.g. block 3.
+		 */
 		lba = (cur->file_pos + cur->base) / block_size;
-		if (left >= buf->length) {
+
+		if (skip + left > buf->length) {
 			/*
-			 * Since left is larger, it's impossible to padding.
-			 *
-			 * If buffer isn't aligned, we need to use aligned
-			 * buffer instead.
+			 * The underlying read buffer is too small to
+			 * read all the required data - limit to just
+			 * fill the buffer, and then read again.
 			 */
-			if (skip || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we need to use
-				 * block buffer to read block. Since block
-				 * device is always relied on DMA operation.
-				 */
-				count = ops->read(lba, buf->offset,
-						  buf->length);
-			} else {
-				count = ops->read(lba, buffer, buf->length);
-			}
-			assert(count == buf->length);
-			cur->file_pos += count - skip;
-			if (skip || buffer_not_aligned) {
-				/*
-				 * Since there's not aligned block size caused
-				 * by skip or not aligned buffer, block buffer
-				 * is used to store data.
-				 */
-				memcpy((void *)buffer,
-				       (void *)(buf->offset + skip),
-				       count - skip);
-			}
-			left = left - (count - skip);
+			request = buf->length;
 		} else {
-			if (skip || padding || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we have to read
-				 * full block by block buffer instead.
-				 * The size isn't aligned with block size.
-				 * Use block buffer to avoid overflow.
-				 *
-				 * If buffer isn't aligned, use block buffer
-				 * to avoid DMA error.
-				 */
-				count = ops->read(lba, buf->offset, left);
-			} else
-				count = ops->read(lba, buffer, left);
-			assert(count == left);
-			left = left - (skip + padding);
-			cur->file_pos += left;
-			if (skip || padding || buffer_not_aligned) {
-				/*
-				 * Since there's not aligned block size or
-				 * buffer, block buffer is used to store data.
-				 */
-				memcpy((void *)buffer,
-				       (void *)(buf->offset + skip),
-				       left);
-			}
-			/* It's already the last block operation */
-			left = 0;
+			/*
+			 * The underlying read buffer is big enough to
+			 * read all the required data. Calculate the
+			 * number of bytes to read to align with the
+			 * block size.
+			 */
+			request = skip + left;
+			request = (request + (block_size - 1)) & ~(block_size - 1);
+		}
+		request = ops->read(lba, buf->offset, request);
+
+		if (request <= skip) {
+			/*
+			 * We couldn't read enough bytes to jump over
+			 * the skip bytes, so we should have to read
+			 * again the same block, thus generating
+			 * the same error.
+			 */
+			return -EIO;
 		}
-		skip = cur->file_pos % block_size;
-	} while (left > 0);
-	*length_read = length;
+
+		/*
+		 * Need to remove skip and padding bytes,if any, from
+		 * the read data when copying to the user buffer.
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		memcpy((void *)(buffer + count),
+		       (void *)(buf->offset + skip),
+		       nbytes);
+
+		cur->file_pos += nbytes;
+		count += nbytes;
+	}
+	assert(count == length);
+	*length_read = count;
 
 	return 0;
 }
 
+/*
+ * This function allows the caller to write any number of bytes
+ * from any position. It hides from the caller that the low level
+ * driver only can write aligned blocks of data.
+ * See comments for block_read for more details.
+ */
 static int block_write(io_entity_t *entity, const uintptr_t buffer,
 		       size_t length, size_t *length_written)
 {
 	block_dev_state_t *cur;
 	io_block_spec_t *buf;
 	io_block_ops_t *ops;
-	size_t aligned_length, skip, count, left, padding, block_size;
 	int lba;
-	int buffer_not_aligned;
+	size_t block_size, left;
+	size_t nbytes;  /* number of bytes read in one iteration */
+	size_t request; /* number of requested bytes in one iteration */
+	size_t count;   /* number of bytes already read */
+	/*
+	 * number of leading bytes from start of the block
+	 * to the first byte to be read
+	 */
+	size_t skip;
+
+	/*
+	 * number of trailing bytes between the last byte
+	 * to be read and the end of the block
+	 */
+	size_t padding;
 
 	assert(entity->info != (uintptr_t)NULL);
 	cur = (block_dev_state_t *)entity->info;
@@ -293,75 +381,107 @@
 	       (ops->read != 0) &&
 	       (ops->write != 0));
 
-	if ((buffer & (block_size - 1)) != 0) {
+	/*
+	 * We don't know the number of bytes that we are going
+	 * to write in every iteration, because it will depend
+	 * on the low level driver.
+	 */
+	count = 0;
+	for (left = length; left > 0; left -= nbytes) {
 		/*
-		 * buffer isn't aligned with block size.
-		 * Block device always relies on DMA operation.
-		 * It's better to make the buffer as block size aligned.
+		 * We must only request operations aligned to the block
+		 * size. Therefore if file_pos is not block-aligned,
+		 * we have to request the operation to start at the
+		 * previous block boundary and skip the leading bytes. And
+		 * similarly, the number of bytes requested must be a
+		 * block size multiple
 		 */
-		buffer_not_aligned = 1;
-	} else {
-		buffer_not_aligned = 0;
-	}
+		skip = cur->file_pos & (block_size - 1);
 
-	skip = cur->file_pos % block_size;
-	aligned_length = ((skip + length) + (block_size - 1)) &
-			 ~(block_size - 1);
-	padding = aligned_length - (skip + length);
-	left = aligned_length;
-	do {
+		/*
+		 * Calculate the block number containing file_pos
+		 * - e.g. block 3.
+		 */
 		lba = (cur->file_pos + cur->base) / block_size;
-		if (left >= buf->length) {
-			/* Since left is larger, it's impossible to padding. */
-			if (skip || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size or buffer isn't
-				 * aligned, we need to use block buffer to
-				 * write block.
-				 */
-				count = ops->read(lba, buf->offset,
-						  buf->length);
-				assert(count == buf->length);
-				memcpy((void *)(buf->offset + skip),
-				       (void *)buffer,
-				       count - skip);
-				count = ops->write(lba, buf->offset,
-						   buf->length);
-			} else
-				count = ops->write(lba, buffer, buf->length);
-			assert(count == buf->length);
-			cur->file_pos += count - skip;
-			left = left - (count - skip);
+
+		if (skip + left > buf->length) {
+			/*
+			 * The underlying read buffer is too small to
+			 * read all the required data - limit to just
+			 * fill the buffer, and then read again.
+			 */
+			request = buf->length;
 		} else {
-			if (skip || padding || buffer_not_aligned) {
+			/*
+			 * The underlying read buffer is big enough to
+			 * read all the required data. Calculate the
+			 * number of bytes to read to align with the
+			 * block size.
+			 */
+			request = skip + left;
+			request = (request + (block_size - 1)) & ~(block_size - 1);
+		}
+
+		/*
+		 * The number of bytes that we are going to write
+		 * from the user buffer will depend of the size
+		 * of the current request.
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		/*
+		 * If we have skip or padding bytes then we have to preserve
+		 * some content and it means that we have to read before
+		 * writing
+		 */
+		if (skip > 0 || padding > 0) {
+			request = ops->read(lba, buf->offset, request);
+			/*
+			 * The read may return size less than
+			 * requested. Round down to the nearest block
+			 * boundary
+			 */
+			request &= ~(block_size-1);
+			if (request <= skip) {
 				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we need to avoid
-				 * poluate data in the beginning. Reading and
-				 * skipping the beginning is the only way.
-				 * The size isn't aligned with block size.
-				 * Use block buffer to avoid overflow.
-				 *
-				 * If buffer isn't aligned, use block buffer
-				 * to avoid DMA error.
+				 * We couldn't read enough bytes to jump over
+				 * the skip bytes, so we should have to read
+				 * again the same block, thus generating
+				 * the same error.
 				 */
-				count = ops->read(lba, buf->offset, left);
-				assert(count == left);
-				memcpy((void *)(buf->offset + skip),
-				       (void *)buffer,
-				       left - skip - padding);
-				count = ops->write(lba, buf->offset, left);
-			} else
-				count = ops->write(lba, buffer, left);
-			assert(count == left);
-			cur->file_pos += left - (skip + padding);
-			/* It's already the last block operation */
-			left = 0;
+				return -EIO;
+			}
+			nbytes = request - skip;
+			padding = (nbytes > left) ? nbytes - left : 0;
+			nbytes -= padding;
 		}
-		skip = cur->file_pos % block_size;
-	} while (left > 0);
-	*length_written = length;
+
+		memcpy((void *)(buf->offset + skip),
+		       (void *)(buffer + count),
+		       nbytes);
+
+		request = ops->write(lba, buf->offset, request);
+		if (request <= skip)
+			return -EIO;
+
+		/*
+		 * And the previous write operation may modify the size
+		 * of the request, so again, we have to calculate the
+		 * number of bytes that we consumed from the user
+		 * buffer
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		cur->file_pos += nbytes;
+		count += nbytes;
+	}
+	assert(count == length);
+	*length_written = count;
+
 	return 0;
 }
 
diff --git a/plat/hisilicon/poplar/aarch64/platform_common.c b/plat/hisilicon/poplar/aarch64/platform_common.c
index a7dac4f..762bd84 100644
--- a/plat/hisilicon/poplar/aarch64/platform_common.c
+++ b/plat/hisilicon/poplar/aarch64/platform_common.c
@@ -25,9 +25,14 @@
 					DEVICE_SIZE,			\
 					MT_DEVICE | MT_RW | MT_SECURE)
 
+#define MAP_TSP_MEM	MAP_REGION_FLAT(TSP_SEC_MEM_BASE,		\
+					TSP_SEC_MEM_SIZE,		\
+					MT_MEMORY | MT_RW | MT_SECURE)
+
 static const mmap_region_t poplar_mmap[] = {
 	MAP_DDR,
 	MAP_DEVICE,
+	MAP_TSP_MEM,
 	{0}
 };
 
diff --git a/plat/hisilicon/poplar/bl2_plat_setup.c b/plat/hisilicon/poplar/bl2_plat_setup.c
index 1741475..db507c3 100644
--- a/plat/hisilicon/poplar/bl2_plat_setup.c
+++ b/plat/hisilicon/poplar/bl2_plat_setup.c
@@ -29,8 +29,10 @@
 typedef struct bl2_to_bl31_params_mem {
 	bl31_params_t		bl31_params;
 	image_info_t		bl31_image_info;
+	image_info_t		bl32_image_info;
 	image_info_t		bl33_image_info;
 	entry_point_info_t	bl33_ep_info;
+	entry_point_info_t	bl32_ep_info;
 	entry_point_info_t	bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
@@ -61,6 +63,16 @@
 	SET_PARAM_HEAD(bl2_to_bl31_params->bl31_image_info,
 		       PARAM_IMAGE_BINARY, VERSION_1, 0);
 
+	/* Fill BL3-2 related information if it exists */
+#ifdef BL32_BASE
+	bl2_to_bl31_params->bl32_ep_info = &bl31_params_mem.bl32_ep_info;
+	SET_PARAM_HEAD(bl2_to_bl31_params->bl32_ep_info, PARAM_EP,
+		VERSION_1, 0);
+	bl2_to_bl31_params->bl32_image_info = &bl31_params_mem.bl32_image_info;
+	SET_PARAM_HEAD(bl2_to_bl31_params->bl32_image_info, PARAM_IMAGE_BINARY,
+		VERSION_1, 0);
+#endif
+
 	/* Fill BL3-3 related information */
 	bl2_to_bl31_params->bl33_ep_info = &bl31_params_mem.bl33_ep_info;
 	SET_PARAM_HEAD(bl2_to_bl31_params->bl33_ep_info,
@@ -89,6 +101,41 @@
 				     DISABLE_ALL_EXCEPTIONS);
 }
 
+/*******************************************************************************
+ * Before calling this function BL32 is loaded in memory and its entrypoint
+ * is set by load_image. This is a placeholder for the platform to change
+ * the entrypoint of BL32 and set SPSR and security state.
+ * On Poplar we only set the security state of the entrypoint
+ ******************************************************************************/
+#ifdef BL32_BASE
+void bl2_plat_set_bl32_ep_info(image_info_t *bl32_image_info,
+					entry_point_info_t *bl32_ep_info)
+{
+	SET_SECURITY_STATE(bl32_ep_info->h.attr, SECURE);
+	/*
+	 * The Secure Payload Dispatcher service is responsible for
+	 * setting the SPSR prior to entry into the BL32 image.
+	 */
+	bl32_ep_info->spsr = 0;
+}
+
+/*******************************************************************************
+ * Populate the extents of memory available for loading BL32
+ ******************************************************************************/
+void bl2_plat_get_bl32_meminfo(meminfo_t *bl32_meminfo)
+{
+	/*
+	 * Populate the extents of memory available for loading BL32.
+	 */
+	bl32_meminfo->total_base = BL32_BASE;
+	bl32_meminfo->free_base = BL32_BASE;
+	bl32_meminfo->total_size =
+			(TSP_SEC_MEM_BASE + TSP_SEC_MEM_SIZE) - BL32_BASE;
+	bl32_meminfo->free_size =
+			(TSP_SEC_MEM_BASE + TSP_SEC_MEM_SIZE) - BL32_BASE;
+}
+#endif /* BL32_BASE */
+
 static uint32_t hisi_get_spsr_for_bl33_entry(void)
 {
 	unsigned long el_status;
@@ -159,5 +206,5 @@
 
 unsigned long plat_get_ns_image_entrypoint(void)
 {
-	return PLAT_ARM_NS_IMAGE_OFFSET;
+	return PLAT_POPLAR_NS_IMAGE_OFFSET;
 }
diff --git a/plat/hisilicon/poplar/bl31_plat_setup.c b/plat/hisilicon/poplar/bl31_plat_setup.c
index b9a0e18..e3a5c50 100644
--- a/plat/hisilicon/poplar/bl31_plat_setup.c
+++ b/plat/hisilicon/poplar/bl31_plat_setup.c
@@ -32,11 +32,31 @@
 #define BL31_COHERENT_RAM_BASE	(unsigned long)(&__COHERENT_RAM_START__)
 #define BL31_COHERENT_RAM_LIMIT	(unsigned long)(&__COHERENT_RAM_END__)
 
+#define TZPC_SEC_ATTR_CTRL_VALUE (0x9DB98D45)
+
+static entry_point_info_t bl32_image_ep_info;
 static entry_point_info_t bl33_image_ep_info;
 
+static void hisi_tzpc_sec_init(void)
+{
+	mmio_write_32(HISI_TZPC_SEC_ATTR_CTRL, TZPC_SEC_ATTR_CTRL_VALUE);
+}
+
 entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)
 {
-	return &bl33_image_ep_info;
+	entry_point_info_t *next_image_info;
+
+	assert(sec_state_is_valid(type));
+	next_image_info = (type == NON_SECURE)
+			? &bl33_image_ep_info : &bl32_image_ep_info;
+	/*
+	 * None of the images on the ARM development platforms can have 0x0
+	 * as the entrypoint
+	 */
+	if (next_image_info->pc)
+		return next_image_info;
+	else
+		return NULL;
 }
 
 void bl31_early_platform_setup(bl31_params_t *from_bl2,
@@ -47,6 +67,13 @@
 	/* Init console for crash report */
 	plat_crash_console_init();
 
+
+	/*
+	 * Copy BL32 (if populated by BL2) and BL33 entry point information.
+	 * They are stored in Secure RAM, in BL2's address space.
+	 */
+	if (from_bl2->bl32_ep_info)
+		bl32_image_ep_info = *from_bl2->bl32_ep_info;
 	bl33_image_ep_info = *from_bl2->bl33_ep_info;
 }
 
@@ -58,6 +85,9 @@
 	/* Init GIC distributor and CPU interface */
 	plat_arm_gic_driver_init();
 	plat_arm_gic_init();
+
+	/* Init security properties of IP blocks */
+	hisi_tzpc_sec_init();
 }
 
 void bl31_plat_runtime_setup(void)
diff --git a/plat/hisilicon/poplar/include/hi3798cv200.h b/plat/hisilicon/poplar/include/hi3798cv200.h
index 6318b9c..540d0aa 100644
--- a/plat/hisilicon/poplar/include/hi3798cv200.h
+++ b/plat/hisilicon/poplar/include/hi3798cv200.h
@@ -30,7 +30,7 @@
 #define TIMER20_BGLOAD			(SEC_TIMER2_BASE + 0x018)
 
 /* GPIO */
-#define	GPIO_MAX			(12)
+#define	GPIO_MAX			(13)
 #define	GPIO_BASE(x)			(x != 5 ?			\
 					0xf820000 + x * 0x1000 : 0xf8004000)
 
@@ -97,4 +97,7 @@
 /* Watchdog */
 #define HISI_WDG0_BASE			(0xF8A2C000)
 
+#define HISI_TZPC_BASE			(0xF8A80000)
+#define HISI_TZPC_SEC_ATTR_CTRL		(HISI_TZPC_BASE + 0x10)
+
 #endif	/* __HI3798cv200_H__ */
diff --git a/plat/hisilicon/poplar/include/platform_def.h b/plat/hisilicon/poplar/include/platform_def.h
index b7afe82..3d1ad9b 100644
--- a/plat/hisilicon/poplar/include/platform_def.h
+++ b/plat/hisilicon/poplar/include/platform_def.h
@@ -48,11 +48,55 @@
 #define TEE_SEC_MEM_BASE		(0x70000000)
 #define TEE_SEC_MEM_SIZE		(0x10000000)
 
+/* Memory location options for TSP */
+#define POPLAR_SRAM_ID	0
+#define POPLAR_DRAM_ID	1
+
+/*
+ * DDR for OP-TEE (28MB from 0x02200000 -0x04000000) is divided in several
+ * regions:
+ *   - Secure DDR (default is the top 16MB) used by OP-TEE
+ *   - Non-secure DDR (4MB) reserved for OP-TEE's future use
+ *   - Secure DDR (4MB aligned on 4MB) for OP-TEE's "Secure Data Path" feature
+ *   - Non-secure DDR used by OP-TEE (shared memory and padding) (4MB)
+ *   - Non-secure DDR (2MB) reserved for OP-TEE's future use
+ */
+#define DDR_SEC_SIZE			0x01000000
+#define DDR_SEC_BASE			0x03000000
+
 #define BL_MEM_BASE			(BL1_RO_BASE)
 #define BL_MEM_LIMIT			(BL31_LIMIT)
 #define BL_MEM_SIZE			(BL_MEM_LIMIT - BL_MEM_BASE)
 
+/*
+ * BL3-2 specific defines.
+ */
+
+/*
+ * The TSP currently executes from TZC secured area of DRAM.
+ */
+#define BL32_DRAM_BASE			0x03000000
+#define BL32_DRAM_LIMIT			0x04000000
+
+#if (POPLAR_TSP_RAM_LOCATION_ID == POPLAR_DRAM_ID)
+#define TSP_SEC_MEM_BASE		BL32_DRAM_BASE
+#define TSP_SEC_MEM_SIZE		(BL32_DRAM_LIMIT - BL32_DRAM_BASE)
+#define BL32_BASE			BL32_DRAM_BASE
+#define BL32_LIMIT			BL32_DRAM_LIMIT
+#elif (POPLAR_TSP_RAM_LOCATION_ID == POPLAR_SRAM_ID)
+#error "SRAM storage of TSP payload is currently unsupported"
+#else
+#error "Currently unsupported POPLAR_TSP_LOCATION_ID value"
+#endif
+
+/* BL32 is mandatory in AArch32 */
+#ifndef AARCH32
+#ifdef SPD_none
+#undef BL32_BASE
+#endif /* SPD_none */
+#endif
+
-#define PLAT_ARM_NS_IMAGE_OFFSET	0x37000000
+#define PLAT_POPLAR_NS_IMAGE_OFFSET	0x37000000
 
 /* Page table and MMU setup constants */
 #define ADDR_SPACE_SIZE			(1ull << 32)
diff --git a/plat/hisilicon/poplar/include/poplar_layout.h b/plat/hisilicon/poplar/include/poplar_layout.h
index 192bcb9..e0b5618 100644
--- a/plat/hisilicon/poplar/include/poplar_layout.h
+++ b/plat/hisilicon/poplar/include/poplar_layout.h
@@ -74,16 +74,16 @@
  * "OFFSET" is an offset to the start of a region relative to the
  * base of the "l-loader" TEXT section (also a multiple of page size).
  */
-#define LLOADER_TEXT_BASE		0x00001000	/* page aligned */
+#define LLOADER_TEXT_BASE		0x02001000	/* page aligned */
 #define BL1_OFFSET			0x0000D000	/* page multiple */
-#define FIP_BASE			0x00040000
+#define FIP_BASE			0x02040000
 
 #define BL1_RO_SIZE			0x00008000	/* page multiple */
 #define BL1_RW_SIZE			0x00008000	/* page multiple */
 #define BL1_SIZE			(BL1_RO_SIZE + BL1_RW_SIZE)
 #define BL2_SIZE			0x0000c000	/* page multiple */
 #define BL31_SIZE			0x00014000
-#define FIP_SIZE			0x00068000
+#define FIP_SIZE			0x000c0000  /* absolute max */
 
      /* BL1_OFFSET */			/* (Defined above) */
 #define BL1_BASE			(LLOADER_TEXT_BASE + BL1_OFFSET)
diff --git a/plat/hisilicon/poplar/plat_storage.c b/plat/hisilicon/poplar/plat_storage.c
index 623a61b..ab94cba 100644
--- a/plat/hisilicon/poplar/plat_storage.c
+++ b/plat/hisilicon/poplar/plat_storage.c
@@ -43,6 +43,10 @@
 	.uuid = UUID_EL3_RUNTIME_FIRMWARE_BL31,
 };
 
+static const io_uuid_spec_t bl32_uuid_spec = {
+	.uuid = UUID_SECURE_PAYLOAD_BL32,
+};
+
 static const io_uuid_spec_t bl33_uuid_spec = {
 	.uuid = UUID_NON_TRUSTED_FIRMWARE_BL33,
 };
@@ -69,6 +73,11 @@
 		(uintptr_t)&bl31_uuid_spec,
 		open_fip
 	},
+	[BL32_IMAGE_ID] = {
+		&fip_dev_handle,
+		(uintptr_t)&bl32_uuid_spec,
+		open_fip
+	},
 	[BL33_IMAGE_ID] = {
 		&fip_dev_handle,
 		(uintptr_t)&bl33_uuid_spec,
diff --git a/plat/hisilicon/poplar/platform.mk b/plat/hisilicon/poplar/platform.mk
index 28e0d1f..818e311 100644
--- a/plat/hisilicon/poplar/platform.mk
+++ b/plat/hisilicon/poplar/platform.mk
@@ -4,6 +4,17 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# On Poplar, the TSP can execute from TZC secure area in DRAM.
+POPLAR_TSP_RAM_LOCATION	:=	dram
+ifeq (${POPLAR_TSP_RAM_LOCATION}, dram)
+  POPLAR_TSP_RAM_LOCATION_ID = POPLAR_DRAM_ID
+else ifeq (${HIKEY960_TSP_RAM_LOCATION}, sram)
+  POPLAR_TSP_RAM_LOCATION_ID := POPLAR_SRAM_ID
+else
+  $(error "Currently unsupported POPLAR_TSP_RAM_LOCATION value")
+endif
+$(eval $(call add_define,POPLAR_TSP_RAM_LOCATION_ID))
+
 NEED_BL33			:= yes
 
 COLD_BOOT_SINGLE_CPU		:= 1
diff --git a/services/std_svc/spm/spm_main.c b/services/std_svc/spm/spm_main.c
index 00f3a30..ae71c1d 100644
--- a/services/std_svc/spm/spm_main.c
+++ b/services/std_svc/spm/spm_main.c
@@ -48,7 +48,7 @@
  * 2. Saves the current C runtime state (callee-saved registers) on the stack
  *    frame and saves a reference to this state.
  * 3. Calls el3_exit() so that the EL3 system and general purpose registers
- *    from the sp_ctx->cpu_ctx are used to enter the secure payload image.
+ *    from the sp_ctx->cpu_ctx are used to enter the secure partition image.
  ******************************************************************************/
 static uint64_t spm_synchronous_sp_entry(secure_partition_context_t *sp_ctx_ptr)
 {
@@ -75,7 +75,7 @@
 
 /*******************************************************************************
  * This function takes a Secure partition context pointer and:
- * 1. Saves the S-EL1 system register context tp sp_ctx->cpu_ctx.
+ * 1. Saves the S-EL1 system register context to sp_ctx->cpu_ctx.
  * 2. Restores the current C runtime state (callee saved registers) from the
  *    stack frame using the reference to this state saved in
  *    spm_secure_partition_enter().
@@ -101,7 +101,7 @@
  * This function passes control to the Secure Partition image (BL32) for the
  * first time on the primary cpu after a cold boot. It assumes that a valid
  * secure context has already been created by spm_setup() which can be directly
- * used. This function performs a synchronous entry into the Secure payload.
+ * used. This function performs a synchronous entry into the Secure partition.
  * The SP passes control back to this routine through a SMC.
  ******************************************************************************/
 int32_t spm_init(void)
@@ -126,7 +126,7 @@
 	secure_partition_setup();
 
 	/*
-	 * Arrange for an entry into the secure payload.
+	 * Arrange for an entry into the secure partition.
 	 */
 	sp_init_in_progress = 1;
 	rc = spm_synchronous_sp_entry(&sp_ctx);
@@ -138,9 +138,9 @@
 }
 
 /*******************************************************************************
- * Given a secure payload entrypoint info pointer, entry point PC & pointer to
+ * Given a secure partition entrypoint info pointer, entry point PC & pointer to
  * a context data structure, this function will initialize the SPM context and
- * entry point info for the secure payload
+ * entry point info for the secure partition.
  ******************************************************************************/
 void spm_init_sp_ep_state(struct entry_point_info *sp_ep_info,
 			  uint64_t pc,
@@ -161,7 +161,7 @@
 	SET_PARAM_HEAD(sp_ep_info, PARAM_EP, VERSION_1, ep_attr);
 
 	sp_ep_info->pc = pc;
-	/* The SPM payload runs in S-EL0 */
+	/* The secure partition runs in S-EL0. */
 	sp_ep_info->spsr = SPSR_64(MODE_EL0,
 				   MODE_SP_EL0,
 				   DISABLE_ALL_EXCEPTIONS);
@@ -350,7 +350,7 @@
 
 		switch (smc_fid) {
 
-		case  SPM_VERSION_AARCH32:
+		case SPM_VERSION_AARCH32:
 			SMC_RET1(handle, SPM_VERSION_COMPILED);
 
 		case SP_EVENT_COMPLETE_AARCH64:
@@ -414,12 +414,31 @@
 
 		switch (smc_fid) {
 
-		case  SP_VERSION_AARCH64:
-		case  SP_VERSION_AARCH32:
+		case SP_VERSION_AARCH64:
+		case SP_VERSION_AARCH32:
 			SMC_RET1(handle, SP_VERSION_COMPILED);
 
 		case MM_COMMUNICATE_AARCH32:
 		case MM_COMMUNICATE_AARCH64:
+		{
+			uint64_t mm_cookie = x1;
+			uint64_t comm_buffer_address = x2;
+			uint64_t comm_size_address = x3;
+
+			/* Cookie. Reserved for future use. It must be zero. */
+			if (mm_cookie != 0) {
+				ERROR("MM_COMMUNICATE: cookie is not zero\n");
+				SMC_RET1(handle, SPM_INVALID_PARAMETER);
+			}
+
+			if (comm_buffer_address == 0) {
+				ERROR("MM_COMMUNICATE: comm_buffer_address is zero\n");
+				SMC_RET1(handle, SPM_INVALID_PARAMETER);
+			}
+
+			if (comm_size_address != 0) {
+				VERBOSE("MM_COMMUNICATE: comm_size_address is not 0 as recommended.\n");
+			}
 
 			/* Save the Normal world context */
 			cm_el1_sysregs_context_save(NON_SECURE);
@@ -432,14 +451,9 @@
 			cm_el1_sysregs_context_restore(SECURE);
 			cm_set_next_eret_context(SECURE);
 
-			/* Cookie. Reserved for future use. It must be zero. */
-			assert(x1 == 0);
-
-			if (x3 != 0) {
-				VERBOSE("MM_COMMUNICATE_AARCH32/64: X3 is not 0 as recommended.\n");
-			}
-
-			SMC_RET4(&sp_ctx.cpu_ctx, smc_fid, x1, x2, x3);
+			SMC_RET4(&sp_ctx.cpu_ctx, smc_fid, comm_buffer_address,
+				 comm_size_address, plat_my_core_pos());
+		}
 
 		case SP_MEMORY_ATTRIBUTES_GET_AARCH64:
 		case SP_MEMORY_ATTRIBUTES_SET_AARCH64: