Merge pull request #1196 from antonio-nino-diaz-arm/an/zero-pad

Add support to left-pad with zeroes in tf_printf
diff --git a/Makefile b/Makefile
index 51e622e..1058b39 100644
--- a/Makefile
+++ b/Makefile
@@ -160,6 +160,13 @@
 				-ffreestanding -fno-builtin -Wall -std=gnu99	\
 				-Os -ffunction-sections -fdata-sections
 
+GCC_V_OUTPUT		:=	$(shell $(CC) -v 2>&1)
+PIE_FOUND		:=	$(findstring --enable-default-pie,${GCC_V_OUTPUT})
+
+ifeq ($(PIE_FOUND),1)
+TF_CFLAGS		+=	-fno-PIE
+endif
+
 TF_LDFLAGS		+=	--fatal-warnings -O1
 TF_LDFLAGS		+=	--gc-sections
 TF_LDFLAGS		+=	$(TF_LDFLAGS_$(ARCH))
diff --git a/drivers/io/io_block.c b/drivers/io/io_block.c
index 128246f..8226554 100644
--- a/drivers/io/io_block.c
+++ b/drivers/io/io_block.c
@@ -167,15 +167,98 @@
 	return 0;
 }
 
+/*
+ * This function allows the caller to read any number of bytes
+ * from any position. It hides from the caller that the low level
+ * driver only can read aligned blocks of data. For this reason
+ * we need to handle the use case where the first byte to be read is not
+ * aligned to start of the block, the last byte to be read is also not
+ * aligned to the end of a block, and there are zero or more blocks-worth
+ * of data in between.
+ *
+ * In such a case we need to read more bytes than requested (i.e. full
+ * blocks) and strip-out the leading bytes (aka skip) and the trailing
+ * bytes (aka padding). See diagram below
+ *
+ * cur->file_pos ------------
+ *                          |
+ * cur->base                |
+ *  |                       |
+ *  v                       v<----  length   ---->
+ *  --------------------------------------------------------------
+ * |           |         block#1    |        |   block#n          |
+ * |  block#0  |            +       |   ...  |     +              |
+ * |           | <- skip -> +       |        |     + <- padding ->|
+ *  ------------------------+----------------------+--------------
+ *             ^                                                  ^
+ *             |                                                  |
+ *             v    iteration#1                iteration#n        v
+ *              --------------------------------------------------
+ *             |                    |        |                    |
+ *             |<----  request ---->|  ...   |<----- request ---->|
+ *             |                    |        |                    |
+ *              --------------------------------------------------
+ *            /                   /          |                    |
+ *           /                   /           |                    |
+ *          /                   /            |                    |
+ *         /                   /             |                    |
+ *        /                   /              |                    |
+ *       /                   /               |                    |
+ *      /                   /                |                    |
+ *     /                   /                 |                    |
+ *    /                   /                  |                    |
+ *   /                   /                   |                    |
+ *  <---- request ------>                    <------ request  ----->
+ *  ---------------------                    -----------------------
+ *  |        |          |                    |          |           |
+ *  |<-skip->|<-nbytes->|           -------->|<-nbytes->|<-padding->|
+ *  |        |          |           |        |          |           |
+ *  ---------------------           |        -----------------------
+ *  ^        \           \          |        |          |
+ *  |         \           \         |        |          |
+ *  |          \           \        |        |          |
+ *  buf->offset \           \   buf->offset  |          |
+ *               \           \               |          |
+ *                \           \              |          |
+ *                 \           \             |          |
+ *                  \           \            |          |
+ *                   \           \           |          |
+ *                    \           \          |          |
+ *                     \           \         |          |
+ *                      --------------------------------
+ *                      |           |        |         |
+ * buffer-------------->|           | ...    |         |
+ *                      |           |        |         |
+ *                      --------------------------------
+ *                      <-count#1->|                   |
+ *                      <----------  count#n   -------->
+ *                      <----------  length  ---------->
+ *
+ * Additionally, the IO driver has an underlying buffer that is at least
+ * one block-size and may be big enough to allow.
+ */
 static int block_read(io_entity_t *entity, uintptr_t buffer, size_t length,
 		      size_t *length_read)
 {
 	block_dev_state_t *cur;
 	io_block_spec_t *buf;
 	io_block_ops_t *ops;
-	size_t aligned_length, skip, count, left, padding, block_size;
 	int lba;
-	int buffer_not_aligned;
+	size_t block_size, left;
+	size_t nbytes;  /* number of bytes read in one iteration */
+	size_t request; /* number of requested bytes in one iteration */
+	size_t count;   /* number of bytes already read */
+	/*
+	 * number of leading bytes from start of the block
+	 * to the first byte to be read
+	 */
+	size_t skip;
+
+	/*
+	 * number of trailing bytes between the last byte
+	 * to be read and the end of the block
+	 */
+	size_t padding;
 
 	assert(entity->info != (uintptr_t)NULL);
 	cur = (block_dev_state_t *)entity->info;
@@ -186,102 +269,107 @@
 	       (length > 0) &&
 	       (ops->read != 0));
 
-	if ((buffer & (block_size - 1)) != 0) {
+	/*
+	 * We don't know the number of bytes that we are going
+	 * to read in every iteration, because it will depend
+	 * on the low level driver.
+	 */
+	count = 0;
+	for (left = length; left > 0; left -= nbytes) {
 		/*
-		 * buffer isn't aligned with block size.
-		 * Block device always relies on DMA operation.
-		 * It's better to make the buffer as block size aligned.
+		 * We must only request operations aligned to the block
+		 * size. Therefore if file_pos is not block-aligned,
+		 * we have to request the operation to start at the
+		 * previous block boundary and skip the leading bytes. And
+		 * similarly, the number of bytes requested must be a
+		 * block size multiple
 		 */
-		buffer_not_aligned = 1;
-	} else {
-		buffer_not_aligned = 0;
-	}
+		skip = cur->file_pos & (block_size - 1);
 
-	skip = cur->file_pos % block_size;
-	aligned_length = ((skip + length) + (block_size - 1)) &
-			 ~(block_size - 1);
-	padding = aligned_length - (skip + length);
-	left = aligned_length;
-	do {
+		/*
+		 * Calculate the block number containing file_pos
+		 * - e.g. block 3.
+		 */
 		lba = (cur->file_pos + cur->base) / block_size;
-		if (left >= buf->length) {
+
+		if (skip + left > buf->length) {
 			/*
-			 * Since left is larger, it's impossible to padding.
-			 *
-			 * If buffer isn't aligned, we need to use aligned
-			 * buffer instead.
+			 * The underlying read buffer is too small to
+			 * read all the required data - limit to just
+			 * fill the buffer, and then read again.
 			 */
-			if (skip || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we need to use
-				 * block buffer to read block. Since block
-				 * device is always relied on DMA operation.
-				 */
-				count = ops->read(lba, buf->offset,
-						  buf->length);
-			} else {
-				count = ops->read(lba, buffer, buf->length);
-			}
-			assert(count == buf->length);
-			cur->file_pos += count - skip;
-			if (skip || buffer_not_aligned) {
-				/*
-				 * Since there's not aligned block size caused
-				 * by skip or not aligned buffer, block buffer
-				 * is used to store data.
-				 */
-				memcpy((void *)buffer,
-				       (void *)(buf->offset + skip),
-				       count - skip);
-			}
-			left = left - (count - skip);
+			request = buf->length;
 		} else {
-			if (skip || padding || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we have to read
-				 * full block by block buffer instead.
-				 * The size isn't aligned with block size.
-				 * Use block buffer to avoid overflow.
-				 *
-				 * If buffer isn't aligned, use block buffer
-				 * to avoid DMA error.
-				 */
-				count = ops->read(lba, buf->offset, left);
-			} else
-				count = ops->read(lba, buffer, left);
-			assert(count == left);
-			left = left - (skip + padding);
-			cur->file_pos += left;
-			if (skip || padding || buffer_not_aligned) {
-				/*
-				 * Since there's not aligned block size or
-				 * buffer, block buffer is used to store data.
-				 */
-				memcpy((void *)buffer,
-				       (void *)(buf->offset + skip),
-				       left);
-			}
-			/* It's already the last block operation */
-			left = 0;
+			/*
+			 * The underlying read buffer is big enough to
+			 * read all the required data. Calculate the
+			 * number of bytes to read to align with the
+			 * block size.
+			 */
+			request = skip + left;
+			request = (request + (block_size - 1)) & ~(block_size - 1);
+		}
+		request = ops->read(lba, buf->offset, request);
+
+		if (request <= skip) {
+			/*
+			 * We couldn't read enough bytes to jump over
+			 * the skip bytes, so we should have to read
+			 * again the same block, thus generating
+			 * the same error.
+			 */
+			return -EIO;
 		}
-		skip = cur->file_pos % block_size;
-	} while (left > 0);
-	*length_read = length;
+
+		/*
+		 * Need to remove skip and padding bytes,if any, from
+		 * the read data when copying to the user buffer.
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		memcpy((void *)(buffer + count),
+		       (void *)(buf->offset + skip),
+		       nbytes);
+
+		cur->file_pos += nbytes;
+		count += nbytes;
+	}
+	assert(count == length);
+	*length_read = count;
 
 	return 0;
 }
 
+/*
+ * This function allows the caller to write any number of bytes
+ * from any position. It hides from the caller that the low level
+ * driver only can write aligned blocks of data.
+ * See comments for block_read for more details.
+ */
 static int block_write(io_entity_t *entity, const uintptr_t buffer,
 		       size_t length, size_t *length_written)
 {
 	block_dev_state_t *cur;
 	io_block_spec_t *buf;
 	io_block_ops_t *ops;
-	size_t aligned_length, skip, count, left, padding, block_size;
 	int lba;
-	int buffer_not_aligned;
+	size_t block_size, left;
+	size_t nbytes;  /* number of bytes read in one iteration */
+	size_t request; /* number of requested bytes in one iteration */
+	size_t count;   /* number of bytes already read */
+	/*
+	 * number of leading bytes from start of the block
+	 * to the first byte to be read
+	 */
+	size_t skip;
+
+	/*
+	 * number of trailing bytes between the last byte
+	 * to be read and the end of the block
+	 */
+	size_t padding;
 
 	assert(entity->info != (uintptr_t)NULL);
 	cur = (block_dev_state_t *)entity->info;
@@ -293,75 +381,107 @@
 	       (ops->read != 0) &&
 	       (ops->write != 0));
 
-	if ((buffer & (block_size - 1)) != 0) {
+	/*
+	 * We don't know the number of bytes that we are going
+	 * to write in every iteration, because it will depend
+	 * on the low level driver.
+	 */
+	count = 0;
+	for (left = length; left > 0; left -= nbytes) {
 		/*
-		 * buffer isn't aligned with block size.
-		 * Block device always relies on DMA operation.
-		 * It's better to make the buffer as block size aligned.
+		 * We must only request operations aligned to the block
+		 * size. Therefore if file_pos is not block-aligned,
+		 * we have to request the operation to start at the
+		 * previous block boundary and skip the leading bytes. And
+		 * similarly, the number of bytes requested must be a
+		 * block size multiple
 		 */
-		buffer_not_aligned = 1;
-	} else {
-		buffer_not_aligned = 0;
-	}
+		skip = cur->file_pos & (block_size - 1);
 
-	skip = cur->file_pos % block_size;
-	aligned_length = ((skip + length) + (block_size - 1)) &
-			 ~(block_size - 1);
-	padding = aligned_length - (skip + length);
-	left = aligned_length;
-	do {
+		/*
+		 * Calculate the block number containing file_pos
+		 * - e.g. block 3.
+		 */
 		lba = (cur->file_pos + cur->base) / block_size;
-		if (left >= buf->length) {
-			/* Since left is larger, it's impossible to padding. */
-			if (skip || buffer_not_aligned) {
-				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size or buffer isn't
-				 * aligned, we need to use block buffer to
-				 * write block.
-				 */
-				count = ops->read(lba, buf->offset,
-						  buf->length);
-				assert(count == buf->length);
-				memcpy((void *)(buf->offset + skip),
-				       (void *)buffer,
-				       count - skip);
-				count = ops->write(lba, buf->offset,
-						   buf->length);
-			} else
-				count = ops->write(lba, buffer, buf->length);
-			assert(count == buf->length);
-			cur->file_pos += count - skip;
-			left = left - (count - skip);
+
+		if (skip + left > buf->length) {
+			/*
+			 * The underlying read buffer is too small to
+			 * read all the required data - limit to just
+			 * fill the buffer, and then read again.
+			 */
+			request = buf->length;
 		} else {
-			if (skip || padding || buffer_not_aligned) {
+			/*
+			 * The underlying read buffer is big enough to
+			 * read all the required data. Calculate the
+			 * number of bytes to read to align with the
+			 * block size.
+			 */
+			request = skip + left;
+			request = (request + (block_size - 1)) & ~(block_size - 1);
+		}
+
+		/*
+		 * The number of bytes that we are going to write
+		 * from the user buffer will depend of the size
+		 * of the current request.
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		/*
+		 * If we have skip or padding bytes then we have to preserve
+		 * some content and it means that we have to read before
+		 * writing
+		 */
+		if (skip > 0 || padding > 0) {
+			request = ops->read(lba, buf->offset, request);
+			/*
+			 * The read may return size less than
+			 * requested. Round down to the nearest block
+			 * boundary
+			 */
+			request &= ~(block_size-1);
+			if (request <= skip) {
 				/*
-				 * The beginning address (file_pos) isn't
-				 * aligned with block size, we need to avoid
-				 * poluate data in the beginning. Reading and
-				 * skipping the beginning is the only way.
-				 * The size isn't aligned with block size.
-				 * Use block buffer to avoid overflow.
-				 *
-				 * If buffer isn't aligned, use block buffer
-				 * to avoid DMA error.
+				 * We couldn't read enough bytes to jump over
+				 * the skip bytes, so we should have to read
+				 * again the same block, thus generating
+				 * the same error.
 				 */
-				count = ops->read(lba, buf->offset, left);
-				assert(count == left);
-				memcpy((void *)(buf->offset + skip),
-				       (void *)buffer,
-				       left - skip - padding);
-				count = ops->write(lba, buf->offset, left);
-			} else
-				count = ops->write(lba, buffer, left);
-			assert(count == left);
-			cur->file_pos += left - (skip + padding);
-			/* It's already the last block operation */
-			left = 0;
+				return -EIO;
+			}
+			nbytes = request - skip;
+			padding = (nbytes > left) ? nbytes - left : 0;
+			nbytes -= padding;
 		}
-		skip = cur->file_pos % block_size;
-	} while (left > 0);
-	*length_written = length;
+
+		memcpy((void *)(buf->offset + skip),
+		       (void *)(buffer + count),
+		       nbytes);
+
+		request = ops->write(lba, buf->offset, request);
+		if (request <= skip)
+			return -EIO;
+
+		/*
+		 * And the previous write operation may modify the size
+		 * of the request, so again, we have to calculate the
+		 * number of bytes that we consumed from the user
+		 * buffer
+		 */
+		nbytes = request - skip;
+		padding = (nbytes > left) ? nbytes - left : 0;
+		nbytes -= padding;
+
+		cur->file_pos += nbytes;
+		count += nbytes;
+	}
+	assert(count == length);
+	*length_written = count;
+
 	return 0;
 }
 
diff --git a/plat/hisilicon/poplar/aarch64/platform_common.c b/plat/hisilicon/poplar/aarch64/platform_common.c
index a7dac4f..762bd84 100644
--- a/plat/hisilicon/poplar/aarch64/platform_common.c
+++ b/plat/hisilicon/poplar/aarch64/platform_common.c
@@ -25,9 +25,14 @@
 					DEVICE_SIZE,			\
 					MT_DEVICE | MT_RW | MT_SECURE)
 
+#define MAP_TSP_MEM	MAP_REGION_FLAT(TSP_SEC_MEM_BASE,		\
+					TSP_SEC_MEM_SIZE,		\
+					MT_MEMORY | MT_RW | MT_SECURE)
+
 static const mmap_region_t poplar_mmap[] = {
 	MAP_DDR,
 	MAP_DEVICE,
+	MAP_TSP_MEM,
 	{0}
 };
 
diff --git a/plat/hisilicon/poplar/bl2_plat_setup.c b/plat/hisilicon/poplar/bl2_plat_setup.c
index 1741475..db507c3 100644
--- a/plat/hisilicon/poplar/bl2_plat_setup.c
+++ b/plat/hisilicon/poplar/bl2_plat_setup.c
@@ -29,8 +29,10 @@
 typedef struct bl2_to_bl31_params_mem {
 	bl31_params_t		bl31_params;
 	image_info_t		bl31_image_info;
+	image_info_t		bl32_image_info;
 	image_info_t		bl33_image_info;
 	entry_point_info_t	bl33_ep_info;
+	entry_point_info_t	bl32_ep_info;
 	entry_point_info_t	bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
@@ -61,6 +63,16 @@
 	SET_PARAM_HEAD(bl2_to_bl31_params->bl31_image_info,
 		       PARAM_IMAGE_BINARY, VERSION_1, 0);
 
+	/* Fill BL3-2 related information if it exists */
+#ifdef BL32_BASE
+	bl2_to_bl31_params->bl32_ep_info = &bl31_params_mem.bl32_ep_info;
+	SET_PARAM_HEAD(bl2_to_bl31_params->bl32_ep_info, PARAM_EP,
+		VERSION_1, 0);
+	bl2_to_bl31_params->bl32_image_info = &bl31_params_mem.bl32_image_info;
+	SET_PARAM_HEAD(bl2_to_bl31_params->bl32_image_info, PARAM_IMAGE_BINARY,
+		VERSION_1, 0);
+#endif
+
 	/* Fill BL3-3 related information */
 	bl2_to_bl31_params->bl33_ep_info = &bl31_params_mem.bl33_ep_info;
 	SET_PARAM_HEAD(bl2_to_bl31_params->bl33_ep_info,
@@ -89,6 +101,41 @@
 				     DISABLE_ALL_EXCEPTIONS);
 }
 
+/*******************************************************************************
+ * Before calling this function BL32 is loaded in memory and its entrypoint
+ * is set by load_image. This is a placeholder for the platform to change
+ * the entrypoint of BL32 and set SPSR and security state.
+ * On Poplar we only set the security state of the entrypoint
+ ******************************************************************************/
+#ifdef BL32_BASE
+void bl2_plat_set_bl32_ep_info(image_info_t *bl32_image_info,
+					entry_point_info_t *bl32_ep_info)
+{
+	SET_SECURITY_STATE(bl32_ep_info->h.attr, SECURE);
+	/*
+	 * The Secure Payload Dispatcher service is responsible for
+	 * setting the SPSR prior to entry into the BL32 image.
+	 */
+	bl32_ep_info->spsr = 0;
+}
+
+/*******************************************************************************
+ * Populate the extents of memory available for loading BL32
+ ******************************************************************************/
+void bl2_plat_get_bl32_meminfo(meminfo_t *bl32_meminfo)
+{
+	/*
+	 * Populate the extents of memory available for loading BL32.
+	 */
+	bl32_meminfo->total_base = BL32_BASE;
+	bl32_meminfo->free_base = BL32_BASE;
+	bl32_meminfo->total_size =
+			(TSP_SEC_MEM_BASE + TSP_SEC_MEM_SIZE) - BL32_BASE;
+	bl32_meminfo->free_size =
+			(TSP_SEC_MEM_BASE + TSP_SEC_MEM_SIZE) - BL32_BASE;
+}
+#endif /* BL32_BASE */
+
 static uint32_t hisi_get_spsr_for_bl33_entry(void)
 {
 	unsigned long el_status;
@@ -159,5 +206,5 @@
 
 unsigned long plat_get_ns_image_entrypoint(void)
 {
-	return PLAT_ARM_NS_IMAGE_OFFSET;
+	return PLAT_POPLAR_NS_IMAGE_OFFSET;
 }
diff --git a/plat/hisilicon/poplar/bl31_plat_setup.c b/plat/hisilicon/poplar/bl31_plat_setup.c
index b9a0e18..e3a5c50 100644
--- a/plat/hisilicon/poplar/bl31_plat_setup.c
+++ b/plat/hisilicon/poplar/bl31_plat_setup.c
@@ -32,11 +32,31 @@
 #define BL31_COHERENT_RAM_BASE	(unsigned long)(&__COHERENT_RAM_START__)
 #define BL31_COHERENT_RAM_LIMIT	(unsigned long)(&__COHERENT_RAM_END__)
 
+#define TZPC_SEC_ATTR_CTRL_VALUE (0x9DB98D45)
+
+static entry_point_info_t bl32_image_ep_info;
 static entry_point_info_t bl33_image_ep_info;
 
+static void hisi_tzpc_sec_init(void)
+{
+	mmio_write_32(HISI_TZPC_SEC_ATTR_CTRL, TZPC_SEC_ATTR_CTRL_VALUE);
+}
+
 entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)
 {
-	return &bl33_image_ep_info;
+	entry_point_info_t *next_image_info;
+
+	assert(sec_state_is_valid(type));
+	next_image_info = (type == NON_SECURE)
+			? &bl33_image_ep_info : &bl32_image_ep_info;
+	/*
+	 * None of the images on the ARM development platforms can have 0x0
+	 * as the entrypoint
+	 */
+	if (next_image_info->pc)
+		return next_image_info;
+	else
+		return NULL;
 }
 
 void bl31_early_platform_setup(bl31_params_t *from_bl2,
@@ -47,6 +67,13 @@
 	/* Init console for crash report */
 	plat_crash_console_init();
 
+
+	/*
+	 * Copy BL32 (if populated by BL2) and BL33 entry point information.
+	 * They are stored in Secure RAM, in BL2's address space.
+	 */
+	if (from_bl2->bl32_ep_info)
+		bl32_image_ep_info = *from_bl2->bl32_ep_info;
 	bl33_image_ep_info = *from_bl2->bl33_ep_info;
 }
 
@@ -58,6 +85,9 @@
 	/* Init GIC distributor and CPU interface */
 	plat_arm_gic_driver_init();
 	plat_arm_gic_init();
+
+	/* Init security properties of IP blocks */
+	hisi_tzpc_sec_init();
 }
 
 void bl31_plat_runtime_setup(void)
diff --git a/plat/hisilicon/poplar/include/hi3798cv200.h b/plat/hisilicon/poplar/include/hi3798cv200.h
index 6318b9c..540d0aa 100644
--- a/plat/hisilicon/poplar/include/hi3798cv200.h
+++ b/plat/hisilicon/poplar/include/hi3798cv200.h
@@ -30,7 +30,7 @@
 #define TIMER20_BGLOAD			(SEC_TIMER2_BASE + 0x018)
 
 /* GPIO */
-#define	GPIO_MAX			(12)
+#define	GPIO_MAX			(13)
 #define	GPIO_BASE(x)			(x != 5 ?			\
 					0xf820000 + x * 0x1000 : 0xf8004000)
 
@@ -97,4 +97,7 @@
 /* Watchdog */
 #define HISI_WDG0_BASE			(0xF8A2C000)
 
+#define HISI_TZPC_BASE			(0xF8A80000)
+#define HISI_TZPC_SEC_ATTR_CTRL		(HISI_TZPC_BASE + 0x10)
+
 #endif	/* __HI3798cv200_H__ */
diff --git a/plat/hisilicon/poplar/include/platform_def.h b/plat/hisilicon/poplar/include/platform_def.h
index b7afe82..3d1ad9b 100644
--- a/plat/hisilicon/poplar/include/platform_def.h
+++ b/plat/hisilicon/poplar/include/platform_def.h
@@ -48,11 +48,55 @@
 #define TEE_SEC_MEM_BASE		(0x70000000)
 #define TEE_SEC_MEM_SIZE		(0x10000000)
 
+/* Memory location options for TSP */
+#define POPLAR_SRAM_ID	0
+#define POPLAR_DRAM_ID	1
+
+/*
+ * DDR for OP-TEE (28MB from 0x02200000 -0x04000000) is divided in several
+ * regions:
+ *   - Secure DDR (default is the top 16MB) used by OP-TEE
+ *   - Non-secure DDR (4MB) reserved for OP-TEE's future use
+ *   - Secure DDR (4MB aligned on 4MB) for OP-TEE's "Secure Data Path" feature
+ *   - Non-secure DDR used by OP-TEE (shared memory and padding) (4MB)
+ *   - Non-secure DDR (2MB) reserved for OP-TEE's future use
+ */
+#define DDR_SEC_SIZE			0x01000000
+#define DDR_SEC_BASE			0x03000000
+
 #define BL_MEM_BASE			(BL1_RO_BASE)
 #define BL_MEM_LIMIT			(BL31_LIMIT)
 #define BL_MEM_SIZE			(BL_MEM_LIMIT - BL_MEM_BASE)
 
+/*
+ * BL3-2 specific defines.
+ */
+
+/*
+ * The TSP currently executes from TZC secured area of DRAM.
+ */
+#define BL32_DRAM_BASE			0x03000000
+#define BL32_DRAM_LIMIT			0x04000000
+
+#if (POPLAR_TSP_RAM_LOCATION_ID == POPLAR_DRAM_ID)
+#define TSP_SEC_MEM_BASE		BL32_DRAM_BASE
+#define TSP_SEC_MEM_SIZE		(BL32_DRAM_LIMIT - BL32_DRAM_BASE)
+#define BL32_BASE			BL32_DRAM_BASE
+#define BL32_LIMIT			BL32_DRAM_LIMIT
+#elif (POPLAR_TSP_RAM_LOCATION_ID == POPLAR_SRAM_ID)
+#error "SRAM storage of TSP payload is currently unsupported"
+#else
+#error "Currently unsupported POPLAR_TSP_LOCATION_ID value"
+#endif
+
+/* BL32 is mandatory in AArch32 */
+#ifndef AARCH32
+#ifdef SPD_none
+#undef BL32_BASE
+#endif /* SPD_none */
+#endif
+
-#define PLAT_ARM_NS_IMAGE_OFFSET	0x37000000
+#define PLAT_POPLAR_NS_IMAGE_OFFSET	0x37000000
 
 /* Page table and MMU setup constants */
 #define ADDR_SPACE_SIZE			(1ull << 32)
diff --git a/plat/hisilicon/poplar/include/poplar_layout.h b/plat/hisilicon/poplar/include/poplar_layout.h
index 192bcb9..e0b5618 100644
--- a/plat/hisilicon/poplar/include/poplar_layout.h
+++ b/plat/hisilicon/poplar/include/poplar_layout.h
@@ -74,16 +74,16 @@
  * "OFFSET" is an offset to the start of a region relative to the
  * base of the "l-loader" TEXT section (also a multiple of page size).
  */
-#define LLOADER_TEXT_BASE		0x00001000	/* page aligned */
+#define LLOADER_TEXT_BASE		0x02001000	/* page aligned */
 #define BL1_OFFSET			0x0000D000	/* page multiple */
-#define FIP_BASE			0x00040000
+#define FIP_BASE			0x02040000
 
 #define BL1_RO_SIZE			0x00008000	/* page multiple */
 #define BL1_RW_SIZE			0x00008000	/* page multiple */
 #define BL1_SIZE			(BL1_RO_SIZE + BL1_RW_SIZE)
 #define BL2_SIZE			0x0000c000	/* page multiple */
 #define BL31_SIZE			0x00014000
-#define FIP_SIZE			0x00068000
+#define FIP_SIZE			0x000c0000  /* absolute max */
 
      /* BL1_OFFSET */			/* (Defined above) */
 #define BL1_BASE			(LLOADER_TEXT_BASE + BL1_OFFSET)
diff --git a/plat/hisilicon/poplar/plat_storage.c b/plat/hisilicon/poplar/plat_storage.c
index 623a61b..ab94cba 100644
--- a/plat/hisilicon/poplar/plat_storage.c
+++ b/plat/hisilicon/poplar/plat_storage.c
@@ -43,6 +43,10 @@
 	.uuid = UUID_EL3_RUNTIME_FIRMWARE_BL31,
 };
 
+static const io_uuid_spec_t bl32_uuid_spec = {
+	.uuid = UUID_SECURE_PAYLOAD_BL32,
+};
+
 static const io_uuid_spec_t bl33_uuid_spec = {
 	.uuid = UUID_NON_TRUSTED_FIRMWARE_BL33,
 };
@@ -69,6 +73,11 @@
 		(uintptr_t)&bl31_uuid_spec,
 		open_fip
 	},
+	[BL32_IMAGE_ID] = {
+		&fip_dev_handle,
+		(uintptr_t)&bl32_uuid_spec,
+		open_fip
+	},
 	[BL33_IMAGE_ID] = {
 		&fip_dev_handle,
 		(uintptr_t)&bl33_uuid_spec,
diff --git a/plat/hisilicon/poplar/platform.mk b/plat/hisilicon/poplar/platform.mk
index 28e0d1f..818e311 100644
--- a/plat/hisilicon/poplar/platform.mk
+++ b/plat/hisilicon/poplar/platform.mk
@@ -4,6 +4,17 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# On Poplar, the TSP can execute from TZC secure area in DRAM.
+POPLAR_TSP_RAM_LOCATION	:=	dram
+ifeq (${POPLAR_TSP_RAM_LOCATION}, dram)
+  POPLAR_TSP_RAM_LOCATION_ID = POPLAR_DRAM_ID
+else ifeq (${HIKEY960_TSP_RAM_LOCATION}, sram)
+  POPLAR_TSP_RAM_LOCATION_ID := POPLAR_SRAM_ID
+else
+  $(error "Currently unsupported POPLAR_TSP_RAM_LOCATION value")
+endif
+$(eval $(call add_define,POPLAR_TSP_RAM_LOCATION_ID))
+
 NEED_BL33			:= yes
 
 COLD_BOOT_SINGLE_CPU		:= 1
diff --git a/services/std_svc/spm/spm_main.c b/services/std_svc/spm/spm_main.c
index 00f3a30..ae71c1d 100644
--- a/services/std_svc/spm/spm_main.c
+++ b/services/std_svc/spm/spm_main.c
@@ -48,7 +48,7 @@
  * 2. Saves the current C runtime state (callee-saved registers) on the stack
  *    frame and saves a reference to this state.
  * 3. Calls el3_exit() so that the EL3 system and general purpose registers
- *    from the sp_ctx->cpu_ctx are used to enter the secure payload image.
+ *    from the sp_ctx->cpu_ctx are used to enter the secure partition image.
  ******************************************************************************/
 static uint64_t spm_synchronous_sp_entry(secure_partition_context_t *sp_ctx_ptr)
 {
@@ -75,7 +75,7 @@
 
 /*******************************************************************************
  * This function takes a Secure partition context pointer and:
- * 1. Saves the S-EL1 system register context tp sp_ctx->cpu_ctx.
+ * 1. Saves the S-EL1 system register context to sp_ctx->cpu_ctx.
  * 2. Restores the current C runtime state (callee saved registers) from the
  *    stack frame using the reference to this state saved in
  *    spm_secure_partition_enter().
@@ -101,7 +101,7 @@
  * This function passes control to the Secure Partition image (BL32) for the
  * first time on the primary cpu after a cold boot. It assumes that a valid
  * secure context has already been created by spm_setup() which can be directly
- * used. This function performs a synchronous entry into the Secure payload.
+ * used. This function performs a synchronous entry into the Secure partition.
  * The SP passes control back to this routine through a SMC.
  ******************************************************************************/
 int32_t spm_init(void)
@@ -126,7 +126,7 @@
 	secure_partition_setup();
 
 	/*
-	 * Arrange for an entry into the secure payload.
+	 * Arrange for an entry into the secure partition.
 	 */
 	sp_init_in_progress = 1;
 	rc = spm_synchronous_sp_entry(&sp_ctx);
@@ -138,9 +138,9 @@
 }
 
 /*******************************************************************************
- * Given a secure payload entrypoint info pointer, entry point PC & pointer to
+ * Given a secure partition entrypoint info pointer, entry point PC & pointer to
  * a context data structure, this function will initialize the SPM context and
- * entry point info for the secure payload
+ * entry point info for the secure partition.
  ******************************************************************************/
 void spm_init_sp_ep_state(struct entry_point_info *sp_ep_info,
 			  uint64_t pc,
@@ -161,7 +161,7 @@
 	SET_PARAM_HEAD(sp_ep_info, PARAM_EP, VERSION_1, ep_attr);
 
 	sp_ep_info->pc = pc;
-	/* The SPM payload runs in S-EL0 */
+	/* The secure partition runs in S-EL0. */
 	sp_ep_info->spsr = SPSR_64(MODE_EL0,
 				   MODE_SP_EL0,
 				   DISABLE_ALL_EXCEPTIONS);
@@ -350,7 +350,7 @@
 
 		switch (smc_fid) {
 
-		case  SPM_VERSION_AARCH32:
+		case SPM_VERSION_AARCH32:
 			SMC_RET1(handle, SPM_VERSION_COMPILED);
 
 		case SP_EVENT_COMPLETE_AARCH64:
@@ -414,12 +414,31 @@
 
 		switch (smc_fid) {
 
-		case  SP_VERSION_AARCH64:
-		case  SP_VERSION_AARCH32:
+		case SP_VERSION_AARCH64:
+		case SP_VERSION_AARCH32:
 			SMC_RET1(handle, SP_VERSION_COMPILED);
 
 		case MM_COMMUNICATE_AARCH32:
 		case MM_COMMUNICATE_AARCH64:
+		{
+			uint64_t mm_cookie = x1;
+			uint64_t comm_buffer_address = x2;
+			uint64_t comm_size_address = x3;
+
+			/* Cookie. Reserved for future use. It must be zero. */
+			if (mm_cookie != 0) {
+				ERROR("MM_COMMUNICATE: cookie is not zero\n");
+				SMC_RET1(handle, SPM_INVALID_PARAMETER);
+			}
+
+			if (comm_buffer_address == 0) {
+				ERROR("MM_COMMUNICATE: comm_buffer_address is zero\n");
+				SMC_RET1(handle, SPM_INVALID_PARAMETER);
+			}
+
+			if (comm_size_address != 0) {
+				VERBOSE("MM_COMMUNICATE: comm_size_address is not 0 as recommended.\n");
+			}
 
 			/* Save the Normal world context */
 			cm_el1_sysregs_context_save(NON_SECURE);
@@ -432,14 +451,9 @@
 			cm_el1_sysregs_context_restore(SECURE);
 			cm_set_next_eret_context(SECURE);
 
-			/* Cookie. Reserved for future use. It must be zero. */
-			assert(x1 == 0);
-
-			if (x3 != 0) {
-				VERBOSE("MM_COMMUNICATE_AARCH32/64: X3 is not 0 as recommended.\n");
-			}
-
-			SMC_RET4(&sp_ctx.cpu_ctx, smc_fid, x1, x2, x3);
+			SMC_RET4(&sp_ctx.cpu_ctx, smc_fid, comm_buffer_address,
+				 comm_size_address, plat_my_core_pos());
+		}
 
 		case SP_MEMORY_ATTRIBUTES_GET_AARCH64:
 		case SP_MEMORY_ATTRIBUTES_SET_AARCH64: