feat(rss): add new comms protocols

The current comms protocol (where arguments and return data is embedded
into the MHU message) is now protocol v0. Protocol v1 embeds pointers
into the message, and has the RSS retrieve the data via DMA.

Change-Id: I08d7f09c4eaea673769fde9eee194447a99f1b78
Signed-off-by: Raef Coles <raef.coles@arm.com>
diff --git a/drivers/arm/rss/rss_comms.c b/drivers/arm/rss/rss_comms.c
index 28a4925..0633c61 100644
--- a/drivers/arm/rss/rss_comms.c
+++ b/drivers/arm/rss/rss_comms.c
@@ -10,199 +10,141 @@
 #include <common/debug.h>
 #include <drivers/arm/mhu.h>
 #include <drivers/arm/rss_comms.h>
-#include <initial_attestation.h>
 #include <psa/client.h>
+#include <rss_comms_protocol.h>
 
-#include <platform_def.h>
-
-#define TYPE_OFFSET	U(16)
-#define TYPE_MASK	(0xFFFFUL << TYPE_OFFSET)
-#define IN_LEN_OFFSET	U(8)
-#define IN_LEN_MASK	(0xFFUL << IN_LEN_OFFSET)
-#define OUT_LEN_OFFSET	U(0)
-#define OUT_LEN_MASK	(0xFFUL << OUT_LEN_OFFSET)
-
-#define PARAM_PACK(type, in_len, out_len)			  \
-	(((((uint32_t)type) << TYPE_OFFSET) & TYPE_MASK)	| \
-	 ((((uint32_t)in_len) << IN_LEN_OFFSET) & IN_LEN_MASK)	| \
-	 ((((uint32_t)out_len) << OUT_LEN_OFFSET) & OUT_LEN_MASK))
-
-#define PARAM_UNPACK_IN_LEN(ctrl_param) \
-	((size_t)(((ctrl_param) & IN_LEN_MASK) >> IN_LEN_OFFSET))
-
-/* Message types */
-struct __packed packed_psa_call_t {
-	uint8_t protocol_ver;
-	uint8_t seq_num;
-	uint16_t client_id;
-	psa_handle_t handle;
-	uint32_t ctrl_param; /* type, in_len, out_len */
-	uint16_t io_size[4];
-};
-
-struct __packed packed_psa_reply_t {
-	uint8_t protocol_ver;
-	uint8_t seq_num;
-	uint16_t client_id;
-	int32_t return_val;
-	uint16_t out_size[4];
-};
-
-/*
- * In the current implementation the RoT Service request that requires the
- * biggest message buffer is the RSS_ATTEST_GET_TOKEN. The maximum required
- * buffer size is calculated based on the platform-specific needs of
- * this request.
+/* Union as message space and reply space are never used at the same time, and this saves space as
+ * we can overlap them.
  */
-#define MAX_REQUEST_PAYLOAD_SIZE	(PSA_INITIAL_ATTEST_CHALLENGE_SIZE_64 \
-					 + PLAT_ATTEST_TOKEN_MAX_SIZE)
-
-/* Buffer to store the messages to be sent/received. */
-static uint8_t message_buf[MAX_REQUEST_PAYLOAD_SIZE] __aligned(4);
+union __packed __attribute__((aligned(4))) rss_comms_io_buffer_t {
+	struct serialized_rss_comms_msg_t msg;
+	struct serialized_rss_comms_reply_t reply;
+};
 
-static int32_t pack_params(const psa_invec *invecs,
-			   size_t in_len,
-			   uint8_t *buf,
-			   size_t *buf_len)
+static uint8_t select_protocol_version(const psa_invec *in_vec, size_t in_len,
+				       const psa_outvec *out_vec, size_t out_len)
 {
-	uint32_t i;
-	size_t payload_size = 0U;
+	size_t comms_mhu_msg_size;
+	size_t comms_embed_msg_min_size;
+	size_t comms_embed_reply_min_size;
+	size_t in_size_total = 0;
+	size_t out_size_total = 0;
+	size_t i;
 
 	for (i = 0U; i < in_len; ++i) {
-		if (invecs[i].len > *buf_len - payload_size) {
-			return -1;
-		}
-		memcpy(buf + payload_size, invecs[i].base, invecs[i].len);
-		payload_size += invecs[i].len;
+		in_size_total += in_vec[i].len;
 	}
-
-	*buf_len = payload_size;
-	return 0;
-}
-
-static int serialise_message(const struct packed_psa_call_t *msg,
-			     const psa_invec *invecs,
-			     uint8_t *payload_buf,
-			     size_t *payload_len)
-{
-	size_t message_len = 0U;
-	size_t len;
-
-	/* Copy the message header into the payload buffer. */
-	len = sizeof(*msg);
-	if (len > *payload_len) {
-		ERROR("[RSS-COMMS] Message buffer too small.\n");
-		return -1;
-	}
-	memcpy(payload_buf, (const void *)msg, len);
-	message_len += len;
-
-	/* The input data will follow the message header in the payload buffer. */
-	len = *payload_len - message_len;
-	if (pack_params(invecs, PARAM_UNPACK_IN_LEN(msg->ctrl_param),
-			payload_buf + message_len, &len) != 0) {
-		ERROR("[RSS-COMMS] Message buffer too small.\n");
-		return -1;
-	}
-	message_len += len;
-
-	*payload_len = message_len;
-	return 0;
-}
-
-static void unpack_params(const uint8_t *buf,
-			  psa_outvec *outvecs,
-			  size_t out_len)
-{
-	size_t i;
-
 	for (i = 0U; i < out_len; ++i) {
-		memcpy(outvecs[i].base, buf, outvecs[i].len);
-		buf += outvecs[i].len;
+		out_size_total += out_vec[i].len;
 	}
-}
 
-static void deserialise_reply(struct packed_psa_reply_t *reply,
-			      psa_outvec *outvecs,
-			      size_t outlen,
-			      const uint8_t *message,
-			      size_t message_len)
-{
-	uint32_t i;
+	comms_mhu_msg_size = mhu_get_max_message_size();
 
-	memcpy(reply, message, sizeof(*reply));
+	comms_embed_msg_min_size = sizeof(struct serialized_rss_comms_header_t) +
+				   sizeof(struct rss_embed_msg_t) -
+				   RSS_COMMS_PAYLOAD_MAX_SIZE;
 
-	/* Outvecs */
-	for (i = 0U; i < outlen; ++i) {
-		outvecs[i].len = reply->out_size[i];
-	}
+	comms_embed_reply_min_size = sizeof(struct serialized_rss_comms_header_t) +
+				     sizeof(struct rss_embed_reply_t) -
+				     RSS_COMMS_PAYLOAD_MAX_SIZE;
 
-	unpack_params(message + sizeof(*reply), outvecs, outlen);
+	/* Use embed if we can pack into one message and reply, else use
+	 * pointer_access. The underlying MHU transport protocol uses a
+	 * single uint32_t to track the length, so the amount of data that
+	 * can be in a message is 4 bytes less than mhu_get_max_message_size
+	 * reports.
+	 *
+	 * TODO tune this with real performance numbers, it's possible a
+	 * pointer_access message is less performant than multiple embed
+	 * messages due to ATU configuration costs to allow access to the
+	 * pointers.
+	 */
+	if ((comms_embed_msg_min_size + in_size_total > comms_mhu_msg_size - sizeof(uint32_t))
+	 || (comms_embed_reply_min_size + out_size_total > comms_mhu_msg_size) - sizeof(uint32_t)) {
+		return RSS_COMMS_PROTOCOL_POINTER_ACCESS;
+	} else {
+		return RSS_COMMS_PROTOCOL_EMBED;
+	}
 }
 
-psa_status_t psa_call(psa_handle_t handle, int32_t type,
-		      const psa_invec *in_vec, size_t in_len,
+psa_status_t psa_call(psa_handle_t handle, int32_t type, const psa_invec *in_vec, size_t in_len,
 		      psa_outvec *out_vec, size_t out_len)
 {
 	enum mhu_error_t err;
-	static uint32_t seq_num = 1U;
-	struct packed_psa_call_t msg = {
-		.protocol_ver = 0U,
-		.seq_num = seq_num,
-		/* No need to distinguish callers (currently concurrent calls are not supported). */
-		.client_id = 1U,
-		.handle = handle,
-		.ctrl_param = PARAM_PACK(type, in_len, out_len),
-	};
-
-	struct packed_psa_reply_t reply = {0};
-	size_t message_size;
-	uint32_t i;
+	psa_status_t status;
+	static uint8_t seq_num = 1U;
+	size_t msg_size;
+	size_t reply_size = sizeof(io_buf.reply);
+	psa_status_t return_val;
+	size_t idx;
+	/* Declared statically to avoid using huge amounts of stack space. Maybe revisit if
+	 * functions not being reentrant becomes a problem.
+	 */
+	static union rss_comms_io_buffer_t io_buf;
 
-	/* Fill msg iovec lengths */
-	for (i = 0U; i < in_len; ++i) {
-		msg.io_size[i] = in_vec[i].len;
+	if (type > INT16_MAX || type < INT16_MIN || in_len > PSA_MAX_IOVEC
+	    || out_len > PSA_MAX_IOVEC) {
+		return PSA_ERROR_INVALID_ARGUMENT;
 	}
-	for (i = 0U; i < out_len; ++i) {
-		msg.io_size[in_len + i] = out_vec[i].len;
+
+	io_buf.msg.header.seq_num = seq_num,
+	/* No need to distinguish callers (currently concurrent calls are not supported). */
+	io_buf.msg.header.client_id = 1U,
+	io_buf.msg.header.protocol_ver = select_protocol_version(in_vec, in_len, out_vec, out_len);
+
+	status = rss_protocol_serialize_msg(handle, type, in_vec, in_len, out_vec,
+					    out_len, &io_buf.msg, &msg_size);
+	if (status != PSA_SUCCESS) {
+		return status;
 	}
 
-	message_size = sizeof(message_buf);
-	if (serialise_message(&msg, in_vec, message_buf, &message_size)) {
-		/* Local buffer is probably too small. */
-		return PSA_ERROR_INSUFFICIENT_MEMORY;
+	VERBOSE("[RSS-COMMS] Sending message\n");
+	VERBOSE("protocol_ver=%u\n", io_buf.msg.header.protocol_ver);
+	VERBOSE("seq_num=%u\n", io_buf.msg.header.seq_num);
+	VERBOSE("client_id=%u\n", io_buf.msg.header.client_id);
+	for (idx = 0; idx < in_len; idx++) {
+		VERBOSE("in_vec[%lu].len=%lu\n", idx, in_vec[idx].len);
+		VERBOSE("in_vec[%lu].buf=%p\n", idx, (void *)in_vec[idx].base);
 	}
 
-	err = mhu_send_data(message_buf, message_size);
+	err = mhu_send_data((uint8_t *)&io_buf.msg, msg_size);
 	if (err != MHU_ERR_NONE) {
 		return PSA_ERROR_COMMUNICATION_FAILURE;
 	}
 
-	message_size = sizeof(message_buf);
 #if DEBUG
 	/*
 	 * Poisoning the message buffer (with a known pattern).
 	 * Helps in detecting hypothetical RSS communication bugs.
 	 */
-	memset(message_buf, 0xA5, message_size);
+	memset(&io_buf.msg, 0xA5, msg_size);
 #endif
-	err = mhu_receive_data(message_buf, &message_size);
+
+	err = mhu_receive_data((uint8_t *)&io_buf.reply, &reply_size);
 	if (err != MHU_ERR_NONE) {
 		return PSA_ERROR_COMMUNICATION_FAILURE;
 	}
 
-	deserialise_reply(&reply, out_vec, out_len, message_buf, message_size);
+	VERBOSE("[RSS-COMMS] Received reply\n");
+	VERBOSE("protocol_ver=%u\n", io_buf.reply.header.protocol_ver);
+	VERBOSE("seq_num=%u\n", io_buf.reply.header.seq_num);
+	VERBOSE("client_id=%u\n", io_buf.reply.header.client_id);
 
-	seq_num++;
+	status = rss_protocol_deserialize_reply(out_vec, out_len, &return_val,
+						&io_buf.reply, reply_size);
+	if (status != PSA_SUCCESS) {
+		return status;
+	}
 
-	VERBOSE("[RSS-COMMS] Received reply\n");
-	VERBOSE("protocol_ver=%d\n", reply.protocol_ver);
-	VERBOSE("seq_num=%d\n", reply.seq_num);
-	VERBOSE("client_id=%d\n", reply.client_id);
-	VERBOSE("return_val=%d\n", reply.return_val);
-	VERBOSE("out_size[0]=%d\n", reply.out_size[0]);
+	VERBOSE("return_val=%d\n", return_val);
+	for (idx = 0U; idx < out_len; idx++) {
+		VERBOSE("out_vec[%lu].len=%lu\n", idx, out_vec[idx].len);
+		VERBOSE("out_vec[%lu].buf=%p\n", idx, (void *)out_vec[idx].base);
+	}
+
+	seq_num++;
 
-	return reply.return_val;
+	return return_val;
 }
 
 int rss_comms_init(uintptr_t mhu_sender_base, uintptr_t mhu_receiver_base)