feat(cm): enhance the cpu_context memory report

Currently, as part of the context_memory report, we explicitly
list the EL3, EL1 and EL2 registers and memory usage per CPU
for each world. The remaining bits in the cpu_context_t structure
are grouped and listed as other section.

This patch enhances this part, by individually listing all the
remaining bits (GPREGS, PAUTH_REGS) separately providing
a much detailed overview of the context memory consumption
amongst the registers.

The patch has been tested on the CI with the following patch
and the results are summarised precisely.
[https://review.trustedfirmware.org/c/ci/tf-a-ci-scripts/+/28849]

Change-Id: I16f210b605ddd7900600519520accf1ccd057bc7
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
diff --git a/lib/el3_runtime/aarch64/context_debug.c b/lib/el3_runtime/aarch64/context_debug.c
index 0a14e1f..b37bcb7 100644
--- a/lib/el3_runtime/aarch64/context_debug.c
+++ b/lib/el3_runtime/aarch64/context_debug.c
@@ -30,7 +30,7 @@
 
 #define PRINT_MEM_USAGE_SEPARATOR()					\
 	do {								\
-		printf("+-----------+-----------"			\
+		printf("+-----------+-----------+-----------"		\
 		"+-----------+-----------+-----------+\n");		\
 	} while (false)
 
@@ -41,6 +41,11 @@
 		putchar('-');						\
 	}
 
+#define PRINT_SINGLE_MEM_USAGE_SEP_BLOCK()				\
+	do {								\
+		printf("+-----------");					\
+	} while (false)
+
 /********************************************************************************
  * This function prints the allocated memory for a specific security state.
  * Values are grouped by exception level and core. The memory usage for the
@@ -49,74 +54,119 @@
 static size_t report_allocated_memory(unsigned int security_state_idx)
 {
 	size_t core_total = 0U;
+	size_t gp_total = 0U;
 	size_t el3_total = 0U;
+	size_t other_total = 0U;
+	size_t total = 0U;
+	size_t per_world_ctx_size = 0U;
+
 #if CTX_INCLUDE_EL2_REGS
 	size_t el2_total = 0U;
 #else
 	size_t el1_total = 0U;
 #endif /* CTX_INCLUDE_EL2_REGS */
-	size_t other_total = 0U;
-	size_t total = 0U;
-	size_t per_world_ctx_size = 0U;
+
+#if CTX_INCLUDE_PAUTH_REGS
+	size_t pauth_total = 0U;
+	PRINT_SINGLE_MEM_USAGE_SEP_BLOCK();
+#endif
 
 	PRINT_MEM_USAGE_SEPARATOR();
-	printf("|    Core   |    EL3    ");
+
+	printf("|    Core   |     GP    |    EL3    ");
 #if CTX_INCLUDE_EL2_REGS
 	printf("|    EL2    ");
 #else
 	printf("|    EL1    ");
 #endif /* CTX_INCLUDE_EL2_REGS */
+
+#if CTX_INCLUDE_PAUTH_REGS
+	printf("|   PAUTH   ");
+#endif
+
 	printf("|   Other   |   Total   |\n");
 
 	/* Compute memory usage for each core's context */
 	for (unsigned int i = 0U; i < PLATFORM_CORE_COUNT; i++) {
 		size_t size_other = 0U;
 		size_t el3_size = 0U;
+		size_t gp_size = 0U;
 #if CTX_INCLUDE_EL2_REGS
 		size_t el2_size = 0U;
 #else
 		size_t el1_size = 0U;
 #endif /* CTX_INCLUDE_EL2_REGS */
 
+#if CTX_INCLUDE_PAUTH_REGS
+		size_t pauth_size = 0U;
+		PRINT_SINGLE_MEM_USAGE_SEP_BLOCK();
+#endif
+
 		PRINT_MEM_USAGE_SEPARATOR();
+
 		cpu_context_t *ctx = (cpu_context_t *)cm_get_context_by_index(i,
 			security_state_idx);
 		core_total = sizeof(*ctx);
 		el3_size = sizeof(ctx->el3state_ctx);
+		gp_size = sizeof(ctx->gpregs_ctx);
+		size_other = core_total - (el3_size + gp_size);
+		printf("| %9u | %8luB | %8luB ", i, gp_size, el3_size);
+
 #if CTX_INCLUDE_EL2_REGS
 		el2_size = sizeof(ctx->el2_sysregs_ctx);
-#else
-		el1_size = sizeof(ctx->el1_sysregs_ctx);
-#endif /* CTX_INCLUDE_EL2_REGS */
-		size_other = core_total - el3_size;
-		printf("| %9u | %8luB ", i, el3_size);
-#if CTX_INCLUDE_EL2_REGS
 		size_other -= el2_size;
+		el2_total += el2_size;
 		printf("| %8luB ", el2_size);
 #else
+		el1_size = sizeof(ctx->el1_sysregs_ctx);
 		size_other -= el1_size;
+		el1_total += el1_size;
 		printf("| %8luB ", el1_size);
 #endif /* CTX_INCLUDE_EL2_REGS */
+
+#if CTX_INCLUDE_PAUTH_REGS
+		pauth_size = sizeof(ctx->pauth_ctx);
+		size_other -= pauth_size;
+		pauth_total += pauth_size;
+		printf("| %8luB ", pauth_size);
+#endif
 		printf("| %8luB | %8luB |\n", size_other, core_total);
 
+		gp_total += gp_size;
 		el3_total += el3_size;
-#if CTX_INCLUDE_EL2_REGS
-		el2_total += el2_size;
-#else
-		el1_total += el1_size;
-#endif /* CTX_INCLUDE_EL2_REGS */
 		other_total += size_other;
 		total += core_total;
 	}
+
+#if CTX_INCLUDE_PAUTH_REGS
+	PRINT_SINGLE_MEM_USAGE_SEP_BLOCK();
+#endif
+
 	PRINT_MEM_USAGE_SEPARATOR();
+
+#if CTX_INCLUDE_PAUTH_REGS
+	PRINT_SINGLE_MEM_USAGE_SEP_BLOCK();
+#endif
+
 	PRINT_MEM_USAGE_SEPARATOR();
-	printf("|    All    | %8luB ", el3_total);
+
+	printf("|    All    | %8luB | %8luB ", gp_total, el3_total);
+
 #if CTX_INCLUDE_EL2_REGS
 	printf("| %8luB ", el2_total);
 #else
 	printf("| %8luB ", el1_total);
 #endif /* CTX_INCLUDE_EL2_REGS */
+
+#if CTX_INCLUDE_PAUTH_REGS
+	printf("| %8luB ", pauth_total);
+#endif
+
 	printf("| %8luB | %8luB |\n", other_total, total);
+
+#if CTX_INCLUDE_PAUTH_REGS
+	PRINT_SINGLE_MEM_USAGE_SEP_BLOCK();
+#endif
 	PRINT_MEM_USAGE_SEPARATOR();
 	printf("\n");